net/mlx5: support e-switch flow count action
[dpdk.git] / drivers / net / mlx5 / mlx5_flow_tcf.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2018 6WIND S.A.
3  * Copyright 2018 Mellanox Technologies, Ltd
4  */
5
6 #include <assert.h>
7 #include <errno.h>
8 #include <libmnl/libmnl.h>
9 #include <linux/gen_stats.h>
10 #include <linux/if_ether.h>
11 #include <linux/netlink.h>
12 #include <linux/pkt_cls.h>
13 #include <linux/pkt_sched.h>
14 #include <linux/rtnetlink.h>
15 #include <linux/tc_act/tc_gact.h>
16 #include <linux/tc_act/tc_mirred.h>
17 #include <netinet/in.h>
18 #include <stdalign.h>
19 #include <stdbool.h>
20 #include <stddef.h>
21 #include <stdint.h>
22 #include <stdlib.h>
23 #include <sys/socket.h>
24
25 #include <rte_byteorder.h>
26 #include <rte_errno.h>
27 #include <rte_ether.h>
28 #include <rte_flow.h>
29 #include <rte_malloc.h>
30 #include <rte_common.h>
31
32 #include "mlx5.h"
33 #include "mlx5_flow.h"
34 #include "mlx5_autoconf.h"
35
36 #ifdef HAVE_TC_ACT_VLAN
37
38 #include <linux/tc_act/tc_vlan.h>
39
40 #else /* HAVE_TC_ACT_VLAN */
41
42 #define TCA_VLAN_ACT_POP 1
43 #define TCA_VLAN_ACT_PUSH 2
44 #define TCA_VLAN_ACT_MODIFY 3
45 #define TCA_VLAN_PARMS 2
46 #define TCA_VLAN_PUSH_VLAN_ID 3
47 #define TCA_VLAN_PUSH_VLAN_PROTOCOL 4
48 #define TCA_VLAN_PAD 5
49 #define TCA_VLAN_PUSH_VLAN_PRIORITY 6
50
51 struct tc_vlan {
52         tc_gen;
53         int v_action;
54 };
55
56 #endif /* HAVE_TC_ACT_VLAN */
57
58 #ifdef HAVE_TC_ACT_PEDIT
59
60 #include <linux/tc_act/tc_pedit.h>
61
62 #else /* HAVE_TC_ACT_VLAN */
63
64 enum {
65         TCA_PEDIT_UNSPEC,
66         TCA_PEDIT_TM,
67         TCA_PEDIT_PARMS,
68         TCA_PEDIT_PAD,
69         TCA_PEDIT_PARMS_EX,
70         TCA_PEDIT_KEYS_EX,
71         TCA_PEDIT_KEY_EX,
72         __TCA_PEDIT_MAX
73 };
74
75 enum {
76         TCA_PEDIT_KEY_EX_HTYPE = 1,
77         TCA_PEDIT_KEY_EX_CMD = 2,
78         __TCA_PEDIT_KEY_EX_MAX
79 };
80
81 enum pedit_header_type {
82         TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK = 0,
83         TCA_PEDIT_KEY_EX_HDR_TYPE_ETH = 1,
84         TCA_PEDIT_KEY_EX_HDR_TYPE_IP4 = 2,
85         TCA_PEDIT_KEY_EX_HDR_TYPE_IP6 = 3,
86         TCA_PEDIT_KEY_EX_HDR_TYPE_TCP = 4,
87         TCA_PEDIT_KEY_EX_HDR_TYPE_UDP = 5,
88         __PEDIT_HDR_TYPE_MAX,
89 };
90
91 enum pedit_cmd {
92         TCA_PEDIT_KEY_EX_CMD_SET = 0,
93         TCA_PEDIT_KEY_EX_CMD_ADD = 1,
94         __PEDIT_CMD_MAX,
95 };
96
97 struct tc_pedit_key {
98         __u32 mask; /* AND */
99         __u32 val; /*XOR */
100         __u32 off; /*offset */
101         __u32 at;
102         __u32 offmask;
103         __u32 shift;
104 };
105
106 __extension__
107 struct tc_pedit_sel {
108         tc_gen;
109         unsigned char nkeys;
110         unsigned char flags;
111         struct tc_pedit_key keys[0];
112 };
113
114 #endif /* HAVE_TC_ACT_VLAN */
115
116 /* Normally found in linux/netlink.h. */
117 #ifndef NETLINK_CAP_ACK
118 #define NETLINK_CAP_ACK 10
119 #endif
120
121 /* Normally found in linux/pkt_sched.h. */
122 #ifndef TC_H_MIN_INGRESS
123 #define TC_H_MIN_INGRESS 0xfff2u
124 #endif
125
126 /* Normally found in linux/pkt_cls.h. */
127 #ifndef TCA_CLS_FLAGS_SKIP_SW
128 #define TCA_CLS_FLAGS_SKIP_SW (1 << 1)
129 #endif
130 #ifndef HAVE_TCA_CHAIN
131 #define TCA_CHAIN 11
132 #endif
133 #ifndef HAVE_TCA_FLOWER_ACT
134 #define TCA_FLOWER_ACT 3
135 #endif
136 #ifndef HAVE_TCA_FLOWER_FLAGS
137 #define TCA_FLOWER_FLAGS 22
138 #endif
139 #ifndef HAVE_TCA_FLOWER_KEY_ETH_TYPE
140 #define TCA_FLOWER_KEY_ETH_TYPE 8
141 #endif
142 #ifndef HAVE_TCA_FLOWER_KEY_ETH_DST
143 #define TCA_FLOWER_KEY_ETH_DST 4
144 #endif
145 #ifndef HAVE_TCA_FLOWER_KEY_ETH_DST_MASK
146 #define TCA_FLOWER_KEY_ETH_DST_MASK 5
147 #endif
148 #ifndef HAVE_TCA_FLOWER_KEY_ETH_SRC
149 #define TCA_FLOWER_KEY_ETH_SRC 6
150 #endif
151 #ifndef HAVE_TCA_FLOWER_KEY_ETH_SRC_MASK
152 #define TCA_FLOWER_KEY_ETH_SRC_MASK 7
153 #endif
154 #ifndef HAVE_TCA_FLOWER_KEY_IP_PROTO
155 #define TCA_FLOWER_KEY_IP_PROTO 9
156 #endif
157 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_SRC
158 #define TCA_FLOWER_KEY_IPV4_SRC 10
159 #endif
160 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_SRC_MASK
161 #define TCA_FLOWER_KEY_IPV4_SRC_MASK 11
162 #endif
163 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_DST
164 #define TCA_FLOWER_KEY_IPV4_DST 12
165 #endif
166 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_DST_MASK
167 #define TCA_FLOWER_KEY_IPV4_DST_MASK 13
168 #endif
169 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_SRC
170 #define TCA_FLOWER_KEY_IPV6_SRC 14
171 #endif
172 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_SRC_MASK
173 #define TCA_FLOWER_KEY_IPV6_SRC_MASK 15
174 #endif
175 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_DST
176 #define TCA_FLOWER_KEY_IPV6_DST 16
177 #endif
178 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_DST_MASK
179 #define TCA_FLOWER_KEY_IPV6_DST_MASK 17
180 #endif
181 #ifndef HAVE_TCA_FLOWER_KEY_TCP_SRC
182 #define TCA_FLOWER_KEY_TCP_SRC 18
183 #endif
184 #ifndef HAVE_TCA_FLOWER_KEY_TCP_SRC_MASK
185 #define TCA_FLOWER_KEY_TCP_SRC_MASK 35
186 #endif
187 #ifndef HAVE_TCA_FLOWER_KEY_TCP_DST
188 #define TCA_FLOWER_KEY_TCP_DST 19
189 #endif
190 #ifndef HAVE_TCA_FLOWER_KEY_TCP_DST_MASK
191 #define TCA_FLOWER_KEY_TCP_DST_MASK 36
192 #endif
193 #ifndef HAVE_TCA_FLOWER_KEY_UDP_SRC
194 #define TCA_FLOWER_KEY_UDP_SRC 20
195 #endif
196 #ifndef HAVE_TCA_FLOWER_KEY_UDP_SRC_MASK
197 #define TCA_FLOWER_KEY_UDP_SRC_MASK 37
198 #endif
199 #ifndef HAVE_TCA_FLOWER_KEY_UDP_DST
200 #define TCA_FLOWER_KEY_UDP_DST 21
201 #endif
202 #ifndef HAVE_TCA_FLOWER_KEY_UDP_DST_MASK
203 #define TCA_FLOWER_KEY_UDP_DST_MASK 38
204 #endif
205 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_ID
206 #define TCA_FLOWER_KEY_VLAN_ID 23
207 #endif
208 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_PRIO
209 #define TCA_FLOWER_KEY_VLAN_PRIO 24
210 #endif
211 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_ETH_TYPE
212 #define TCA_FLOWER_KEY_VLAN_ETH_TYPE 25
213 #endif
214 #ifndef HAVE_TCA_FLOWER_KEY_TCP_FLAGS
215 #define TCA_FLOWER_KEY_TCP_FLAGS 71
216 #endif
217 #ifndef HAVE_TCA_FLOWER_KEY_TCP_FLAGS_MASK
218 #define TCA_FLOWER_KEY_TCP_FLAGS_MASK 72
219 #endif
220 #ifndef HAVE_TC_ACT_GOTO_CHAIN
221 #define TC_ACT_GOTO_CHAIN 0x20000000
222 #endif
223
224 #ifndef IPV6_ADDR_LEN
225 #define IPV6_ADDR_LEN 16
226 #endif
227
228 #ifndef IPV4_ADDR_LEN
229 #define IPV4_ADDR_LEN 4
230 #endif
231
232 #ifndef TP_PORT_LEN
233 #define TP_PORT_LEN 2 /* Transport Port (UDP/TCP) Length */
234 #endif
235
236 #ifndef TTL_LEN
237 #define TTL_LEN 1
238 #endif
239
240 #ifndef TCA_ACT_MAX_PRIO
241 #define TCA_ACT_MAX_PRIO 32
242 #endif
243
244 /**
245  * Structure for holding netlink context.
246  * Note the size of the message buffer which is MNL_SOCKET_BUFFER_SIZE.
247  * Using this (8KB) buffer size ensures that netlink messages will never be
248  * truncated.
249  */
250 struct mlx5_flow_tcf_context {
251         struct mnl_socket *nl; /* NETLINK_ROUTE libmnl socket. */
252         uint32_t seq; /* Message sequence number. */
253         uint32_t buf_size; /* Message buffer size. */
254         uint8_t *buf; /* Message buffer. */
255 };
256
257 /** Structure used when extracting the values of a flow counters
258  * from a netlink message.
259  */
260 struct flow_tcf_stats_basic {
261         bool valid;
262         struct gnet_stats_basic counters;
263 };
264
265 /** Empty masks for known item types. */
266 static const union {
267         struct rte_flow_item_port_id port_id;
268         struct rte_flow_item_eth eth;
269         struct rte_flow_item_vlan vlan;
270         struct rte_flow_item_ipv4 ipv4;
271         struct rte_flow_item_ipv6 ipv6;
272         struct rte_flow_item_tcp tcp;
273         struct rte_flow_item_udp udp;
274 } flow_tcf_mask_empty;
275
276 /** Supported masks for known item types. */
277 static const struct {
278         struct rte_flow_item_port_id port_id;
279         struct rte_flow_item_eth eth;
280         struct rte_flow_item_vlan vlan;
281         struct rte_flow_item_ipv4 ipv4;
282         struct rte_flow_item_ipv6 ipv6;
283         struct rte_flow_item_tcp tcp;
284         struct rte_flow_item_udp udp;
285 } flow_tcf_mask_supported = {
286         .port_id = {
287                 .id = 0xffffffff,
288         },
289         .eth = {
290                 .type = RTE_BE16(0xffff),
291                 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
292                 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
293         },
294         .vlan = {
295                 /* PCP and VID only, no DEI. */
296                 .tci = RTE_BE16(0xefff),
297                 .inner_type = RTE_BE16(0xffff),
298         },
299         .ipv4.hdr = {
300                 .next_proto_id = 0xff,
301                 .src_addr = RTE_BE32(0xffffffff),
302                 .dst_addr = RTE_BE32(0xffffffff),
303         },
304         .ipv6.hdr = {
305                 .proto = 0xff,
306                 .src_addr =
307                         "\xff\xff\xff\xff\xff\xff\xff\xff"
308                         "\xff\xff\xff\xff\xff\xff\xff\xff",
309                 .dst_addr =
310                         "\xff\xff\xff\xff\xff\xff\xff\xff"
311                         "\xff\xff\xff\xff\xff\xff\xff\xff",
312         },
313         .tcp.hdr = {
314                 .src_port = RTE_BE16(0xffff),
315                 .dst_port = RTE_BE16(0xffff),
316                 .tcp_flags = 0xff,
317         },
318         .udp.hdr = {
319                 .src_port = RTE_BE16(0xffff),
320                 .dst_port = RTE_BE16(0xffff),
321         },
322 };
323
324 #define SZ_NLATTR_HDR MNL_ALIGN(sizeof(struct nlattr))
325 #define SZ_NLATTR_NEST SZ_NLATTR_HDR
326 #define SZ_NLATTR_DATA_OF(len) MNL_ALIGN(SZ_NLATTR_HDR + (len))
327 #define SZ_NLATTR_TYPE_OF(typ) SZ_NLATTR_DATA_OF(sizeof(typ))
328 #define SZ_NLATTR_STRZ_OF(str) SZ_NLATTR_DATA_OF(strlen(str) + 1)
329
330 #define PTOI_TABLE_SZ_MAX(dev) (mlx5_dev_to_port_id((dev)->device, NULL, 0) + 2)
331
332 /** DPDK port to network interface index (ifindex) conversion. */
333 struct flow_tcf_ptoi {
334         uint16_t port_id; /**< DPDK port ID. */
335         unsigned int ifindex; /**< Network interface index. */
336 };
337
338 /* Due to a limitation on driver/FW. */
339 #define MLX5_TCF_GROUP_ID_MAX 3
340 #define MLX5_TCF_GROUP_PRIORITY_MAX 14
341
342 #define MLX5_TCF_FATE_ACTIONS \
343         (MLX5_FLOW_ACTION_DROP | MLX5_FLOW_ACTION_PORT_ID | \
344          MLX5_FLOW_ACTION_JUMP)
345
346 #define MLX5_TCF_VLAN_ACTIONS \
347         (MLX5_FLOW_ACTION_OF_POP_VLAN | MLX5_FLOW_ACTION_OF_PUSH_VLAN | \
348          MLX5_FLOW_ACTION_OF_SET_VLAN_VID | MLX5_FLOW_ACTION_OF_SET_VLAN_PCP)
349
350 #define MLX5_TCF_PEDIT_ACTIONS \
351         (MLX5_FLOW_ACTION_SET_IPV4_SRC | MLX5_FLOW_ACTION_SET_IPV4_DST | \
352          MLX5_FLOW_ACTION_SET_IPV6_SRC | MLX5_FLOW_ACTION_SET_IPV6_DST | \
353          MLX5_FLOW_ACTION_SET_TP_SRC | MLX5_FLOW_ACTION_SET_TP_DST | \
354          MLX5_FLOW_ACTION_SET_TTL | MLX5_FLOW_ACTION_DEC_TTL | \
355          MLX5_FLOW_ACTION_SET_MAC_SRC | MLX5_FLOW_ACTION_SET_MAC_DST)
356
357 #define MLX5_TCF_CONFIG_ACTIONS \
358         (MLX5_FLOW_ACTION_PORT_ID | MLX5_FLOW_ACTION_JUMP | \
359          MLX5_FLOW_ACTION_OF_PUSH_VLAN | MLX5_FLOW_ACTION_OF_SET_VLAN_VID | \
360          MLX5_FLOW_ACTION_OF_SET_VLAN_PCP | \
361          (MLX5_TCF_PEDIT_ACTIONS & ~MLX5_FLOW_ACTION_DEC_TTL))
362
363 #define MAX_PEDIT_KEYS 128
364 #define SZ_PEDIT_KEY_VAL 4
365
366 #define NUM_OF_PEDIT_KEYS(sz) \
367         (((sz) / SZ_PEDIT_KEY_VAL) + (((sz) % SZ_PEDIT_KEY_VAL) ? 1 : 0))
368
369 struct pedit_key_ex {
370         enum pedit_header_type htype;
371         enum pedit_cmd cmd;
372 };
373
374 struct pedit_parser {
375         struct tc_pedit_sel sel;
376         struct tc_pedit_key keys[MAX_PEDIT_KEYS];
377         struct pedit_key_ex keys_ex[MAX_PEDIT_KEYS];
378 };
379
380 /**
381  * Create space for using the implicitly created TC flow counter.
382  *
383  * @param[in] dev
384  *   Pointer to the Ethernet device structure.
385  *
386  * @return
387  *   A pointer to the counter data structure, NULL otherwise and
388  *   rte_errno is set.
389  */
390 static struct mlx5_flow_counter *
391 flow_tcf_counter_new(void)
392 {
393         struct mlx5_flow_counter *cnt;
394
395         /*
396          * eswitch counter cannot be shared and its id is unknown.
397          * currently returning all with id 0.
398          * in the future maybe better to switch to unique numbers.
399          */
400         struct mlx5_flow_counter tmpl = {
401                 .ref_cnt = 1,
402                 .shared = 0,
403                 .id = 0,
404                 .cs = NULL,
405                 .hits = 0,
406                 .bytes = 0,
407         };
408         cnt = rte_calloc(__func__, 1, sizeof(*cnt), 0);
409         if (!cnt) {
410                 rte_errno = ENOMEM;
411                 return NULL;
412         }
413         *cnt = tmpl;
414         /* Implicit counter, do not add to list. */
415         return cnt;
416 }
417
418 /**
419  * Set pedit key of MAC address
420  *
421  * @param[in] actions
422  *   pointer to action specification
423  * @param[in,out] p_parser
424  *   pointer to pedit_parser
425  */
426 static void
427 flow_tcf_pedit_key_set_mac(const struct rte_flow_action *actions,
428                            struct pedit_parser *p_parser)
429 {
430         int idx = p_parser->sel.nkeys;
431         uint32_t off = actions->type == RTE_FLOW_ACTION_TYPE_SET_MAC_SRC ?
432                                         offsetof(struct ether_hdr, s_addr) :
433                                         offsetof(struct ether_hdr, d_addr);
434         const struct rte_flow_action_set_mac *conf =
435                 (const struct rte_flow_action_set_mac *)actions->conf;
436
437         p_parser->keys[idx].off = off;
438         p_parser->keys[idx].mask = ~UINT32_MAX;
439         p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_ETH;
440         p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
441         memcpy(&p_parser->keys[idx].val,
442                 conf->mac_addr, SZ_PEDIT_KEY_VAL);
443         idx++;
444         p_parser->keys[idx].off = off + SZ_PEDIT_KEY_VAL;
445         p_parser->keys[idx].mask = 0xFFFF0000;
446         p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_ETH;
447         p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
448         memcpy(&p_parser->keys[idx].val,
449                 conf->mac_addr + SZ_PEDIT_KEY_VAL,
450                 ETHER_ADDR_LEN - SZ_PEDIT_KEY_VAL);
451         p_parser->sel.nkeys = (++idx);
452 }
453
454 /**
455  * Set pedit key of decrease/set ttl
456  *
457  * @param[in] actions
458  *   pointer to action specification
459  * @param[in,out] p_parser
460  *   pointer to pedit_parser
461  * @param[in] item_flags
462  *   flags of all items presented
463  */
464 static void
465 flow_tcf_pedit_key_set_dec_ttl(const struct rte_flow_action *actions,
466                                 struct pedit_parser *p_parser,
467                                 uint64_t item_flags)
468 {
469         int idx = p_parser->sel.nkeys;
470
471         p_parser->keys[idx].mask = 0xFFFFFF00;
472         if (item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4) {
473                 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP4;
474                 p_parser->keys[idx].off =
475                         offsetof(struct ipv4_hdr, time_to_live);
476         }
477         if (item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV6) {
478                 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6;
479                 p_parser->keys[idx].off =
480                         offsetof(struct ipv6_hdr, hop_limits);
481         }
482         if (actions->type == RTE_FLOW_ACTION_TYPE_DEC_TTL) {
483                 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_ADD;
484                 p_parser->keys[idx].val = 0x000000FF;
485         } else {
486                 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
487                 p_parser->keys[idx].val =
488                         (__u32)((const struct rte_flow_action_set_ttl *)
489                          actions->conf)->ttl_value;
490         }
491         p_parser->sel.nkeys = (++idx);
492 }
493
494 /**
495  * Set pedit key of transport (TCP/UDP) port value
496  *
497  * @param[in] actions
498  *   pointer to action specification
499  * @param[in,out] p_parser
500  *   pointer to pedit_parser
501  * @param[in] item_flags
502  *   flags of all items presented
503  */
504 static void
505 flow_tcf_pedit_key_set_tp_port(const struct rte_flow_action *actions,
506                                 struct pedit_parser *p_parser,
507                                 uint64_t item_flags)
508 {
509         int idx = p_parser->sel.nkeys;
510
511         if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)
512                 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_UDP;
513         if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_TCP)
514                 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_TCP;
515         p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
516         /* offset of src/dst port is same for TCP and UDP */
517         p_parser->keys[idx].off =
518                 actions->type == RTE_FLOW_ACTION_TYPE_SET_TP_SRC ?
519                 offsetof(struct tcp_hdr, src_port) :
520                 offsetof(struct tcp_hdr, dst_port);
521         p_parser->keys[idx].mask = 0xFFFF0000;
522         p_parser->keys[idx].val =
523                 (__u32)((const struct rte_flow_action_set_tp *)
524                                 actions->conf)->port;
525         p_parser->sel.nkeys = (++idx);
526 }
527
528 /**
529  * Set pedit key of ipv6 address
530  *
531  * @param[in] actions
532  *   pointer to action specification
533  * @param[in,out] p_parser
534  *   pointer to pedit_parser
535  */
536 static void
537 flow_tcf_pedit_key_set_ipv6_addr(const struct rte_flow_action *actions,
538                                  struct pedit_parser *p_parser)
539 {
540         int idx = p_parser->sel.nkeys;
541         int keys = NUM_OF_PEDIT_KEYS(IPV6_ADDR_LEN);
542         int off_base =
543                 actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC ?
544                 offsetof(struct ipv6_hdr, src_addr) :
545                 offsetof(struct ipv6_hdr, dst_addr);
546         const struct rte_flow_action_set_ipv6 *conf =
547                 (const struct rte_flow_action_set_ipv6 *)actions->conf;
548
549         for (int i = 0; i < keys; i++, idx++) {
550                 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6;
551                 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
552                 p_parser->keys[idx].off = off_base + i * SZ_PEDIT_KEY_VAL;
553                 p_parser->keys[idx].mask = ~UINT32_MAX;
554                 memcpy(&p_parser->keys[idx].val,
555                         conf->ipv6_addr + i *  SZ_PEDIT_KEY_VAL,
556                         SZ_PEDIT_KEY_VAL);
557         }
558         p_parser->sel.nkeys += keys;
559 }
560
561 /**
562  * Set pedit key of ipv4 address
563  *
564  * @param[in] actions
565  *   pointer to action specification
566  * @param[in,out] p_parser
567  *   pointer to pedit_parser
568  */
569 static void
570 flow_tcf_pedit_key_set_ipv4_addr(const struct rte_flow_action *actions,
571                                  struct pedit_parser *p_parser)
572 {
573         int idx = p_parser->sel.nkeys;
574
575         p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP4;
576         p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
577         p_parser->keys[idx].off =
578                 actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC ?
579                 offsetof(struct ipv4_hdr, src_addr) :
580                 offsetof(struct ipv4_hdr, dst_addr);
581         p_parser->keys[idx].mask = ~UINT32_MAX;
582         p_parser->keys[idx].val =
583                 ((const struct rte_flow_action_set_ipv4 *)
584                  actions->conf)->ipv4_addr;
585         p_parser->sel.nkeys = (++idx);
586 }
587
588 /**
589  * Create the pedit's na attribute in netlink message
590  * on pre-allocate message buffer
591  *
592  * @param[in,out] nl
593  *   pointer to pre-allocated netlink message buffer
594  * @param[in,out] actions
595  *   pointer to pointer of actions specification.
596  * @param[in,out] action_flags
597  *   pointer to actions flags
598  * @param[in] item_flags
599  *   flags of all item presented
600  */
601 static void
602 flow_tcf_create_pedit_mnl_msg(struct nlmsghdr *nl,
603                               const struct rte_flow_action **actions,
604                               uint64_t item_flags)
605 {
606         struct pedit_parser p_parser;
607         struct nlattr *na_act_options;
608         struct nlattr *na_pedit_keys;
609
610         memset(&p_parser, 0, sizeof(p_parser));
611         mnl_attr_put_strz(nl, TCA_ACT_KIND, "pedit");
612         na_act_options = mnl_attr_nest_start(nl, TCA_ACT_OPTIONS);
613         /* all modify header actions should be in one tc-pedit action */
614         for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {
615                 switch ((*actions)->type) {
616                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
617                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
618                         flow_tcf_pedit_key_set_ipv4_addr(*actions, &p_parser);
619                         break;
620                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
621                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
622                         flow_tcf_pedit_key_set_ipv6_addr(*actions, &p_parser);
623                         break;
624                 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
625                 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
626                         flow_tcf_pedit_key_set_tp_port(*actions,
627                                                         &p_parser, item_flags);
628                         break;
629                 case RTE_FLOW_ACTION_TYPE_SET_TTL:
630                 case RTE_FLOW_ACTION_TYPE_DEC_TTL:
631                         flow_tcf_pedit_key_set_dec_ttl(*actions,
632                                                         &p_parser, item_flags);
633                         break;
634                 case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
635                 case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
636                         flow_tcf_pedit_key_set_mac(*actions, &p_parser);
637                         break;
638                 default:
639                         goto pedit_mnl_msg_done;
640                 }
641         }
642 pedit_mnl_msg_done:
643         p_parser.sel.action = TC_ACT_PIPE;
644         mnl_attr_put(nl, TCA_PEDIT_PARMS_EX,
645                      sizeof(p_parser.sel) +
646                      p_parser.sel.nkeys * sizeof(struct tc_pedit_key),
647                      &p_parser);
648         na_pedit_keys =
649                 mnl_attr_nest_start(nl, TCA_PEDIT_KEYS_EX | NLA_F_NESTED);
650         for (int i = 0; i < p_parser.sel.nkeys; i++) {
651                 struct nlattr *na_pedit_key =
652                         mnl_attr_nest_start(nl,
653                                             TCA_PEDIT_KEY_EX | NLA_F_NESTED);
654                 mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_HTYPE,
655                                  p_parser.keys_ex[i].htype);
656                 mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_CMD,
657                                  p_parser.keys_ex[i].cmd);
658                 mnl_attr_nest_end(nl, na_pedit_key);
659         }
660         mnl_attr_nest_end(nl, na_pedit_keys);
661         mnl_attr_nest_end(nl, na_act_options);
662         (*actions)--;
663 }
664
665 /**
666  * Calculate max memory size of one TC-pedit actions.
667  * One TC-pedit action can contain set of keys each defining
668  * a rewrite element (rte_flow action)
669  *
670  * @param[in,out] actions
671  *   actions specification.
672  * @param[in,out] action_flags
673  *   actions flags
674  * @param[in,out] size
675  *   accumulated size
676  * @return
677  *   Max memory size of one TC-pedit action
678  */
679 static int
680 flow_tcf_get_pedit_actions_size(const struct rte_flow_action **actions,
681                                 uint64_t *action_flags)
682 {
683         int pedit_size = 0;
684         int keys = 0;
685         uint64_t flags = 0;
686
687         pedit_size += SZ_NLATTR_NEST + /* na_act_index. */
688                       SZ_NLATTR_STRZ_OF("pedit") +
689                       SZ_NLATTR_NEST; /* TCA_ACT_OPTIONS. */
690         for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {
691                 switch ((*actions)->type) {
692                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
693                         keys += NUM_OF_PEDIT_KEYS(IPV4_ADDR_LEN);
694                         flags |= MLX5_FLOW_ACTION_SET_IPV4_SRC;
695                         break;
696                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
697                         keys += NUM_OF_PEDIT_KEYS(IPV4_ADDR_LEN);
698                         flags |= MLX5_FLOW_ACTION_SET_IPV4_DST;
699                         break;
700                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
701                         keys += NUM_OF_PEDIT_KEYS(IPV6_ADDR_LEN);
702                         flags |= MLX5_FLOW_ACTION_SET_IPV6_SRC;
703                         break;
704                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
705                         keys += NUM_OF_PEDIT_KEYS(IPV6_ADDR_LEN);
706                         flags |= MLX5_FLOW_ACTION_SET_IPV6_DST;
707                         break;
708                 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
709                         /* TCP is as same as UDP */
710                         keys += NUM_OF_PEDIT_KEYS(TP_PORT_LEN);
711                         flags |= MLX5_FLOW_ACTION_SET_TP_SRC;
712                         break;
713                 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
714                         /* TCP is as same as UDP */
715                         keys += NUM_OF_PEDIT_KEYS(TP_PORT_LEN);
716                         flags |= MLX5_FLOW_ACTION_SET_TP_DST;
717                         break;
718                 case RTE_FLOW_ACTION_TYPE_SET_TTL:
719                         keys += NUM_OF_PEDIT_KEYS(TTL_LEN);
720                         flags |= MLX5_FLOW_ACTION_SET_TTL;
721                         break;
722                 case RTE_FLOW_ACTION_TYPE_DEC_TTL:
723                         keys += NUM_OF_PEDIT_KEYS(TTL_LEN);
724                         flags |= MLX5_FLOW_ACTION_DEC_TTL;
725                         break;
726                 case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
727                         keys += NUM_OF_PEDIT_KEYS(ETHER_ADDR_LEN);
728                         flags |= MLX5_FLOW_ACTION_SET_MAC_SRC;
729                         break;
730                 case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
731                         keys += NUM_OF_PEDIT_KEYS(ETHER_ADDR_LEN);
732                         flags |= MLX5_FLOW_ACTION_SET_MAC_DST;
733                         break;
734                 default:
735                         goto get_pedit_action_size_done;
736                 }
737         }
738 get_pedit_action_size_done:
739         /* TCA_PEDIT_PARAMS_EX */
740         pedit_size +=
741                 SZ_NLATTR_DATA_OF(sizeof(struct tc_pedit_sel) +
742                                   keys * sizeof(struct tc_pedit_key));
743         pedit_size += SZ_NLATTR_NEST; /* TCA_PEDIT_KEYS */
744         pedit_size += keys *
745                       /* TCA_PEDIT_KEY_EX + HTYPE + CMD */
746                       (SZ_NLATTR_NEST + SZ_NLATTR_DATA_OF(2) +
747                        SZ_NLATTR_DATA_OF(2));
748         (*action_flags) |= flags;
749         (*actions)--;
750         return pedit_size;
751 }
752
753 /**
754  * Retrieve mask for pattern item.
755  *
756  * This function does basic sanity checks on a pattern item in order to
757  * return the most appropriate mask for it.
758  *
759  * @param[in] item
760  *   Item specification.
761  * @param[in] mask_default
762  *   Default mask for pattern item as specified by the flow API.
763  * @param[in] mask_supported
764  *   Mask fields supported by the implementation.
765  * @param[in] mask_empty
766  *   Empty mask to return when there is no specification.
767  * @param[out] error
768  *   Perform verbose error reporting if not NULL.
769  *
770  * @return
771  *   Either @p item->mask or one of the mask parameters on success, NULL
772  *   otherwise and rte_errno is set.
773  */
774 static const void *
775 flow_tcf_item_mask(const struct rte_flow_item *item, const void *mask_default,
776                    const void *mask_supported, const void *mask_empty,
777                    size_t mask_size, struct rte_flow_error *error)
778 {
779         const uint8_t *mask;
780         size_t i;
781
782         /* item->last and item->mask cannot exist without item->spec. */
783         if (!item->spec && (item->mask || item->last)) {
784                 rte_flow_error_set(error, EINVAL,
785                                    RTE_FLOW_ERROR_TYPE_ITEM, item,
786                                    "\"mask\" or \"last\" field provided without"
787                                    " a corresponding \"spec\"");
788                 return NULL;
789         }
790         /* No spec, no mask, no problem. */
791         if (!item->spec)
792                 return mask_empty;
793         mask = item->mask ? item->mask : mask_default;
794         assert(mask);
795         /*
796          * Single-pass check to make sure that:
797          * - Mask is supported, no bits are set outside mask_supported.
798          * - Both item->spec and item->last are included in mask.
799          */
800         for (i = 0; i != mask_size; ++i) {
801                 if (!mask[i])
802                         continue;
803                 if ((mask[i] | ((const uint8_t *)mask_supported)[i]) !=
804                     ((const uint8_t *)mask_supported)[i]) {
805                         rte_flow_error_set(error, ENOTSUP,
806                                            RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
807                                            "unsupported field found"
808                                            " in \"mask\"");
809                         return NULL;
810                 }
811                 if (item->last &&
812                     (((const uint8_t *)item->spec)[i] & mask[i]) !=
813                     (((const uint8_t *)item->last)[i] & mask[i])) {
814                         rte_flow_error_set(error, EINVAL,
815                                            RTE_FLOW_ERROR_TYPE_ITEM_LAST,
816                                            item->last,
817                                            "range between \"spec\" and \"last\""
818                                            " not comprised in \"mask\"");
819                         return NULL;
820                 }
821         }
822         return mask;
823 }
824
825 /**
826  * Build a conversion table between port ID and ifindex.
827  *
828  * @param[in] dev
829  *   Pointer to Ethernet device.
830  * @param[out] ptoi
831  *   Pointer to ptoi table.
832  * @param[in] len
833  *   Size of ptoi table provided.
834  *
835  * @return
836  *   Size of ptoi table filled.
837  */
838 static unsigned int
839 flow_tcf_build_ptoi_table(struct rte_eth_dev *dev, struct flow_tcf_ptoi *ptoi,
840                           unsigned int len)
841 {
842         unsigned int n = mlx5_dev_to_port_id(dev->device, NULL, 0);
843         uint16_t port_id[n + 1];
844         unsigned int i;
845         unsigned int own = 0;
846
847         /* At least one port is needed when no switch domain is present. */
848         if (!n) {
849                 n = 1;
850                 port_id[0] = dev->data->port_id;
851         } else {
852                 n = RTE_MIN(mlx5_dev_to_port_id(dev->device, port_id, n), n);
853         }
854         if (n > len)
855                 return 0;
856         for (i = 0; i != n; ++i) {
857                 struct rte_eth_dev_info dev_info;
858
859                 rte_eth_dev_info_get(port_id[i], &dev_info);
860                 if (port_id[i] == dev->data->port_id)
861                         own = i;
862                 ptoi[i].port_id = port_id[i];
863                 ptoi[i].ifindex = dev_info.if_index;
864         }
865         /* Ensure first entry of ptoi[] is the current device. */
866         if (own) {
867                 ptoi[n] = ptoi[0];
868                 ptoi[0] = ptoi[own];
869                 ptoi[own] = ptoi[n];
870         }
871         /* An entry with zero ifindex terminates ptoi[]. */
872         ptoi[n].port_id = 0;
873         ptoi[n].ifindex = 0;
874         return n;
875 }
876
877 /**
878  * Verify the @p attr will be correctly understood by the E-switch.
879  *
880  * @param[in] attr
881  *   Pointer to flow attributes
882  * @param[out] error
883  *   Pointer to error structure.
884  *
885  * @return
886  *   0 on success, a negative errno value otherwise and rte_errno is set.
887  */
888 static int
889 flow_tcf_validate_attributes(const struct rte_flow_attr *attr,
890                              struct rte_flow_error *error)
891 {
892         /*
893          * Supported attributes: groups, some priorities and ingress only.
894          * group is supported only if kernel supports chain. Don't care about
895          * transfer as it is the caller's problem.
896          */
897         if (attr->group > MLX5_TCF_GROUP_ID_MAX)
898                 return rte_flow_error_set(error, ENOTSUP,
899                                           RTE_FLOW_ERROR_TYPE_ATTR_GROUP, attr,
900                                           "group ID larger than "
901                                           RTE_STR(MLX5_TCF_GROUP_ID_MAX)
902                                           " isn't supported");
903         else if (attr->group > 0 &&
904                  attr->priority > MLX5_TCF_GROUP_PRIORITY_MAX)
905                 return rte_flow_error_set(error, ENOTSUP,
906                                           RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
907                                           attr,
908                                           "lowest priority level is "
909                                           RTE_STR(MLX5_TCF_GROUP_PRIORITY_MAX)
910                                           " when group is configured");
911         else if (attr->priority > 0xfffe)
912                 return rte_flow_error_set(error, ENOTSUP,
913                                           RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
914                                           attr,
915                                           "lowest priority level is 0xfffe");
916         if (!attr->ingress)
917                 return rte_flow_error_set(error, EINVAL,
918                                           RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
919                                           attr, "only ingress is supported");
920         if (attr->egress)
921                 return rte_flow_error_set(error, ENOTSUP,
922                                           RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
923                                           attr, "egress is not supported");
924         return 0;
925 }
926
927 /**
928  * Validate flow for E-Switch.
929  *
930  * @param[in] priv
931  *   Pointer to the priv structure.
932  * @param[in] attr
933  *   Pointer to the flow attributes.
934  * @param[in] items
935  *   Pointer to the list of items.
936  * @param[in] actions
937  *   Pointer to the list of actions.
938  * @param[out] error
939  *   Pointer to the error structure.
940  *
941  * @return
942  *   0 on success, a negative errno value otherwise and rte_ernno is set.
943  */
944 static int
945 flow_tcf_validate(struct rte_eth_dev *dev,
946                   const struct rte_flow_attr *attr,
947                   const struct rte_flow_item items[],
948                   const struct rte_flow_action actions[],
949                   struct rte_flow_error *error)
950 {
951         union {
952                 const struct rte_flow_item_port_id *port_id;
953                 const struct rte_flow_item_eth *eth;
954                 const struct rte_flow_item_vlan *vlan;
955                 const struct rte_flow_item_ipv4 *ipv4;
956                 const struct rte_flow_item_ipv6 *ipv6;
957                 const struct rte_flow_item_tcp *tcp;
958                 const struct rte_flow_item_udp *udp;
959         } spec, mask;
960         union {
961                 const struct rte_flow_action_port_id *port_id;
962                 const struct rte_flow_action_jump *jump;
963                 const struct rte_flow_action_of_push_vlan *of_push_vlan;
964                 const struct rte_flow_action_of_set_vlan_vid *
965                         of_set_vlan_vid;
966                 const struct rte_flow_action_of_set_vlan_pcp *
967                         of_set_vlan_pcp;
968                 const struct rte_flow_action_set_ipv4 *set_ipv4;
969                 const struct rte_flow_action_set_ipv6 *set_ipv6;
970         } conf;
971         uint32_t item_flags = 0;
972         uint32_t action_flags = 0;
973         uint8_t next_protocol = -1;
974         unsigned int tcm_ifindex = 0;
975         uint8_t pedit_validated = 0;
976         struct flow_tcf_ptoi ptoi[PTOI_TABLE_SZ_MAX(dev)];
977         struct rte_eth_dev *port_id_dev = NULL;
978         bool in_port_id_set;
979         int ret;
980
981         claim_nonzero(flow_tcf_build_ptoi_table(dev, ptoi,
982                                                 PTOI_TABLE_SZ_MAX(dev)));
983         ret = flow_tcf_validate_attributes(attr, error);
984         if (ret < 0)
985                 return ret;
986         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
987                 unsigned int i;
988
989                 switch (items->type) {
990                 case RTE_FLOW_ITEM_TYPE_VOID:
991                         break;
992                 case RTE_FLOW_ITEM_TYPE_PORT_ID:
993                         mask.port_id = flow_tcf_item_mask
994                                 (items, &rte_flow_item_port_id_mask,
995                                  &flow_tcf_mask_supported.port_id,
996                                  &flow_tcf_mask_empty.port_id,
997                                  sizeof(flow_tcf_mask_supported.port_id),
998                                  error);
999                         if (!mask.port_id)
1000                                 return -rte_errno;
1001                         if (mask.port_id == &flow_tcf_mask_empty.port_id) {
1002                                 in_port_id_set = 1;
1003                                 break;
1004                         }
1005                         spec.port_id = items->spec;
1006                         if (mask.port_id->id && mask.port_id->id != 0xffffffff)
1007                                 return rte_flow_error_set
1008                                         (error, ENOTSUP,
1009                                          RTE_FLOW_ERROR_TYPE_ITEM_MASK,
1010                                          mask.port_id,
1011                                          "no support for partial mask on"
1012                                          " \"id\" field");
1013                         if (!mask.port_id->id)
1014                                 i = 0;
1015                         else
1016                                 for (i = 0; ptoi[i].ifindex; ++i)
1017                                         if (ptoi[i].port_id == spec.port_id->id)
1018                                                 break;
1019                         if (!ptoi[i].ifindex)
1020                                 return rte_flow_error_set
1021                                         (error, ENODEV,
1022                                          RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
1023                                          spec.port_id,
1024                                          "missing data to convert port ID to"
1025                                          " ifindex");
1026                         if (in_port_id_set && ptoi[i].ifindex != tcm_ifindex)
1027                                 return rte_flow_error_set
1028                                         (error, ENOTSUP,
1029                                          RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
1030                                          spec.port_id,
1031                                          "cannot match traffic for"
1032                                          " several port IDs through"
1033                                          " a single flow rule");
1034                         tcm_ifindex = ptoi[i].ifindex;
1035                         in_port_id_set = 1;
1036                         break;
1037                 case RTE_FLOW_ITEM_TYPE_ETH:
1038                         ret = mlx5_flow_validate_item_eth(items, item_flags,
1039                                                           error);
1040                         if (ret < 0)
1041                                 return ret;
1042                         item_flags |= MLX5_FLOW_LAYER_OUTER_L2;
1043                         /* TODO:
1044                          * Redundant check due to different supported mask.
1045                          * Same for the rest of items.
1046                          */
1047                         mask.eth = flow_tcf_item_mask
1048                                 (items, &rte_flow_item_eth_mask,
1049                                  &flow_tcf_mask_supported.eth,
1050                                  &flow_tcf_mask_empty.eth,
1051                                  sizeof(flow_tcf_mask_supported.eth),
1052                                  error);
1053                         if (!mask.eth)
1054                                 return -rte_errno;
1055                         if (mask.eth->type && mask.eth->type !=
1056                             RTE_BE16(0xffff))
1057                                 return rte_flow_error_set
1058                                         (error, ENOTSUP,
1059                                          RTE_FLOW_ERROR_TYPE_ITEM_MASK,
1060                                          mask.eth,
1061                                          "no support for partial mask on"
1062                                          " \"type\" field");
1063                         break;
1064                 case RTE_FLOW_ITEM_TYPE_VLAN:
1065                         ret = mlx5_flow_validate_item_vlan(items, item_flags,
1066                                                            error);
1067                         if (ret < 0)
1068                                 return ret;
1069                         item_flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
1070                         mask.vlan = flow_tcf_item_mask
1071                                 (items, &rte_flow_item_vlan_mask,
1072                                  &flow_tcf_mask_supported.vlan,
1073                                  &flow_tcf_mask_empty.vlan,
1074                                  sizeof(flow_tcf_mask_supported.vlan),
1075                                  error);
1076                         if (!mask.vlan)
1077                                 return -rte_errno;
1078                         if ((mask.vlan->tci & RTE_BE16(0xe000) &&
1079                              (mask.vlan->tci & RTE_BE16(0xe000)) !=
1080                               RTE_BE16(0xe000)) ||
1081                             (mask.vlan->tci & RTE_BE16(0x0fff) &&
1082                              (mask.vlan->tci & RTE_BE16(0x0fff)) !=
1083                               RTE_BE16(0x0fff)) ||
1084                             (mask.vlan->inner_type &&
1085                              mask.vlan->inner_type != RTE_BE16(0xffff)))
1086                                 return rte_flow_error_set
1087                                         (error, ENOTSUP,
1088                                          RTE_FLOW_ERROR_TYPE_ITEM_MASK,
1089                                          mask.vlan,
1090                                          "no support for partial masks on"
1091                                          " \"tci\" (PCP and VID parts) and"
1092                                          " \"inner_type\" fields");
1093                         break;
1094                 case RTE_FLOW_ITEM_TYPE_IPV4:
1095                         ret = mlx5_flow_validate_item_ipv4(items, item_flags,
1096                                                            error);
1097                         if (ret < 0)
1098                                 return ret;
1099                         item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1100                         mask.ipv4 = flow_tcf_item_mask
1101                                 (items, &rte_flow_item_ipv4_mask,
1102                                  &flow_tcf_mask_supported.ipv4,
1103                                  &flow_tcf_mask_empty.ipv4,
1104                                  sizeof(flow_tcf_mask_supported.ipv4),
1105                                  error);
1106                         if (!mask.ipv4)
1107                                 return -rte_errno;
1108                         if (mask.ipv4->hdr.next_proto_id &&
1109                             mask.ipv4->hdr.next_proto_id != 0xff)
1110                                 return rte_flow_error_set
1111                                         (error, ENOTSUP,
1112                                          RTE_FLOW_ERROR_TYPE_ITEM_MASK,
1113                                          mask.ipv4,
1114                                          "no support for partial mask on"
1115                                          " \"hdr.next_proto_id\" field");
1116                         else if (mask.ipv4->hdr.next_proto_id)
1117                                 next_protocol =
1118                                         ((const struct rte_flow_item_ipv4 *)
1119                                          (items->spec))->hdr.next_proto_id;
1120                         break;
1121                 case RTE_FLOW_ITEM_TYPE_IPV6:
1122                         ret = mlx5_flow_validate_item_ipv6(items, item_flags,
1123                                                            error);
1124                         if (ret < 0)
1125                                 return ret;
1126                         item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1127                         mask.ipv6 = flow_tcf_item_mask
1128                                 (items, &rte_flow_item_ipv6_mask,
1129                                  &flow_tcf_mask_supported.ipv6,
1130                                  &flow_tcf_mask_empty.ipv6,
1131                                  sizeof(flow_tcf_mask_supported.ipv6),
1132                                  error);
1133                         if (!mask.ipv6)
1134                                 return -rte_errno;
1135                         if (mask.ipv6->hdr.proto &&
1136                             mask.ipv6->hdr.proto != 0xff)
1137                                 return rte_flow_error_set
1138                                         (error, ENOTSUP,
1139                                          RTE_FLOW_ERROR_TYPE_ITEM_MASK,
1140                                          mask.ipv6,
1141                                          "no support for partial mask on"
1142                                          " \"hdr.proto\" field");
1143                         else if (mask.ipv6->hdr.proto)
1144                                 next_protocol =
1145                                         ((const struct rte_flow_item_ipv6 *)
1146                                          (items->spec))->hdr.proto;
1147                         break;
1148                 case RTE_FLOW_ITEM_TYPE_UDP:
1149                         ret = mlx5_flow_validate_item_udp(items, item_flags,
1150                                                           next_protocol, error);
1151                         if (ret < 0)
1152                                 return ret;
1153                         item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
1154                         mask.udp = flow_tcf_item_mask
1155                                 (items, &rte_flow_item_udp_mask,
1156                                  &flow_tcf_mask_supported.udp,
1157                                  &flow_tcf_mask_empty.udp,
1158                                  sizeof(flow_tcf_mask_supported.udp),
1159                                  error);
1160                         if (!mask.udp)
1161                                 return -rte_errno;
1162                         break;
1163                 case RTE_FLOW_ITEM_TYPE_TCP:
1164                         ret = mlx5_flow_validate_item_tcp
1165                                              (items, item_flags,
1166                                               next_protocol,
1167                                               &flow_tcf_mask_supported.tcp,
1168                                               error);
1169                         if (ret < 0)
1170                                 return ret;
1171                         item_flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
1172                         mask.tcp = flow_tcf_item_mask
1173                                 (items, &rte_flow_item_tcp_mask,
1174                                  &flow_tcf_mask_supported.tcp,
1175                                  &flow_tcf_mask_empty.tcp,
1176                                  sizeof(flow_tcf_mask_supported.tcp),
1177                                  error);
1178                         if (!mask.tcp)
1179                                 return -rte_errno;
1180                         break;
1181                 default:
1182                         return rte_flow_error_set(error, ENOTSUP,
1183                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1184                                                   NULL, "item not supported");
1185                 }
1186         }
1187         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1188                 unsigned int i;
1189                 uint32_t current_action_flag = 0;
1190
1191                 switch (actions->type) {
1192                 case RTE_FLOW_ACTION_TYPE_VOID:
1193                         break;
1194                 case RTE_FLOW_ACTION_TYPE_PORT_ID:
1195                         current_action_flag = MLX5_FLOW_ACTION_PORT_ID;
1196                         if (!actions->conf)
1197                                 break;
1198                         conf.port_id = actions->conf;
1199                         if (conf.port_id->original)
1200                                 i = 0;
1201                         else
1202                                 for (i = 0; ptoi[i].ifindex; ++i)
1203                                         if (ptoi[i].port_id == conf.port_id->id)
1204                                                 break;
1205                         if (!ptoi[i].ifindex)
1206                                 return rte_flow_error_set
1207                                         (error, ENODEV,
1208                                          RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1209                                          conf.port_id,
1210                                          "missing data to convert port ID to"
1211                                          " ifindex");
1212                         port_id_dev = &rte_eth_devices[conf.port_id->id];
1213                         break;
1214                 case RTE_FLOW_ACTION_TYPE_JUMP:
1215                         current_action_flag = MLX5_FLOW_ACTION_JUMP;
1216                         if (!actions->conf)
1217                                 break;
1218                         conf.jump = actions->conf;
1219                         if (attr->group >= conf.jump->group)
1220                                 return rte_flow_error_set
1221                                         (error, ENOTSUP,
1222                                          RTE_FLOW_ERROR_TYPE_ACTION,
1223                                          actions,
1224                                          "can jump only to a group forward");
1225                         break;
1226                 case RTE_FLOW_ACTION_TYPE_DROP:
1227                         current_action_flag = MLX5_FLOW_ACTION_DROP;
1228                         break;
1229                 case RTE_FLOW_ACTION_TYPE_COUNT:
1230                         break;
1231                 case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
1232                         current_action_flag = MLX5_FLOW_ACTION_OF_POP_VLAN;
1233                         break;
1234                 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
1235                         current_action_flag = MLX5_FLOW_ACTION_OF_PUSH_VLAN;
1236                         break;
1237                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
1238                         if (!(action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN))
1239                                 return rte_flow_error_set
1240                                         (error, ENOTSUP,
1241                                          RTE_FLOW_ERROR_TYPE_ACTION, actions,
1242                                          "vlan modify is not supported,"
1243                                          " set action must follow push action");
1244                         current_action_flag = MLX5_FLOW_ACTION_OF_SET_VLAN_VID;
1245                         break;
1246                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
1247                         if (!(action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN))
1248                                 return rte_flow_error_set
1249                                         (error, ENOTSUP,
1250                                          RTE_FLOW_ERROR_TYPE_ACTION, actions,
1251                                          "vlan modify is not supported,"
1252                                          " set action must follow push action");
1253                         current_action_flag = MLX5_FLOW_ACTION_OF_SET_VLAN_PCP;
1254                         break;
1255                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
1256                         current_action_flag = MLX5_FLOW_ACTION_SET_IPV4_SRC;
1257                         break;
1258                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
1259                         current_action_flag = MLX5_FLOW_ACTION_SET_IPV4_DST;
1260                         break;
1261                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
1262                         current_action_flag = MLX5_FLOW_ACTION_SET_IPV6_SRC;
1263                         break;
1264                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
1265                         current_action_flag = MLX5_FLOW_ACTION_SET_IPV6_DST;
1266                         break;
1267                 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
1268                         current_action_flag = MLX5_FLOW_ACTION_SET_TP_SRC;
1269                         break;
1270                 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
1271                         current_action_flag = MLX5_FLOW_ACTION_SET_TP_DST;
1272                         break;
1273                 case RTE_FLOW_ACTION_TYPE_SET_TTL:
1274                         current_action_flag = MLX5_FLOW_ACTION_SET_TTL;
1275                         break;
1276                 case RTE_FLOW_ACTION_TYPE_DEC_TTL:
1277                         current_action_flag = MLX5_FLOW_ACTION_DEC_TTL;
1278                         break;
1279                 case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
1280                         current_action_flag = MLX5_FLOW_ACTION_SET_MAC_SRC;
1281                         break;
1282                 case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
1283                         current_action_flag = MLX5_FLOW_ACTION_SET_MAC_DST;
1284                         break;
1285                 default:
1286                         return rte_flow_error_set(error, ENOTSUP,
1287                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1288                                                   actions,
1289                                                   "action not supported");
1290                 }
1291                 if (current_action_flag & MLX5_TCF_CONFIG_ACTIONS) {
1292                         if (!actions->conf)
1293                                 return rte_flow_error_set(error, EINVAL,
1294                                                 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1295                                                 actions,
1296                                                 "action configuration not set");
1297                 }
1298                 if ((current_action_flag & MLX5_TCF_PEDIT_ACTIONS) &&
1299                     pedit_validated)
1300                         return rte_flow_error_set(error, ENOTSUP,
1301                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1302                                                   actions,
1303                                                   "set actions should be "
1304                                                   "listed successively");
1305                 if ((current_action_flag & ~MLX5_TCF_PEDIT_ACTIONS) &&
1306                     (action_flags & MLX5_TCF_PEDIT_ACTIONS))
1307                         pedit_validated = 1;
1308                 if ((current_action_flag & MLX5_TCF_FATE_ACTIONS) &&
1309                     (action_flags & MLX5_TCF_FATE_ACTIONS))
1310                         return rte_flow_error_set(error, EINVAL,
1311                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1312                                                   actions,
1313                                                   "can't have multiple fate"
1314                                                   " actions");
1315                 action_flags |= current_action_flag;
1316         }
1317         if ((action_flags & MLX5_TCF_PEDIT_ACTIONS) &&
1318             (action_flags & MLX5_FLOW_ACTION_DROP))
1319                 return rte_flow_error_set(error, ENOTSUP,
1320                                           RTE_FLOW_ERROR_TYPE_ACTION,
1321                                           actions,
1322                                           "set action is not compatible with "
1323                                           "drop action");
1324         if ((action_flags & MLX5_TCF_PEDIT_ACTIONS) &&
1325             !(action_flags & MLX5_FLOW_ACTION_PORT_ID))
1326                 return rte_flow_error_set(error, ENOTSUP,
1327                                           RTE_FLOW_ERROR_TYPE_ACTION,
1328                                           actions,
1329                                           "set action must be followed by "
1330                                           "port_id action");
1331         if (action_flags &
1332            (MLX5_FLOW_ACTION_SET_IPV4_SRC | MLX5_FLOW_ACTION_SET_IPV4_DST)) {
1333                 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4))
1334                         return rte_flow_error_set(error, EINVAL,
1335                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1336                                                   actions,
1337                                                   "no ipv4 item found in"
1338                                                   " pattern");
1339         }
1340         if (action_flags &
1341            (MLX5_FLOW_ACTION_SET_IPV6_SRC | MLX5_FLOW_ACTION_SET_IPV6_DST)) {
1342                 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV6))
1343                         return rte_flow_error_set(error, EINVAL,
1344                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1345                                                   actions,
1346                                                   "no ipv6 item found in"
1347                                                   " pattern");
1348         }
1349         if (action_flags &
1350            (MLX5_FLOW_ACTION_SET_TP_SRC | MLX5_FLOW_ACTION_SET_TP_DST)) {
1351                 if (!(item_flags &
1352                      (MLX5_FLOW_LAYER_OUTER_L4_UDP |
1353                       MLX5_FLOW_LAYER_OUTER_L4_TCP)))
1354                         return rte_flow_error_set(error, EINVAL,
1355                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1356                                                   actions,
1357                                                   "no TCP/UDP item found in"
1358                                                   " pattern");
1359         }
1360         /*
1361          * FW syndrome (0xA9C090):
1362          *     set_flow_table_entry: push vlan action fte in fdb can ONLY be
1363          *     forward to the uplink.
1364          */
1365         if ((action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN) &&
1366             (action_flags & MLX5_FLOW_ACTION_PORT_ID) &&
1367             ((struct priv *)port_id_dev->data->dev_private)->representor)
1368                 return rte_flow_error_set(error, ENOTSUP,
1369                                           RTE_FLOW_ERROR_TYPE_ACTION, actions,
1370                                           "vlan push can only be applied"
1371                                           " when forwarding to uplink port");
1372         /*
1373          * FW syndrome (0x294609):
1374          *     set_flow_table_entry: modify/pop/push actions in fdb flow table
1375          *     are supported only while forwarding to vport.
1376          */
1377         if ((action_flags & MLX5_TCF_VLAN_ACTIONS) &&
1378             !(action_flags & MLX5_FLOW_ACTION_PORT_ID))
1379                 return rte_flow_error_set(error, ENOTSUP,
1380                                           RTE_FLOW_ERROR_TYPE_ACTION, actions,
1381                                           "vlan actions are supported"
1382                                           " only with port_id action");
1383         if (!(action_flags & MLX5_TCF_FATE_ACTIONS))
1384                 return rte_flow_error_set(error, EINVAL,
1385                                           RTE_FLOW_ERROR_TYPE_ACTION, actions,
1386                                           "no fate action is found");
1387         if (action_flags &
1388            (MLX5_FLOW_ACTION_SET_TTL | MLX5_FLOW_ACTION_DEC_TTL)) {
1389                 if (!(item_flags &
1390                      (MLX5_FLOW_LAYER_OUTER_L3_IPV4 |
1391                       MLX5_FLOW_LAYER_OUTER_L3_IPV6)))
1392                         return rte_flow_error_set(error, EINVAL,
1393                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1394                                                   actions,
1395                                                   "no IP found in pattern");
1396         }
1397         if (action_flags &
1398             (MLX5_FLOW_ACTION_SET_MAC_SRC | MLX5_FLOW_ACTION_SET_MAC_DST)) {
1399                 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L2))
1400                         return rte_flow_error_set(error, ENOTSUP,
1401                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1402                                                   actions,
1403                                                   "no ethernet found in"
1404                                                   " pattern");
1405         }
1406         return 0;
1407 }
1408
1409 /**
1410  * Calculate maximum size of memory for flow items of Linux TC flower and
1411  * extract specified items.
1412  *
1413  * @param[in] items
1414  *   Pointer to the list of items.
1415  * @param[out] item_flags
1416  *   Pointer to the detected items.
1417  *
1418  * @return
1419  *   Maximum size of memory for items.
1420  */
1421 static int
1422 flow_tcf_get_items_and_size(const struct rte_flow_attr *attr,
1423                             const struct rte_flow_item items[],
1424                             uint64_t *item_flags)
1425 {
1426         int size = 0;
1427         uint64_t flags = 0;
1428
1429         size += SZ_NLATTR_STRZ_OF("flower") +
1430                 SZ_NLATTR_NEST + /* TCA_OPTIONS. */
1431                 SZ_NLATTR_TYPE_OF(uint32_t); /* TCA_CLS_FLAGS_SKIP_SW. */
1432         if (attr->group > 0)
1433                 size += SZ_NLATTR_TYPE_OF(uint32_t); /* TCA_CHAIN. */
1434         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1435                 switch (items->type) {
1436                 case RTE_FLOW_ITEM_TYPE_VOID:
1437                         break;
1438                 case RTE_FLOW_ITEM_TYPE_PORT_ID:
1439                         break;
1440                 case RTE_FLOW_ITEM_TYPE_ETH:
1441                         size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
1442                                 SZ_NLATTR_DATA_OF(ETHER_ADDR_LEN) * 4;
1443                                 /* dst/src MAC addr and mask. */
1444                         flags |= MLX5_FLOW_LAYER_OUTER_L2;
1445                         break;
1446                 case RTE_FLOW_ITEM_TYPE_VLAN:
1447                         size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
1448                                 SZ_NLATTR_TYPE_OF(uint16_t) +
1449                                 /* VLAN Ether type. */
1450                                 SZ_NLATTR_TYPE_OF(uint8_t) + /* VLAN prio. */
1451                                 SZ_NLATTR_TYPE_OF(uint16_t); /* VLAN ID. */
1452                         flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
1453                         break;
1454                 case RTE_FLOW_ITEM_TYPE_IPV4:
1455                         size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
1456                                 SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
1457                                 SZ_NLATTR_TYPE_OF(uint32_t) * 4;
1458                                 /* dst/src IP addr and mask. */
1459                         flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1460                         break;
1461                 case RTE_FLOW_ITEM_TYPE_IPV6:
1462                         size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
1463                                 SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
1464                                 SZ_NLATTR_TYPE_OF(IPV6_ADDR_LEN) * 4;
1465                                 /* dst/src IP addr and mask. */
1466                         flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1467                         break;
1468                 case RTE_FLOW_ITEM_TYPE_UDP:
1469                         size += SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
1470                                 SZ_NLATTR_TYPE_OF(uint16_t) * 4;
1471                                 /* dst/src port and mask. */
1472                         flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
1473                         break;
1474                 case RTE_FLOW_ITEM_TYPE_TCP:
1475                         size += SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
1476                                 SZ_NLATTR_TYPE_OF(uint16_t) * 4;
1477                                 /* dst/src port and mask. */
1478                         flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
1479                         break;
1480                 default:
1481                         DRV_LOG(WARNING,
1482                                 "unsupported item %p type %d,"
1483                                 " items must be validated before flow creation",
1484                                 (const void *)items, items->type);
1485                         break;
1486                 }
1487         }
1488         *item_flags = flags;
1489         return size;
1490 }
1491
1492 /**
1493  * Calculate maximum size of memory for flow actions of Linux TC flower and
1494  * extract specified actions.
1495  *
1496  * @param[in] actions
1497  *   Pointer to the list of actions.
1498  * @param[out] action_flags
1499  *   Pointer to the detected actions.
1500  *
1501  * @return
1502  *   Maximum size of memory for actions.
1503  */
1504 static int
1505 flow_tcf_get_actions_and_size(const struct rte_flow_action actions[],
1506                               uint64_t *action_flags)
1507 {
1508         int size = 0;
1509         uint64_t flags = 0;
1510
1511         size += SZ_NLATTR_NEST; /* TCA_FLOWER_ACT. */
1512         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1513                 switch (actions->type) {
1514                 case RTE_FLOW_ACTION_TYPE_VOID:
1515                         break;
1516                 case RTE_FLOW_ACTION_TYPE_PORT_ID:
1517                         size += SZ_NLATTR_NEST + /* na_act_index. */
1518                                 SZ_NLATTR_STRZ_OF("mirred") +
1519                                 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
1520                                 SZ_NLATTR_TYPE_OF(struct tc_mirred);
1521                         flags |= MLX5_FLOW_ACTION_PORT_ID;
1522                         break;
1523                 case RTE_FLOW_ACTION_TYPE_JUMP:
1524                         size += SZ_NLATTR_NEST + /* na_act_index. */
1525                                 SZ_NLATTR_STRZ_OF("gact") +
1526                                 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
1527                                 SZ_NLATTR_TYPE_OF(struct tc_gact);
1528                         flags |= MLX5_FLOW_ACTION_JUMP;
1529                         break;
1530                 case RTE_FLOW_ACTION_TYPE_DROP:
1531                         size += SZ_NLATTR_NEST + /* na_act_index. */
1532                                 SZ_NLATTR_STRZ_OF("gact") +
1533                                 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
1534                                 SZ_NLATTR_TYPE_OF(struct tc_gact);
1535                         flags |= MLX5_FLOW_ACTION_DROP;
1536                         break;
1537                 case RTE_FLOW_ACTION_TYPE_COUNT:
1538                         break;
1539                 case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
1540                         flags |= MLX5_FLOW_ACTION_OF_POP_VLAN;
1541                         goto action_of_vlan;
1542                 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
1543                         flags |= MLX5_FLOW_ACTION_OF_PUSH_VLAN;
1544                         goto action_of_vlan;
1545                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
1546                         flags |= MLX5_FLOW_ACTION_OF_SET_VLAN_VID;
1547                         goto action_of_vlan;
1548                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
1549                         flags |= MLX5_FLOW_ACTION_OF_SET_VLAN_PCP;
1550                         goto action_of_vlan;
1551 action_of_vlan:
1552                         size += SZ_NLATTR_NEST + /* na_act_index. */
1553                                 SZ_NLATTR_STRZ_OF("vlan") +
1554                                 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
1555                                 SZ_NLATTR_TYPE_OF(struct tc_vlan) +
1556                                 SZ_NLATTR_TYPE_OF(uint16_t) +
1557                                 /* VLAN protocol. */
1558                                 SZ_NLATTR_TYPE_OF(uint16_t) + /* VLAN ID. */
1559                                 SZ_NLATTR_TYPE_OF(uint8_t); /* VLAN prio. */
1560                         break;
1561                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
1562                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
1563                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
1564                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
1565                 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
1566                 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
1567                 case RTE_FLOW_ACTION_TYPE_SET_TTL:
1568                 case RTE_FLOW_ACTION_TYPE_DEC_TTL:
1569                 case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
1570                 case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
1571                         size += flow_tcf_get_pedit_actions_size(&actions,
1572                                                                 &flags);
1573                         break;
1574                 default:
1575                         DRV_LOG(WARNING,
1576                                 "unsupported action %p type %d,"
1577                                 " items must be validated before flow creation",
1578                                 (const void *)actions, actions->type);
1579                         break;
1580                 }
1581         }
1582         *action_flags = flags;
1583         return size;
1584 }
1585
1586 /**
1587  * Brand rtnetlink buffer with unique handle.
1588  *
1589  * This handle should be unique for a given network interface to avoid
1590  * collisions.
1591  *
1592  * @param nlh
1593  *   Pointer to Netlink message.
1594  * @param handle
1595  *   Unique 32-bit handle to use.
1596  */
1597 static void
1598 flow_tcf_nl_brand(struct nlmsghdr *nlh, uint32_t handle)
1599 {
1600         struct tcmsg *tcm = mnl_nlmsg_get_payload(nlh);
1601
1602         tcm->tcm_handle = handle;
1603         DRV_LOG(DEBUG, "Netlink msg %p is branded with handle %x",
1604                 (void *)nlh, handle);
1605 }
1606
1607 /**
1608  * Prepare a flow object for Linux TC flower. It calculates the maximum size of
1609  * memory required, allocates the memory, initializes Netlink message headers
1610  * and set unique TC message handle.
1611  *
1612  * @param[in] attr
1613  *   Pointer to the flow attributes.
1614  * @param[in] items
1615  *   Pointer to the list of items.
1616  * @param[in] actions
1617  *   Pointer to the list of actions.
1618  * @param[out] item_flags
1619  *   Pointer to bit mask of all items detected.
1620  * @param[out] action_flags
1621  *   Pointer to bit mask of all actions detected.
1622  * @param[out] error
1623  *   Pointer to the error structure.
1624  *
1625  * @return
1626  *   Pointer to mlx5_flow object on success,
1627  *   otherwise NULL and rte_ernno is set.
1628  */
1629 static struct mlx5_flow *
1630 flow_tcf_prepare(const struct rte_flow_attr *attr,
1631                  const struct rte_flow_item items[],
1632                  const struct rte_flow_action actions[],
1633                  uint64_t *item_flags, uint64_t *action_flags,
1634                  struct rte_flow_error *error)
1635 {
1636         size_t size = sizeof(struct mlx5_flow) +
1637                       MNL_ALIGN(sizeof(struct nlmsghdr)) +
1638                       MNL_ALIGN(sizeof(struct tcmsg));
1639         struct mlx5_flow *dev_flow;
1640         struct nlmsghdr *nlh;
1641         struct tcmsg *tcm;
1642
1643         size += flow_tcf_get_items_and_size(attr, items, item_flags);
1644         size += flow_tcf_get_actions_and_size(actions, action_flags);
1645         dev_flow = rte_zmalloc(__func__, size, MNL_ALIGNTO);
1646         if (!dev_flow) {
1647                 rte_flow_error_set(error, ENOMEM,
1648                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1649                                    "not enough memory to create E-Switch flow");
1650                 return NULL;
1651         }
1652         nlh = mnl_nlmsg_put_header((void *)(dev_flow + 1));
1653         tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
1654         *dev_flow = (struct mlx5_flow){
1655                 .tcf = (struct mlx5_flow_tcf){
1656                         .nlh = nlh,
1657                         .tcm = tcm,
1658                 },
1659         };
1660         /*
1661          * Generate a reasonably unique handle based on the address of the
1662          * target buffer.
1663          *
1664          * This is straightforward on 32-bit systems where the flow pointer can
1665          * be used directly. Otherwise, its least significant part is taken
1666          * after shifting it by the previous power of two of the pointed buffer
1667          * size.
1668          */
1669         if (sizeof(dev_flow) <= 4)
1670                 flow_tcf_nl_brand(nlh, (uintptr_t)dev_flow);
1671         else
1672                 flow_tcf_nl_brand(nlh, (uintptr_t)dev_flow >>
1673                                        rte_log2_u32(rte_align32prevpow2(size)));
1674         return dev_flow;
1675 }
1676
1677 /**
1678  * Make adjustments for supporting count actions.
1679  *
1680  * @param[in] dev
1681  *   Pointer to the Ethernet device structure.
1682  * @param[in] dev_flow
1683  *   Pointer to mlx5_flow.
1684  * @param[out] error
1685  *   Pointer to error structure.
1686  *
1687  * @return
1688  *   0 On success else a negative errno value is returned and rte_errno is set.
1689  */
1690 static int
1691 flow_tcf_translate_action_count(struct rte_eth_dev *dev __rte_unused,
1692                                   struct mlx5_flow *dev_flow,
1693                                   struct rte_flow_error *error)
1694 {
1695         struct rte_flow *flow = dev_flow->flow;
1696
1697         if (!flow->counter) {
1698                 flow->counter = flow_tcf_counter_new();
1699                 if (!flow->counter)
1700                         return rte_flow_error_set(error, rte_errno,
1701                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1702                                                   NULL,
1703                                                   "cannot get counter"
1704                                                   " context.");
1705         }
1706         return 0;
1707 }
1708
1709 /**
1710  * Translate flow for Linux TC flower and construct Netlink message.
1711  *
1712  * @param[in] priv
1713  *   Pointer to the priv structure.
1714  * @param[in, out] flow
1715  *   Pointer to the sub flow.
1716  * @param[in] attr
1717  *   Pointer to the flow attributes.
1718  * @param[in] items
1719  *   Pointer to the list of items.
1720  * @param[in] actions
1721  *   Pointer to the list of actions.
1722  * @param[out] error
1723  *   Pointer to the error structure.
1724  *
1725  * @return
1726  *   0 on success, a negative errno value otherwise and rte_ernno is set.
1727  */
1728 static int
1729 flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
1730                    const struct rte_flow_attr *attr,
1731                    const struct rte_flow_item items[],
1732                    const struct rte_flow_action actions[],
1733                    struct rte_flow_error *error)
1734 {
1735         union {
1736                 const struct rte_flow_item_port_id *port_id;
1737                 const struct rte_flow_item_eth *eth;
1738                 const struct rte_flow_item_vlan *vlan;
1739                 const struct rte_flow_item_ipv4 *ipv4;
1740                 const struct rte_flow_item_ipv6 *ipv6;
1741                 const struct rte_flow_item_tcp *tcp;
1742                 const struct rte_flow_item_udp *udp;
1743         } spec, mask;
1744         union {
1745                 const struct rte_flow_action_port_id *port_id;
1746                 const struct rte_flow_action_jump *jump;
1747                 const struct rte_flow_action_of_push_vlan *of_push_vlan;
1748                 const struct rte_flow_action_of_set_vlan_vid *
1749                         of_set_vlan_vid;
1750                 const struct rte_flow_action_of_set_vlan_pcp *
1751                         of_set_vlan_pcp;
1752         } conf;
1753         struct flow_tcf_ptoi ptoi[PTOI_TABLE_SZ_MAX(dev)];
1754         struct nlmsghdr *nlh = dev_flow->tcf.nlh;
1755         struct tcmsg *tcm = dev_flow->tcf.tcm;
1756         uint32_t na_act_index_cur;
1757         bool eth_type_set = 0;
1758         bool vlan_present = 0;
1759         bool vlan_eth_type_set = 0;
1760         bool ip_proto_set = 0;
1761         struct nlattr *na_flower;
1762         struct nlattr *na_flower_act;
1763         struct nlattr *na_vlan_id = NULL;
1764         struct nlattr *na_vlan_priority = NULL;
1765         uint64_t item_flags = 0;
1766         int ret;
1767
1768         claim_nonzero(flow_tcf_build_ptoi_table(dev, ptoi,
1769                                                 PTOI_TABLE_SZ_MAX(dev)));
1770         nlh = dev_flow->tcf.nlh;
1771         tcm = dev_flow->tcf.tcm;
1772         /* Prepare API must have been called beforehand. */
1773         assert(nlh != NULL && tcm != NULL);
1774         tcm->tcm_family = AF_UNSPEC;
1775         tcm->tcm_ifindex = ptoi[0].ifindex;
1776         tcm->tcm_parent = TC_H_MAKE(TC_H_INGRESS, TC_H_MIN_INGRESS);
1777         /*
1778          * Priority cannot be zero to prevent the kernel from picking one
1779          * automatically.
1780          */
1781         tcm->tcm_info = TC_H_MAKE((attr->priority + 1) << 16,
1782                                   RTE_BE16(ETH_P_ALL));
1783         if (attr->group > 0)
1784                 mnl_attr_put_u32(nlh, TCA_CHAIN, attr->group);
1785         mnl_attr_put_strz(nlh, TCA_KIND, "flower");
1786         na_flower = mnl_attr_nest_start(nlh, TCA_OPTIONS);
1787         mnl_attr_put_u32(nlh, TCA_FLOWER_FLAGS, TCA_CLS_FLAGS_SKIP_SW);
1788         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1789                 unsigned int i;
1790
1791                 switch (items->type) {
1792                 case RTE_FLOW_ITEM_TYPE_VOID:
1793                         break;
1794                 case RTE_FLOW_ITEM_TYPE_PORT_ID:
1795                         mask.port_id = flow_tcf_item_mask
1796                                 (items, &rte_flow_item_port_id_mask,
1797                                  &flow_tcf_mask_supported.port_id,
1798                                  &flow_tcf_mask_empty.port_id,
1799                                  sizeof(flow_tcf_mask_supported.port_id),
1800                                  error);
1801                         assert(mask.port_id);
1802                         if (mask.port_id == &flow_tcf_mask_empty.port_id)
1803                                 break;
1804                         spec.port_id = items->spec;
1805                         if (!mask.port_id->id)
1806                                 i = 0;
1807                         else
1808                                 for (i = 0; ptoi[i].ifindex; ++i)
1809                                         if (ptoi[i].port_id == spec.port_id->id)
1810                                                 break;
1811                         assert(ptoi[i].ifindex);
1812                         tcm->tcm_ifindex = ptoi[i].ifindex;
1813                         break;
1814                 case RTE_FLOW_ITEM_TYPE_ETH:
1815                         item_flags |= MLX5_FLOW_LAYER_OUTER_L2;
1816                         mask.eth = flow_tcf_item_mask
1817                                 (items, &rte_flow_item_eth_mask,
1818                                  &flow_tcf_mask_supported.eth,
1819                                  &flow_tcf_mask_empty.eth,
1820                                  sizeof(flow_tcf_mask_supported.eth),
1821                                  error);
1822                         assert(mask.eth);
1823                         if (mask.eth == &flow_tcf_mask_empty.eth)
1824                                 break;
1825                         spec.eth = items->spec;
1826                         if (mask.eth->type) {
1827                                 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_ETH_TYPE,
1828                                                  spec.eth->type);
1829                                 eth_type_set = 1;
1830                         }
1831                         if (!is_zero_ether_addr(&mask.eth->dst)) {
1832                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_DST,
1833                                              ETHER_ADDR_LEN,
1834                                              spec.eth->dst.addr_bytes);
1835                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_DST_MASK,
1836                                              ETHER_ADDR_LEN,
1837                                              mask.eth->dst.addr_bytes);
1838                         }
1839                         if (!is_zero_ether_addr(&mask.eth->src)) {
1840                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_SRC,
1841                                              ETHER_ADDR_LEN,
1842                                              spec.eth->src.addr_bytes);
1843                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_SRC_MASK,
1844                                              ETHER_ADDR_LEN,
1845                                              mask.eth->src.addr_bytes);
1846                         }
1847                         break;
1848                 case RTE_FLOW_ITEM_TYPE_VLAN:
1849                         item_flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
1850                         mask.vlan = flow_tcf_item_mask
1851                                 (items, &rte_flow_item_vlan_mask,
1852                                  &flow_tcf_mask_supported.vlan,
1853                                  &flow_tcf_mask_empty.vlan,
1854                                  sizeof(flow_tcf_mask_supported.vlan),
1855                                  error);
1856                         assert(mask.vlan);
1857                         if (!eth_type_set)
1858                                 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_ETH_TYPE,
1859                                                  RTE_BE16(ETH_P_8021Q));
1860                         eth_type_set = 1;
1861                         vlan_present = 1;
1862                         if (mask.vlan == &flow_tcf_mask_empty.vlan)
1863                                 break;
1864                         spec.vlan = items->spec;
1865                         if (mask.vlan->inner_type) {
1866                                 mnl_attr_put_u16(nlh,
1867                                                  TCA_FLOWER_KEY_VLAN_ETH_TYPE,
1868                                                  spec.vlan->inner_type);
1869                                 vlan_eth_type_set = 1;
1870                         }
1871                         if (mask.vlan->tci & RTE_BE16(0xe000))
1872                                 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_VLAN_PRIO,
1873                                                 (rte_be_to_cpu_16
1874                                                  (spec.vlan->tci) >> 13) & 0x7);
1875                         if (mask.vlan->tci & RTE_BE16(0x0fff))
1876                                 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_VLAN_ID,
1877                                                  rte_be_to_cpu_16
1878                                                  (spec.vlan->tci &
1879                                                   RTE_BE16(0x0fff)));
1880                         break;
1881                 case RTE_FLOW_ITEM_TYPE_IPV4:
1882                         item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1883                         mask.ipv4 = flow_tcf_item_mask
1884                                 (items, &rte_flow_item_ipv4_mask,
1885                                  &flow_tcf_mask_supported.ipv4,
1886                                  &flow_tcf_mask_empty.ipv4,
1887                                  sizeof(flow_tcf_mask_supported.ipv4),
1888                                  error);
1889                         assert(mask.ipv4);
1890                         if (!eth_type_set || !vlan_eth_type_set)
1891                                 mnl_attr_put_u16(nlh,
1892                                                  vlan_present ?
1893                                                  TCA_FLOWER_KEY_VLAN_ETH_TYPE :
1894                                                  TCA_FLOWER_KEY_ETH_TYPE,
1895                                                  RTE_BE16(ETH_P_IP));
1896                         eth_type_set = 1;
1897                         vlan_eth_type_set = 1;
1898                         if (mask.ipv4 == &flow_tcf_mask_empty.ipv4)
1899                                 break;
1900                         spec.ipv4 = items->spec;
1901                         if (mask.ipv4->hdr.next_proto_id) {
1902                                 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
1903                                                 spec.ipv4->hdr.next_proto_id);
1904                                 ip_proto_set = 1;
1905                         }
1906                         if (mask.ipv4->hdr.src_addr) {
1907                                 mnl_attr_put_u32(nlh, TCA_FLOWER_KEY_IPV4_SRC,
1908                                                  spec.ipv4->hdr.src_addr);
1909                                 mnl_attr_put_u32(nlh,
1910                                                  TCA_FLOWER_KEY_IPV4_SRC_MASK,
1911                                                  mask.ipv4->hdr.src_addr);
1912                         }
1913                         if (mask.ipv4->hdr.dst_addr) {
1914                                 mnl_attr_put_u32(nlh, TCA_FLOWER_KEY_IPV4_DST,
1915                                                  spec.ipv4->hdr.dst_addr);
1916                                 mnl_attr_put_u32(nlh,
1917                                                  TCA_FLOWER_KEY_IPV4_DST_MASK,
1918                                                  mask.ipv4->hdr.dst_addr);
1919                         }
1920                         break;
1921                 case RTE_FLOW_ITEM_TYPE_IPV6:
1922                         item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1923                         mask.ipv6 = flow_tcf_item_mask
1924                                 (items, &rte_flow_item_ipv6_mask,
1925                                  &flow_tcf_mask_supported.ipv6,
1926                                  &flow_tcf_mask_empty.ipv6,
1927                                  sizeof(flow_tcf_mask_supported.ipv6),
1928                                  error);
1929                         assert(mask.ipv6);
1930                         if (!eth_type_set || !vlan_eth_type_set)
1931                                 mnl_attr_put_u16(nlh,
1932                                                  vlan_present ?
1933                                                  TCA_FLOWER_KEY_VLAN_ETH_TYPE :
1934                                                  TCA_FLOWER_KEY_ETH_TYPE,
1935                                                  RTE_BE16(ETH_P_IPV6));
1936                         eth_type_set = 1;
1937                         vlan_eth_type_set = 1;
1938                         if (mask.ipv6 == &flow_tcf_mask_empty.ipv6)
1939                                 break;
1940                         spec.ipv6 = items->spec;
1941                         if (mask.ipv6->hdr.proto) {
1942                                 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
1943                                                 spec.ipv6->hdr.proto);
1944                                 ip_proto_set = 1;
1945                         }
1946                         if (!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.src_addr)) {
1947                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_SRC,
1948                                              sizeof(spec.ipv6->hdr.src_addr),
1949                                              spec.ipv6->hdr.src_addr);
1950                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_SRC_MASK,
1951                                              sizeof(mask.ipv6->hdr.src_addr),
1952                                              mask.ipv6->hdr.src_addr);
1953                         }
1954                         if (!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.dst_addr)) {
1955                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_DST,
1956                                              sizeof(spec.ipv6->hdr.dst_addr),
1957                                              spec.ipv6->hdr.dst_addr);
1958                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_DST_MASK,
1959                                              sizeof(mask.ipv6->hdr.dst_addr),
1960                                              mask.ipv6->hdr.dst_addr);
1961                         }
1962                         break;
1963                 case RTE_FLOW_ITEM_TYPE_UDP:
1964                         item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
1965                         mask.udp = flow_tcf_item_mask
1966                                 (items, &rte_flow_item_udp_mask,
1967                                  &flow_tcf_mask_supported.udp,
1968                                  &flow_tcf_mask_empty.udp,
1969                                  sizeof(flow_tcf_mask_supported.udp),
1970                                  error);
1971                         assert(mask.udp);
1972                         if (!ip_proto_set)
1973                                 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
1974                                                 IPPROTO_UDP);
1975                         if (mask.udp == &flow_tcf_mask_empty.udp)
1976                                 break;
1977                         spec.udp = items->spec;
1978                         if (mask.udp->hdr.src_port) {
1979                                 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_UDP_SRC,
1980                                                  spec.udp->hdr.src_port);
1981                                 mnl_attr_put_u16(nlh,
1982                                                  TCA_FLOWER_KEY_UDP_SRC_MASK,
1983                                                  mask.udp->hdr.src_port);
1984                         }
1985                         if (mask.udp->hdr.dst_port) {
1986                                 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_UDP_DST,
1987                                                  spec.udp->hdr.dst_port);
1988                                 mnl_attr_put_u16(nlh,
1989                                                  TCA_FLOWER_KEY_UDP_DST_MASK,
1990                                                  mask.udp->hdr.dst_port);
1991                         }
1992                         break;
1993                 case RTE_FLOW_ITEM_TYPE_TCP:
1994                         item_flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
1995                         mask.tcp = flow_tcf_item_mask
1996                                 (items, &rte_flow_item_tcp_mask,
1997                                  &flow_tcf_mask_supported.tcp,
1998                                  &flow_tcf_mask_empty.tcp,
1999                                  sizeof(flow_tcf_mask_supported.tcp),
2000                                  error);
2001                         assert(mask.tcp);
2002                         if (!ip_proto_set)
2003                                 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
2004                                                 IPPROTO_TCP);
2005                         if (mask.tcp == &flow_tcf_mask_empty.tcp)
2006                                 break;
2007                         spec.tcp = items->spec;
2008                         if (mask.tcp->hdr.src_port) {
2009                                 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_TCP_SRC,
2010                                                  spec.tcp->hdr.src_port);
2011                                 mnl_attr_put_u16(nlh,
2012                                                  TCA_FLOWER_KEY_TCP_SRC_MASK,
2013                                                  mask.tcp->hdr.src_port);
2014                         }
2015                         if (mask.tcp->hdr.dst_port) {
2016                                 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_TCP_DST,
2017                                                  spec.tcp->hdr.dst_port);
2018                                 mnl_attr_put_u16(nlh,
2019                                                  TCA_FLOWER_KEY_TCP_DST_MASK,
2020                                                  mask.tcp->hdr.dst_port);
2021                         }
2022                         if (mask.tcp->hdr.tcp_flags) {
2023                                 mnl_attr_put_u16
2024                                         (nlh,
2025                                          TCA_FLOWER_KEY_TCP_FLAGS,
2026                                          rte_cpu_to_be_16
2027                                                 (spec.tcp->hdr.tcp_flags));
2028                                 mnl_attr_put_u16
2029                                         (nlh,
2030                                          TCA_FLOWER_KEY_TCP_FLAGS_MASK,
2031                                          rte_cpu_to_be_16
2032                                                 (mask.tcp->hdr.tcp_flags));
2033                         }
2034                         break;
2035                 default:
2036                         return rte_flow_error_set(error, ENOTSUP,
2037                                                   RTE_FLOW_ERROR_TYPE_ITEM,
2038                                                   NULL, "item not supported");
2039                 }
2040         }
2041         na_flower_act = mnl_attr_nest_start(nlh, TCA_FLOWER_ACT);
2042         na_act_index_cur = 1;
2043         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
2044                 struct nlattr *na_act_index;
2045                 struct nlattr *na_act;
2046                 unsigned int vlan_act;
2047                 unsigned int i;
2048
2049                 switch (actions->type) {
2050                 case RTE_FLOW_ACTION_TYPE_VOID:
2051                         break;
2052                 case RTE_FLOW_ACTION_TYPE_PORT_ID:
2053                         conf.port_id = actions->conf;
2054                         if (conf.port_id->original)
2055                                 i = 0;
2056                         else
2057                                 for (i = 0; ptoi[i].ifindex; ++i)
2058                                         if (ptoi[i].port_id == conf.port_id->id)
2059                                                 break;
2060                         assert(ptoi[i].ifindex);
2061                         na_act_index =
2062                                 mnl_attr_nest_start(nlh, na_act_index_cur++);
2063                         assert(na_act_index);
2064                         mnl_attr_put_strz(nlh, TCA_ACT_KIND, "mirred");
2065                         na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
2066                         assert(na_act);
2067                         mnl_attr_put(nlh, TCA_MIRRED_PARMS,
2068                                      sizeof(struct tc_mirred),
2069                                      &(struct tc_mirred){
2070                                         .action = TC_ACT_STOLEN,
2071                                         .eaction = TCA_EGRESS_REDIR,
2072                                         .ifindex = ptoi[i].ifindex,
2073                                      });
2074                         mnl_attr_nest_end(nlh, na_act);
2075                         mnl_attr_nest_end(nlh, na_act_index);
2076                         break;
2077                 case RTE_FLOW_ACTION_TYPE_JUMP:
2078                         conf.jump = actions->conf;
2079                         na_act_index =
2080                                 mnl_attr_nest_start(nlh, na_act_index_cur++);
2081                         assert(na_act_index);
2082                         mnl_attr_put_strz(nlh, TCA_ACT_KIND, "gact");
2083                         na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
2084                         assert(na_act);
2085                         mnl_attr_put(nlh, TCA_GACT_PARMS,
2086                                      sizeof(struct tc_gact),
2087                                      &(struct tc_gact){
2088                                         .action = TC_ACT_GOTO_CHAIN |
2089                                                   conf.jump->group,
2090                                      });
2091                         mnl_attr_nest_end(nlh, na_act);
2092                         mnl_attr_nest_end(nlh, na_act_index);
2093                         break;
2094                 case RTE_FLOW_ACTION_TYPE_DROP:
2095                         na_act_index =
2096                                 mnl_attr_nest_start(nlh, na_act_index_cur++);
2097                         assert(na_act_index);
2098                         mnl_attr_put_strz(nlh, TCA_ACT_KIND, "gact");
2099                         na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
2100                         assert(na_act);
2101                         mnl_attr_put(nlh, TCA_GACT_PARMS,
2102                                      sizeof(struct tc_gact),
2103                                      &(struct tc_gact){
2104                                         .action = TC_ACT_SHOT,
2105                                      });
2106                         mnl_attr_nest_end(nlh, na_act);
2107                         mnl_attr_nest_end(nlh, na_act_index);
2108                         break;
2109                 case RTE_FLOW_ACTION_TYPE_COUNT:
2110                         /*
2111                          * Driver adds the count action implicitly for
2112                          * each rule it creates.
2113                          */
2114                         ret = flow_tcf_translate_action_count(dev,
2115                                                               dev_flow, error);
2116                         if (ret < 0)
2117                                 return ret;
2118                         break;
2119                 case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
2120                         conf.of_push_vlan = NULL;
2121                         vlan_act = TCA_VLAN_ACT_POP;
2122                         goto action_of_vlan;
2123                 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
2124                         conf.of_push_vlan = actions->conf;
2125                         vlan_act = TCA_VLAN_ACT_PUSH;
2126                         goto action_of_vlan;
2127                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
2128                         conf.of_set_vlan_vid = actions->conf;
2129                         if (na_vlan_id)
2130                                 goto override_na_vlan_id;
2131                         vlan_act = TCA_VLAN_ACT_MODIFY;
2132                         goto action_of_vlan;
2133                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
2134                         conf.of_set_vlan_pcp = actions->conf;
2135                         if (na_vlan_priority)
2136                                 goto override_na_vlan_priority;
2137                         vlan_act = TCA_VLAN_ACT_MODIFY;
2138                         goto action_of_vlan;
2139 action_of_vlan:
2140                         na_act_index =
2141                                 mnl_attr_nest_start(nlh, na_act_index_cur++);
2142                         assert(na_act_index);
2143                         mnl_attr_put_strz(nlh, TCA_ACT_KIND, "vlan");
2144                         na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
2145                         assert(na_act);
2146                         mnl_attr_put(nlh, TCA_VLAN_PARMS,
2147                                      sizeof(struct tc_vlan),
2148                                      &(struct tc_vlan){
2149                                         .action = TC_ACT_PIPE,
2150                                         .v_action = vlan_act,
2151                                      });
2152                         if (vlan_act == TCA_VLAN_ACT_POP) {
2153                                 mnl_attr_nest_end(nlh, na_act);
2154                                 mnl_attr_nest_end(nlh, na_act_index);
2155                                 break;
2156                         }
2157                         if (vlan_act == TCA_VLAN_ACT_PUSH)
2158                                 mnl_attr_put_u16(nlh,
2159                                                  TCA_VLAN_PUSH_VLAN_PROTOCOL,
2160                                                  conf.of_push_vlan->ethertype);
2161                         na_vlan_id = mnl_nlmsg_get_payload_tail(nlh);
2162                         mnl_attr_put_u16(nlh, TCA_VLAN_PAD, 0);
2163                         na_vlan_priority = mnl_nlmsg_get_payload_tail(nlh);
2164                         mnl_attr_put_u8(nlh, TCA_VLAN_PAD, 0);
2165                         mnl_attr_nest_end(nlh, na_act);
2166                         mnl_attr_nest_end(nlh, na_act_index);
2167                         if (actions->type ==
2168                             RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID) {
2169 override_na_vlan_id:
2170                                 na_vlan_id->nla_type = TCA_VLAN_PUSH_VLAN_ID;
2171                                 *(uint16_t *)mnl_attr_get_payload(na_vlan_id) =
2172                                         rte_be_to_cpu_16
2173                                         (conf.of_set_vlan_vid->vlan_vid);
2174                         } else if (actions->type ==
2175                                    RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP) {
2176 override_na_vlan_priority:
2177                                 na_vlan_priority->nla_type =
2178                                         TCA_VLAN_PUSH_VLAN_PRIORITY;
2179                                 *(uint8_t *)mnl_attr_get_payload
2180                                         (na_vlan_priority) =
2181                                         conf.of_set_vlan_pcp->vlan_pcp;
2182                         }
2183                         break;
2184                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
2185                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
2186                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
2187                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
2188                 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
2189                 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
2190                 case RTE_FLOW_ACTION_TYPE_SET_TTL:
2191                 case RTE_FLOW_ACTION_TYPE_DEC_TTL:
2192                 case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
2193                 case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
2194                         na_act_index =
2195                                 mnl_attr_nest_start(nlh, na_act_index_cur++);
2196                         flow_tcf_create_pedit_mnl_msg(nlh,
2197                                                       &actions, item_flags);
2198                         mnl_attr_nest_end(nlh, na_act_index);
2199                         break;
2200                 default:
2201                         return rte_flow_error_set(error, ENOTSUP,
2202                                                   RTE_FLOW_ERROR_TYPE_ACTION,
2203                                                   actions,
2204                                                   "action not supported");
2205                 }
2206         }
2207         assert(na_flower);
2208         assert(na_flower_act);
2209         mnl_attr_nest_end(nlh, na_flower_act);
2210         mnl_attr_nest_end(nlh, na_flower);
2211         return 0;
2212 }
2213
2214 /**
2215  * Send Netlink message with acknowledgment.
2216  *
2217  * @param ctx
2218  *   Flow context to use.
2219  * @param nlh
2220  *   Message to send. This function always raises the NLM_F_ACK flag before
2221  *   sending.
2222  *
2223  * @return
2224  *   0 on success, a negative errno value otherwise and rte_errno is set.
2225  */
2226 static int
2227 flow_tcf_nl_ack(struct mlx5_flow_tcf_context *ctx, struct nlmsghdr *nlh)
2228 {
2229         alignas(struct nlmsghdr)
2230         uint8_t ans[mnl_nlmsg_size(sizeof(struct nlmsgerr)) +
2231                     nlh->nlmsg_len - sizeof(*nlh)];
2232         uint32_t seq = ctx->seq++;
2233         struct mnl_socket *nl = ctx->nl;
2234         int ret;
2235
2236         nlh->nlmsg_flags |= NLM_F_ACK;
2237         nlh->nlmsg_seq = seq;
2238         ret = mnl_socket_sendto(nl, nlh, nlh->nlmsg_len);
2239         if (ret != -1)
2240                 ret = mnl_socket_recvfrom(nl, ans, sizeof(ans));
2241         if (ret != -1)
2242                 ret = mnl_cb_run
2243                         (ans, ret, seq, mnl_socket_get_portid(nl), NULL, NULL);
2244         if (ret > 0)
2245                 return 0;
2246         rte_errno = errno;
2247         return -rte_errno;
2248 }
2249
2250 /**
2251  * Apply flow to E-Switch by sending Netlink message.
2252  *
2253  * @param[in] dev
2254  *   Pointer to Ethernet device.
2255  * @param[in, out] flow
2256  *   Pointer to the sub flow.
2257  * @param[out] error
2258  *   Pointer to the error structure.
2259  *
2260  * @return
2261  *   0 on success, a negative errno value otherwise and rte_ernno is set.
2262  */
2263 static int
2264 flow_tcf_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
2265                struct rte_flow_error *error)
2266 {
2267         struct priv *priv = dev->data->dev_private;
2268         struct mlx5_flow_tcf_context *ctx = priv->tcf_context;
2269         struct mlx5_flow *dev_flow;
2270         struct nlmsghdr *nlh;
2271
2272         dev_flow = LIST_FIRST(&flow->dev_flows);
2273         /* E-Switch flow can't be expanded. */
2274         assert(!LIST_NEXT(dev_flow, next));
2275         nlh = dev_flow->tcf.nlh;
2276         nlh->nlmsg_type = RTM_NEWTFILTER;
2277         nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
2278         if (!flow_tcf_nl_ack(ctx, nlh))
2279                 return 0;
2280         return rte_flow_error_set(error, rte_errno,
2281                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
2282                                   "netlink: failed to create TC flow rule");
2283 }
2284
2285 /**
2286  * Remove flow from E-Switch by sending Netlink message.
2287  *
2288  * @param[in] dev
2289  *   Pointer to Ethernet device.
2290  * @param[in, out] flow
2291  *   Pointer to the sub flow.
2292  */
2293 static void
2294 flow_tcf_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
2295 {
2296         struct priv *priv = dev->data->dev_private;
2297         struct mlx5_flow_tcf_context *ctx = priv->tcf_context;
2298         struct mlx5_flow *dev_flow;
2299         struct nlmsghdr *nlh;
2300
2301         if (!flow)
2302                 return;
2303         if (flow->counter) {
2304                 if (--flow->counter->ref_cnt == 0) {
2305                         rte_free(flow->counter);
2306                         flow->counter = NULL;
2307                 }
2308         }
2309         dev_flow = LIST_FIRST(&flow->dev_flows);
2310         if (!dev_flow)
2311                 return;
2312         /* E-Switch flow can't be expanded. */
2313         assert(!LIST_NEXT(dev_flow, next));
2314         nlh = dev_flow->tcf.nlh;
2315         nlh->nlmsg_type = RTM_DELTFILTER;
2316         nlh->nlmsg_flags = NLM_F_REQUEST;
2317         flow_tcf_nl_ack(ctx, nlh);
2318 }
2319
2320 /**
2321  * Remove flow from E-Switch and release resources of the device flow.
2322  *
2323  * @param[in] dev
2324  *   Pointer to Ethernet device.
2325  * @param[in, out] flow
2326  *   Pointer to the sub flow.
2327  */
2328 static void
2329 flow_tcf_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
2330 {
2331         struct mlx5_flow *dev_flow;
2332
2333         if (!flow)
2334                 return;
2335         flow_tcf_remove(dev, flow);
2336         dev_flow = LIST_FIRST(&flow->dev_flows);
2337         if (!dev_flow)
2338                 return;
2339         /* E-Switch flow can't be expanded. */
2340         assert(!LIST_NEXT(dev_flow, next));
2341         LIST_REMOVE(dev_flow, next);
2342         rte_free(dev_flow);
2343 }
2344
2345 /**
2346  * Helper routine for figuring the space size required for a parse buffer.
2347  *
2348  * @param array
2349  *   array of values to use.
2350  * @param idx
2351  *   Current location in array.
2352  * @param value
2353  *   Value to compare with.
2354  *
2355  * @return
2356  *   The maximum between the given value and the array value on index.
2357  */
2358 static uint16_t
2359 flow_tcf_arr_val_max(uint16_t array[], int idx, uint16_t value)
2360 {
2361         return idx < 0 ? (value) : RTE_MAX((array)[idx], value);
2362 }
2363
2364 /**
2365  * Parse rtnetlink message attributes filling the attribute table with the info
2366  * retrieved.
2367  *
2368  * @param tb
2369  *   Attribute table to be filled.
2370  * @param[out] max
2371  *   Maxinum entry in the attribute table.
2372  * @param rte
2373  *   The attributes section in the message to be parsed.
2374  * @param len
2375  *   The length of the attributes section in the message.
2376  */
2377 static void
2378 flow_tcf_nl_parse_rtattr(struct rtattr *tb[], int max,
2379                          struct rtattr *rta, int len)
2380 {
2381         unsigned short type;
2382         memset(tb, 0, sizeof(struct rtattr *) * (max + 1));
2383         while (RTA_OK(rta, len)) {
2384                 type = rta->rta_type;
2385                 if (type <= max && !tb[type])
2386                         tb[type] = rta;
2387                 rta = RTA_NEXT(rta, len);
2388         }
2389 }
2390
2391 /**
2392  * Extract flow counters from flower action.
2393  *
2394  * @param rta
2395  *   flower action stats properties in the Netlink message received.
2396  * @param rta_type
2397  *   The backward sequence of rta_types, as written in the attribute table,
2398  *   we need to traverse in order to get to the requested object.
2399  * @param idx
2400  *   Current location in rta_type table.
2401  * @param[out] data
2402  *   data holding the count statistics of the rte_flow retrieved from
2403  *   the message.
2404  *
2405  * @return
2406  *   0 if data was found and retrieved, -1 otherwise.
2407  */
2408 static int
2409 flow_tcf_nl_action_stats_parse_and_get(struct rtattr *rta,
2410                                        uint16_t rta_type[], int idx,
2411                                        struct gnet_stats_basic *data)
2412 {
2413         int tca_stats_max = flow_tcf_arr_val_max(rta_type, idx,
2414                                                  TCA_STATS_BASIC);
2415         struct rtattr *tbs[tca_stats_max + 1];
2416
2417         if (rta == NULL || idx < 0)
2418                 return -1;
2419         flow_tcf_nl_parse_rtattr(tbs, tca_stats_max,
2420                                  RTA_DATA(rta), RTA_PAYLOAD(rta));
2421         switch (rta_type[idx]) {
2422         case TCA_STATS_BASIC:
2423                 if (tbs[TCA_STATS_BASIC]) {
2424                         memcpy(data, RTA_DATA(tbs[TCA_STATS_BASIC]),
2425                                RTE_MIN(RTA_PAYLOAD(tbs[TCA_STATS_BASIC]),
2426                                sizeof(*data)));
2427                         return 0;
2428                 }
2429                 break;
2430         default:
2431                 break;
2432         }
2433         return -1;
2434 }
2435
2436 /**
2437  * Parse flower single action retrieving the requested action attribute,
2438  * if found.
2439  *
2440  * @param arg
2441  *   flower action properties in the Netlink message received.
2442  * @param rta_type
2443  *   The backward sequence of rta_types, as written in the attribute table,
2444  *   we need to traverse in order to get to the requested object.
2445  * @param idx
2446  *   Current location in rta_type table.
2447  * @param[out] data
2448  *   Count statistics retrieved from the message query.
2449  *
2450  * @return
2451  *   0 if data was found and retrieved, -1 otherwise.
2452  */
2453 static int
2454 flow_tcf_nl_parse_one_action_and_get(struct rtattr *arg,
2455                                      uint16_t rta_type[], int idx, void *data)
2456 {
2457         int tca_act_max = flow_tcf_arr_val_max(rta_type, idx, TCA_ACT_STATS);
2458         struct rtattr *tb[tca_act_max + 1];
2459
2460         if (arg == NULL || idx < 0)
2461                 return -1;
2462         flow_tcf_nl_parse_rtattr(tb, tca_act_max,
2463                                  RTA_DATA(arg), RTA_PAYLOAD(arg));
2464         if (tb[TCA_ACT_KIND] == NULL)
2465                 return -1;
2466         switch (rta_type[idx]) {
2467         case TCA_ACT_STATS:
2468                 if (tb[TCA_ACT_STATS])
2469                         return flow_tcf_nl_action_stats_parse_and_get
2470                                         (tb[TCA_ACT_STATS],
2471                                          rta_type, --idx,
2472                                          (struct gnet_stats_basic *)data);
2473                 break;
2474         default:
2475                 break;
2476         }
2477         return -1;
2478 }
2479
2480 /**
2481  * Parse flower action section in the message retrieving the requested
2482  * attribute from the first action that provides it.
2483  *
2484  * @param opt
2485  *   flower section in the Netlink message received.
2486  * @param rta_type
2487  *   The backward sequence of rta_types, as written in the attribute table,
2488  *   we need to traverse in order to get to the requested object.
2489  * @param idx
2490  *   Current location in rta_type table.
2491  * @param[out] data
2492  *   data retrieved from the message query.
2493  *
2494  * @return
2495  *   0 if data was found and retrieved, -1 otherwise.
2496  */
2497 static int
2498 flow_tcf_nl_action_parse_and_get(struct rtattr *arg,
2499                                  uint16_t rta_type[], int idx, void *data)
2500 {
2501         struct rtattr *tb[TCA_ACT_MAX_PRIO + 1];
2502         int i;
2503
2504         if (arg == NULL || idx < 0)
2505                 return -1;
2506         flow_tcf_nl_parse_rtattr(tb, TCA_ACT_MAX_PRIO,
2507                                  RTA_DATA(arg), RTA_PAYLOAD(arg));
2508         switch (rta_type[idx]) {
2509         /*
2510          * flow counters are stored in the actions defined by the flow
2511          * and not in the flow itself, therefore we need to traverse the
2512          * flower chain of actions in search for them.
2513          *
2514          * Note that the index is not decremented here.
2515          */
2516         case TCA_ACT_STATS:
2517                 for (i = 0; i <= TCA_ACT_MAX_PRIO; i++) {
2518                         if (tb[i] &&
2519                         !flow_tcf_nl_parse_one_action_and_get(tb[i],
2520                                                               rta_type,
2521                                                               idx, data))
2522                                 return 0;
2523                 }
2524                 break;
2525         default:
2526                 break;
2527         }
2528         return -1;
2529 }
2530
2531 /**
2532  * Parse flower classifier options in the message, retrieving the requested
2533  * attribute if found.
2534  *
2535  * @param opt
2536  *   flower section in the Netlink message received.
2537  * @param rta_type
2538  *   The backward sequence of rta_types, as written in the attribute table,
2539  *   we need to traverse in order to get to the requested object.
2540  * @param idx
2541  *   Current location in rta_type table.
2542  * @param[out] data
2543  *   data retrieved from the message query.
2544  *
2545  * @return
2546  *   0 if data was found and retrieved, -1 otherwise.
2547  */
2548 static int
2549 flow_tcf_nl_opts_parse_and_get(struct rtattr *opt,
2550                                uint16_t rta_type[], int idx, void *data)
2551 {
2552         int tca_flower_max = flow_tcf_arr_val_max(rta_type, idx,
2553                                                   TCA_FLOWER_ACT);
2554         struct rtattr *tb[tca_flower_max + 1];
2555
2556         if (!opt || idx < 0)
2557                 return -1;
2558         flow_tcf_nl_parse_rtattr(tb, tca_flower_max,
2559                                  RTA_DATA(opt), RTA_PAYLOAD(opt));
2560         switch (rta_type[idx]) {
2561         case TCA_FLOWER_ACT:
2562                 if (tb[TCA_FLOWER_ACT])
2563                         return flow_tcf_nl_action_parse_and_get
2564                                                         (tb[TCA_FLOWER_ACT],
2565                                                          rta_type, --idx, data);
2566                 break;
2567         default:
2568                 break;
2569         }
2570         return -1;
2571 }
2572
2573 /**
2574  * Parse Netlink reply on filter query, retrieving the flow counters.
2575  *
2576  * @param nlh
2577  *   Message received from Netlink.
2578  * @param rta_type
2579  *   The backward sequence of rta_types, as written in the attribute table,
2580  *   we need to traverse in order to get to the requested object.
2581  * @param idx
2582  *   Current location in rta_type table.
2583  * @param[out] data
2584  *   data retrieved from the message query.
2585  *
2586  * @return
2587  *   0 if data was found and retrieved, -1 otherwise.
2588  */
2589 static int
2590 flow_tcf_nl_filter_parse_and_get(struct nlmsghdr *cnlh,
2591                                  uint16_t rta_type[], int idx, void *data)
2592 {
2593         struct nlmsghdr *nlh = cnlh;
2594         struct tcmsg *t = NLMSG_DATA(nlh);
2595         int len = nlh->nlmsg_len;
2596         int tca_max = flow_tcf_arr_val_max(rta_type, idx, TCA_OPTIONS);
2597         struct rtattr *tb[tca_max + 1];
2598
2599         if (idx < 0)
2600                 return -1;
2601         if (nlh->nlmsg_type != RTM_NEWTFILTER &&
2602             nlh->nlmsg_type != RTM_GETTFILTER &&
2603             nlh->nlmsg_type != RTM_DELTFILTER)
2604                 return -1;
2605         len -= NLMSG_LENGTH(sizeof(*t));
2606         if (len < 0)
2607                 return -1;
2608         flow_tcf_nl_parse_rtattr(tb, tca_max, TCA_RTA(t), len);
2609         /* Not a TC flower flow - bail out */
2610         if (!tb[TCA_KIND] ||
2611             strcmp(RTA_DATA(tb[TCA_KIND]), "flower"))
2612                 return -1;
2613         switch (rta_type[idx]) {
2614         case TCA_OPTIONS:
2615                 if (tb[TCA_OPTIONS])
2616                         return flow_tcf_nl_opts_parse_and_get(tb[TCA_OPTIONS],
2617                                                               rta_type,
2618                                                               --idx, data);
2619                 break;
2620         default:
2621                 break;
2622         }
2623         return -1;
2624 }
2625
2626 /**
2627  * A callback to parse Netlink reply on TC flower query.
2628  *
2629  * @param nlh
2630  *   Message received from Netlink.
2631  * @param[out] data
2632  *   Pointer to data area to be filled by the parsing routine.
2633  *   assumed to be a pinter to struct flow_tcf_stats_basic.
2634  *
2635  * @return
2636  *   MNL_CB_OK value.
2637  */
2638 static int
2639 flow_tcf_nl_message_get_stats_basic(const struct nlmsghdr *nlh, void *data)
2640 {
2641         /*
2642          * The backward sequence of rta_types to pass in order to get
2643          *  to the counters.
2644          */
2645         uint16_t rta_type[] = { TCA_STATS_BASIC, TCA_ACT_STATS,
2646                                 TCA_FLOWER_ACT, TCA_OPTIONS };
2647         struct flow_tcf_stats_basic *sb_data = data;
2648         union {
2649                 const struct nlmsghdr *c;
2650                 struct nlmsghdr *nc;
2651         } tnlh = { .c = nlh };
2652
2653         if (!flow_tcf_nl_filter_parse_and_get(tnlh.nc, rta_type,
2654                                               RTE_DIM(rta_type) - 1,
2655                                               (void *)&sb_data->counters))
2656                 sb_data->valid = true;
2657         return MNL_CB_OK;
2658 }
2659
2660 /**
2661  * Query a TC flower rule for its statistics via netlink.
2662  *
2663  * @param[in] dev
2664  *   Pointer to Ethernet device.
2665  * @param[in] flow
2666  *   Pointer to the sub flow.
2667  * @param[out] data
2668  *   data retrieved by the query.
2669  * @param[out] error
2670  *   Perform verbose error reporting if not NULL.
2671  *
2672  * @return
2673  *   0 on success, a negative errno value otherwise and rte_errno is set.
2674  */
2675 static int
2676 flow_tcf_query_count(struct rte_eth_dev *dev,
2677                           struct rte_flow *flow,
2678                           void *data,
2679                           struct rte_flow_error *error)
2680 {
2681         struct flow_tcf_stats_basic sb_data = { 0 };
2682         struct rte_flow_query_count *qc = data;
2683         struct priv *priv = dev->data->dev_private;
2684         struct mlx5_flow_tcf_context *ctx = priv->tcf_context;
2685         struct mnl_socket *nl = ctx->nl;
2686         struct mlx5_flow *dev_flow;
2687         struct nlmsghdr *nlh;
2688         uint32_t seq = priv->tcf_context->seq++;
2689         ssize_t ret;
2690         assert(qc);
2691
2692         dev_flow = LIST_FIRST(&flow->dev_flows);
2693         /* E-Switch flow can't be expanded. */
2694         assert(!LIST_NEXT(dev_flow, next));
2695         if (!dev_flow->flow->counter)
2696                 goto notsup_exit;
2697         nlh = dev_flow->tcf.nlh;
2698         nlh->nlmsg_type = RTM_GETTFILTER;
2699         nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ECHO;
2700         nlh->nlmsg_seq = seq;
2701         if (mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) == -1)
2702                 goto error_exit;
2703         do {
2704                 ret = mnl_socket_recvfrom(nl, ctx->buf, ctx->buf_size);
2705                 if (ret <= 0)
2706                         break;
2707                 ret = mnl_cb_run(ctx->buf, ret, seq,
2708                                  mnl_socket_get_portid(nl),
2709                                  flow_tcf_nl_message_get_stats_basic,
2710                                  (void *)&sb_data);
2711         } while (ret > 0);
2712         /* Return the delta from last reset. */
2713         if (sb_data.valid) {
2714                 /* Return the delta from last reset. */
2715                 qc->hits_set = 1;
2716                 qc->bytes_set = 1;
2717                 qc->hits = sb_data.counters.packets - flow->counter->hits;
2718                 qc->bytes = sb_data.counters.bytes - flow->counter->bytes;
2719                 if (qc->reset) {
2720                         flow->counter->hits = sb_data.counters.packets;
2721                         flow->counter->bytes = sb_data.counters.bytes;
2722                 }
2723                 return 0;
2724         }
2725         return rte_flow_error_set(error, EINVAL,
2726                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2727                                   NULL,
2728                                   "flow does not have counter");
2729 error_exit:
2730         return rte_flow_error_set
2731                         (error, errno, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2732                          NULL, "netlink: failed to read flow rule counters");
2733 notsup_exit:
2734         return rte_flow_error_set
2735                         (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2736                          NULL, "counters are not available.");
2737 }
2738
2739 /**
2740  * Query a flow.
2741  *
2742  * @see rte_flow_query()
2743  * @see rte_flow_ops
2744  */
2745 static int
2746 flow_tcf_query(struct rte_eth_dev *dev,
2747                struct rte_flow *flow,
2748                const struct rte_flow_action *actions,
2749                void *data,
2750                struct rte_flow_error *error)
2751 {
2752         int ret = -EINVAL;
2753
2754         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
2755                 switch (actions->type) {
2756                 case RTE_FLOW_ACTION_TYPE_VOID:
2757                         break;
2758                 case RTE_FLOW_ACTION_TYPE_COUNT:
2759                         ret = flow_tcf_query_count(dev, flow, data, error);
2760                         break;
2761                 default:
2762                         return rte_flow_error_set(error, ENOTSUP,
2763                                                   RTE_FLOW_ERROR_TYPE_ACTION,
2764                                                   actions,
2765                                                   "action not supported");
2766                 }
2767         }
2768         return ret;
2769 }
2770
2771 const struct mlx5_flow_driver_ops mlx5_flow_tcf_drv_ops = {
2772         .validate = flow_tcf_validate,
2773         .prepare = flow_tcf_prepare,
2774         .translate = flow_tcf_translate,
2775         .apply = flow_tcf_apply,
2776         .remove = flow_tcf_remove,
2777         .destroy = flow_tcf_destroy,
2778         .query = flow_tcf_query,
2779 };
2780
2781 /**
2782  * Create and configure a libmnl socket for Netlink flow rules.
2783  *
2784  * @return
2785  *   A valid libmnl socket object pointer on success, NULL otherwise and
2786  *   rte_errno is set.
2787  */
2788 static struct mnl_socket *
2789 flow_tcf_mnl_socket_create(void)
2790 {
2791         struct mnl_socket *nl = mnl_socket_open(NETLINK_ROUTE);
2792
2793         if (nl) {
2794                 mnl_socket_setsockopt(nl, NETLINK_CAP_ACK, &(int){ 1 },
2795                                       sizeof(int));
2796                 if (!mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID))
2797                         return nl;
2798         }
2799         rte_errno = errno;
2800         if (nl)
2801                 mnl_socket_close(nl);
2802         return NULL;
2803 }
2804
2805 /**
2806  * Destroy a libmnl socket.
2807  *
2808  * @param nl
2809  *   Libmnl socket of the @p NETLINK_ROUTE kind.
2810  */
2811 static void
2812 flow_tcf_mnl_socket_destroy(struct mnl_socket *nl)
2813 {
2814         if (nl)
2815                 mnl_socket_close(nl);
2816 }
2817
2818 /**
2819  * Initialize ingress qdisc of a given network interface.
2820  *
2821  * @param ctx
2822  *   Pointer to tc-flower context to use.
2823  * @param ifindex
2824  *   Index of network interface to initialize.
2825  * @param[out] error
2826  *   Perform verbose error reporting if not NULL.
2827  *
2828  * @return
2829  *   0 on success, a negative errno value otherwise and rte_errno is set.
2830  */
2831 int
2832 mlx5_flow_tcf_init(struct mlx5_flow_tcf_context *ctx,
2833                    unsigned int ifindex, struct rte_flow_error *error)
2834 {
2835         struct nlmsghdr *nlh;
2836         struct tcmsg *tcm;
2837         alignas(struct nlmsghdr)
2838         uint8_t buf[mnl_nlmsg_size(sizeof(*tcm) + 128)];
2839
2840         /* Destroy existing ingress qdisc and everything attached to it. */
2841         nlh = mnl_nlmsg_put_header(buf);
2842         nlh->nlmsg_type = RTM_DELQDISC;
2843         nlh->nlmsg_flags = NLM_F_REQUEST;
2844         tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
2845         tcm->tcm_family = AF_UNSPEC;
2846         tcm->tcm_ifindex = ifindex;
2847         tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
2848         tcm->tcm_parent = TC_H_INGRESS;
2849         /* Ignore errors when qdisc is already absent. */
2850         if (flow_tcf_nl_ack(ctx, nlh) &&
2851             rte_errno != EINVAL && rte_errno != ENOENT)
2852                 return rte_flow_error_set(error, rte_errno,
2853                                           RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
2854                                           "netlink: failed to remove ingress"
2855                                           " qdisc");
2856         /* Create fresh ingress qdisc. */
2857         nlh = mnl_nlmsg_put_header(buf);
2858         nlh->nlmsg_type = RTM_NEWQDISC;
2859         nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
2860         tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
2861         tcm->tcm_family = AF_UNSPEC;
2862         tcm->tcm_ifindex = ifindex;
2863         tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
2864         tcm->tcm_parent = TC_H_INGRESS;
2865         mnl_attr_put_strz_check(nlh, sizeof(buf), TCA_KIND, "ingress");
2866         if (flow_tcf_nl_ack(ctx, nlh))
2867                 return rte_flow_error_set(error, rte_errno,
2868                                           RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
2869                                           "netlink: failed to create ingress"
2870                                           " qdisc");
2871         return 0;
2872 }
2873
2874 /**
2875  * Create libmnl context for Netlink flow rules.
2876  *
2877  * @return
2878  *   A valid libmnl socket object pointer on success, NULL otherwise and
2879  *   rte_errno is set.
2880  */
2881 struct mlx5_flow_tcf_context *
2882 mlx5_flow_tcf_context_create(void)
2883 {
2884         struct mlx5_flow_tcf_context *ctx = rte_zmalloc(__func__,
2885                                                         sizeof(*ctx),
2886                                                         sizeof(uint32_t));
2887         if (!ctx)
2888                 goto error;
2889         ctx->nl = flow_tcf_mnl_socket_create();
2890         if (!ctx->nl)
2891                 goto error;
2892         ctx->buf_size = MNL_SOCKET_BUFFER_SIZE;
2893         ctx->buf = rte_zmalloc(__func__,
2894                                ctx->buf_size, sizeof(uint32_t));
2895         if (!ctx->buf)
2896                 goto error;
2897         ctx->seq = random();
2898         return ctx;
2899 error:
2900         mlx5_flow_tcf_context_destroy(ctx);
2901         return NULL;
2902 }
2903
2904 /**
2905  * Destroy a libmnl context.
2906  *
2907  * @param ctx
2908  *   Libmnl socket of the @p NETLINK_ROUTE kind.
2909  */
2910 void
2911 mlx5_flow_tcf_context_destroy(struct mlx5_flow_tcf_context *ctx)
2912 {
2913         if (!ctx)
2914                 return;
2915         flow_tcf_mnl_socket_destroy(ctx->nl);
2916         rte_free(ctx->buf);
2917         rte_free(ctx);
2918 }