net/mlx5: rewrite TTL by E-Switch
[dpdk.git] / drivers / net / mlx5 / mlx5_flow_tcf.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2018 6WIND S.A.
3  * Copyright 2018 Mellanox Technologies, Ltd
4  */
5
6 #include <assert.h>
7 #include <errno.h>
8 #include <libmnl/libmnl.h>
9 #include <linux/if_ether.h>
10 #include <linux/netlink.h>
11 #include <linux/pkt_cls.h>
12 #include <linux/pkt_sched.h>
13 #include <linux/rtnetlink.h>
14 #include <linux/tc_act/tc_gact.h>
15 #include <linux/tc_act/tc_mirred.h>
16 #include <netinet/in.h>
17 #include <stdalign.h>
18 #include <stdbool.h>
19 #include <stddef.h>
20 #include <stdint.h>
21 #include <stdlib.h>
22 #include <sys/socket.h>
23
24 #include <rte_byteorder.h>
25 #include <rte_errno.h>
26 #include <rte_ether.h>
27 #include <rte_flow.h>
28 #include <rte_malloc.h>
29
30 #include "mlx5.h"
31 #include "mlx5_flow.h"
32 #include "mlx5_autoconf.h"
33
34 #ifdef HAVE_TC_ACT_VLAN
35
36 #include <linux/tc_act/tc_vlan.h>
37
38 #else /* HAVE_TC_ACT_VLAN */
39
40 #define TCA_VLAN_ACT_POP 1
41 #define TCA_VLAN_ACT_PUSH 2
42 #define TCA_VLAN_ACT_MODIFY 3
43 #define TCA_VLAN_PARMS 2
44 #define TCA_VLAN_PUSH_VLAN_ID 3
45 #define TCA_VLAN_PUSH_VLAN_PROTOCOL 4
46 #define TCA_VLAN_PAD 5
47 #define TCA_VLAN_PUSH_VLAN_PRIORITY 6
48
49 struct tc_vlan {
50         tc_gen;
51         int v_action;
52 };
53
54 #endif /* HAVE_TC_ACT_VLAN */
55
56 #ifdef HAVE_TC_ACT_PEDIT
57
58 #include <linux/tc_act/tc_pedit.h>
59
60 #else /* HAVE_TC_ACT_VLAN */
61
62 enum {
63         TCA_PEDIT_UNSPEC,
64         TCA_PEDIT_TM,
65         TCA_PEDIT_PARMS,
66         TCA_PEDIT_PAD,
67         TCA_PEDIT_PARMS_EX,
68         TCA_PEDIT_KEYS_EX,
69         TCA_PEDIT_KEY_EX,
70         __TCA_PEDIT_MAX
71 };
72
73 enum {
74         TCA_PEDIT_KEY_EX_HTYPE = 1,
75         TCA_PEDIT_KEY_EX_CMD = 2,
76         __TCA_PEDIT_KEY_EX_MAX
77 };
78
79 enum pedit_header_type {
80         TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK = 0,
81         TCA_PEDIT_KEY_EX_HDR_TYPE_ETH = 1,
82         TCA_PEDIT_KEY_EX_HDR_TYPE_IP4 = 2,
83         TCA_PEDIT_KEY_EX_HDR_TYPE_IP6 = 3,
84         TCA_PEDIT_KEY_EX_HDR_TYPE_TCP = 4,
85         TCA_PEDIT_KEY_EX_HDR_TYPE_UDP = 5,
86         __PEDIT_HDR_TYPE_MAX,
87 };
88
89 enum pedit_cmd {
90         TCA_PEDIT_KEY_EX_CMD_SET = 0,
91         TCA_PEDIT_KEY_EX_CMD_ADD = 1,
92         __PEDIT_CMD_MAX,
93 };
94
95 struct tc_pedit_key {
96         __u32 mask; /* AND */
97         __u32 val; /*XOR */
98         __u32 off; /*offset */
99         __u32 at;
100         __u32 offmask;
101         __u32 shift;
102 };
103
104 __extension__
105 struct tc_pedit_sel {
106         tc_gen;
107         unsigned char nkeys;
108         unsigned char flags;
109         struct tc_pedit_key keys[0];
110 };
111
112 #endif /* HAVE_TC_ACT_VLAN */
113
114 /* Normally found in linux/netlink.h. */
115 #ifndef NETLINK_CAP_ACK
116 #define NETLINK_CAP_ACK 10
117 #endif
118
119 /* Normally found in linux/pkt_sched.h. */
120 #ifndef TC_H_MIN_INGRESS
121 #define TC_H_MIN_INGRESS 0xfff2u
122 #endif
123
124 /* Normally found in linux/pkt_cls.h. */
125 #ifndef TCA_CLS_FLAGS_SKIP_SW
126 #define TCA_CLS_FLAGS_SKIP_SW (1 << 1)
127 #endif
128 #ifndef HAVE_TCA_CHAIN
129 #define TCA_CHAIN 11
130 #endif
131 #ifndef HAVE_TCA_FLOWER_ACT
132 #define TCA_FLOWER_ACT 3
133 #endif
134 #ifndef HAVE_TCA_FLOWER_FLAGS
135 #define TCA_FLOWER_FLAGS 22
136 #endif
137 #ifndef HAVE_TCA_FLOWER_KEY_ETH_TYPE
138 #define TCA_FLOWER_KEY_ETH_TYPE 8
139 #endif
140 #ifndef HAVE_TCA_FLOWER_KEY_ETH_DST
141 #define TCA_FLOWER_KEY_ETH_DST 4
142 #endif
143 #ifndef HAVE_TCA_FLOWER_KEY_ETH_DST_MASK
144 #define TCA_FLOWER_KEY_ETH_DST_MASK 5
145 #endif
146 #ifndef HAVE_TCA_FLOWER_KEY_ETH_SRC
147 #define TCA_FLOWER_KEY_ETH_SRC 6
148 #endif
149 #ifndef HAVE_TCA_FLOWER_KEY_ETH_SRC_MASK
150 #define TCA_FLOWER_KEY_ETH_SRC_MASK 7
151 #endif
152 #ifndef HAVE_TCA_FLOWER_KEY_IP_PROTO
153 #define TCA_FLOWER_KEY_IP_PROTO 9
154 #endif
155 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_SRC
156 #define TCA_FLOWER_KEY_IPV4_SRC 10
157 #endif
158 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_SRC_MASK
159 #define TCA_FLOWER_KEY_IPV4_SRC_MASK 11
160 #endif
161 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_DST
162 #define TCA_FLOWER_KEY_IPV4_DST 12
163 #endif
164 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_DST_MASK
165 #define TCA_FLOWER_KEY_IPV4_DST_MASK 13
166 #endif
167 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_SRC
168 #define TCA_FLOWER_KEY_IPV6_SRC 14
169 #endif
170 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_SRC_MASK
171 #define TCA_FLOWER_KEY_IPV6_SRC_MASK 15
172 #endif
173 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_DST
174 #define TCA_FLOWER_KEY_IPV6_DST 16
175 #endif
176 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_DST_MASK
177 #define TCA_FLOWER_KEY_IPV6_DST_MASK 17
178 #endif
179 #ifndef HAVE_TCA_FLOWER_KEY_TCP_SRC
180 #define TCA_FLOWER_KEY_TCP_SRC 18
181 #endif
182 #ifndef HAVE_TCA_FLOWER_KEY_TCP_SRC_MASK
183 #define TCA_FLOWER_KEY_TCP_SRC_MASK 35
184 #endif
185 #ifndef HAVE_TCA_FLOWER_KEY_TCP_DST
186 #define TCA_FLOWER_KEY_TCP_DST 19
187 #endif
188 #ifndef HAVE_TCA_FLOWER_KEY_TCP_DST_MASK
189 #define TCA_FLOWER_KEY_TCP_DST_MASK 36
190 #endif
191 #ifndef HAVE_TCA_FLOWER_KEY_UDP_SRC
192 #define TCA_FLOWER_KEY_UDP_SRC 20
193 #endif
194 #ifndef HAVE_TCA_FLOWER_KEY_UDP_SRC_MASK
195 #define TCA_FLOWER_KEY_UDP_SRC_MASK 37
196 #endif
197 #ifndef HAVE_TCA_FLOWER_KEY_UDP_DST
198 #define TCA_FLOWER_KEY_UDP_DST 21
199 #endif
200 #ifndef HAVE_TCA_FLOWER_KEY_UDP_DST_MASK
201 #define TCA_FLOWER_KEY_UDP_DST_MASK 38
202 #endif
203 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_ID
204 #define TCA_FLOWER_KEY_VLAN_ID 23
205 #endif
206 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_PRIO
207 #define TCA_FLOWER_KEY_VLAN_PRIO 24
208 #endif
209 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_ETH_TYPE
210 #define TCA_FLOWER_KEY_VLAN_ETH_TYPE 25
211 #endif
212 #ifndef HAVE_TCA_FLOWER_KEY_TCP_FLAGS
213 #define TCA_FLOWER_KEY_TCP_FLAGS 71
214 #endif
215 #ifndef HAVE_TCA_FLOWER_KEY_TCP_FLAGS_MASK
216 #define TCA_FLOWER_KEY_TCP_FLAGS_MASK 72
217 #endif
218 #ifndef HAVE_TC_ACT_GOTO_CHAIN
219 #define TC_ACT_GOTO_CHAIN 0x20000000
220 #endif
221
222 #ifndef IPV6_ADDR_LEN
223 #define IPV6_ADDR_LEN 16
224 #endif
225
226 #ifndef IPV4_ADDR_LEN
227 #define IPV4_ADDR_LEN 4
228 #endif
229
230 #ifndef TP_PORT_LEN
231 #define TP_PORT_LEN 2 /* Transport Port (UDP/TCP) Length */
232 #endif
233
234 #ifndef TTL_LEN
235 #define TTL_LEN 1
236 #endif
237
238 /** Empty masks for known item types. */
239 static const union {
240         struct rte_flow_item_port_id port_id;
241         struct rte_flow_item_eth eth;
242         struct rte_flow_item_vlan vlan;
243         struct rte_flow_item_ipv4 ipv4;
244         struct rte_flow_item_ipv6 ipv6;
245         struct rte_flow_item_tcp tcp;
246         struct rte_flow_item_udp udp;
247 } flow_tcf_mask_empty;
248
249 /** Supported masks for known item types. */
250 static const struct {
251         struct rte_flow_item_port_id port_id;
252         struct rte_flow_item_eth eth;
253         struct rte_flow_item_vlan vlan;
254         struct rte_flow_item_ipv4 ipv4;
255         struct rte_flow_item_ipv6 ipv6;
256         struct rte_flow_item_tcp tcp;
257         struct rte_flow_item_udp udp;
258 } flow_tcf_mask_supported = {
259         .port_id = {
260                 .id = 0xffffffff,
261         },
262         .eth = {
263                 .type = RTE_BE16(0xffff),
264                 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
265                 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
266         },
267         .vlan = {
268                 /* PCP and VID only, no DEI. */
269                 .tci = RTE_BE16(0xefff),
270                 .inner_type = RTE_BE16(0xffff),
271         },
272         .ipv4.hdr = {
273                 .next_proto_id = 0xff,
274                 .src_addr = RTE_BE32(0xffffffff),
275                 .dst_addr = RTE_BE32(0xffffffff),
276         },
277         .ipv6.hdr = {
278                 .proto = 0xff,
279                 .src_addr =
280                         "\xff\xff\xff\xff\xff\xff\xff\xff"
281                         "\xff\xff\xff\xff\xff\xff\xff\xff",
282                 .dst_addr =
283                         "\xff\xff\xff\xff\xff\xff\xff\xff"
284                         "\xff\xff\xff\xff\xff\xff\xff\xff",
285         },
286         .tcp.hdr = {
287                 .src_port = RTE_BE16(0xffff),
288                 .dst_port = RTE_BE16(0xffff),
289                 .tcp_flags = 0xff,
290         },
291         .udp.hdr = {
292                 .src_port = RTE_BE16(0xffff),
293                 .dst_port = RTE_BE16(0xffff),
294         },
295 };
296
297 #define SZ_NLATTR_HDR MNL_ALIGN(sizeof(struct nlattr))
298 #define SZ_NLATTR_NEST SZ_NLATTR_HDR
299 #define SZ_NLATTR_DATA_OF(len) MNL_ALIGN(SZ_NLATTR_HDR + (len))
300 #define SZ_NLATTR_TYPE_OF(typ) SZ_NLATTR_DATA_OF(sizeof(typ))
301 #define SZ_NLATTR_STRZ_OF(str) SZ_NLATTR_DATA_OF(strlen(str) + 1)
302
303 #define PTOI_TABLE_SZ_MAX(dev) (mlx5_dev_to_port_id((dev)->device, NULL, 0) + 2)
304
305 /** DPDK port to network interface index (ifindex) conversion. */
306 struct flow_tcf_ptoi {
307         uint16_t port_id; /**< DPDK port ID. */
308         unsigned int ifindex; /**< Network interface index. */
309 };
310
311 /* Due to a limitation on driver/FW. */
312 #define MLX5_TCF_GROUP_ID_MAX 3
313 #define MLX5_TCF_GROUP_PRIORITY_MAX 14
314
315 #define MLX5_TCF_FATE_ACTIONS \
316         (MLX5_FLOW_ACTION_DROP | MLX5_FLOW_ACTION_PORT_ID | \
317          MLX5_FLOW_ACTION_JUMP)
318
319 #define MLX5_TCF_VLAN_ACTIONS \
320         (MLX5_FLOW_ACTION_OF_POP_VLAN | MLX5_FLOW_ACTION_OF_PUSH_VLAN | \
321          MLX5_FLOW_ACTION_OF_SET_VLAN_VID | MLX5_FLOW_ACTION_OF_SET_VLAN_PCP)
322
323 #define MLX5_TCF_PEDIT_ACTIONS \
324         (MLX5_FLOW_ACTION_SET_IPV4_SRC | MLX5_FLOW_ACTION_SET_IPV4_DST | \
325          MLX5_FLOW_ACTION_SET_IPV6_SRC | MLX5_FLOW_ACTION_SET_IPV6_DST | \
326          MLX5_FLOW_ACTION_SET_TP_SRC | MLX5_FLOW_ACTION_SET_TP_DST | \
327          MLX5_FLOW_ACTION_SET_TTL | MLX5_FLOW_ACTION_DEC_TTL)
328
329 #define MLX5_TCF_CONFIG_ACTIONS \
330         (MLX5_FLOW_ACTION_PORT_ID | MLX5_FLOW_ACTION_JUMP | \
331          MLX5_FLOW_ACTION_OF_PUSH_VLAN | MLX5_FLOW_ACTION_OF_SET_VLAN_VID | \
332          MLX5_FLOW_ACTION_OF_SET_VLAN_PCP | \
333          (MLX5_TCF_PEDIT_ACTIONS & ~MLX5_FLOW_ACTION_DEC_TTL))
334
335 #define MAX_PEDIT_KEYS 128
336 #define SZ_PEDIT_KEY_VAL 4
337
338 #define NUM_OF_PEDIT_KEYS(sz) \
339         (((sz) / SZ_PEDIT_KEY_VAL) + (((sz) % SZ_PEDIT_KEY_VAL) ? 1 : 0))
340
341 struct pedit_key_ex {
342         enum pedit_header_type htype;
343         enum pedit_cmd cmd;
344 };
345
346 struct pedit_parser {
347         struct tc_pedit_sel sel;
348         struct tc_pedit_key keys[MAX_PEDIT_KEYS];
349         struct pedit_key_ex keys_ex[MAX_PEDIT_KEYS];
350 };
351
352
353 /**
354  * Set pedit key of decrease/set ttl
355  *
356  * @param[in] actions
357  *   pointer to action specification
358  * @param[in,out] p_parser
359  *   pointer to pedit_parser
360  * @param[in] item_flags
361  *   flags of all items presented
362  */
363 static void
364 flow_tcf_pedit_key_set_dec_ttl(const struct rte_flow_action *actions,
365                                 struct pedit_parser *p_parser,
366                                 uint64_t item_flags)
367 {
368         int idx = p_parser->sel.nkeys;
369
370         p_parser->keys[idx].mask = 0xFFFFFF00;
371         if (item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4) {
372                 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP4;
373                 p_parser->keys[idx].off =
374                         offsetof(struct ipv4_hdr, time_to_live);
375         }
376         if (item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV6) {
377                 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6;
378                 p_parser->keys[idx].off =
379                         offsetof(struct ipv6_hdr, hop_limits);
380         }
381         if (actions->type == RTE_FLOW_ACTION_TYPE_DEC_TTL) {
382                 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_ADD;
383                 p_parser->keys[idx].val = 0x000000FF;
384         } else {
385                 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
386                 p_parser->keys[idx].val =
387                         (__u32)((const struct rte_flow_action_set_ttl *)
388                          actions->conf)->ttl_value;
389         }
390         p_parser->sel.nkeys = (++idx);
391 }
392
393 /**
394  * Set pedit key of transport (TCP/UDP) port value
395  *
396  * @param[in] actions
397  *   pointer to action specification
398  * @param[in,out] p_parser
399  *   pointer to pedit_parser
400  * @param[in] item_flags
401  *   flags of all items presented
402  */
403 static void
404 flow_tcf_pedit_key_set_tp_port(const struct rte_flow_action *actions,
405                                 struct pedit_parser *p_parser,
406                                 uint64_t item_flags)
407 {
408         int idx = p_parser->sel.nkeys;
409
410         if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)
411                 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_UDP;
412         if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_TCP)
413                 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_TCP;
414         p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
415         /* offset of src/dst port is same for TCP and UDP */
416         p_parser->keys[idx].off =
417                 actions->type == RTE_FLOW_ACTION_TYPE_SET_TP_SRC ?
418                 offsetof(struct tcp_hdr, src_port) :
419                 offsetof(struct tcp_hdr, dst_port);
420         p_parser->keys[idx].mask = 0xFFFF0000;
421         p_parser->keys[idx].val =
422                 (__u32)((const struct rte_flow_action_set_tp *)
423                                 actions->conf)->port;
424         p_parser->sel.nkeys = (++idx);
425 }
426
427 /**
428  * Set pedit key of ipv6 address
429  *
430  * @param[in] actions
431  *   pointer to action specification
432  * @param[in,out] p_parser
433  *   pointer to pedit_parser
434  */
435 static void
436 flow_tcf_pedit_key_set_ipv6_addr(const struct rte_flow_action *actions,
437                                  struct pedit_parser *p_parser)
438 {
439         int idx = p_parser->sel.nkeys;
440         int keys = NUM_OF_PEDIT_KEYS(IPV6_ADDR_LEN);
441         int off_base =
442                 actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC ?
443                 offsetof(struct ipv6_hdr, src_addr) :
444                 offsetof(struct ipv6_hdr, dst_addr);
445         const struct rte_flow_action_set_ipv6 *conf =
446                 (const struct rte_flow_action_set_ipv6 *)actions->conf;
447
448         for (int i = 0; i < keys; i++, idx++) {
449                 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6;
450                 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
451                 p_parser->keys[idx].off = off_base + i * SZ_PEDIT_KEY_VAL;
452                 p_parser->keys[idx].mask = ~UINT32_MAX;
453                 memcpy(&p_parser->keys[idx].val,
454                         conf->ipv6_addr + i *  SZ_PEDIT_KEY_VAL,
455                         SZ_PEDIT_KEY_VAL);
456         }
457         p_parser->sel.nkeys += keys;
458 }
459
460 /**
461  * Set pedit key of ipv4 address
462  *
463  * @param[in] actions
464  *   pointer to action specification
465  * @param[in,out] p_parser
466  *   pointer to pedit_parser
467  */
468 static void
469 flow_tcf_pedit_key_set_ipv4_addr(const struct rte_flow_action *actions,
470                                  struct pedit_parser *p_parser)
471 {
472         int idx = p_parser->sel.nkeys;
473
474         p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP4;
475         p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
476         p_parser->keys[idx].off =
477                 actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC ?
478                 offsetof(struct ipv4_hdr, src_addr) :
479                 offsetof(struct ipv4_hdr, dst_addr);
480         p_parser->keys[idx].mask = ~UINT32_MAX;
481         p_parser->keys[idx].val =
482                 ((const struct rte_flow_action_set_ipv4 *)
483                  actions->conf)->ipv4_addr;
484         p_parser->sel.nkeys = (++idx);
485 }
486
487 /**
488  * Create the pedit's na attribute in netlink message
489  * on pre-allocate message buffer
490  *
491  * @param[in,out] nl
492  *   pointer to pre-allocated netlink message buffer
493  * @param[in,out] actions
494  *   pointer to pointer of actions specification.
495  * @param[in,out] action_flags
496  *   pointer to actions flags
497  * @param[in] item_flags
498  *   flags of all item presented
499  */
500 static void
501 flow_tcf_create_pedit_mnl_msg(struct nlmsghdr *nl,
502                               const struct rte_flow_action **actions,
503                               uint64_t item_flags)
504 {
505         struct pedit_parser p_parser;
506         struct nlattr *na_act_options;
507         struct nlattr *na_pedit_keys;
508
509         memset(&p_parser, 0, sizeof(p_parser));
510         mnl_attr_put_strz(nl, TCA_ACT_KIND, "pedit");
511         na_act_options = mnl_attr_nest_start(nl, TCA_ACT_OPTIONS);
512         /* all modify header actions should be in one tc-pedit action */
513         for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {
514                 switch ((*actions)->type) {
515                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
516                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
517                         flow_tcf_pedit_key_set_ipv4_addr(*actions, &p_parser);
518                         break;
519                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
520                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
521                         flow_tcf_pedit_key_set_ipv6_addr(*actions, &p_parser);
522                         break;
523                 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
524                 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
525                         flow_tcf_pedit_key_set_tp_port(*actions,
526                                                         &p_parser, item_flags);
527                         break;
528                 case RTE_FLOW_ACTION_TYPE_SET_TTL:
529                 case RTE_FLOW_ACTION_TYPE_DEC_TTL:
530                         flow_tcf_pedit_key_set_dec_ttl(*actions,
531                                                         &p_parser, item_flags);
532                         break;
533                 default:
534                         goto pedit_mnl_msg_done;
535                 }
536         }
537 pedit_mnl_msg_done:
538         p_parser.sel.action = TC_ACT_PIPE;
539         mnl_attr_put(nl, TCA_PEDIT_PARMS_EX,
540                      sizeof(p_parser.sel) +
541                      p_parser.sel.nkeys * sizeof(struct tc_pedit_key),
542                      &p_parser);
543         na_pedit_keys =
544                 mnl_attr_nest_start(nl, TCA_PEDIT_KEYS_EX | NLA_F_NESTED);
545         for (int i = 0; i < p_parser.sel.nkeys; i++) {
546                 struct nlattr *na_pedit_key =
547                         mnl_attr_nest_start(nl,
548                                             TCA_PEDIT_KEY_EX | NLA_F_NESTED);
549                 mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_HTYPE,
550                                  p_parser.keys_ex[i].htype);
551                 mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_CMD,
552                                  p_parser.keys_ex[i].cmd);
553                 mnl_attr_nest_end(nl, na_pedit_key);
554         }
555         mnl_attr_nest_end(nl, na_pedit_keys);
556         mnl_attr_nest_end(nl, na_act_options);
557         (*actions)--;
558 }
559
560 /**
561  * Calculate max memory size of one TC-pedit actions.
562  * One TC-pedit action can contain set of keys each defining
563  * a rewrite element (rte_flow action)
564  *
565  * @param[in,out] actions
566  *   actions specification.
567  * @param[in,out] action_flags
568  *   actions flags
569  * @param[in,out] size
570  *   accumulated size
571  * @return
572  *   Max memory size of one TC-pedit action
573  */
574 static int
575 flow_tcf_get_pedit_actions_size(const struct rte_flow_action **actions,
576                                 uint64_t *action_flags)
577 {
578         int pedit_size = 0;
579         int keys = 0;
580         uint64_t flags = 0;
581
582         pedit_size += SZ_NLATTR_NEST + /* na_act_index. */
583                       SZ_NLATTR_STRZ_OF("pedit") +
584                       SZ_NLATTR_NEST; /* TCA_ACT_OPTIONS. */
585         for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {
586                 switch ((*actions)->type) {
587                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
588                         keys += NUM_OF_PEDIT_KEYS(IPV4_ADDR_LEN);
589                         flags |= MLX5_FLOW_ACTION_SET_IPV4_SRC;
590                         break;
591                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
592                         keys += NUM_OF_PEDIT_KEYS(IPV4_ADDR_LEN);
593                         flags |= MLX5_FLOW_ACTION_SET_IPV4_DST;
594                         break;
595                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
596                         keys += NUM_OF_PEDIT_KEYS(IPV6_ADDR_LEN);
597                         flags |= MLX5_FLOW_ACTION_SET_IPV6_SRC;
598                         break;
599                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
600                         keys += NUM_OF_PEDIT_KEYS(IPV6_ADDR_LEN);
601                         flags |= MLX5_FLOW_ACTION_SET_IPV6_DST;
602                         break;
603                 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
604                         /* TCP is as same as UDP */
605                         keys += NUM_OF_PEDIT_KEYS(TP_PORT_LEN);
606                         flags |= MLX5_FLOW_ACTION_SET_TP_SRC;
607                         break;
608                 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
609                         /* TCP is as same as UDP */
610                         keys += NUM_OF_PEDIT_KEYS(TP_PORT_LEN);
611                         flags |= MLX5_FLOW_ACTION_SET_TP_DST;
612                         break;
613                 case RTE_FLOW_ACTION_TYPE_SET_TTL:
614                         keys += NUM_OF_PEDIT_KEYS(TTL_LEN);
615                         flags |= MLX5_FLOW_ACTION_SET_TTL;
616                         break;
617                 case RTE_FLOW_ACTION_TYPE_DEC_TTL:
618                         keys += NUM_OF_PEDIT_KEYS(TTL_LEN);
619                         flags |= MLX5_FLOW_ACTION_DEC_TTL;
620                         break;
621                 default:
622                         goto get_pedit_action_size_done;
623                 }
624         }
625 get_pedit_action_size_done:
626         /* TCA_PEDIT_PARAMS_EX */
627         pedit_size +=
628                 SZ_NLATTR_DATA_OF(sizeof(struct tc_pedit_sel) +
629                                   keys * sizeof(struct tc_pedit_key));
630         pedit_size += SZ_NLATTR_NEST; /* TCA_PEDIT_KEYS */
631         pedit_size += keys *
632                       /* TCA_PEDIT_KEY_EX + HTYPE + CMD */
633                       (SZ_NLATTR_NEST + SZ_NLATTR_DATA_OF(2) +
634                        SZ_NLATTR_DATA_OF(2));
635         (*action_flags) |= flags;
636         (*actions)--;
637         return pedit_size;
638 }
639
640 /**
641  * Retrieve mask for pattern item.
642  *
643  * This function does basic sanity checks on a pattern item in order to
644  * return the most appropriate mask for it.
645  *
646  * @param[in] item
647  *   Item specification.
648  * @param[in] mask_default
649  *   Default mask for pattern item as specified by the flow API.
650  * @param[in] mask_supported
651  *   Mask fields supported by the implementation.
652  * @param[in] mask_empty
653  *   Empty mask to return when there is no specification.
654  * @param[out] error
655  *   Perform verbose error reporting if not NULL.
656  *
657  * @return
658  *   Either @p item->mask or one of the mask parameters on success, NULL
659  *   otherwise and rte_errno is set.
660  */
661 static const void *
662 flow_tcf_item_mask(const struct rte_flow_item *item, const void *mask_default,
663                    const void *mask_supported, const void *mask_empty,
664                    size_t mask_size, struct rte_flow_error *error)
665 {
666         const uint8_t *mask;
667         size_t i;
668
669         /* item->last and item->mask cannot exist without item->spec. */
670         if (!item->spec && (item->mask || item->last)) {
671                 rte_flow_error_set(error, EINVAL,
672                                    RTE_FLOW_ERROR_TYPE_ITEM, item,
673                                    "\"mask\" or \"last\" field provided without"
674                                    " a corresponding \"spec\"");
675                 return NULL;
676         }
677         /* No spec, no mask, no problem. */
678         if (!item->spec)
679                 return mask_empty;
680         mask = item->mask ? item->mask : mask_default;
681         assert(mask);
682         /*
683          * Single-pass check to make sure that:
684          * - Mask is supported, no bits are set outside mask_supported.
685          * - Both item->spec and item->last are included in mask.
686          */
687         for (i = 0; i != mask_size; ++i) {
688                 if (!mask[i])
689                         continue;
690                 if ((mask[i] | ((const uint8_t *)mask_supported)[i]) !=
691                     ((const uint8_t *)mask_supported)[i]) {
692                         rte_flow_error_set(error, ENOTSUP,
693                                            RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
694                                            "unsupported field found"
695                                            " in \"mask\"");
696                         return NULL;
697                 }
698                 if (item->last &&
699                     (((const uint8_t *)item->spec)[i] & mask[i]) !=
700                     (((const uint8_t *)item->last)[i] & mask[i])) {
701                         rte_flow_error_set(error, EINVAL,
702                                            RTE_FLOW_ERROR_TYPE_ITEM_LAST,
703                                            item->last,
704                                            "range between \"spec\" and \"last\""
705                                            " not comprised in \"mask\"");
706                         return NULL;
707                 }
708         }
709         return mask;
710 }
711
712 /**
713  * Build a conversion table between port ID and ifindex.
714  *
715  * @param[in] dev
716  *   Pointer to Ethernet device.
717  * @param[out] ptoi
718  *   Pointer to ptoi table.
719  * @param[in] len
720  *   Size of ptoi table provided.
721  *
722  * @return
723  *   Size of ptoi table filled.
724  */
725 static unsigned int
726 flow_tcf_build_ptoi_table(struct rte_eth_dev *dev, struct flow_tcf_ptoi *ptoi,
727                           unsigned int len)
728 {
729         unsigned int n = mlx5_dev_to_port_id(dev->device, NULL, 0);
730         uint16_t port_id[n + 1];
731         unsigned int i;
732         unsigned int own = 0;
733
734         /* At least one port is needed when no switch domain is present. */
735         if (!n) {
736                 n = 1;
737                 port_id[0] = dev->data->port_id;
738         } else {
739                 n = RTE_MIN(mlx5_dev_to_port_id(dev->device, port_id, n), n);
740         }
741         if (n > len)
742                 return 0;
743         for (i = 0; i != n; ++i) {
744                 struct rte_eth_dev_info dev_info;
745
746                 rte_eth_dev_info_get(port_id[i], &dev_info);
747                 if (port_id[i] == dev->data->port_id)
748                         own = i;
749                 ptoi[i].port_id = port_id[i];
750                 ptoi[i].ifindex = dev_info.if_index;
751         }
752         /* Ensure first entry of ptoi[] is the current device. */
753         if (own) {
754                 ptoi[n] = ptoi[0];
755                 ptoi[0] = ptoi[own];
756                 ptoi[own] = ptoi[n];
757         }
758         /* An entry with zero ifindex terminates ptoi[]. */
759         ptoi[n].port_id = 0;
760         ptoi[n].ifindex = 0;
761         return n;
762 }
763
764 /**
765  * Verify the @p attr will be correctly understood by the E-switch.
766  *
767  * @param[in] attr
768  *   Pointer to flow attributes
769  * @param[out] error
770  *   Pointer to error structure.
771  *
772  * @return
773  *   0 on success, a negative errno value otherwise and rte_errno is set.
774  */
775 static int
776 flow_tcf_validate_attributes(const struct rte_flow_attr *attr,
777                              struct rte_flow_error *error)
778 {
779         /*
780          * Supported attributes: groups, some priorities and ingress only.
781          * group is supported only if kernel supports chain. Don't care about
782          * transfer as it is the caller's problem.
783          */
784         if (attr->group > MLX5_TCF_GROUP_ID_MAX)
785                 return rte_flow_error_set(error, ENOTSUP,
786                                           RTE_FLOW_ERROR_TYPE_ATTR_GROUP, attr,
787                                           "group ID larger than "
788                                           RTE_STR(MLX5_TCF_GROUP_ID_MAX)
789                                           " isn't supported");
790         else if (attr->group > 0 &&
791                  attr->priority > MLX5_TCF_GROUP_PRIORITY_MAX)
792                 return rte_flow_error_set(error, ENOTSUP,
793                                           RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
794                                           attr,
795                                           "lowest priority level is "
796                                           RTE_STR(MLX5_TCF_GROUP_PRIORITY_MAX)
797                                           " when group is configured");
798         else if (attr->priority > 0xfffe)
799                 return rte_flow_error_set(error, ENOTSUP,
800                                           RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
801                                           attr,
802                                           "lowest priority level is 0xfffe");
803         if (!attr->ingress)
804                 return rte_flow_error_set(error, EINVAL,
805                                           RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
806                                           attr, "only ingress is supported");
807         if (attr->egress)
808                 return rte_flow_error_set(error, ENOTSUP,
809                                           RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
810                                           attr, "egress is not supported");
811         return 0;
812 }
813
814 /**
815  * Validate flow for E-Switch.
816  *
817  * @param[in] priv
818  *   Pointer to the priv structure.
819  * @param[in] attr
820  *   Pointer to the flow attributes.
821  * @param[in] items
822  *   Pointer to the list of items.
823  * @param[in] actions
824  *   Pointer to the list of actions.
825  * @param[out] error
826  *   Pointer to the error structure.
827  *
828  * @return
829  *   0 on success, a negative errno value otherwise and rte_ernno is set.
830  */
831 static int
832 flow_tcf_validate(struct rte_eth_dev *dev,
833                   const struct rte_flow_attr *attr,
834                   const struct rte_flow_item items[],
835                   const struct rte_flow_action actions[],
836                   struct rte_flow_error *error)
837 {
838         union {
839                 const struct rte_flow_item_port_id *port_id;
840                 const struct rte_flow_item_eth *eth;
841                 const struct rte_flow_item_vlan *vlan;
842                 const struct rte_flow_item_ipv4 *ipv4;
843                 const struct rte_flow_item_ipv6 *ipv6;
844                 const struct rte_flow_item_tcp *tcp;
845                 const struct rte_flow_item_udp *udp;
846         } spec, mask;
847         union {
848                 const struct rte_flow_action_port_id *port_id;
849                 const struct rte_flow_action_jump *jump;
850                 const struct rte_flow_action_of_push_vlan *of_push_vlan;
851                 const struct rte_flow_action_of_set_vlan_vid *
852                         of_set_vlan_vid;
853                 const struct rte_flow_action_of_set_vlan_pcp *
854                         of_set_vlan_pcp;
855                 const struct rte_flow_action_set_ipv4 *set_ipv4;
856                 const struct rte_flow_action_set_ipv6 *set_ipv6;
857         } conf;
858         uint32_t item_flags = 0;
859         uint32_t action_flags = 0;
860         uint8_t next_protocol = -1;
861         unsigned int tcm_ifindex = 0;
862         uint8_t pedit_validated = 0;
863         struct flow_tcf_ptoi ptoi[PTOI_TABLE_SZ_MAX(dev)];
864         struct rte_eth_dev *port_id_dev = NULL;
865         bool in_port_id_set;
866         int ret;
867
868         claim_nonzero(flow_tcf_build_ptoi_table(dev, ptoi,
869                                                 PTOI_TABLE_SZ_MAX(dev)));
870         ret = flow_tcf_validate_attributes(attr, error);
871         if (ret < 0)
872                 return ret;
873         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
874                 unsigned int i;
875
876                 switch (items->type) {
877                 case RTE_FLOW_ITEM_TYPE_VOID:
878                         break;
879                 case RTE_FLOW_ITEM_TYPE_PORT_ID:
880                         mask.port_id = flow_tcf_item_mask
881                                 (items, &rte_flow_item_port_id_mask,
882                                  &flow_tcf_mask_supported.port_id,
883                                  &flow_tcf_mask_empty.port_id,
884                                  sizeof(flow_tcf_mask_supported.port_id),
885                                  error);
886                         if (!mask.port_id)
887                                 return -rte_errno;
888                         if (mask.port_id == &flow_tcf_mask_empty.port_id) {
889                                 in_port_id_set = 1;
890                                 break;
891                         }
892                         spec.port_id = items->spec;
893                         if (mask.port_id->id && mask.port_id->id != 0xffffffff)
894                                 return rte_flow_error_set
895                                         (error, ENOTSUP,
896                                          RTE_FLOW_ERROR_TYPE_ITEM_MASK,
897                                          mask.port_id,
898                                          "no support for partial mask on"
899                                          " \"id\" field");
900                         if (!mask.port_id->id)
901                                 i = 0;
902                         else
903                                 for (i = 0; ptoi[i].ifindex; ++i)
904                                         if (ptoi[i].port_id == spec.port_id->id)
905                                                 break;
906                         if (!ptoi[i].ifindex)
907                                 return rte_flow_error_set
908                                         (error, ENODEV,
909                                          RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
910                                          spec.port_id,
911                                          "missing data to convert port ID to"
912                                          " ifindex");
913                         if (in_port_id_set && ptoi[i].ifindex != tcm_ifindex)
914                                 return rte_flow_error_set
915                                         (error, ENOTSUP,
916                                          RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
917                                          spec.port_id,
918                                          "cannot match traffic for"
919                                          " several port IDs through"
920                                          " a single flow rule");
921                         tcm_ifindex = ptoi[i].ifindex;
922                         in_port_id_set = 1;
923                         break;
924                 case RTE_FLOW_ITEM_TYPE_ETH:
925                         ret = mlx5_flow_validate_item_eth(items, item_flags,
926                                                           error);
927                         if (ret < 0)
928                                 return ret;
929                         item_flags |= MLX5_FLOW_LAYER_OUTER_L2;
930                         /* TODO:
931                          * Redundant check due to different supported mask.
932                          * Same for the rest of items.
933                          */
934                         mask.eth = flow_tcf_item_mask
935                                 (items, &rte_flow_item_eth_mask,
936                                  &flow_tcf_mask_supported.eth,
937                                  &flow_tcf_mask_empty.eth,
938                                  sizeof(flow_tcf_mask_supported.eth),
939                                  error);
940                         if (!mask.eth)
941                                 return -rte_errno;
942                         if (mask.eth->type && mask.eth->type !=
943                             RTE_BE16(0xffff))
944                                 return rte_flow_error_set
945                                         (error, ENOTSUP,
946                                          RTE_FLOW_ERROR_TYPE_ITEM_MASK,
947                                          mask.eth,
948                                          "no support for partial mask on"
949                                          " \"type\" field");
950                         break;
951                 case RTE_FLOW_ITEM_TYPE_VLAN:
952                         ret = mlx5_flow_validate_item_vlan(items, item_flags,
953                                                            error);
954                         if (ret < 0)
955                                 return ret;
956                         item_flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
957                         mask.vlan = flow_tcf_item_mask
958                                 (items, &rte_flow_item_vlan_mask,
959                                  &flow_tcf_mask_supported.vlan,
960                                  &flow_tcf_mask_empty.vlan,
961                                  sizeof(flow_tcf_mask_supported.vlan),
962                                  error);
963                         if (!mask.vlan)
964                                 return -rte_errno;
965                         if ((mask.vlan->tci & RTE_BE16(0xe000) &&
966                              (mask.vlan->tci & RTE_BE16(0xe000)) !=
967                               RTE_BE16(0xe000)) ||
968                             (mask.vlan->tci & RTE_BE16(0x0fff) &&
969                              (mask.vlan->tci & RTE_BE16(0x0fff)) !=
970                               RTE_BE16(0x0fff)) ||
971                             (mask.vlan->inner_type &&
972                              mask.vlan->inner_type != RTE_BE16(0xffff)))
973                                 return rte_flow_error_set
974                                         (error, ENOTSUP,
975                                          RTE_FLOW_ERROR_TYPE_ITEM_MASK,
976                                          mask.vlan,
977                                          "no support for partial masks on"
978                                          " \"tci\" (PCP and VID parts) and"
979                                          " \"inner_type\" fields");
980                         break;
981                 case RTE_FLOW_ITEM_TYPE_IPV4:
982                         ret = mlx5_flow_validate_item_ipv4(items, item_flags,
983                                                            error);
984                         if (ret < 0)
985                                 return ret;
986                         item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
987                         mask.ipv4 = flow_tcf_item_mask
988                                 (items, &rte_flow_item_ipv4_mask,
989                                  &flow_tcf_mask_supported.ipv4,
990                                  &flow_tcf_mask_empty.ipv4,
991                                  sizeof(flow_tcf_mask_supported.ipv4),
992                                  error);
993                         if (!mask.ipv4)
994                                 return -rte_errno;
995                         if (mask.ipv4->hdr.next_proto_id &&
996                             mask.ipv4->hdr.next_proto_id != 0xff)
997                                 return rte_flow_error_set
998                                         (error, ENOTSUP,
999                                          RTE_FLOW_ERROR_TYPE_ITEM_MASK,
1000                                          mask.ipv4,
1001                                          "no support for partial mask on"
1002                                          " \"hdr.next_proto_id\" field");
1003                         else if (mask.ipv4->hdr.next_proto_id)
1004                                 next_protocol =
1005                                         ((const struct rte_flow_item_ipv4 *)
1006                                          (items->spec))->hdr.next_proto_id;
1007                         break;
1008                 case RTE_FLOW_ITEM_TYPE_IPV6:
1009                         ret = mlx5_flow_validate_item_ipv6(items, item_flags,
1010                                                            error);
1011                         if (ret < 0)
1012                                 return ret;
1013                         item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1014                         mask.ipv6 = flow_tcf_item_mask
1015                                 (items, &rte_flow_item_ipv6_mask,
1016                                  &flow_tcf_mask_supported.ipv6,
1017                                  &flow_tcf_mask_empty.ipv6,
1018                                  sizeof(flow_tcf_mask_supported.ipv6),
1019                                  error);
1020                         if (!mask.ipv6)
1021                                 return -rte_errno;
1022                         if (mask.ipv6->hdr.proto &&
1023                             mask.ipv6->hdr.proto != 0xff)
1024                                 return rte_flow_error_set
1025                                         (error, ENOTSUP,
1026                                          RTE_FLOW_ERROR_TYPE_ITEM_MASK,
1027                                          mask.ipv6,
1028                                          "no support for partial mask on"
1029                                          " \"hdr.proto\" field");
1030                         else if (mask.ipv6->hdr.proto)
1031                                 next_protocol =
1032                                         ((const struct rte_flow_item_ipv6 *)
1033                                          (items->spec))->hdr.proto;
1034                         break;
1035                 case RTE_FLOW_ITEM_TYPE_UDP:
1036                         ret = mlx5_flow_validate_item_udp(items, item_flags,
1037                                                           next_protocol, error);
1038                         if (ret < 0)
1039                                 return ret;
1040                         item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
1041                         mask.udp = flow_tcf_item_mask
1042                                 (items, &rte_flow_item_udp_mask,
1043                                  &flow_tcf_mask_supported.udp,
1044                                  &flow_tcf_mask_empty.udp,
1045                                  sizeof(flow_tcf_mask_supported.udp),
1046                                  error);
1047                         if (!mask.udp)
1048                                 return -rte_errno;
1049                         break;
1050                 case RTE_FLOW_ITEM_TYPE_TCP:
1051                         ret = mlx5_flow_validate_item_tcp
1052                                              (items, item_flags,
1053                                               next_protocol,
1054                                               &flow_tcf_mask_supported.tcp,
1055                                               error);
1056                         if (ret < 0)
1057                                 return ret;
1058                         item_flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
1059                         mask.tcp = flow_tcf_item_mask
1060                                 (items, &rte_flow_item_tcp_mask,
1061                                  &flow_tcf_mask_supported.tcp,
1062                                  &flow_tcf_mask_empty.tcp,
1063                                  sizeof(flow_tcf_mask_supported.tcp),
1064                                  error);
1065                         if (!mask.tcp)
1066                                 return -rte_errno;
1067                         break;
1068                 default:
1069                         return rte_flow_error_set(error, ENOTSUP,
1070                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1071                                                   NULL, "item not supported");
1072                 }
1073         }
1074         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1075                 unsigned int i;
1076                 uint32_t current_action_flag = 0;
1077
1078                 switch (actions->type) {
1079                 case RTE_FLOW_ACTION_TYPE_VOID:
1080                         break;
1081                 case RTE_FLOW_ACTION_TYPE_PORT_ID:
1082                         current_action_flag = MLX5_FLOW_ACTION_PORT_ID;
1083                         if (!actions->conf)
1084                                 break;
1085                         conf.port_id = actions->conf;
1086                         if (conf.port_id->original)
1087                                 i = 0;
1088                         else
1089                                 for (i = 0; ptoi[i].ifindex; ++i)
1090                                         if (ptoi[i].port_id == conf.port_id->id)
1091                                                 break;
1092                         if (!ptoi[i].ifindex)
1093                                 return rte_flow_error_set
1094                                         (error, ENODEV,
1095                                          RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1096                                          conf.port_id,
1097                                          "missing data to convert port ID to"
1098                                          " ifindex");
1099                         port_id_dev = &rte_eth_devices[conf.port_id->id];
1100                         break;
1101                 case RTE_FLOW_ACTION_TYPE_JUMP:
1102                         current_action_flag = MLX5_FLOW_ACTION_JUMP;
1103                         if (!actions->conf)
1104                                 break;
1105                         conf.jump = actions->conf;
1106                         if (attr->group >= conf.jump->group)
1107                                 return rte_flow_error_set
1108                                         (error, ENOTSUP,
1109                                          RTE_FLOW_ERROR_TYPE_ACTION,
1110                                          actions,
1111                                          "can jump only to a group forward");
1112                         break;
1113                 case RTE_FLOW_ACTION_TYPE_DROP:
1114                         current_action_flag = MLX5_FLOW_ACTION_DROP;
1115                         break;
1116                 case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
1117                         current_action_flag = MLX5_FLOW_ACTION_OF_POP_VLAN;
1118                         break;
1119                 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
1120                         current_action_flag = MLX5_FLOW_ACTION_OF_PUSH_VLAN;
1121                         break;
1122                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
1123                         if (!(action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN))
1124                                 return rte_flow_error_set
1125                                         (error, ENOTSUP,
1126                                          RTE_FLOW_ERROR_TYPE_ACTION, actions,
1127                                          "vlan modify is not supported,"
1128                                          " set action must follow push action");
1129                         current_action_flag = MLX5_FLOW_ACTION_OF_SET_VLAN_VID;
1130                         break;
1131                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
1132                         if (!(action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN))
1133                                 return rte_flow_error_set
1134                                         (error, ENOTSUP,
1135                                          RTE_FLOW_ERROR_TYPE_ACTION, actions,
1136                                          "vlan modify is not supported,"
1137                                          " set action must follow push action");
1138                         current_action_flag = MLX5_FLOW_ACTION_OF_SET_VLAN_PCP;
1139                         break;
1140                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
1141                         current_action_flag = MLX5_FLOW_ACTION_SET_IPV4_SRC;
1142                         break;
1143                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
1144                         current_action_flag = MLX5_FLOW_ACTION_SET_IPV4_DST;
1145                         break;
1146                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
1147                         current_action_flag = MLX5_FLOW_ACTION_SET_IPV6_SRC;
1148                         break;
1149                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
1150                         current_action_flag = MLX5_FLOW_ACTION_SET_IPV6_DST;
1151                         break;
1152                 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
1153                         current_action_flag = MLX5_FLOW_ACTION_SET_TP_SRC;
1154                         break;
1155                 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
1156                         current_action_flag = MLX5_FLOW_ACTION_SET_TP_DST;
1157                         break;
1158                 case RTE_FLOW_ACTION_TYPE_SET_TTL:
1159                         current_action_flag = MLX5_FLOW_ACTION_SET_TTL;
1160                         break;
1161                 case RTE_FLOW_ACTION_TYPE_DEC_TTL:
1162                         current_action_flag = MLX5_FLOW_ACTION_DEC_TTL;
1163                         break;
1164                 default:
1165                         return rte_flow_error_set(error, ENOTSUP,
1166                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1167                                                   actions,
1168                                                   "action not supported");
1169                 }
1170                 if (current_action_flag & MLX5_TCF_CONFIG_ACTIONS) {
1171                         if (!actions->conf)
1172                                 return rte_flow_error_set(error, EINVAL,
1173                                                 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1174                                                 actions,
1175                                                 "action configuration not set");
1176                 }
1177                 if ((current_action_flag & MLX5_TCF_PEDIT_ACTIONS) &&
1178                     pedit_validated)
1179                         return rte_flow_error_set(error, ENOTSUP,
1180                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1181                                                   actions,
1182                                                   "set actions should be "
1183                                                   "listed successively");
1184                 if ((current_action_flag & ~MLX5_TCF_PEDIT_ACTIONS) &&
1185                     (action_flags & MLX5_TCF_PEDIT_ACTIONS))
1186                         pedit_validated = 1;
1187                 if ((current_action_flag & MLX5_TCF_FATE_ACTIONS) &&
1188                     (action_flags & MLX5_TCF_FATE_ACTIONS))
1189                         return rte_flow_error_set(error, EINVAL,
1190                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1191                                                   actions,
1192                                                   "can't have multiple fate"
1193                                                   " actions");
1194                 action_flags |= current_action_flag;
1195         }
1196         if ((action_flags & MLX5_TCF_PEDIT_ACTIONS) &&
1197             (action_flags & MLX5_FLOW_ACTION_DROP))
1198                 return rte_flow_error_set(error, ENOTSUP,
1199                                           RTE_FLOW_ERROR_TYPE_ACTION,
1200                                           actions,
1201                                           "set action is not compatible with "
1202                                           "drop action");
1203         if ((action_flags & MLX5_TCF_PEDIT_ACTIONS) &&
1204             !(action_flags & MLX5_FLOW_ACTION_PORT_ID))
1205                 return rte_flow_error_set(error, ENOTSUP,
1206                                           RTE_FLOW_ERROR_TYPE_ACTION,
1207                                           actions,
1208                                           "set action must be followed by "
1209                                           "port_id action");
1210         if (action_flags &
1211            (MLX5_FLOW_ACTION_SET_IPV4_SRC | MLX5_FLOW_ACTION_SET_IPV4_DST)) {
1212                 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4))
1213                         return rte_flow_error_set(error, EINVAL,
1214                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1215                                                   actions,
1216                                                   "no ipv4 item found in"
1217                                                   " pattern");
1218         }
1219         if (action_flags &
1220            (MLX5_FLOW_ACTION_SET_IPV6_SRC | MLX5_FLOW_ACTION_SET_IPV6_DST)) {
1221                 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV6))
1222                         return rte_flow_error_set(error, EINVAL,
1223                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1224                                                   actions,
1225                                                   "no ipv6 item found in"
1226                                                   " pattern");
1227         }
1228         if (action_flags &
1229            (MLX5_FLOW_ACTION_SET_TP_SRC | MLX5_FLOW_ACTION_SET_TP_DST)) {
1230                 if (!(item_flags &
1231                      (MLX5_FLOW_LAYER_OUTER_L4_UDP |
1232                       MLX5_FLOW_LAYER_OUTER_L4_TCP)))
1233                         return rte_flow_error_set(error, EINVAL,
1234                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1235                                                   actions,
1236                                                   "no TCP/UDP item found in"
1237                                                   " pattern");
1238         }
1239         /*
1240          * FW syndrome (0xA9C090):
1241          *     set_flow_table_entry: push vlan action fte in fdb can ONLY be
1242          *     forward to the uplink.
1243          */
1244         if ((action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN) &&
1245             (action_flags & MLX5_FLOW_ACTION_PORT_ID) &&
1246             ((struct priv *)port_id_dev->data->dev_private)->representor)
1247                 return rte_flow_error_set(error, ENOTSUP,
1248                                           RTE_FLOW_ERROR_TYPE_ACTION, actions,
1249                                           "vlan push can only be applied"
1250                                           " when forwarding to uplink port");
1251         /*
1252          * FW syndrome (0x294609):
1253          *     set_flow_table_entry: modify/pop/push actions in fdb flow table
1254          *     are supported only while forwarding to vport.
1255          */
1256         if ((action_flags & MLX5_TCF_VLAN_ACTIONS) &&
1257             !(action_flags & MLX5_FLOW_ACTION_PORT_ID))
1258                 return rte_flow_error_set(error, ENOTSUP,
1259                                           RTE_FLOW_ERROR_TYPE_ACTION, actions,
1260                                           "vlan actions are supported"
1261                                           " only with port_id action");
1262         if (!(action_flags & MLX5_TCF_FATE_ACTIONS))
1263                 return rte_flow_error_set(error, EINVAL,
1264                                           RTE_FLOW_ERROR_TYPE_ACTION, actions,
1265                                           "no fate action is found");
1266         if (action_flags &
1267            (MLX5_FLOW_ACTION_SET_TTL | MLX5_FLOW_ACTION_DEC_TTL)) {
1268                 if (!(item_flags &
1269                      (MLX5_FLOW_LAYER_OUTER_L3_IPV4 |
1270                       MLX5_FLOW_LAYER_OUTER_L3_IPV6)))
1271                         return rte_flow_error_set(error, EINVAL,
1272                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1273                                                   actions,
1274                                                   "no IP found in pattern");
1275         }
1276         return 0;
1277 }
1278
1279 /**
1280  * Calculate maximum size of memory for flow items of Linux TC flower and
1281  * extract specified items.
1282  *
1283  * @param[in] items
1284  *   Pointer to the list of items.
1285  * @param[out] item_flags
1286  *   Pointer to the detected items.
1287  *
1288  * @return
1289  *   Maximum size of memory for items.
1290  */
1291 static int
1292 flow_tcf_get_items_and_size(const struct rte_flow_attr *attr,
1293                             const struct rte_flow_item items[],
1294                             uint64_t *item_flags)
1295 {
1296         int size = 0;
1297         uint64_t flags = 0;
1298
1299         size += SZ_NLATTR_STRZ_OF("flower") +
1300                 SZ_NLATTR_NEST + /* TCA_OPTIONS. */
1301                 SZ_NLATTR_TYPE_OF(uint32_t); /* TCA_CLS_FLAGS_SKIP_SW. */
1302         if (attr->group > 0)
1303                 size += SZ_NLATTR_TYPE_OF(uint32_t); /* TCA_CHAIN. */
1304         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1305                 switch (items->type) {
1306                 case RTE_FLOW_ITEM_TYPE_VOID:
1307                         break;
1308                 case RTE_FLOW_ITEM_TYPE_PORT_ID:
1309                         break;
1310                 case RTE_FLOW_ITEM_TYPE_ETH:
1311                         size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
1312                                 SZ_NLATTR_DATA_OF(ETHER_ADDR_LEN) * 4;
1313                                 /* dst/src MAC addr and mask. */
1314                         flags |= MLX5_FLOW_LAYER_OUTER_L2;
1315                         break;
1316                 case RTE_FLOW_ITEM_TYPE_VLAN:
1317                         size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
1318                                 SZ_NLATTR_TYPE_OF(uint16_t) +
1319                                 /* VLAN Ether type. */
1320                                 SZ_NLATTR_TYPE_OF(uint8_t) + /* VLAN prio. */
1321                                 SZ_NLATTR_TYPE_OF(uint16_t); /* VLAN ID. */
1322                         flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
1323                         break;
1324                 case RTE_FLOW_ITEM_TYPE_IPV4:
1325                         size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
1326                                 SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
1327                                 SZ_NLATTR_TYPE_OF(uint32_t) * 4;
1328                                 /* dst/src IP addr and mask. */
1329                         flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1330                         break;
1331                 case RTE_FLOW_ITEM_TYPE_IPV6:
1332                         size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
1333                                 SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
1334                                 SZ_NLATTR_TYPE_OF(IPV6_ADDR_LEN) * 4;
1335                                 /* dst/src IP addr and mask. */
1336                         flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1337                         break;
1338                 case RTE_FLOW_ITEM_TYPE_UDP:
1339                         size += SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
1340                                 SZ_NLATTR_TYPE_OF(uint16_t) * 4;
1341                                 /* dst/src port and mask. */
1342                         flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
1343                         break;
1344                 case RTE_FLOW_ITEM_TYPE_TCP:
1345                         size += SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
1346                                 SZ_NLATTR_TYPE_OF(uint16_t) * 4;
1347                                 /* dst/src port and mask. */
1348                         flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
1349                         break;
1350                 default:
1351                         DRV_LOG(WARNING,
1352                                 "unsupported item %p type %d,"
1353                                 " items must be validated before flow creation",
1354                                 (const void *)items, items->type);
1355                         break;
1356                 }
1357         }
1358         *item_flags = flags;
1359         return size;
1360 }
1361
1362 /**
1363  * Calculate maximum size of memory for flow actions of Linux TC flower and
1364  * extract specified actions.
1365  *
1366  * @param[in] actions
1367  *   Pointer to the list of actions.
1368  * @param[out] action_flags
1369  *   Pointer to the detected actions.
1370  *
1371  * @return
1372  *   Maximum size of memory for actions.
1373  */
1374 static int
1375 flow_tcf_get_actions_and_size(const struct rte_flow_action actions[],
1376                               uint64_t *action_flags)
1377 {
1378         int size = 0;
1379         uint64_t flags = 0;
1380
1381         size += SZ_NLATTR_NEST; /* TCA_FLOWER_ACT. */
1382         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1383                 switch (actions->type) {
1384                 case RTE_FLOW_ACTION_TYPE_VOID:
1385                         break;
1386                 case RTE_FLOW_ACTION_TYPE_PORT_ID:
1387                         size += SZ_NLATTR_NEST + /* na_act_index. */
1388                                 SZ_NLATTR_STRZ_OF("mirred") +
1389                                 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
1390                                 SZ_NLATTR_TYPE_OF(struct tc_mirred);
1391                         flags |= MLX5_FLOW_ACTION_PORT_ID;
1392                         break;
1393                 case RTE_FLOW_ACTION_TYPE_JUMP:
1394                         size += SZ_NLATTR_NEST + /* na_act_index. */
1395                                 SZ_NLATTR_STRZ_OF("gact") +
1396                                 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
1397                                 SZ_NLATTR_TYPE_OF(struct tc_gact);
1398                         flags |= MLX5_FLOW_ACTION_JUMP;
1399                         break;
1400                 case RTE_FLOW_ACTION_TYPE_DROP:
1401                         size += SZ_NLATTR_NEST + /* na_act_index. */
1402                                 SZ_NLATTR_STRZ_OF("gact") +
1403                                 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
1404                                 SZ_NLATTR_TYPE_OF(struct tc_gact);
1405                         flags |= MLX5_FLOW_ACTION_DROP;
1406                         break;
1407                 case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
1408                         flags |= MLX5_FLOW_ACTION_OF_POP_VLAN;
1409                         goto action_of_vlan;
1410                 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
1411                         flags |= MLX5_FLOW_ACTION_OF_PUSH_VLAN;
1412                         goto action_of_vlan;
1413                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
1414                         flags |= MLX5_FLOW_ACTION_OF_SET_VLAN_VID;
1415                         goto action_of_vlan;
1416                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
1417                         flags |= MLX5_FLOW_ACTION_OF_SET_VLAN_PCP;
1418                         goto action_of_vlan;
1419 action_of_vlan:
1420                         size += SZ_NLATTR_NEST + /* na_act_index. */
1421                                 SZ_NLATTR_STRZ_OF("vlan") +
1422                                 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
1423                                 SZ_NLATTR_TYPE_OF(struct tc_vlan) +
1424                                 SZ_NLATTR_TYPE_OF(uint16_t) +
1425                                 /* VLAN protocol. */
1426                                 SZ_NLATTR_TYPE_OF(uint16_t) + /* VLAN ID. */
1427                                 SZ_NLATTR_TYPE_OF(uint8_t); /* VLAN prio. */
1428                         break;
1429                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
1430                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
1431                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
1432                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
1433                 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
1434                 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
1435                 case RTE_FLOW_ACTION_TYPE_SET_TTL:
1436                 case RTE_FLOW_ACTION_TYPE_DEC_TTL:
1437                         size += flow_tcf_get_pedit_actions_size(&actions,
1438                                                                 &flags);
1439                         break;
1440                 default:
1441                         DRV_LOG(WARNING,
1442                                 "unsupported action %p type %d,"
1443                                 " items must be validated before flow creation",
1444                                 (const void *)actions, actions->type);
1445                         break;
1446                 }
1447         }
1448         *action_flags = flags;
1449         return size;
1450 }
1451
1452 /**
1453  * Brand rtnetlink buffer with unique handle.
1454  *
1455  * This handle should be unique for a given network interface to avoid
1456  * collisions.
1457  *
1458  * @param nlh
1459  *   Pointer to Netlink message.
1460  * @param handle
1461  *   Unique 32-bit handle to use.
1462  */
1463 static void
1464 flow_tcf_nl_brand(struct nlmsghdr *nlh, uint32_t handle)
1465 {
1466         struct tcmsg *tcm = mnl_nlmsg_get_payload(nlh);
1467
1468         tcm->tcm_handle = handle;
1469         DRV_LOG(DEBUG, "Netlink msg %p is branded with handle %x",
1470                 (void *)nlh, handle);
1471 }
1472
1473 /**
1474  * Prepare a flow object for Linux TC flower. It calculates the maximum size of
1475  * memory required, allocates the memory, initializes Netlink message headers
1476  * and set unique TC message handle.
1477  *
1478  * @param[in] attr
1479  *   Pointer to the flow attributes.
1480  * @param[in] items
1481  *   Pointer to the list of items.
1482  * @param[in] actions
1483  *   Pointer to the list of actions.
1484  * @param[out] item_flags
1485  *   Pointer to bit mask of all items detected.
1486  * @param[out] action_flags
1487  *   Pointer to bit mask of all actions detected.
1488  * @param[out] error
1489  *   Pointer to the error structure.
1490  *
1491  * @return
1492  *   Pointer to mlx5_flow object on success,
1493  *   otherwise NULL and rte_ernno is set.
1494  */
1495 static struct mlx5_flow *
1496 flow_tcf_prepare(const struct rte_flow_attr *attr,
1497                  const struct rte_flow_item items[],
1498                  const struct rte_flow_action actions[],
1499                  uint64_t *item_flags, uint64_t *action_flags,
1500                  struct rte_flow_error *error)
1501 {
1502         size_t size = sizeof(struct mlx5_flow) +
1503                       MNL_ALIGN(sizeof(struct nlmsghdr)) +
1504                       MNL_ALIGN(sizeof(struct tcmsg));
1505         struct mlx5_flow *dev_flow;
1506         struct nlmsghdr *nlh;
1507         struct tcmsg *tcm;
1508
1509         size += flow_tcf_get_items_and_size(attr, items, item_flags);
1510         size += flow_tcf_get_actions_and_size(actions, action_flags);
1511         dev_flow = rte_zmalloc(__func__, size, MNL_ALIGNTO);
1512         if (!dev_flow) {
1513                 rte_flow_error_set(error, ENOMEM,
1514                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1515                                    "not enough memory to create E-Switch flow");
1516                 return NULL;
1517         }
1518         nlh = mnl_nlmsg_put_header((void *)(dev_flow + 1));
1519         tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
1520         *dev_flow = (struct mlx5_flow){
1521                 .tcf = (struct mlx5_flow_tcf){
1522                         .nlh = nlh,
1523                         .tcm = tcm,
1524                 },
1525         };
1526         /*
1527          * Generate a reasonably unique handle based on the address of the
1528          * target buffer.
1529          *
1530          * This is straightforward on 32-bit systems where the flow pointer can
1531          * be used directly. Otherwise, its least significant part is taken
1532          * after shifting it by the previous power of two of the pointed buffer
1533          * size.
1534          */
1535         if (sizeof(dev_flow) <= 4)
1536                 flow_tcf_nl_brand(nlh, (uintptr_t)dev_flow);
1537         else
1538                 flow_tcf_nl_brand(nlh, (uintptr_t)dev_flow >>
1539                                        rte_log2_u32(rte_align32prevpow2(size)));
1540         return dev_flow;
1541 }
1542
1543 /**
1544  * Translate flow for Linux TC flower and construct Netlink message.
1545  *
1546  * @param[in] priv
1547  *   Pointer to the priv structure.
1548  * @param[in, out] flow
1549  *   Pointer to the sub flow.
1550  * @param[in] attr
1551  *   Pointer to the flow attributes.
1552  * @param[in] items
1553  *   Pointer to the list of items.
1554  * @param[in] actions
1555  *   Pointer to the list of actions.
1556  * @param[out] error
1557  *   Pointer to the error structure.
1558  *
1559  * @return
1560  *   0 on success, a negative errno value otherwise and rte_ernno is set.
1561  */
1562 static int
1563 flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
1564                    const struct rte_flow_attr *attr,
1565                    const struct rte_flow_item items[],
1566                    const struct rte_flow_action actions[],
1567                    struct rte_flow_error *error)
1568 {
1569         union {
1570                 const struct rte_flow_item_port_id *port_id;
1571                 const struct rte_flow_item_eth *eth;
1572                 const struct rte_flow_item_vlan *vlan;
1573                 const struct rte_flow_item_ipv4 *ipv4;
1574                 const struct rte_flow_item_ipv6 *ipv6;
1575                 const struct rte_flow_item_tcp *tcp;
1576                 const struct rte_flow_item_udp *udp;
1577         } spec, mask;
1578         union {
1579                 const struct rte_flow_action_port_id *port_id;
1580                 const struct rte_flow_action_jump *jump;
1581                 const struct rte_flow_action_of_push_vlan *of_push_vlan;
1582                 const struct rte_flow_action_of_set_vlan_vid *
1583                         of_set_vlan_vid;
1584                 const struct rte_flow_action_of_set_vlan_pcp *
1585                         of_set_vlan_pcp;
1586         } conf;
1587         struct flow_tcf_ptoi ptoi[PTOI_TABLE_SZ_MAX(dev)];
1588         struct nlmsghdr *nlh = dev_flow->tcf.nlh;
1589         struct tcmsg *tcm = dev_flow->tcf.tcm;
1590         uint32_t na_act_index_cur;
1591         bool eth_type_set = 0;
1592         bool vlan_present = 0;
1593         bool vlan_eth_type_set = 0;
1594         bool ip_proto_set = 0;
1595         struct nlattr *na_flower;
1596         struct nlattr *na_flower_act;
1597         struct nlattr *na_vlan_id = NULL;
1598         struct nlattr *na_vlan_priority = NULL;
1599         uint64_t item_flags = 0;
1600
1601         claim_nonzero(flow_tcf_build_ptoi_table(dev, ptoi,
1602                                                 PTOI_TABLE_SZ_MAX(dev)));
1603         nlh = dev_flow->tcf.nlh;
1604         tcm = dev_flow->tcf.tcm;
1605         /* Prepare API must have been called beforehand. */
1606         assert(nlh != NULL && tcm != NULL);
1607         tcm->tcm_family = AF_UNSPEC;
1608         tcm->tcm_ifindex = ptoi[0].ifindex;
1609         tcm->tcm_parent = TC_H_MAKE(TC_H_INGRESS, TC_H_MIN_INGRESS);
1610         /*
1611          * Priority cannot be zero to prevent the kernel from picking one
1612          * automatically.
1613          */
1614         tcm->tcm_info = TC_H_MAKE((attr->priority + 1) << 16,
1615                                   RTE_BE16(ETH_P_ALL));
1616         if (attr->group > 0)
1617                 mnl_attr_put_u32(nlh, TCA_CHAIN, attr->group);
1618         mnl_attr_put_strz(nlh, TCA_KIND, "flower");
1619         na_flower = mnl_attr_nest_start(nlh, TCA_OPTIONS);
1620         mnl_attr_put_u32(nlh, TCA_FLOWER_FLAGS, TCA_CLS_FLAGS_SKIP_SW);
1621         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1622                 unsigned int i;
1623
1624                 switch (items->type) {
1625                 case RTE_FLOW_ITEM_TYPE_VOID:
1626                         break;
1627                 case RTE_FLOW_ITEM_TYPE_PORT_ID:
1628                         mask.port_id = flow_tcf_item_mask
1629                                 (items, &rte_flow_item_port_id_mask,
1630                                  &flow_tcf_mask_supported.port_id,
1631                                  &flow_tcf_mask_empty.port_id,
1632                                  sizeof(flow_tcf_mask_supported.port_id),
1633                                  error);
1634                         assert(mask.port_id);
1635                         if (mask.port_id == &flow_tcf_mask_empty.port_id)
1636                                 break;
1637                         spec.port_id = items->spec;
1638                         if (!mask.port_id->id)
1639                                 i = 0;
1640                         else
1641                                 for (i = 0; ptoi[i].ifindex; ++i)
1642                                         if (ptoi[i].port_id == spec.port_id->id)
1643                                                 break;
1644                         assert(ptoi[i].ifindex);
1645                         tcm->tcm_ifindex = ptoi[i].ifindex;
1646                         break;
1647                 case RTE_FLOW_ITEM_TYPE_ETH:
1648                         item_flags |= MLX5_FLOW_LAYER_OUTER_L2;
1649                         mask.eth = flow_tcf_item_mask
1650                                 (items, &rte_flow_item_eth_mask,
1651                                  &flow_tcf_mask_supported.eth,
1652                                  &flow_tcf_mask_empty.eth,
1653                                  sizeof(flow_tcf_mask_supported.eth),
1654                                  error);
1655                         assert(mask.eth);
1656                         if (mask.eth == &flow_tcf_mask_empty.eth)
1657                                 break;
1658                         spec.eth = items->spec;
1659                         if (mask.eth->type) {
1660                                 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_ETH_TYPE,
1661                                                  spec.eth->type);
1662                                 eth_type_set = 1;
1663                         }
1664                         if (!is_zero_ether_addr(&mask.eth->dst)) {
1665                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_DST,
1666                                              ETHER_ADDR_LEN,
1667                                              spec.eth->dst.addr_bytes);
1668                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_DST_MASK,
1669                                              ETHER_ADDR_LEN,
1670                                              mask.eth->dst.addr_bytes);
1671                         }
1672                         if (!is_zero_ether_addr(&mask.eth->src)) {
1673                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_SRC,
1674                                              ETHER_ADDR_LEN,
1675                                              spec.eth->src.addr_bytes);
1676                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_SRC_MASK,
1677                                              ETHER_ADDR_LEN,
1678                                              mask.eth->src.addr_bytes);
1679                         }
1680                         break;
1681                 case RTE_FLOW_ITEM_TYPE_VLAN:
1682                         item_flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
1683                         mask.vlan = flow_tcf_item_mask
1684                                 (items, &rte_flow_item_vlan_mask,
1685                                  &flow_tcf_mask_supported.vlan,
1686                                  &flow_tcf_mask_empty.vlan,
1687                                  sizeof(flow_tcf_mask_supported.vlan),
1688                                  error);
1689                         assert(mask.vlan);
1690                         if (!eth_type_set)
1691                                 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_ETH_TYPE,
1692                                                  RTE_BE16(ETH_P_8021Q));
1693                         eth_type_set = 1;
1694                         vlan_present = 1;
1695                         if (mask.vlan == &flow_tcf_mask_empty.vlan)
1696                                 break;
1697                         spec.vlan = items->spec;
1698                         if (mask.vlan->inner_type) {
1699                                 mnl_attr_put_u16(nlh,
1700                                                  TCA_FLOWER_KEY_VLAN_ETH_TYPE,
1701                                                  spec.vlan->inner_type);
1702                                 vlan_eth_type_set = 1;
1703                         }
1704                         if (mask.vlan->tci & RTE_BE16(0xe000))
1705                                 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_VLAN_PRIO,
1706                                                 (rte_be_to_cpu_16
1707                                                  (spec.vlan->tci) >> 13) & 0x7);
1708                         if (mask.vlan->tci & RTE_BE16(0x0fff))
1709                                 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_VLAN_ID,
1710                                                  rte_be_to_cpu_16
1711                                                  (spec.vlan->tci &
1712                                                   RTE_BE16(0x0fff)));
1713                         break;
1714                 case RTE_FLOW_ITEM_TYPE_IPV4:
1715                         item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1716                         mask.ipv4 = flow_tcf_item_mask
1717                                 (items, &rte_flow_item_ipv4_mask,
1718                                  &flow_tcf_mask_supported.ipv4,
1719                                  &flow_tcf_mask_empty.ipv4,
1720                                  sizeof(flow_tcf_mask_supported.ipv4),
1721                                  error);
1722                         assert(mask.ipv4);
1723                         if (!eth_type_set || !vlan_eth_type_set)
1724                                 mnl_attr_put_u16(nlh,
1725                                                  vlan_present ?
1726                                                  TCA_FLOWER_KEY_VLAN_ETH_TYPE :
1727                                                  TCA_FLOWER_KEY_ETH_TYPE,
1728                                                  RTE_BE16(ETH_P_IP));
1729                         eth_type_set = 1;
1730                         vlan_eth_type_set = 1;
1731                         if (mask.ipv4 == &flow_tcf_mask_empty.ipv4)
1732                                 break;
1733                         spec.ipv4 = items->spec;
1734                         if (mask.ipv4->hdr.next_proto_id) {
1735                                 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
1736                                                 spec.ipv4->hdr.next_proto_id);
1737                                 ip_proto_set = 1;
1738                         }
1739                         if (mask.ipv4->hdr.src_addr) {
1740                                 mnl_attr_put_u32(nlh, TCA_FLOWER_KEY_IPV4_SRC,
1741                                                  spec.ipv4->hdr.src_addr);
1742                                 mnl_attr_put_u32(nlh,
1743                                                  TCA_FLOWER_KEY_IPV4_SRC_MASK,
1744                                                  mask.ipv4->hdr.src_addr);
1745                         }
1746                         if (mask.ipv4->hdr.dst_addr) {
1747                                 mnl_attr_put_u32(nlh, TCA_FLOWER_KEY_IPV4_DST,
1748                                                  spec.ipv4->hdr.dst_addr);
1749                                 mnl_attr_put_u32(nlh,
1750                                                  TCA_FLOWER_KEY_IPV4_DST_MASK,
1751                                                  mask.ipv4->hdr.dst_addr);
1752                         }
1753                         break;
1754                 case RTE_FLOW_ITEM_TYPE_IPV6:
1755                         item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1756                         mask.ipv6 = flow_tcf_item_mask
1757                                 (items, &rte_flow_item_ipv6_mask,
1758                                  &flow_tcf_mask_supported.ipv6,
1759                                  &flow_tcf_mask_empty.ipv6,
1760                                  sizeof(flow_tcf_mask_supported.ipv6),
1761                                  error);
1762                         assert(mask.ipv6);
1763                         if (!eth_type_set || !vlan_eth_type_set)
1764                                 mnl_attr_put_u16(nlh,
1765                                                  vlan_present ?
1766                                                  TCA_FLOWER_KEY_VLAN_ETH_TYPE :
1767                                                  TCA_FLOWER_KEY_ETH_TYPE,
1768                                                  RTE_BE16(ETH_P_IPV6));
1769                         eth_type_set = 1;
1770                         vlan_eth_type_set = 1;
1771                         if (mask.ipv6 == &flow_tcf_mask_empty.ipv6)
1772                                 break;
1773                         spec.ipv6 = items->spec;
1774                         if (mask.ipv6->hdr.proto) {
1775                                 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
1776                                                 spec.ipv6->hdr.proto);
1777                                 ip_proto_set = 1;
1778                         }
1779                         if (!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.src_addr)) {
1780                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_SRC,
1781                                              sizeof(spec.ipv6->hdr.src_addr),
1782                                              spec.ipv6->hdr.src_addr);
1783                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_SRC_MASK,
1784                                              sizeof(mask.ipv6->hdr.src_addr),
1785                                              mask.ipv6->hdr.src_addr);
1786                         }
1787                         if (!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.dst_addr)) {
1788                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_DST,
1789                                              sizeof(spec.ipv6->hdr.dst_addr),
1790                                              spec.ipv6->hdr.dst_addr);
1791                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_DST_MASK,
1792                                              sizeof(mask.ipv6->hdr.dst_addr),
1793                                              mask.ipv6->hdr.dst_addr);
1794                         }
1795                         break;
1796                 case RTE_FLOW_ITEM_TYPE_UDP:
1797                         item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
1798                         mask.udp = flow_tcf_item_mask
1799                                 (items, &rte_flow_item_udp_mask,
1800                                  &flow_tcf_mask_supported.udp,
1801                                  &flow_tcf_mask_empty.udp,
1802                                  sizeof(flow_tcf_mask_supported.udp),
1803                                  error);
1804                         assert(mask.udp);
1805                         if (!ip_proto_set)
1806                                 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
1807                                                 IPPROTO_UDP);
1808                         if (mask.udp == &flow_tcf_mask_empty.udp)
1809                                 break;
1810                         spec.udp = items->spec;
1811                         if (mask.udp->hdr.src_port) {
1812                                 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_UDP_SRC,
1813                                                  spec.udp->hdr.src_port);
1814                                 mnl_attr_put_u16(nlh,
1815                                                  TCA_FLOWER_KEY_UDP_SRC_MASK,
1816                                                  mask.udp->hdr.src_port);
1817                         }
1818                         if (mask.udp->hdr.dst_port) {
1819                                 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_UDP_DST,
1820                                                  spec.udp->hdr.dst_port);
1821                                 mnl_attr_put_u16(nlh,
1822                                                  TCA_FLOWER_KEY_UDP_DST_MASK,
1823                                                  mask.udp->hdr.dst_port);
1824                         }
1825                         break;
1826                 case RTE_FLOW_ITEM_TYPE_TCP:
1827                         item_flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
1828                         mask.tcp = flow_tcf_item_mask
1829                                 (items, &rte_flow_item_tcp_mask,
1830                                  &flow_tcf_mask_supported.tcp,
1831                                  &flow_tcf_mask_empty.tcp,
1832                                  sizeof(flow_tcf_mask_supported.tcp),
1833                                  error);
1834                         assert(mask.tcp);
1835                         if (!ip_proto_set)
1836                                 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
1837                                                 IPPROTO_TCP);
1838                         if (mask.tcp == &flow_tcf_mask_empty.tcp)
1839                                 break;
1840                         spec.tcp = items->spec;
1841                         if (mask.tcp->hdr.src_port) {
1842                                 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_TCP_SRC,
1843                                                  spec.tcp->hdr.src_port);
1844                                 mnl_attr_put_u16(nlh,
1845                                                  TCA_FLOWER_KEY_TCP_SRC_MASK,
1846                                                  mask.tcp->hdr.src_port);
1847                         }
1848                         if (mask.tcp->hdr.dst_port) {
1849                                 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_TCP_DST,
1850                                                  spec.tcp->hdr.dst_port);
1851                                 mnl_attr_put_u16(nlh,
1852                                                  TCA_FLOWER_KEY_TCP_DST_MASK,
1853                                                  mask.tcp->hdr.dst_port);
1854                         }
1855                         if (mask.tcp->hdr.tcp_flags) {
1856                                 mnl_attr_put_u16
1857                                         (nlh,
1858                                          TCA_FLOWER_KEY_TCP_FLAGS,
1859                                          rte_cpu_to_be_16
1860                                                 (spec.tcp->hdr.tcp_flags));
1861                                 mnl_attr_put_u16
1862                                         (nlh,
1863                                          TCA_FLOWER_KEY_TCP_FLAGS_MASK,
1864                                          rte_cpu_to_be_16
1865                                                 (mask.tcp->hdr.tcp_flags));
1866                         }
1867                         break;
1868                 default:
1869                         return rte_flow_error_set(error, ENOTSUP,
1870                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1871                                                   NULL, "item not supported");
1872                 }
1873         }
1874         na_flower_act = mnl_attr_nest_start(nlh, TCA_FLOWER_ACT);
1875         na_act_index_cur = 1;
1876         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1877                 struct nlattr *na_act_index;
1878                 struct nlattr *na_act;
1879                 unsigned int vlan_act;
1880                 unsigned int i;
1881
1882                 switch (actions->type) {
1883                 case RTE_FLOW_ACTION_TYPE_VOID:
1884                         break;
1885                 case RTE_FLOW_ACTION_TYPE_PORT_ID:
1886                         conf.port_id = actions->conf;
1887                         if (conf.port_id->original)
1888                                 i = 0;
1889                         else
1890                                 for (i = 0; ptoi[i].ifindex; ++i)
1891                                         if (ptoi[i].port_id == conf.port_id->id)
1892                                                 break;
1893                         assert(ptoi[i].ifindex);
1894                         na_act_index =
1895                                 mnl_attr_nest_start(nlh, na_act_index_cur++);
1896                         assert(na_act_index);
1897                         mnl_attr_put_strz(nlh, TCA_ACT_KIND, "mirred");
1898                         na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
1899                         assert(na_act);
1900                         mnl_attr_put(nlh, TCA_MIRRED_PARMS,
1901                                      sizeof(struct tc_mirred),
1902                                      &(struct tc_mirred){
1903                                         .action = TC_ACT_STOLEN,
1904                                         .eaction = TCA_EGRESS_REDIR,
1905                                         .ifindex = ptoi[i].ifindex,
1906                                      });
1907                         mnl_attr_nest_end(nlh, na_act);
1908                         mnl_attr_nest_end(nlh, na_act_index);
1909                         break;
1910                 case RTE_FLOW_ACTION_TYPE_JUMP:
1911                         conf.jump = actions->conf;
1912                         na_act_index =
1913                                 mnl_attr_nest_start(nlh, na_act_index_cur++);
1914                         assert(na_act_index);
1915                         mnl_attr_put_strz(nlh, TCA_ACT_KIND, "gact");
1916                         na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
1917                         assert(na_act);
1918                         mnl_attr_put(nlh, TCA_GACT_PARMS,
1919                                      sizeof(struct tc_gact),
1920                                      &(struct tc_gact){
1921                                         .action = TC_ACT_GOTO_CHAIN |
1922                                                   conf.jump->group,
1923                                      });
1924                         mnl_attr_nest_end(nlh, na_act);
1925                         mnl_attr_nest_end(nlh, na_act_index);
1926                         break;
1927                 case RTE_FLOW_ACTION_TYPE_DROP:
1928                         na_act_index =
1929                                 mnl_attr_nest_start(nlh, na_act_index_cur++);
1930                         assert(na_act_index);
1931                         mnl_attr_put_strz(nlh, TCA_ACT_KIND, "gact");
1932                         na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
1933                         assert(na_act);
1934                         mnl_attr_put(nlh, TCA_GACT_PARMS,
1935                                      sizeof(struct tc_gact),
1936                                      &(struct tc_gact){
1937                                         .action = TC_ACT_SHOT,
1938                                      });
1939                         mnl_attr_nest_end(nlh, na_act);
1940                         mnl_attr_nest_end(nlh, na_act_index);
1941                         break;
1942                 case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
1943                         conf.of_push_vlan = NULL;
1944                         vlan_act = TCA_VLAN_ACT_POP;
1945                         goto action_of_vlan;
1946                 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
1947                         conf.of_push_vlan = actions->conf;
1948                         vlan_act = TCA_VLAN_ACT_PUSH;
1949                         goto action_of_vlan;
1950                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
1951                         conf.of_set_vlan_vid = actions->conf;
1952                         if (na_vlan_id)
1953                                 goto override_na_vlan_id;
1954                         vlan_act = TCA_VLAN_ACT_MODIFY;
1955                         goto action_of_vlan;
1956                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
1957                         conf.of_set_vlan_pcp = actions->conf;
1958                         if (na_vlan_priority)
1959                                 goto override_na_vlan_priority;
1960                         vlan_act = TCA_VLAN_ACT_MODIFY;
1961                         goto action_of_vlan;
1962 action_of_vlan:
1963                         na_act_index =
1964                                 mnl_attr_nest_start(nlh, na_act_index_cur++);
1965                         assert(na_act_index);
1966                         mnl_attr_put_strz(nlh, TCA_ACT_KIND, "vlan");
1967                         na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
1968                         assert(na_act);
1969                         mnl_attr_put(nlh, TCA_VLAN_PARMS,
1970                                      sizeof(struct tc_vlan),
1971                                      &(struct tc_vlan){
1972                                         .action = TC_ACT_PIPE,
1973                                         .v_action = vlan_act,
1974                                      });
1975                         if (vlan_act == TCA_VLAN_ACT_POP) {
1976                                 mnl_attr_nest_end(nlh, na_act);
1977                                 mnl_attr_nest_end(nlh, na_act_index);
1978                                 break;
1979                         }
1980                         if (vlan_act == TCA_VLAN_ACT_PUSH)
1981                                 mnl_attr_put_u16(nlh,
1982                                                  TCA_VLAN_PUSH_VLAN_PROTOCOL,
1983                                                  conf.of_push_vlan->ethertype);
1984                         na_vlan_id = mnl_nlmsg_get_payload_tail(nlh);
1985                         mnl_attr_put_u16(nlh, TCA_VLAN_PAD, 0);
1986                         na_vlan_priority = mnl_nlmsg_get_payload_tail(nlh);
1987                         mnl_attr_put_u8(nlh, TCA_VLAN_PAD, 0);
1988                         mnl_attr_nest_end(nlh, na_act);
1989                         mnl_attr_nest_end(nlh, na_act_index);
1990                         if (actions->type ==
1991                             RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID) {
1992 override_na_vlan_id:
1993                                 na_vlan_id->nla_type = TCA_VLAN_PUSH_VLAN_ID;
1994                                 *(uint16_t *)mnl_attr_get_payload(na_vlan_id) =
1995                                         rte_be_to_cpu_16
1996                                         (conf.of_set_vlan_vid->vlan_vid);
1997                         } else if (actions->type ==
1998                                    RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP) {
1999 override_na_vlan_priority:
2000                                 na_vlan_priority->nla_type =
2001                                         TCA_VLAN_PUSH_VLAN_PRIORITY;
2002                                 *(uint8_t *)mnl_attr_get_payload
2003                                         (na_vlan_priority) =
2004                                         conf.of_set_vlan_pcp->vlan_pcp;
2005                         }
2006                         break;
2007                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
2008                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
2009                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
2010                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
2011                 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
2012                 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
2013                 case RTE_FLOW_ACTION_TYPE_SET_TTL:
2014                 case RTE_FLOW_ACTION_TYPE_DEC_TTL:
2015                         na_act_index =
2016                                 mnl_attr_nest_start(nlh, na_act_index_cur++);
2017                         flow_tcf_create_pedit_mnl_msg(nlh,
2018                                                       &actions, item_flags);
2019                         mnl_attr_nest_end(nlh, na_act_index);
2020                         break;
2021                 default:
2022                         return rte_flow_error_set(error, ENOTSUP,
2023                                                   RTE_FLOW_ERROR_TYPE_ACTION,
2024                                                   actions,
2025                                                   "action not supported");
2026                 }
2027         }
2028         assert(na_flower);
2029         assert(na_flower_act);
2030         mnl_attr_nest_end(nlh, na_flower_act);
2031         mnl_attr_nest_end(nlh, na_flower);
2032         return 0;
2033 }
2034
2035 /**
2036  * Send Netlink message with acknowledgment.
2037  *
2038  * @param nl
2039  *   Libmnl socket to use.
2040  * @param nlh
2041  *   Message to send. This function always raises the NLM_F_ACK flag before
2042  *   sending.
2043  *
2044  * @return
2045  *   0 on success, a negative errno value otherwise and rte_errno is set.
2046  */
2047 static int
2048 flow_tcf_nl_ack(struct mnl_socket *nl, struct nlmsghdr *nlh)
2049 {
2050         alignas(struct nlmsghdr)
2051         uint8_t ans[mnl_nlmsg_size(sizeof(struct nlmsgerr)) +
2052                     nlh->nlmsg_len - sizeof(*nlh)];
2053         uint32_t seq = random();
2054         int ret;
2055
2056         nlh->nlmsg_flags |= NLM_F_ACK;
2057         nlh->nlmsg_seq = seq;
2058         ret = mnl_socket_sendto(nl, nlh, nlh->nlmsg_len);
2059         if (ret != -1)
2060                 ret = mnl_socket_recvfrom(nl, ans, sizeof(ans));
2061         if (ret != -1)
2062                 ret = mnl_cb_run
2063                         (ans, ret, seq, mnl_socket_get_portid(nl), NULL, NULL);
2064         if (ret > 0)
2065                 return 0;
2066         rte_errno = errno;
2067         return -rte_errno;
2068 }
2069
2070 /**
2071  * Apply flow to E-Switch by sending Netlink message.
2072  *
2073  * @param[in] dev
2074  *   Pointer to Ethernet device.
2075  * @param[in, out] flow
2076  *   Pointer to the sub flow.
2077  * @param[out] error
2078  *   Pointer to the error structure.
2079  *
2080  * @return
2081  *   0 on success, a negative errno value otherwise and rte_ernno is set.
2082  */
2083 static int
2084 flow_tcf_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
2085                struct rte_flow_error *error)
2086 {
2087         struct priv *priv = dev->data->dev_private;
2088         struct mnl_socket *nl = priv->mnl_socket;
2089         struct mlx5_flow *dev_flow;
2090         struct nlmsghdr *nlh;
2091
2092         dev_flow = LIST_FIRST(&flow->dev_flows);
2093         /* E-Switch flow can't be expanded. */
2094         assert(!LIST_NEXT(dev_flow, next));
2095         nlh = dev_flow->tcf.nlh;
2096         nlh->nlmsg_type = RTM_NEWTFILTER;
2097         nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
2098         if (!flow_tcf_nl_ack(nl, nlh))
2099                 return 0;
2100         return rte_flow_error_set(error, rte_errno,
2101                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
2102                                   "netlink: failed to create TC flow rule");
2103 }
2104
2105 /**
2106  * Remove flow from E-Switch by sending Netlink message.
2107  *
2108  * @param[in] dev
2109  *   Pointer to Ethernet device.
2110  * @param[in, out] flow
2111  *   Pointer to the sub flow.
2112  */
2113 static void
2114 flow_tcf_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
2115 {
2116         struct priv *priv = dev->data->dev_private;
2117         struct mnl_socket *nl = priv->mnl_socket;
2118         struct mlx5_flow *dev_flow;
2119         struct nlmsghdr *nlh;
2120
2121         if (!flow)
2122                 return;
2123         dev_flow = LIST_FIRST(&flow->dev_flows);
2124         if (!dev_flow)
2125                 return;
2126         /* E-Switch flow can't be expanded. */
2127         assert(!LIST_NEXT(dev_flow, next));
2128         nlh = dev_flow->tcf.nlh;
2129         nlh->nlmsg_type = RTM_DELTFILTER;
2130         nlh->nlmsg_flags = NLM_F_REQUEST;
2131         flow_tcf_nl_ack(nl, nlh);
2132 }
2133
2134 /**
2135  * Remove flow from E-Switch and release resources of the device flow.
2136  *
2137  * @param[in] dev
2138  *   Pointer to Ethernet device.
2139  * @param[in, out] flow
2140  *   Pointer to the sub flow.
2141  */
2142 static void
2143 flow_tcf_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
2144 {
2145         struct mlx5_flow *dev_flow;
2146
2147         if (!flow)
2148                 return;
2149         flow_tcf_remove(dev, flow);
2150         dev_flow = LIST_FIRST(&flow->dev_flows);
2151         if (!dev_flow)
2152                 return;
2153         /* E-Switch flow can't be expanded. */
2154         assert(!LIST_NEXT(dev_flow, next));
2155         LIST_REMOVE(dev_flow, next);
2156         rte_free(dev_flow);
2157 }
2158
2159 const struct mlx5_flow_driver_ops mlx5_flow_tcf_drv_ops = {
2160         .validate = flow_tcf_validate,
2161         .prepare = flow_tcf_prepare,
2162         .translate = flow_tcf_translate,
2163         .apply = flow_tcf_apply,
2164         .remove = flow_tcf_remove,
2165         .destroy = flow_tcf_destroy,
2166 };
2167
2168 /**
2169  * Initialize ingress qdisc of a given network interface.
2170  *
2171  * @param nl
2172  *   Libmnl socket of the @p NETLINK_ROUTE kind.
2173  * @param ifindex
2174  *   Index of network interface to initialize.
2175  * @param[out] error
2176  *   Perform verbose error reporting if not NULL.
2177  *
2178  * @return
2179  *   0 on success, a negative errno value otherwise and rte_errno is set.
2180  */
2181 int
2182 mlx5_flow_tcf_init(struct mnl_socket *nl, unsigned int ifindex,
2183                    struct rte_flow_error *error)
2184 {
2185         struct nlmsghdr *nlh;
2186         struct tcmsg *tcm;
2187         alignas(struct nlmsghdr)
2188         uint8_t buf[mnl_nlmsg_size(sizeof(*tcm) + 128)];
2189
2190         /* Destroy existing ingress qdisc and everything attached to it. */
2191         nlh = mnl_nlmsg_put_header(buf);
2192         nlh->nlmsg_type = RTM_DELQDISC;
2193         nlh->nlmsg_flags = NLM_F_REQUEST;
2194         tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
2195         tcm->tcm_family = AF_UNSPEC;
2196         tcm->tcm_ifindex = ifindex;
2197         tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
2198         tcm->tcm_parent = TC_H_INGRESS;
2199         /* Ignore errors when qdisc is already absent. */
2200         if (flow_tcf_nl_ack(nl, nlh) &&
2201             rte_errno != EINVAL && rte_errno != ENOENT)
2202                 return rte_flow_error_set(error, rte_errno,
2203                                           RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
2204                                           "netlink: failed to remove ingress"
2205                                           " qdisc");
2206         /* Create fresh ingress qdisc. */
2207         nlh = mnl_nlmsg_put_header(buf);
2208         nlh->nlmsg_type = RTM_NEWQDISC;
2209         nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
2210         tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
2211         tcm->tcm_family = AF_UNSPEC;
2212         tcm->tcm_ifindex = ifindex;
2213         tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
2214         tcm->tcm_parent = TC_H_INGRESS;
2215         mnl_attr_put_strz_check(nlh, sizeof(buf), TCA_KIND, "ingress");
2216         if (flow_tcf_nl_ack(nl, nlh))
2217                 return rte_flow_error_set(error, rte_errno,
2218                                           RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
2219                                           "netlink: failed to create ingress"
2220                                           " qdisc");
2221         return 0;
2222 }
2223
2224 /**
2225  * Create and configure a libmnl socket for Netlink flow rules.
2226  *
2227  * @return
2228  *   A valid libmnl socket object pointer on success, NULL otherwise and
2229  *   rte_errno is set.
2230  */
2231 struct mnl_socket *
2232 mlx5_flow_tcf_socket_create(void)
2233 {
2234         struct mnl_socket *nl = mnl_socket_open(NETLINK_ROUTE);
2235
2236         if (nl) {
2237                 mnl_socket_setsockopt(nl, NETLINK_CAP_ACK, &(int){ 1 },
2238                                       sizeof(int));
2239                 if (!mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID))
2240                         return nl;
2241         }
2242         rte_errno = errno;
2243         if (nl)
2244                 mnl_socket_close(nl);
2245         return NULL;
2246 }
2247
2248 /**
2249  * Destroy a libmnl socket.
2250  *
2251  * @param nl
2252  *   Libmnl socket of the @p NETLINK_ROUTE kind.
2253  */
2254 void
2255 mlx5_flow_tcf_socket_destroy(struct mnl_socket *nl)
2256 {
2257         mnl_socket_close(nl);
2258 }