net/mlx5: rewrite MAC address by E-Switch
[dpdk.git] / drivers / net / mlx5 / mlx5_flow_tcf.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2018 6WIND S.A.
3  * Copyright 2018 Mellanox Technologies, Ltd
4  */
5
6 #include <assert.h>
7 #include <errno.h>
8 #include <libmnl/libmnl.h>
9 #include <linux/if_ether.h>
10 #include <linux/netlink.h>
11 #include <linux/pkt_cls.h>
12 #include <linux/pkt_sched.h>
13 #include <linux/rtnetlink.h>
14 #include <linux/tc_act/tc_gact.h>
15 #include <linux/tc_act/tc_mirred.h>
16 #include <netinet/in.h>
17 #include <stdalign.h>
18 #include <stdbool.h>
19 #include <stddef.h>
20 #include <stdint.h>
21 #include <stdlib.h>
22 #include <sys/socket.h>
23
24 #include <rte_byteorder.h>
25 #include <rte_errno.h>
26 #include <rte_ether.h>
27 #include <rte_flow.h>
28 #include <rte_malloc.h>
29
30 #include "mlx5.h"
31 #include "mlx5_flow.h"
32 #include "mlx5_autoconf.h"
33
34 #ifdef HAVE_TC_ACT_VLAN
35
36 #include <linux/tc_act/tc_vlan.h>
37
38 #else /* HAVE_TC_ACT_VLAN */
39
40 #define TCA_VLAN_ACT_POP 1
41 #define TCA_VLAN_ACT_PUSH 2
42 #define TCA_VLAN_ACT_MODIFY 3
43 #define TCA_VLAN_PARMS 2
44 #define TCA_VLAN_PUSH_VLAN_ID 3
45 #define TCA_VLAN_PUSH_VLAN_PROTOCOL 4
46 #define TCA_VLAN_PAD 5
47 #define TCA_VLAN_PUSH_VLAN_PRIORITY 6
48
49 struct tc_vlan {
50         tc_gen;
51         int v_action;
52 };
53
54 #endif /* HAVE_TC_ACT_VLAN */
55
56 #ifdef HAVE_TC_ACT_PEDIT
57
58 #include <linux/tc_act/tc_pedit.h>
59
60 #else /* HAVE_TC_ACT_VLAN */
61
62 enum {
63         TCA_PEDIT_UNSPEC,
64         TCA_PEDIT_TM,
65         TCA_PEDIT_PARMS,
66         TCA_PEDIT_PAD,
67         TCA_PEDIT_PARMS_EX,
68         TCA_PEDIT_KEYS_EX,
69         TCA_PEDIT_KEY_EX,
70         __TCA_PEDIT_MAX
71 };
72
73 enum {
74         TCA_PEDIT_KEY_EX_HTYPE = 1,
75         TCA_PEDIT_KEY_EX_CMD = 2,
76         __TCA_PEDIT_KEY_EX_MAX
77 };
78
79 enum pedit_header_type {
80         TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK = 0,
81         TCA_PEDIT_KEY_EX_HDR_TYPE_ETH = 1,
82         TCA_PEDIT_KEY_EX_HDR_TYPE_IP4 = 2,
83         TCA_PEDIT_KEY_EX_HDR_TYPE_IP6 = 3,
84         TCA_PEDIT_KEY_EX_HDR_TYPE_TCP = 4,
85         TCA_PEDIT_KEY_EX_HDR_TYPE_UDP = 5,
86         __PEDIT_HDR_TYPE_MAX,
87 };
88
89 enum pedit_cmd {
90         TCA_PEDIT_KEY_EX_CMD_SET = 0,
91         TCA_PEDIT_KEY_EX_CMD_ADD = 1,
92         __PEDIT_CMD_MAX,
93 };
94
95 struct tc_pedit_key {
96         __u32 mask; /* AND */
97         __u32 val; /*XOR */
98         __u32 off; /*offset */
99         __u32 at;
100         __u32 offmask;
101         __u32 shift;
102 };
103
104 __extension__
105 struct tc_pedit_sel {
106         tc_gen;
107         unsigned char nkeys;
108         unsigned char flags;
109         struct tc_pedit_key keys[0];
110 };
111
112 #endif /* HAVE_TC_ACT_VLAN */
113
114 /* Normally found in linux/netlink.h. */
115 #ifndef NETLINK_CAP_ACK
116 #define NETLINK_CAP_ACK 10
117 #endif
118
119 /* Normally found in linux/pkt_sched.h. */
120 #ifndef TC_H_MIN_INGRESS
121 #define TC_H_MIN_INGRESS 0xfff2u
122 #endif
123
124 /* Normally found in linux/pkt_cls.h. */
125 #ifndef TCA_CLS_FLAGS_SKIP_SW
126 #define TCA_CLS_FLAGS_SKIP_SW (1 << 1)
127 #endif
128 #ifndef HAVE_TCA_CHAIN
129 #define TCA_CHAIN 11
130 #endif
131 #ifndef HAVE_TCA_FLOWER_ACT
132 #define TCA_FLOWER_ACT 3
133 #endif
134 #ifndef HAVE_TCA_FLOWER_FLAGS
135 #define TCA_FLOWER_FLAGS 22
136 #endif
137 #ifndef HAVE_TCA_FLOWER_KEY_ETH_TYPE
138 #define TCA_FLOWER_KEY_ETH_TYPE 8
139 #endif
140 #ifndef HAVE_TCA_FLOWER_KEY_ETH_DST
141 #define TCA_FLOWER_KEY_ETH_DST 4
142 #endif
143 #ifndef HAVE_TCA_FLOWER_KEY_ETH_DST_MASK
144 #define TCA_FLOWER_KEY_ETH_DST_MASK 5
145 #endif
146 #ifndef HAVE_TCA_FLOWER_KEY_ETH_SRC
147 #define TCA_FLOWER_KEY_ETH_SRC 6
148 #endif
149 #ifndef HAVE_TCA_FLOWER_KEY_ETH_SRC_MASK
150 #define TCA_FLOWER_KEY_ETH_SRC_MASK 7
151 #endif
152 #ifndef HAVE_TCA_FLOWER_KEY_IP_PROTO
153 #define TCA_FLOWER_KEY_IP_PROTO 9
154 #endif
155 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_SRC
156 #define TCA_FLOWER_KEY_IPV4_SRC 10
157 #endif
158 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_SRC_MASK
159 #define TCA_FLOWER_KEY_IPV4_SRC_MASK 11
160 #endif
161 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_DST
162 #define TCA_FLOWER_KEY_IPV4_DST 12
163 #endif
164 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_DST_MASK
165 #define TCA_FLOWER_KEY_IPV4_DST_MASK 13
166 #endif
167 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_SRC
168 #define TCA_FLOWER_KEY_IPV6_SRC 14
169 #endif
170 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_SRC_MASK
171 #define TCA_FLOWER_KEY_IPV6_SRC_MASK 15
172 #endif
173 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_DST
174 #define TCA_FLOWER_KEY_IPV6_DST 16
175 #endif
176 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_DST_MASK
177 #define TCA_FLOWER_KEY_IPV6_DST_MASK 17
178 #endif
179 #ifndef HAVE_TCA_FLOWER_KEY_TCP_SRC
180 #define TCA_FLOWER_KEY_TCP_SRC 18
181 #endif
182 #ifndef HAVE_TCA_FLOWER_KEY_TCP_SRC_MASK
183 #define TCA_FLOWER_KEY_TCP_SRC_MASK 35
184 #endif
185 #ifndef HAVE_TCA_FLOWER_KEY_TCP_DST
186 #define TCA_FLOWER_KEY_TCP_DST 19
187 #endif
188 #ifndef HAVE_TCA_FLOWER_KEY_TCP_DST_MASK
189 #define TCA_FLOWER_KEY_TCP_DST_MASK 36
190 #endif
191 #ifndef HAVE_TCA_FLOWER_KEY_UDP_SRC
192 #define TCA_FLOWER_KEY_UDP_SRC 20
193 #endif
194 #ifndef HAVE_TCA_FLOWER_KEY_UDP_SRC_MASK
195 #define TCA_FLOWER_KEY_UDP_SRC_MASK 37
196 #endif
197 #ifndef HAVE_TCA_FLOWER_KEY_UDP_DST
198 #define TCA_FLOWER_KEY_UDP_DST 21
199 #endif
200 #ifndef HAVE_TCA_FLOWER_KEY_UDP_DST_MASK
201 #define TCA_FLOWER_KEY_UDP_DST_MASK 38
202 #endif
203 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_ID
204 #define TCA_FLOWER_KEY_VLAN_ID 23
205 #endif
206 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_PRIO
207 #define TCA_FLOWER_KEY_VLAN_PRIO 24
208 #endif
209 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_ETH_TYPE
210 #define TCA_FLOWER_KEY_VLAN_ETH_TYPE 25
211 #endif
212 #ifndef HAVE_TCA_FLOWER_KEY_TCP_FLAGS
213 #define TCA_FLOWER_KEY_TCP_FLAGS 71
214 #endif
215 #ifndef HAVE_TCA_FLOWER_KEY_TCP_FLAGS_MASK
216 #define TCA_FLOWER_KEY_TCP_FLAGS_MASK 72
217 #endif
218 #ifndef HAVE_TC_ACT_GOTO_CHAIN
219 #define TC_ACT_GOTO_CHAIN 0x20000000
220 #endif
221
222 #ifndef IPV6_ADDR_LEN
223 #define IPV6_ADDR_LEN 16
224 #endif
225
226 #ifndef IPV4_ADDR_LEN
227 #define IPV4_ADDR_LEN 4
228 #endif
229
230 #ifndef TP_PORT_LEN
231 #define TP_PORT_LEN 2 /* Transport Port (UDP/TCP) Length */
232 #endif
233
234 #ifndef TTL_LEN
235 #define TTL_LEN 1
236 #endif
237
238 /** Empty masks for known item types. */
239 static const union {
240         struct rte_flow_item_port_id port_id;
241         struct rte_flow_item_eth eth;
242         struct rte_flow_item_vlan vlan;
243         struct rte_flow_item_ipv4 ipv4;
244         struct rte_flow_item_ipv6 ipv6;
245         struct rte_flow_item_tcp tcp;
246         struct rte_flow_item_udp udp;
247 } flow_tcf_mask_empty;
248
249 /** Supported masks for known item types. */
250 static const struct {
251         struct rte_flow_item_port_id port_id;
252         struct rte_flow_item_eth eth;
253         struct rte_flow_item_vlan vlan;
254         struct rte_flow_item_ipv4 ipv4;
255         struct rte_flow_item_ipv6 ipv6;
256         struct rte_flow_item_tcp tcp;
257         struct rte_flow_item_udp udp;
258 } flow_tcf_mask_supported = {
259         .port_id = {
260                 .id = 0xffffffff,
261         },
262         .eth = {
263                 .type = RTE_BE16(0xffff),
264                 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
265                 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
266         },
267         .vlan = {
268                 /* PCP and VID only, no DEI. */
269                 .tci = RTE_BE16(0xefff),
270                 .inner_type = RTE_BE16(0xffff),
271         },
272         .ipv4.hdr = {
273                 .next_proto_id = 0xff,
274                 .src_addr = RTE_BE32(0xffffffff),
275                 .dst_addr = RTE_BE32(0xffffffff),
276         },
277         .ipv6.hdr = {
278                 .proto = 0xff,
279                 .src_addr =
280                         "\xff\xff\xff\xff\xff\xff\xff\xff"
281                         "\xff\xff\xff\xff\xff\xff\xff\xff",
282                 .dst_addr =
283                         "\xff\xff\xff\xff\xff\xff\xff\xff"
284                         "\xff\xff\xff\xff\xff\xff\xff\xff",
285         },
286         .tcp.hdr = {
287                 .src_port = RTE_BE16(0xffff),
288                 .dst_port = RTE_BE16(0xffff),
289                 .tcp_flags = 0xff,
290         },
291         .udp.hdr = {
292                 .src_port = RTE_BE16(0xffff),
293                 .dst_port = RTE_BE16(0xffff),
294         },
295 };
296
297 #define SZ_NLATTR_HDR MNL_ALIGN(sizeof(struct nlattr))
298 #define SZ_NLATTR_NEST SZ_NLATTR_HDR
299 #define SZ_NLATTR_DATA_OF(len) MNL_ALIGN(SZ_NLATTR_HDR + (len))
300 #define SZ_NLATTR_TYPE_OF(typ) SZ_NLATTR_DATA_OF(sizeof(typ))
301 #define SZ_NLATTR_STRZ_OF(str) SZ_NLATTR_DATA_OF(strlen(str) + 1)
302
303 #define PTOI_TABLE_SZ_MAX(dev) (mlx5_dev_to_port_id((dev)->device, NULL, 0) + 2)
304
305 /** DPDK port to network interface index (ifindex) conversion. */
306 struct flow_tcf_ptoi {
307         uint16_t port_id; /**< DPDK port ID. */
308         unsigned int ifindex; /**< Network interface index. */
309 };
310
311 /* Due to a limitation on driver/FW. */
312 #define MLX5_TCF_GROUP_ID_MAX 3
313 #define MLX5_TCF_GROUP_PRIORITY_MAX 14
314
315 #define MLX5_TCF_FATE_ACTIONS \
316         (MLX5_FLOW_ACTION_DROP | MLX5_FLOW_ACTION_PORT_ID | \
317          MLX5_FLOW_ACTION_JUMP)
318
319 #define MLX5_TCF_VLAN_ACTIONS \
320         (MLX5_FLOW_ACTION_OF_POP_VLAN | MLX5_FLOW_ACTION_OF_PUSH_VLAN | \
321          MLX5_FLOW_ACTION_OF_SET_VLAN_VID | MLX5_FLOW_ACTION_OF_SET_VLAN_PCP)
322
323 #define MLX5_TCF_PEDIT_ACTIONS \
324         (MLX5_FLOW_ACTION_SET_IPV4_SRC | MLX5_FLOW_ACTION_SET_IPV4_DST | \
325          MLX5_FLOW_ACTION_SET_IPV6_SRC | MLX5_FLOW_ACTION_SET_IPV6_DST | \
326          MLX5_FLOW_ACTION_SET_TP_SRC | MLX5_FLOW_ACTION_SET_TP_DST | \
327          MLX5_FLOW_ACTION_SET_TTL | MLX5_FLOW_ACTION_DEC_TTL | \
328          MLX5_FLOW_ACTION_SET_MAC_SRC | MLX5_FLOW_ACTION_SET_MAC_DST)
329
330 #define MLX5_TCF_CONFIG_ACTIONS \
331         (MLX5_FLOW_ACTION_PORT_ID | MLX5_FLOW_ACTION_JUMP | \
332          MLX5_FLOW_ACTION_OF_PUSH_VLAN | MLX5_FLOW_ACTION_OF_SET_VLAN_VID | \
333          MLX5_FLOW_ACTION_OF_SET_VLAN_PCP | \
334          (MLX5_TCF_PEDIT_ACTIONS & ~MLX5_FLOW_ACTION_DEC_TTL))
335
336 #define MAX_PEDIT_KEYS 128
337 #define SZ_PEDIT_KEY_VAL 4
338
339 #define NUM_OF_PEDIT_KEYS(sz) \
340         (((sz) / SZ_PEDIT_KEY_VAL) + (((sz) % SZ_PEDIT_KEY_VAL) ? 1 : 0))
341
342 struct pedit_key_ex {
343         enum pedit_header_type htype;
344         enum pedit_cmd cmd;
345 };
346
347 struct pedit_parser {
348         struct tc_pedit_sel sel;
349         struct tc_pedit_key keys[MAX_PEDIT_KEYS];
350         struct pedit_key_ex keys_ex[MAX_PEDIT_KEYS];
351 };
352
353
354 /**
355  * Set pedit key of MAC address
356  *
357  * @param[in] actions
358  *   pointer to action specification
359  * @param[in,out] p_parser
360  *   pointer to pedit_parser
361  */
362 static void
363 flow_tcf_pedit_key_set_mac(const struct rte_flow_action *actions,
364                            struct pedit_parser *p_parser)
365 {
366         int idx = p_parser->sel.nkeys;
367         uint32_t off = actions->type == RTE_FLOW_ACTION_TYPE_SET_MAC_SRC ?
368                                         offsetof(struct ether_hdr, s_addr) :
369                                         offsetof(struct ether_hdr, d_addr);
370         const struct rte_flow_action_set_mac *conf =
371                 (const struct rte_flow_action_set_mac *)actions->conf;
372
373         p_parser->keys[idx].off = off;
374         p_parser->keys[idx].mask = ~UINT32_MAX;
375         p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_ETH;
376         p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
377         memcpy(&p_parser->keys[idx].val,
378                 conf->mac_addr, SZ_PEDIT_KEY_VAL);
379         idx++;
380         p_parser->keys[idx].off = off + SZ_PEDIT_KEY_VAL;
381         p_parser->keys[idx].mask = 0xFFFF0000;
382         p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_ETH;
383         p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
384         memcpy(&p_parser->keys[idx].val,
385                 conf->mac_addr + SZ_PEDIT_KEY_VAL,
386                 ETHER_ADDR_LEN - SZ_PEDIT_KEY_VAL);
387         p_parser->sel.nkeys = (++idx);
388 }
389
390 /**
391  * Set pedit key of decrease/set ttl
392  *
393  * @param[in] actions
394  *   pointer to action specification
395  * @param[in,out] p_parser
396  *   pointer to pedit_parser
397  * @param[in] item_flags
398  *   flags of all items presented
399  */
400 static void
401 flow_tcf_pedit_key_set_dec_ttl(const struct rte_flow_action *actions,
402                                 struct pedit_parser *p_parser,
403                                 uint64_t item_flags)
404 {
405         int idx = p_parser->sel.nkeys;
406
407         p_parser->keys[idx].mask = 0xFFFFFF00;
408         if (item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4) {
409                 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP4;
410                 p_parser->keys[idx].off =
411                         offsetof(struct ipv4_hdr, time_to_live);
412         }
413         if (item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV6) {
414                 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6;
415                 p_parser->keys[idx].off =
416                         offsetof(struct ipv6_hdr, hop_limits);
417         }
418         if (actions->type == RTE_FLOW_ACTION_TYPE_DEC_TTL) {
419                 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_ADD;
420                 p_parser->keys[idx].val = 0x000000FF;
421         } else {
422                 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
423                 p_parser->keys[idx].val =
424                         (__u32)((const struct rte_flow_action_set_ttl *)
425                          actions->conf)->ttl_value;
426         }
427         p_parser->sel.nkeys = (++idx);
428 }
429
430 /**
431  * Set pedit key of transport (TCP/UDP) port value
432  *
433  * @param[in] actions
434  *   pointer to action specification
435  * @param[in,out] p_parser
436  *   pointer to pedit_parser
437  * @param[in] item_flags
438  *   flags of all items presented
439  */
440 static void
441 flow_tcf_pedit_key_set_tp_port(const struct rte_flow_action *actions,
442                                 struct pedit_parser *p_parser,
443                                 uint64_t item_flags)
444 {
445         int idx = p_parser->sel.nkeys;
446
447         if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)
448                 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_UDP;
449         if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_TCP)
450                 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_TCP;
451         p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
452         /* offset of src/dst port is same for TCP and UDP */
453         p_parser->keys[idx].off =
454                 actions->type == RTE_FLOW_ACTION_TYPE_SET_TP_SRC ?
455                 offsetof(struct tcp_hdr, src_port) :
456                 offsetof(struct tcp_hdr, dst_port);
457         p_parser->keys[idx].mask = 0xFFFF0000;
458         p_parser->keys[idx].val =
459                 (__u32)((const struct rte_flow_action_set_tp *)
460                                 actions->conf)->port;
461         p_parser->sel.nkeys = (++idx);
462 }
463
464 /**
465  * Set pedit key of ipv6 address
466  *
467  * @param[in] actions
468  *   pointer to action specification
469  * @param[in,out] p_parser
470  *   pointer to pedit_parser
471  */
472 static void
473 flow_tcf_pedit_key_set_ipv6_addr(const struct rte_flow_action *actions,
474                                  struct pedit_parser *p_parser)
475 {
476         int idx = p_parser->sel.nkeys;
477         int keys = NUM_OF_PEDIT_KEYS(IPV6_ADDR_LEN);
478         int off_base =
479                 actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC ?
480                 offsetof(struct ipv6_hdr, src_addr) :
481                 offsetof(struct ipv6_hdr, dst_addr);
482         const struct rte_flow_action_set_ipv6 *conf =
483                 (const struct rte_flow_action_set_ipv6 *)actions->conf;
484
485         for (int i = 0; i < keys; i++, idx++) {
486                 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6;
487                 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
488                 p_parser->keys[idx].off = off_base + i * SZ_PEDIT_KEY_VAL;
489                 p_parser->keys[idx].mask = ~UINT32_MAX;
490                 memcpy(&p_parser->keys[idx].val,
491                         conf->ipv6_addr + i *  SZ_PEDIT_KEY_VAL,
492                         SZ_PEDIT_KEY_VAL);
493         }
494         p_parser->sel.nkeys += keys;
495 }
496
497 /**
498  * Set pedit key of ipv4 address
499  *
500  * @param[in] actions
501  *   pointer to action specification
502  * @param[in,out] p_parser
503  *   pointer to pedit_parser
504  */
505 static void
506 flow_tcf_pedit_key_set_ipv4_addr(const struct rte_flow_action *actions,
507                                  struct pedit_parser *p_parser)
508 {
509         int idx = p_parser->sel.nkeys;
510
511         p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP4;
512         p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
513         p_parser->keys[idx].off =
514                 actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC ?
515                 offsetof(struct ipv4_hdr, src_addr) :
516                 offsetof(struct ipv4_hdr, dst_addr);
517         p_parser->keys[idx].mask = ~UINT32_MAX;
518         p_parser->keys[idx].val =
519                 ((const struct rte_flow_action_set_ipv4 *)
520                  actions->conf)->ipv4_addr;
521         p_parser->sel.nkeys = (++idx);
522 }
523
524 /**
525  * Create the pedit's na attribute in netlink message
526  * on pre-allocate message buffer
527  *
528  * @param[in,out] nl
529  *   pointer to pre-allocated netlink message buffer
530  * @param[in,out] actions
531  *   pointer to pointer of actions specification.
532  * @param[in,out] action_flags
533  *   pointer to actions flags
534  * @param[in] item_flags
535  *   flags of all item presented
536  */
537 static void
538 flow_tcf_create_pedit_mnl_msg(struct nlmsghdr *nl,
539                               const struct rte_flow_action **actions,
540                               uint64_t item_flags)
541 {
542         struct pedit_parser p_parser;
543         struct nlattr *na_act_options;
544         struct nlattr *na_pedit_keys;
545
546         memset(&p_parser, 0, sizeof(p_parser));
547         mnl_attr_put_strz(nl, TCA_ACT_KIND, "pedit");
548         na_act_options = mnl_attr_nest_start(nl, TCA_ACT_OPTIONS);
549         /* all modify header actions should be in one tc-pedit action */
550         for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {
551                 switch ((*actions)->type) {
552                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
553                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
554                         flow_tcf_pedit_key_set_ipv4_addr(*actions, &p_parser);
555                         break;
556                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
557                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
558                         flow_tcf_pedit_key_set_ipv6_addr(*actions, &p_parser);
559                         break;
560                 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
561                 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
562                         flow_tcf_pedit_key_set_tp_port(*actions,
563                                                         &p_parser, item_flags);
564                         break;
565                 case RTE_FLOW_ACTION_TYPE_SET_TTL:
566                 case RTE_FLOW_ACTION_TYPE_DEC_TTL:
567                         flow_tcf_pedit_key_set_dec_ttl(*actions,
568                                                         &p_parser, item_flags);
569                         break;
570                 case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
571                 case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
572                         flow_tcf_pedit_key_set_mac(*actions, &p_parser);
573                         break;
574                 default:
575                         goto pedit_mnl_msg_done;
576                 }
577         }
578 pedit_mnl_msg_done:
579         p_parser.sel.action = TC_ACT_PIPE;
580         mnl_attr_put(nl, TCA_PEDIT_PARMS_EX,
581                      sizeof(p_parser.sel) +
582                      p_parser.sel.nkeys * sizeof(struct tc_pedit_key),
583                      &p_parser);
584         na_pedit_keys =
585                 mnl_attr_nest_start(nl, TCA_PEDIT_KEYS_EX | NLA_F_NESTED);
586         for (int i = 0; i < p_parser.sel.nkeys; i++) {
587                 struct nlattr *na_pedit_key =
588                         mnl_attr_nest_start(nl,
589                                             TCA_PEDIT_KEY_EX | NLA_F_NESTED);
590                 mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_HTYPE,
591                                  p_parser.keys_ex[i].htype);
592                 mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_CMD,
593                                  p_parser.keys_ex[i].cmd);
594                 mnl_attr_nest_end(nl, na_pedit_key);
595         }
596         mnl_attr_nest_end(nl, na_pedit_keys);
597         mnl_attr_nest_end(nl, na_act_options);
598         (*actions)--;
599 }
600
601 /**
602  * Calculate max memory size of one TC-pedit actions.
603  * One TC-pedit action can contain set of keys each defining
604  * a rewrite element (rte_flow action)
605  *
606  * @param[in,out] actions
607  *   actions specification.
608  * @param[in,out] action_flags
609  *   actions flags
610  * @param[in,out] size
611  *   accumulated size
612  * @return
613  *   Max memory size of one TC-pedit action
614  */
615 static int
616 flow_tcf_get_pedit_actions_size(const struct rte_flow_action **actions,
617                                 uint64_t *action_flags)
618 {
619         int pedit_size = 0;
620         int keys = 0;
621         uint64_t flags = 0;
622
623         pedit_size += SZ_NLATTR_NEST + /* na_act_index. */
624                       SZ_NLATTR_STRZ_OF("pedit") +
625                       SZ_NLATTR_NEST; /* TCA_ACT_OPTIONS. */
626         for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {
627                 switch ((*actions)->type) {
628                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
629                         keys += NUM_OF_PEDIT_KEYS(IPV4_ADDR_LEN);
630                         flags |= MLX5_FLOW_ACTION_SET_IPV4_SRC;
631                         break;
632                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
633                         keys += NUM_OF_PEDIT_KEYS(IPV4_ADDR_LEN);
634                         flags |= MLX5_FLOW_ACTION_SET_IPV4_DST;
635                         break;
636                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
637                         keys += NUM_OF_PEDIT_KEYS(IPV6_ADDR_LEN);
638                         flags |= MLX5_FLOW_ACTION_SET_IPV6_SRC;
639                         break;
640                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
641                         keys += NUM_OF_PEDIT_KEYS(IPV6_ADDR_LEN);
642                         flags |= MLX5_FLOW_ACTION_SET_IPV6_DST;
643                         break;
644                 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
645                         /* TCP is as same as UDP */
646                         keys += NUM_OF_PEDIT_KEYS(TP_PORT_LEN);
647                         flags |= MLX5_FLOW_ACTION_SET_TP_SRC;
648                         break;
649                 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
650                         /* TCP is as same as UDP */
651                         keys += NUM_OF_PEDIT_KEYS(TP_PORT_LEN);
652                         flags |= MLX5_FLOW_ACTION_SET_TP_DST;
653                         break;
654                 case RTE_FLOW_ACTION_TYPE_SET_TTL:
655                         keys += NUM_OF_PEDIT_KEYS(TTL_LEN);
656                         flags |= MLX5_FLOW_ACTION_SET_TTL;
657                         break;
658                 case RTE_FLOW_ACTION_TYPE_DEC_TTL:
659                         keys += NUM_OF_PEDIT_KEYS(TTL_LEN);
660                         flags |= MLX5_FLOW_ACTION_DEC_TTL;
661                         break;
662                 case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
663                         keys += NUM_OF_PEDIT_KEYS(ETHER_ADDR_LEN);
664                         flags |= MLX5_FLOW_ACTION_SET_MAC_SRC;
665                         break;
666                 case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
667                         keys += NUM_OF_PEDIT_KEYS(ETHER_ADDR_LEN);
668                         flags |= MLX5_FLOW_ACTION_SET_MAC_DST;
669                         break;
670                 default:
671                         goto get_pedit_action_size_done;
672                 }
673         }
674 get_pedit_action_size_done:
675         /* TCA_PEDIT_PARAMS_EX */
676         pedit_size +=
677                 SZ_NLATTR_DATA_OF(sizeof(struct tc_pedit_sel) +
678                                   keys * sizeof(struct tc_pedit_key));
679         pedit_size += SZ_NLATTR_NEST; /* TCA_PEDIT_KEYS */
680         pedit_size += keys *
681                       /* TCA_PEDIT_KEY_EX + HTYPE + CMD */
682                       (SZ_NLATTR_NEST + SZ_NLATTR_DATA_OF(2) +
683                        SZ_NLATTR_DATA_OF(2));
684         (*action_flags) |= flags;
685         (*actions)--;
686         return pedit_size;
687 }
688
689 /**
690  * Retrieve mask for pattern item.
691  *
692  * This function does basic sanity checks on a pattern item in order to
693  * return the most appropriate mask for it.
694  *
695  * @param[in] item
696  *   Item specification.
697  * @param[in] mask_default
698  *   Default mask for pattern item as specified by the flow API.
699  * @param[in] mask_supported
700  *   Mask fields supported by the implementation.
701  * @param[in] mask_empty
702  *   Empty mask to return when there is no specification.
703  * @param[out] error
704  *   Perform verbose error reporting if not NULL.
705  *
706  * @return
707  *   Either @p item->mask or one of the mask parameters on success, NULL
708  *   otherwise and rte_errno is set.
709  */
710 static const void *
711 flow_tcf_item_mask(const struct rte_flow_item *item, const void *mask_default,
712                    const void *mask_supported, const void *mask_empty,
713                    size_t mask_size, struct rte_flow_error *error)
714 {
715         const uint8_t *mask;
716         size_t i;
717
718         /* item->last and item->mask cannot exist without item->spec. */
719         if (!item->spec && (item->mask || item->last)) {
720                 rte_flow_error_set(error, EINVAL,
721                                    RTE_FLOW_ERROR_TYPE_ITEM, item,
722                                    "\"mask\" or \"last\" field provided without"
723                                    " a corresponding \"spec\"");
724                 return NULL;
725         }
726         /* No spec, no mask, no problem. */
727         if (!item->spec)
728                 return mask_empty;
729         mask = item->mask ? item->mask : mask_default;
730         assert(mask);
731         /*
732          * Single-pass check to make sure that:
733          * - Mask is supported, no bits are set outside mask_supported.
734          * - Both item->spec and item->last are included in mask.
735          */
736         for (i = 0; i != mask_size; ++i) {
737                 if (!mask[i])
738                         continue;
739                 if ((mask[i] | ((const uint8_t *)mask_supported)[i]) !=
740                     ((const uint8_t *)mask_supported)[i]) {
741                         rte_flow_error_set(error, ENOTSUP,
742                                            RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
743                                            "unsupported field found"
744                                            " in \"mask\"");
745                         return NULL;
746                 }
747                 if (item->last &&
748                     (((const uint8_t *)item->spec)[i] & mask[i]) !=
749                     (((const uint8_t *)item->last)[i] & mask[i])) {
750                         rte_flow_error_set(error, EINVAL,
751                                            RTE_FLOW_ERROR_TYPE_ITEM_LAST,
752                                            item->last,
753                                            "range between \"spec\" and \"last\""
754                                            " not comprised in \"mask\"");
755                         return NULL;
756                 }
757         }
758         return mask;
759 }
760
761 /**
762  * Build a conversion table between port ID and ifindex.
763  *
764  * @param[in] dev
765  *   Pointer to Ethernet device.
766  * @param[out] ptoi
767  *   Pointer to ptoi table.
768  * @param[in] len
769  *   Size of ptoi table provided.
770  *
771  * @return
772  *   Size of ptoi table filled.
773  */
774 static unsigned int
775 flow_tcf_build_ptoi_table(struct rte_eth_dev *dev, struct flow_tcf_ptoi *ptoi,
776                           unsigned int len)
777 {
778         unsigned int n = mlx5_dev_to_port_id(dev->device, NULL, 0);
779         uint16_t port_id[n + 1];
780         unsigned int i;
781         unsigned int own = 0;
782
783         /* At least one port is needed when no switch domain is present. */
784         if (!n) {
785                 n = 1;
786                 port_id[0] = dev->data->port_id;
787         } else {
788                 n = RTE_MIN(mlx5_dev_to_port_id(dev->device, port_id, n), n);
789         }
790         if (n > len)
791                 return 0;
792         for (i = 0; i != n; ++i) {
793                 struct rte_eth_dev_info dev_info;
794
795                 rte_eth_dev_info_get(port_id[i], &dev_info);
796                 if (port_id[i] == dev->data->port_id)
797                         own = i;
798                 ptoi[i].port_id = port_id[i];
799                 ptoi[i].ifindex = dev_info.if_index;
800         }
801         /* Ensure first entry of ptoi[] is the current device. */
802         if (own) {
803                 ptoi[n] = ptoi[0];
804                 ptoi[0] = ptoi[own];
805                 ptoi[own] = ptoi[n];
806         }
807         /* An entry with zero ifindex terminates ptoi[]. */
808         ptoi[n].port_id = 0;
809         ptoi[n].ifindex = 0;
810         return n;
811 }
812
813 /**
814  * Verify the @p attr will be correctly understood by the E-switch.
815  *
816  * @param[in] attr
817  *   Pointer to flow attributes
818  * @param[out] error
819  *   Pointer to error structure.
820  *
821  * @return
822  *   0 on success, a negative errno value otherwise and rte_errno is set.
823  */
824 static int
825 flow_tcf_validate_attributes(const struct rte_flow_attr *attr,
826                              struct rte_flow_error *error)
827 {
828         /*
829          * Supported attributes: groups, some priorities and ingress only.
830          * group is supported only if kernel supports chain. Don't care about
831          * transfer as it is the caller's problem.
832          */
833         if (attr->group > MLX5_TCF_GROUP_ID_MAX)
834                 return rte_flow_error_set(error, ENOTSUP,
835                                           RTE_FLOW_ERROR_TYPE_ATTR_GROUP, attr,
836                                           "group ID larger than "
837                                           RTE_STR(MLX5_TCF_GROUP_ID_MAX)
838                                           " isn't supported");
839         else if (attr->group > 0 &&
840                  attr->priority > MLX5_TCF_GROUP_PRIORITY_MAX)
841                 return rte_flow_error_set(error, ENOTSUP,
842                                           RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
843                                           attr,
844                                           "lowest priority level is "
845                                           RTE_STR(MLX5_TCF_GROUP_PRIORITY_MAX)
846                                           " when group is configured");
847         else if (attr->priority > 0xfffe)
848                 return rte_flow_error_set(error, ENOTSUP,
849                                           RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
850                                           attr,
851                                           "lowest priority level is 0xfffe");
852         if (!attr->ingress)
853                 return rte_flow_error_set(error, EINVAL,
854                                           RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
855                                           attr, "only ingress is supported");
856         if (attr->egress)
857                 return rte_flow_error_set(error, ENOTSUP,
858                                           RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
859                                           attr, "egress is not supported");
860         return 0;
861 }
862
863 /**
864  * Validate flow for E-Switch.
865  *
866  * @param[in] priv
867  *   Pointer to the priv structure.
868  * @param[in] attr
869  *   Pointer to the flow attributes.
870  * @param[in] items
871  *   Pointer to the list of items.
872  * @param[in] actions
873  *   Pointer to the list of actions.
874  * @param[out] error
875  *   Pointer to the error structure.
876  *
877  * @return
878  *   0 on success, a negative errno value otherwise and rte_ernno is set.
879  */
880 static int
881 flow_tcf_validate(struct rte_eth_dev *dev,
882                   const struct rte_flow_attr *attr,
883                   const struct rte_flow_item items[],
884                   const struct rte_flow_action actions[],
885                   struct rte_flow_error *error)
886 {
887         union {
888                 const struct rte_flow_item_port_id *port_id;
889                 const struct rte_flow_item_eth *eth;
890                 const struct rte_flow_item_vlan *vlan;
891                 const struct rte_flow_item_ipv4 *ipv4;
892                 const struct rte_flow_item_ipv6 *ipv6;
893                 const struct rte_flow_item_tcp *tcp;
894                 const struct rte_flow_item_udp *udp;
895         } spec, mask;
896         union {
897                 const struct rte_flow_action_port_id *port_id;
898                 const struct rte_flow_action_jump *jump;
899                 const struct rte_flow_action_of_push_vlan *of_push_vlan;
900                 const struct rte_flow_action_of_set_vlan_vid *
901                         of_set_vlan_vid;
902                 const struct rte_flow_action_of_set_vlan_pcp *
903                         of_set_vlan_pcp;
904                 const struct rte_flow_action_set_ipv4 *set_ipv4;
905                 const struct rte_flow_action_set_ipv6 *set_ipv6;
906         } conf;
907         uint32_t item_flags = 0;
908         uint32_t action_flags = 0;
909         uint8_t next_protocol = -1;
910         unsigned int tcm_ifindex = 0;
911         uint8_t pedit_validated = 0;
912         struct flow_tcf_ptoi ptoi[PTOI_TABLE_SZ_MAX(dev)];
913         struct rte_eth_dev *port_id_dev = NULL;
914         bool in_port_id_set;
915         int ret;
916
917         claim_nonzero(flow_tcf_build_ptoi_table(dev, ptoi,
918                                                 PTOI_TABLE_SZ_MAX(dev)));
919         ret = flow_tcf_validate_attributes(attr, error);
920         if (ret < 0)
921                 return ret;
922         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
923                 unsigned int i;
924
925                 switch (items->type) {
926                 case RTE_FLOW_ITEM_TYPE_VOID:
927                         break;
928                 case RTE_FLOW_ITEM_TYPE_PORT_ID:
929                         mask.port_id = flow_tcf_item_mask
930                                 (items, &rte_flow_item_port_id_mask,
931                                  &flow_tcf_mask_supported.port_id,
932                                  &flow_tcf_mask_empty.port_id,
933                                  sizeof(flow_tcf_mask_supported.port_id),
934                                  error);
935                         if (!mask.port_id)
936                                 return -rte_errno;
937                         if (mask.port_id == &flow_tcf_mask_empty.port_id) {
938                                 in_port_id_set = 1;
939                                 break;
940                         }
941                         spec.port_id = items->spec;
942                         if (mask.port_id->id && mask.port_id->id != 0xffffffff)
943                                 return rte_flow_error_set
944                                         (error, ENOTSUP,
945                                          RTE_FLOW_ERROR_TYPE_ITEM_MASK,
946                                          mask.port_id,
947                                          "no support for partial mask on"
948                                          " \"id\" field");
949                         if (!mask.port_id->id)
950                                 i = 0;
951                         else
952                                 for (i = 0; ptoi[i].ifindex; ++i)
953                                         if (ptoi[i].port_id == spec.port_id->id)
954                                                 break;
955                         if (!ptoi[i].ifindex)
956                                 return rte_flow_error_set
957                                         (error, ENODEV,
958                                          RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
959                                          spec.port_id,
960                                          "missing data to convert port ID to"
961                                          " ifindex");
962                         if (in_port_id_set && ptoi[i].ifindex != tcm_ifindex)
963                                 return rte_flow_error_set
964                                         (error, ENOTSUP,
965                                          RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
966                                          spec.port_id,
967                                          "cannot match traffic for"
968                                          " several port IDs through"
969                                          " a single flow rule");
970                         tcm_ifindex = ptoi[i].ifindex;
971                         in_port_id_set = 1;
972                         break;
973                 case RTE_FLOW_ITEM_TYPE_ETH:
974                         ret = mlx5_flow_validate_item_eth(items, item_flags,
975                                                           error);
976                         if (ret < 0)
977                                 return ret;
978                         item_flags |= MLX5_FLOW_LAYER_OUTER_L2;
979                         /* TODO:
980                          * Redundant check due to different supported mask.
981                          * Same for the rest of items.
982                          */
983                         mask.eth = flow_tcf_item_mask
984                                 (items, &rte_flow_item_eth_mask,
985                                  &flow_tcf_mask_supported.eth,
986                                  &flow_tcf_mask_empty.eth,
987                                  sizeof(flow_tcf_mask_supported.eth),
988                                  error);
989                         if (!mask.eth)
990                                 return -rte_errno;
991                         if (mask.eth->type && mask.eth->type !=
992                             RTE_BE16(0xffff))
993                                 return rte_flow_error_set
994                                         (error, ENOTSUP,
995                                          RTE_FLOW_ERROR_TYPE_ITEM_MASK,
996                                          mask.eth,
997                                          "no support for partial mask on"
998                                          " \"type\" field");
999                         break;
1000                 case RTE_FLOW_ITEM_TYPE_VLAN:
1001                         ret = mlx5_flow_validate_item_vlan(items, item_flags,
1002                                                            error);
1003                         if (ret < 0)
1004                                 return ret;
1005                         item_flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
1006                         mask.vlan = flow_tcf_item_mask
1007                                 (items, &rte_flow_item_vlan_mask,
1008                                  &flow_tcf_mask_supported.vlan,
1009                                  &flow_tcf_mask_empty.vlan,
1010                                  sizeof(flow_tcf_mask_supported.vlan),
1011                                  error);
1012                         if (!mask.vlan)
1013                                 return -rte_errno;
1014                         if ((mask.vlan->tci & RTE_BE16(0xe000) &&
1015                              (mask.vlan->tci & RTE_BE16(0xe000)) !=
1016                               RTE_BE16(0xe000)) ||
1017                             (mask.vlan->tci & RTE_BE16(0x0fff) &&
1018                              (mask.vlan->tci & RTE_BE16(0x0fff)) !=
1019                               RTE_BE16(0x0fff)) ||
1020                             (mask.vlan->inner_type &&
1021                              mask.vlan->inner_type != RTE_BE16(0xffff)))
1022                                 return rte_flow_error_set
1023                                         (error, ENOTSUP,
1024                                          RTE_FLOW_ERROR_TYPE_ITEM_MASK,
1025                                          mask.vlan,
1026                                          "no support for partial masks on"
1027                                          " \"tci\" (PCP and VID parts) and"
1028                                          " \"inner_type\" fields");
1029                         break;
1030                 case RTE_FLOW_ITEM_TYPE_IPV4:
1031                         ret = mlx5_flow_validate_item_ipv4(items, item_flags,
1032                                                            error);
1033                         if (ret < 0)
1034                                 return ret;
1035                         item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1036                         mask.ipv4 = flow_tcf_item_mask
1037                                 (items, &rte_flow_item_ipv4_mask,
1038                                  &flow_tcf_mask_supported.ipv4,
1039                                  &flow_tcf_mask_empty.ipv4,
1040                                  sizeof(flow_tcf_mask_supported.ipv4),
1041                                  error);
1042                         if (!mask.ipv4)
1043                                 return -rte_errno;
1044                         if (mask.ipv4->hdr.next_proto_id &&
1045                             mask.ipv4->hdr.next_proto_id != 0xff)
1046                                 return rte_flow_error_set
1047                                         (error, ENOTSUP,
1048                                          RTE_FLOW_ERROR_TYPE_ITEM_MASK,
1049                                          mask.ipv4,
1050                                          "no support for partial mask on"
1051                                          " \"hdr.next_proto_id\" field");
1052                         else if (mask.ipv4->hdr.next_proto_id)
1053                                 next_protocol =
1054                                         ((const struct rte_flow_item_ipv4 *)
1055                                          (items->spec))->hdr.next_proto_id;
1056                         break;
1057                 case RTE_FLOW_ITEM_TYPE_IPV6:
1058                         ret = mlx5_flow_validate_item_ipv6(items, item_flags,
1059                                                            error);
1060                         if (ret < 0)
1061                                 return ret;
1062                         item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1063                         mask.ipv6 = flow_tcf_item_mask
1064                                 (items, &rte_flow_item_ipv6_mask,
1065                                  &flow_tcf_mask_supported.ipv6,
1066                                  &flow_tcf_mask_empty.ipv6,
1067                                  sizeof(flow_tcf_mask_supported.ipv6),
1068                                  error);
1069                         if (!mask.ipv6)
1070                                 return -rte_errno;
1071                         if (mask.ipv6->hdr.proto &&
1072                             mask.ipv6->hdr.proto != 0xff)
1073                                 return rte_flow_error_set
1074                                         (error, ENOTSUP,
1075                                          RTE_FLOW_ERROR_TYPE_ITEM_MASK,
1076                                          mask.ipv6,
1077                                          "no support for partial mask on"
1078                                          " \"hdr.proto\" field");
1079                         else if (mask.ipv6->hdr.proto)
1080                                 next_protocol =
1081                                         ((const struct rte_flow_item_ipv6 *)
1082                                          (items->spec))->hdr.proto;
1083                         break;
1084                 case RTE_FLOW_ITEM_TYPE_UDP:
1085                         ret = mlx5_flow_validate_item_udp(items, item_flags,
1086                                                           next_protocol, error);
1087                         if (ret < 0)
1088                                 return ret;
1089                         item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
1090                         mask.udp = flow_tcf_item_mask
1091                                 (items, &rte_flow_item_udp_mask,
1092                                  &flow_tcf_mask_supported.udp,
1093                                  &flow_tcf_mask_empty.udp,
1094                                  sizeof(flow_tcf_mask_supported.udp),
1095                                  error);
1096                         if (!mask.udp)
1097                                 return -rte_errno;
1098                         break;
1099                 case RTE_FLOW_ITEM_TYPE_TCP:
1100                         ret = mlx5_flow_validate_item_tcp
1101                                              (items, item_flags,
1102                                               next_protocol,
1103                                               &flow_tcf_mask_supported.tcp,
1104                                               error);
1105                         if (ret < 0)
1106                                 return ret;
1107                         item_flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
1108                         mask.tcp = flow_tcf_item_mask
1109                                 (items, &rte_flow_item_tcp_mask,
1110                                  &flow_tcf_mask_supported.tcp,
1111                                  &flow_tcf_mask_empty.tcp,
1112                                  sizeof(flow_tcf_mask_supported.tcp),
1113                                  error);
1114                         if (!mask.tcp)
1115                                 return -rte_errno;
1116                         break;
1117                 default:
1118                         return rte_flow_error_set(error, ENOTSUP,
1119                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1120                                                   NULL, "item not supported");
1121                 }
1122         }
1123         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1124                 unsigned int i;
1125                 uint32_t current_action_flag = 0;
1126
1127                 switch (actions->type) {
1128                 case RTE_FLOW_ACTION_TYPE_VOID:
1129                         break;
1130                 case RTE_FLOW_ACTION_TYPE_PORT_ID:
1131                         current_action_flag = MLX5_FLOW_ACTION_PORT_ID;
1132                         if (!actions->conf)
1133                                 break;
1134                         conf.port_id = actions->conf;
1135                         if (conf.port_id->original)
1136                                 i = 0;
1137                         else
1138                                 for (i = 0; ptoi[i].ifindex; ++i)
1139                                         if (ptoi[i].port_id == conf.port_id->id)
1140                                                 break;
1141                         if (!ptoi[i].ifindex)
1142                                 return rte_flow_error_set
1143                                         (error, ENODEV,
1144                                          RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1145                                          conf.port_id,
1146                                          "missing data to convert port ID to"
1147                                          " ifindex");
1148                         port_id_dev = &rte_eth_devices[conf.port_id->id];
1149                         break;
1150                 case RTE_FLOW_ACTION_TYPE_JUMP:
1151                         current_action_flag = MLX5_FLOW_ACTION_JUMP;
1152                         if (!actions->conf)
1153                                 break;
1154                         conf.jump = actions->conf;
1155                         if (attr->group >= conf.jump->group)
1156                                 return rte_flow_error_set
1157                                         (error, ENOTSUP,
1158                                          RTE_FLOW_ERROR_TYPE_ACTION,
1159                                          actions,
1160                                          "can jump only to a group forward");
1161                         break;
1162                 case RTE_FLOW_ACTION_TYPE_DROP:
1163                         current_action_flag = MLX5_FLOW_ACTION_DROP;
1164                         break;
1165                 case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
1166                         current_action_flag = MLX5_FLOW_ACTION_OF_POP_VLAN;
1167                         break;
1168                 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
1169                         current_action_flag = MLX5_FLOW_ACTION_OF_PUSH_VLAN;
1170                         break;
1171                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
1172                         if (!(action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN))
1173                                 return rte_flow_error_set
1174                                         (error, ENOTSUP,
1175                                          RTE_FLOW_ERROR_TYPE_ACTION, actions,
1176                                          "vlan modify is not supported,"
1177                                          " set action must follow push action");
1178                         current_action_flag = MLX5_FLOW_ACTION_OF_SET_VLAN_VID;
1179                         break;
1180                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
1181                         if (!(action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN))
1182                                 return rte_flow_error_set
1183                                         (error, ENOTSUP,
1184                                          RTE_FLOW_ERROR_TYPE_ACTION, actions,
1185                                          "vlan modify is not supported,"
1186                                          " set action must follow push action");
1187                         current_action_flag = MLX5_FLOW_ACTION_OF_SET_VLAN_PCP;
1188                         break;
1189                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
1190                         current_action_flag = MLX5_FLOW_ACTION_SET_IPV4_SRC;
1191                         break;
1192                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
1193                         current_action_flag = MLX5_FLOW_ACTION_SET_IPV4_DST;
1194                         break;
1195                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
1196                         current_action_flag = MLX5_FLOW_ACTION_SET_IPV6_SRC;
1197                         break;
1198                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
1199                         current_action_flag = MLX5_FLOW_ACTION_SET_IPV6_DST;
1200                         break;
1201                 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
1202                         current_action_flag = MLX5_FLOW_ACTION_SET_TP_SRC;
1203                         break;
1204                 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
1205                         current_action_flag = MLX5_FLOW_ACTION_SET_TP_DST;
1206                         break;
1207                 case RTE_FLOW_ACTION_TYPE_SET_TTL:
1208                         current_action_flag = MLX5_FLOW_ACTION_SET_TTL;
1209                         break;
1210                 case RTE_FLOW_ACTION_TYPE_DEC_TTL:
1211                         current_action_flag = MLX5_FLOW_ACTION_DEC_TTL;
1212                         break;
1213                 case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
1214                         current_action_flag = MLX5_FLOW_ACTION_SET_MAC_SRC;
1215                         break;
1216                 case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
1217                         current_action_flag = MLX5_FLOW_ACTION_SET_MAC_DST;
1218                         break;
1219                 default:
1220                         return rte_flow_error_set(error, ENOTSUP,
1221                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1222                                                   actions,
1223                                                   "action not supported");
1224                 }
1225                 if (current_action_flag & MLX5_TCF_CONFIG_ACTIONS) {
1226                         if (!actions->conf)
1227                                 return rte_flow_error_set(error, EINVAL,
1228                                                 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1229                                                 actions,
1230                                                 "action configuration not set");
1231                 }
1232                 if ((current_action_flag & MLX5_TCF_PEDIT_ACTIONS) &&
1233                     pedit_validated)
1234                         return rte_flow_error_set(error, ENOTSUP,
1235                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1236                                                   actions,
1237                                                   "set actions should be "
1238                                                   "listed successively");
1239                 if ((current_action_flag & ~MLX5_TCF_PEDIT_ACTIONS) &&
1240                     (action_flags & MLX5_TCF_PEDIT_ACTIONS))
1241                         pedit_validated = 1;
1242                 if ((current_action_flag & MLX5_TCF_FATE_ACTIONS) &&
1243                     (action_flags & MLX5_TCF_FATE_ACTIONS))
1244                         return rte_flow_error_set(error, EINVAL,
1245                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1246                                                   actions,
1247                                                   "can't have multiple fate"
1248                                                   " actions");
1249                 action_flags |= current_action_flag;
1250         }
1251         if ((action_flags & MLX5_TCF_PEDIT_ACTIONS) &&
1252             (action_flags & MLX5_FLOW_ACTION_DROP))
1253                 return rte_flow_error_set(error, ENOTSUP,
1254                                           RTE_FLOW_ERROR_TYPE_ACTION,
1255                                           actions,
1256                                           "set action is not compatible with "
1257                                           "drop action");
1258         if ((action_flags & MLX5_TCF_PEDIT_ACTIONS) &&
1259             !(action_flags & MLX5_FLOW_ACTION_PORT_ID))
1260                 return rte_flow_error_set(error, ENOTSUP,
1261                                           RTE_FLOW_ERROR_TYPE_ACTION,
1262                                           actions,
1263                                           "set action must be followed by "
1264                                           "port_id action");
1265         if (action_flags &
1266            (MLX5_FLOW_ACTION_SET_IPV4_SRC | MLX5_FLOW_ACTION_SET_IPV4_DST)) {
1267                 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4))
1268                         return rte_flow_error_set(error, EINVAL,
1269                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1270                                                   actions,
1271                                                   "no ipv4 item found in"
1272                                                   " pattern");
1273         }
1274         if (action_flags &
1275            (MLX5_FLOW_ACTION_SET_IPV6_SRC | MLX5_FLOW_ACTION_SET_IPV6_DST)) {
1276                 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV6))
1277                         return rte_flow_error_set(error, EINVAL,
1278                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1279                                                   actions,
1280                                                   "no ipv6 item found in"
1281                                                   " pattern");
1282         }
1283         if (action_flags &
1284            (MLX5_FLOW_ACTION_SET_TP_SRC | MLX5_FLOW_ACTION_SET_TP_DST)) {
1285                 if (!(item_flags &
1286                      (MLX5_FLOW_LAYER_OUTER_L4_UDP |
1287                       MLX5_FLOW_LAYER_OUTER_L4_TCP)))
1288                         return rte_flow_error_set(error, EINVAL,
1289                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1290                                                   actions,
1291                                                   "no TCP/UDP item found in"
1292                                                   " pattern");
1293         }
1294         /*
1295          * FW syndrome (0xA9C090):
1296          *     set_flow_table_entry: push vlan action fte in fdb can ONLY be
1297          *     forward to the uplink.
1298          */
1299         if ((action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN) &&
1300             (action_flags & MLX5_FLOW_ACTION_PORT_ID) &&
1301             ((struct priv *)port_id_dev->data->dev_private)->representor)
1302                 return rte_flow_error_set(error, ENOTSUP,
1303                                           RTE_FLOW_ERROR_TYPE_ACTION, actions,
1304                                           "vlan push can only be applied"
1305                                           " when forwarding to uplink port");
1306         /*
1307          * FW syndrome (0x294609):
1308          *     set_flow_table_entry: modify/pop/push actions in fdb flow table
1309          *     are supported only while forwarding to vport.
1310          */
1311         if ((action_flags & MLX5_TCF_VLAN_ACTIONS) &&
1312             !(action_flags & MLX5_FLOW_ACTION_PORT_ID))
1313                 return rte_flow_error_set(error, ENOTSUP,
1314                                           RTE_FLOW_ERROR_TYPE_ACTION, actions,
1315                                           "vlan actions are supported"
1316                                           " only with port_id action");
1317         if (!(action_flags & MLX5_TCF_FATE_ACTIONS))
1318                 return rte_flow_error_set(error, EINVAL,
1319                                           RTE_FLOW_ERROR_TYPE_ACTION, actions,
1320                                           "no fate action is found");
1321         if (action_flags &
1322            (MLX5_FLOW_ACTION_SET_TTL | MLX5_FLOW_ACTION_DEC_TTL)) {
1323                 if (!(item_flags &
1324                      (MLX5_FLOW_LAYER_OUTER_L3_IPV4 |
1325                       MLX5_FLOW_LAYER_OUTER_L3_IPV6)))
1326                         return rte_flow_error_set(error, EINVAL,
1327                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1328                                                   actions,
1329                                                   "no IP found in pattern");
1330         }
1331         if (action_flags &
1332             (MLX5_FLOW_ACTION_SET_MAC_SRC | MLX5_FLOW_ACTION_SET_MAC_DST)) {
1333                 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L2))
1334                         return rte_flow_error_set(error, ENOTSUP,
1335                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1336                                                   actions,
1337                                                   "no ethernet found in"
1338                                                   " pattern");
1339         }
1340         return 0;
1341 }
1342
1343 /**
1344  * Calculate maximum size of memory for flow items of Linux TC flower and
1345  * extract specified items.
1346  *
1347  * @param[in] items
1348  *   Pointer to the list of items.
1349  * @param[out] item_flags
1350  *   Pointer to the detected items.
1351  *
1352  * @return
1353  *   Maximum size of memory for items.
1354  */
1355 static int
1356 flow_tcf_get_items_and_size(const struct rte_flow_attr *attr,
1357                             const struct rte_flow_item items[],
1358                             uint64_t *item_flags)
1359 {
1360         int size = 0;
1361         uint64_t flags = 0;
1362
1363         size += SZ_NLATTR_STRZ_OF("flower") +
1364                 SZ_NLATTR_NEST + /* TCA_OPTIONS. */
1365                 SZ_NLATTR_TYPE_OF(uint32_t); /* TCA_CLS_FLAGS_SKIP_SW. */
1366         if (attr->group > 0)
1367                 size += SZ_NLATTR_TYPE_OF(uint32_t); /* TCA_CHAIN. */
1368         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1369                 switch (items->type) {
1370                 case RTE_FLOW_ITEM_TYPE_VOID:
1371                         break;
1372                 case RTE_FLOW_ITEM_TYPE_PORT_ID:
1373                         break;
1374                 case RTE_FLOW_ITEM_TYPE_ETH:
1375                         size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
1376                                 SZ_NLATTR_DATA_OF(ETHER_ADDR_LEN) * 4;
1377                                 /* dst/src MAC addr and mask. */
1378                         flags |= MLX5_FLOW_LAYER_OUTER_L2;
1379                         break;
1380                 case RTE_FLOW_ITEM_TYPE_VLAN:
1381                         size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
1382                                 SZ_NLATTR_TYPE_OF(uint16_t) +
1383                                 /* VLAN Ether type. */
1384                                 SZ_NLATTR_TYPE_OF(uint8_t) + /* VLAN prio. */
1385                                 SZ_NLATTR_TYPE_OF(uint16_t); /* VLAN ID. */
1386                         flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
1387                         break;
1388                 case RTE_FLOW_ITEM_TYPE_IPV4:
1389                         size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
1390                                 SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
1391                                 SZ_NLATTR_TYPE_OF(uint32_t) * 4;
1392                                 /* dst/src IP addr and mask. */
1393                         flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1394                         break;
1395                 case RTE_FLOW_ITEM_TYPE_IPV6:
1396                         size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
1397                                 SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
1398                                 SZ_NLATTR_TYPE_OF(IPV6_ADDR_LEN) * 4;
1399                                 /* dst/src IP addr and mask. */
1400                         flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1401                         break;
1402                 case RTE_FLOW_ITEM_TYPE_UDP:
1403                         size += SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
1404                                 SZ_NLATTR_TYPE_OF(uint16_t) * 4;
1405                                 /* dst/src port and mask. */
1406                         flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
1407                         break;
1408                 case RTE_FLOW_ITEM_TYPE_TCP:
1409                         size += SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
1410                                 SZ_NLATTR_TYPE_OF(uint16_t) * 4;
1411                                 /* dst/src port and mask. */
1412                         flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
1413                         break;
1414                 default:
1415                         DRV_LOG(WARNING,
1416                                 "unsupported item %p type %d,"
1417                                 " items must be validated before flow creation",
1418                                 (const void *)items, items->type);
1419                         break;
1420                 }
1421         }
1422         *item_flags = flags;
1423         return size;
1424 }
1425
1426 /**
1427  * Calculate maximum size of memory for flow actions of Linux TC flower and
1428  * extract specified actions.
1429  *
1430  * @param[in] actions
1431  *   Pointer to the list of actions.
1432  * @param[out] action_flags
1433  *   Pointer to the detected actions.
1434  *
1435  * @return
1436  *   Maximum size of memory for actions.
1437  */
1438 static int
1439 flow_tcf_get_actions_and_size(const struct rte_flow_action actions[],
1440                               uint64_t *action_flags)
1441 {
1442         int size = 0;
1443         uint64_t flags = 0;
1444
1445         size += SZ_NLATTR_NEST; /* TCA_FLOWER_ACT. */
1446         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1447                 switch (actions->type) {
1448                 case RTE_FLOW_ACTION_TYPE_VOID:
1449                         break;
1450                 case RTE_FLOW_ACTION_TYPE_PORT_ID:
1451                         size += SZ_NLATTR_NEST + /* na_act_index. */
1452                                 SZ_NLATTR_STRZ_OF("mirred") +
1453                                 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
1454                                 SZ_NLATTR_TYPE_OF(struct tc_mirred);
1455                         flags |= MLX5_FLOW_ACTION_PORT_ID;
1456                         break;
1457                 case RTE_FLOW_ACTION_TYPE_JUMP:
1458                         size += SZ_NLATTR_NEST + /* na_act_index. */
1459                                 SZ_NLATTR_STRZ_OF("gact") +
1460                                 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
1461                                 SZ_NLATTR_TYPE_OF(struct tc_gact);
1462                         flags |= MLX5_FLOW_ACTION_JUMP;
1463                         break;
1464                 case RTE_FLOW_ACTION_TYPE_DROP:
1465                         size += SZ_NLATTR_NEST + /* na_act_index. */
1466                                 SZ_NLATTR_STRZ_OF("gact") +
1467                                 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
1468                                 SZ_NLATTR_TYPE_OF(struct tc_gact);
1469                         flags |= MLX5_FLOW_ACTION_DROP;
1470                         break;
1471                 case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
1472                         flags |= MLX5_FLOW_ACTION_OF_POP_VLAN;
1473                         goto action_of_vlan;
1474                 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
1475                         flags |= MLX5_FLOW_ACTION_OF_PUSH_VLAN;
1476                         goto action_of_vlan;
1477                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
1478                         flags |= MLX5_FLOW_ACTION_OF_SET_VLAN_VID;
1479                         goto action_of_vlan;
1480                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
1481                         flags |= MLX5_FLOW_ACTION_OF_SET_VLAN_PCP;
1482                         goto action_of_vlan;
1483 action_of_vlan:
1484                         size += SZ_NLATTR_NEST + /* na_act_index. */
1485                                 SZ_NLATTR_STRZ_OF("vlan") +
1486                                 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
1487                                 SZ_NLATTR_TYPE_OF(struct tc_vlan) +
1488                                 SZ_NLATTR_TYPE_OF(uint16_t) +
1489                                 /* VLAN protocol. */
1490                                 SZ_NLATTR_TYPE_OF(uint16_t) + /* VLAN ID. */
1491                                 SZ_NLATTR_TYPE_OF(uint8_t); /* VLAN prio. */
1492                         break;
1493                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
1494                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
1495                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
1496                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
1497                 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
1498                 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
1499                 case RTE_FLOW_ACTION_TYPE_SET_TTL:
1500                 case RTE_FLOW_ACTION_TYPE_DEC_TTL:
1501                 case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
1502                 case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
1503                         size += flow_tcf_get_pedit_actions_size(&actions,
1504                                                                 &flags);
1505                         break;
1506                 default:
1507                         DRV_LOG(WARNING,
1508                                 "unsupported action %p type %d,"
1509                                 " items must be validated before flow creation",
1510                                 (const void *)actions, actions->type);
1511                         break;
1512                 }
1513         }
1514         *action_flags = flags;
1515         return size;
1516 }
1517
1518 /**
1519  * Brand rtnetlink buffer with unique handle.
1520  *
1521  * This handle should be unique for a given network interface to avoid
1522  * collisions.
1523  *
1524  * @param nlh
1525  *   Pointer to Netlink message.
1526  * @param handle
1527  *   Unique 32-bit handle to use.
1528  */
1529 static void
1530 flow_tcf_nl_brand(struct nlmsghdr *nlh, uint32_t handle)
1531 {
1532         struct tcmsg *tcm = mnl_nlmsg_get_payload(nlh);
1533
1534         tcm->tcm_handle = handle;
1535         DRV_LOG(DEBUG, "Netlink msg %p is branded with handle %x",
1536                 (void *)nlh, handle);
1537 }
1538
1539 /**
1540  * Prepare a flow object for Linux TC flower. It calculates the maximum size of
1541  * memory required, allocates the memory, initializes Netlink message headers
1542  * and set unique TC message handle.
1543  *
1544  * @param[in] attr
1545  *   Pointer to the flow attributes.
1546  * @param[in] items
1547  *   Pointer to the list of items.
1548  * @param[in] actions
1549  *   Pointer to the list of actions.
1550  * @param[out] item_flags
1551  *   Pointer to bit mask of all items detected.
1552  * @param[out] action_flags
1553  *   Pointer to bit mask of all actions detected.
1554  * @param[out] error
1555  *   Pointer to the error structure.
1556  *
1557  * @return
1558  *   Pointer to mlx5_flow object on success,
1559  *   otherwise NULL and rte_ernno is set.
1560  */
1561 static struct mlx5_flow *
1562 flow_tcf_prepare(const struct rte_flow_attr *attr,
1563                  const struct rte_flow_item items[],
1564                  const struct rte_flow_action actions[],
1565                  uint64_t *item_flags, uint64_t *action_flags,
1566                  struct rte_flow_error *error)
1567 {
1568         size_t size = sizeof(struct mlx5_flow) +
1569                       MNL_ALIGN(sizeof(struct nlmsghdr)) +
1570                       MNL_ALIGN(sizeof(struct tcmsg));
1571         struct mlx5_flow *dev_flow;
1572         struct nlmsghdr *nlh;
1573         struct tcmsg *tcm;
1574
1575         size += flow_tcf_get_items_and_size(attr, items, item_flags);
1576         size += flow_tcf_get_actions_and_size(actions, action_flags);
1577         dev_flow = rte_zmalloc(__func__, size, MNL_ALIGNTO);
1578         if (!dev_flow) {
1579                 rte_flow_error_set(error, ENOMEM,
1580                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1581                                    "not enough memory to create E-Switch flow");
1582                 return NULL;
1583         }
1584         nlh = mnl_nlmsg_put_header((void *)(dev_flow + 1));
1585         tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
1586         *dev_flow = (struct mlx5_flow){
1587                 .tcf = (struct mlx5_flow_tcf){
1588                         .nlh = nlh,
1589                         .tcm = tcm,
1590                 },
1591         };
1592         /*
1593          * Generate a reasonably unique handle based on the address of the
1594          * target buffer.
1595          *
1596          * This is straightforward on 32-bit systems where the flow pointer can
1597          * be used directly. Otherwise, its least significant part is taken
1598          * after shifting it by the previous power of two of the pointed buffer
1599          * size.
1600          */
1601         if (sizeof(dev_flow) <= 4)
1602                 flow_tcf_nl_brand(nlh, (uintptr_t)dev_flow);
1603         else
1604                 flow_tcf_nl_brand(nlh, (uintptr_t)dev_flow >>
1605                                        rte_log2_u32(rte_align32prevpow2(size)));
1606         return dev_flow;
1607 }
1608
1609 /**
1610  * Translate flow for Linux TC flower and construct Netlink message.
1611  *
1612  * @param[in] priv
1613  *   Pointer to the priv structure.
1614  * @param[in, out] flow
1615  *   Pointer to the sub flow.
1616  * @param[in] attr
1617  *   Pointer to the flow attributes.
1618  * @param[in] items
1619  *   Pointer to the list of items.
1620  * @param[in] actions
1621  *   Pointer to the list of actions.
1622  * @param[out] error
1623  *   Pointer to the error structure.
1624  *
1625  * @return
1626  *   0 on success, a negative errno value otherwise and rte_ernno is set.
1627  */
1628 static int
1629 flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
1630                    const struct rte_flow_attr *attr,
1631                    const struct rte_flow_item items[],
1632                    const struct rte_flow_action actions[],
1633                    struct rte_flow_error *error)
1634 {
1635         union {
1636                 const struct rte_flow_item_port_id *port_id;
1637                 const struct rte_flow_item_eth *eth;
1638                 const struct rte_flow_item_vlan *vlan;
1639                 const struct rte_flow_item_ipv4 *ipv4;
1640                 const struct rte_flow_item_ipv6 *ipv6;
1641                 const struct rte_flow_item_tcp *tcp;
1642                 const struct rte_flow_item_udp *udp;
1643         } spec, mask;
1644         union {
1645                 const struct rte_flow_action_port_id *port_id;
1646                 const struct rte_flow_action_jump *jump;
1647                 const struct rte_flow_action_of_push_vlan *of_push_vlan;
1648                 const struct rte_flow_action_of_set_vlan_vid *
1649                         of_set_vlan_vid;
1650                 const struct rte_flow_action_of_set_vlan_pcp *
1651                         of_set_vlan_pcp;
1652         } conf;
1653         struct flow_tcf_ptoi ptoi[PTOI_TABLE_SZ_MAX(dev)];
1654         struct nlmsghdr *nlh = dev_flow->tcf.nlh;
1655         struct tcmsg *tcm = dev_flow->tcf.tcm;
1656         uint32_t na_act_index_cur;
1657         bool eth_type_set = 0;
1658         bool vlan_present = 0;
1659         bool vlan_eth_type_set = 0;
1660         bool ip_proto_set = 0;
1661         struct nlattr *na_flower;
1662         struct nlattr *na_flower_act;
1663         struct nlattr *na_vlan_id = NULL;
1664         struct nlattr *na_vlan_priority = NULL;
1665         uint64_t item_flags = 0;
1666
1667         claim_nonzero(flow_tcf_build_ptoi_table(dev, ptoi,
1668                                                 PTOI_TABLE_SZ_MAX(dev)));
1669         nlh = dev_flow->tcf.nlh;
1670         tcm = dev_flow->tcf.tcm;
1671         /* Prepare API must have been called beforehand. */
1672         assert(nlh != NULL && tcm != NULL);
1673         tcm->tcm_family = AF_UNSPEC;
1674         tcm->tcm_ifindex = ptoi[0].ifindex;
1675         tcm->tcm_parent = TC_H_MAKE(TC_H_INGRESS, TC_H_MIN_INGRESS);
1676         /*
1677          * Priority cannot be zero to prevent the kernel from picking one
1678          * automatically.
1679          */
1680         tcm->tcm_info = TC_H_MAKE((attr->priority + 1) << 16,
1681                                   RTE_BE16(ETH_P_ALL));
1682         if (attr->group > 0)
1683                 mnl_attr_put_u32(nlh, TCA_CHAIN, attr->group);
1684         mnl_attr_put_strz(nlh, TCA_KIND, "flower");
1685         na_flower = mnl_attr_nest_start(nlh, TCA_OPTIONS);
1686         mnl_attr_put_u32(nlh, TCA_FLOWER_FLAGS, TCA_CLS_FLAGS_SKIP_SW);
1687         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1688                 unsigned int i;
1689
1690                 switch (items->type) {
1691                 case RTE_FLOW_ITEM_TYPE_VOID:
1692                         break;
1693                 case RTE_FLOW_ITEM_TYPE_PORT_ID:
1694                         mask.port_id = flow_tcf_item_mask
1695                                 (items, &rte_flow_item_port_id_mask,
1696                                  &flow_tcf_mask_supported.port_id,
1697                                  &flow_tcf_mask_empty.port_id,
1698                                  sizeof(flow_tcf_mask_supported.port_id),
1699                                  error);
1700                         assert(mask.port_id);
1701                         if (mask.port_id == &flow_tcf_mask_empty.port_id)
1702                                 break;
1703                         spec.port_id = items->spec;
1704                         if (!mask.port_id->id)
1705                                 i = 0;
1706                         else
1707                                 for (i = 0; ptoi[i].ifindex; ++i)
1708                                         if (ptoi[i].port_id == spec.port_id->id)
1709                                                 break;
1710                         assert(ptoi[i].ifindex);
1711                         tcm->tcm_ifindex = ptoi[i].ifindex;
1712                         break;
1713                 case RTE_FLOW_ITEM_TYPE_ETH:
1714                         item_flags |= MLX5_FLOW_LAYER_OUTER_L2;
1715                         mask.eth = flow_tcf_item_mask
1716                                 (items, &rte_flow_item_eth_mask,
1717                                  &flow_tcf_mask_supported.eth,
1718                                  &flow_tcf_mask_empty.eth,
1719                                  sizeof(flow_tcf_mask_supported.eth),
1720                                  error);
1721                         assert(mask.eth);
1722                         if (mask.eth == &flow_tcf_mask_empty.eth)
1723                                 break;
1724                         spec.eth = items->spec;
1725                         if (mask.eth->type) {
1726                                 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_ETH_TYPE,
1727                                                  spec.eth->type);
1728                                 eth_type_set = 1;
1729                         }
1730                         if (!is_zero_ether_addr(&mask.eth->dst)) {
1731                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_DST,
1732                                              ETHER_ADDR_LEN,
1733                                              spec.eth->dst.addr_bytes);
1734                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_DST_MASK,
1735                                              ETHER_ADDR_LEN,
1736                                              mask.eth->dst.addr_bytes);
1737                         }
1738                         if (!is_zero_ether_addr(&mask.eth->src)) {
1739                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_SRC,
1740                                              ETHER_ADDR_LEN,
1741                                              spec.eth->src.addr_bytes);
1742                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_SRC_MASK,
1743                                              ETHER_ADDR_LEN,
1744                                              mask.eth->src.addr_bytes);
1745                         }
1746                         break;
1747                 case RTE_FLOW_ITEM_TYPE_VLAN:
1748                         item_flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
1749                         mask.vlan = flow_tcf_item_mask
1750                                 (items, &rte_flow_item_vlan_mask,
1751                                  &flow_tcf_mask_supported.vlan,
1752                                  &flow_tcf_mask_empty.vlan,
1753                                  sizeof(flow_tcf_mask_supported.vlan),
1754                                  error);
1755                         assert(mask.vlan);
1756                         if (!eth_type_set)
1757                                 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_ETH_TYPE,
1758                                                  RTE_BE16(ETH_P_8021Q));
1759                         eth_type_set = 1;
1760                         vlan_present = 1;
1761                         if (mask.vlan == &flow_tcf_mask_empty.vlan)
1762                                 break;
1763                         spec.vlan = items->spec;
1764                         if (mask.vlan->inner_type) {
1765                                 mnl_attr_put_u16(nlh,
1766                                                  TCA_FLOWER_KEY_VLAN_ETH_TYPE,
1767                                                  spec.vlan->inner_type);
1768                                 vlan_eth_type_set = 1;
1769                         }
1770                         if (mask.vlan->tci & RTE_BE16(0xe000))
1771                                 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_VLAN_PRIO,
1772                                                 (rte_be_to_cpu_16
1773                                                  (spec.vlan->tci) >> 13) & 0x7);
1774                         if (mask.vlan->tci & RTE_BE16(0x0fff))
1775                                 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_VLAN_ID,
1776                                                  rte_be_to_cpu_16
1777                                                  (spec.vlan->tci &
1778                                                   RTE_BE16(0x0fff)));
1779                         break;
1780                 case RTE_FLOW_ITEM_TYPE_IPV4:
1781                         item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1782                         mask.ipv4 = flow_tcf_item_mask
1783                                 (items, &rte_flow_item_ipv4_mask,
1784                                  &flow_tcf_mask_supported.ipv4,
1785                                  &flow_tcf_mask_empty.ipv4,
1786                                  sizeof(flow_tcf_mask_supported.ipv4),
1787                                  error);
1788                         assert(mask.ipv4);
1789                         if (!eth_type_set || !vlan_eth_type_set)
1790                                 mnl_attr_put_u16(nlh,
1791                                                  vlan_present ?
1792                                                  TCA_FLOWER_KEY_VLAN_ETH_TYPE :
1793                                                  TCA_FLOWER_KEY_ETH_TYPE,
1794                                                  RTE_BE16(ETH_P_IP));
1795                         eth_type_set = 1;
1796                         vlan_eth_type_set = 1;
1797                         if (mask.ipv4 == &flow_tcf_mask_empty.ipv4)
1798                                 break;
1799                         spec.ipv4 = items->spec;
1800                         if (mask.ipv4->hdr.next_proto_id) {
1801                                 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
1802                                                 spec.ipv4->hdr.next_proto_id);
1803                                 ip_proto_set = 1;
1804                         }
1805                         if (mask.ipv4->hdr.src_addr) {
1806                                 mnl_attr_put_u32(nlh, TCA_FLOWER_KEY_IPV4_SRC,
1807                                                  spec.ipv4->hdr.src_addr);
1808                                 mnl_attr_put_u32(nlh,
1809                                                  TCA_FLOWER_KEY_IPV4_SRC_MASK,
1810                                                  mask.ipv4->hdr.src_addr);
1811                         }
1812                         if (mask.ipv4->hdr.dst_addr) {
1813                                 mnl_attr_put_u32(nlh, TCA_FLOWER_KEY_IPV4_DST,
1814                                                  spec.ipv4->hdr.dst_addr);
1815                                 mnl_attr_put_u32(nlh,
1816                                                  TCA_FLOWER_KEY_IPV4_DST_MASK,
1817                                                  mask.ipv4->hdr.dst_addr);
1818                         }
1819                         break;
1820                 case RTE_FLOW_ITEM_TYPE_IPV6:
1821                         item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1822                         mask.ipv6 = flow_tcf_item_mask
1823                                 (items, &rte_flow_item_ipv6_mask,
1824                                  &flow_tcf_mask_supported.ipv6,
1825                                  &flow_tcf_mask_empty.ipv6,
1826                                  sizeof(flow_tcf_mask_supported.ipv6),
1827                                  error);
1828                         assert(mask.ipv6);
1829                         if (!eth_type_set || !vlan_eth_type_set)
1830                                 mnl_attr_put_u16(nlh,
1831                                                  vlan_present ?
1832                                                  TCA_FLOWER_KEY_VLAN_ETH_TYPE :
1833                                                  TCA_FLOWER_KEY_ETH_TYPE,
1834                                                  RTE_BE16(ETH_P_IPV6));
1835                         eth_type_set = 1;
1836                         vlan_eth_type_set = 1;
1837                         if (mask.ipv6 == &flow_tcf_mask_empty.ipv6)
1838                                 break;
1839                         spec.ipv6 = items->spec;
1840                         if (mask.ipv6->hdr.proto) {
1841                                 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
1842                                                 spec.ipv6->hdr.proto);
1843                                 ip_proto_set = 1;
1844                         }
1845                         if (!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.src_addr)) {
1846                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_SRC,
1847                                              sizeof(spec.ipv6->hdr.src_addr),
1848                                              spec.ipv6->hdr.src_addr);
1849                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_SRC_MASK,
1850                                              sizeof(mask.ipv6->hdr.src_addr),
1851                                              mask.ipv6->hdr.src_addr);
1852                         }
1853                         if (!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.dst_addr)) {
1854                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_DST,
1855                                              sizeof(spec.ipv6->hdr.dst_addr),
1856                                              spec.ipv6->hdr.dst_addr);
1857                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_DST_MASK,
1858                                              sizeof(mask.ipv6->hdr.dst_addr),
1859                                              mask.ipv6->hdr.dst_addr);
1860                         }
1861                         break;
1862                 case RTE_FLOW_ITEM_TYPE_UDP:
1863                         item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
1864                         mask.udp = flow_tcf_item_mask
1865                                 (items, &rte_flow_item_udp_mask,
1866                                  &flow_tcf_mask_supported.udp,
1867                                  &flow_tcf_mask_empty.udp,
1868                                  sizeof(flow_tcf_mask_supported.udp),
1869                                  error);
1870                         assert(mask.udp);
1871                         if (!ip_proto_set)
1872                                 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
1873                                                 IPPROTO_UDP);
1874                         if (mask.udp == &flow_tcf_mask_empty.udp)
1875                                 break;
1876                         spec.udp = items->spec;
1877                         if (mask.udp->hdr.src_port) {
1878                                 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_UDP_SRC,
1879                                                  spec.udp->hdr.src_port);
1880                                 mnl_attr_put_u16(nlh,
1881                                                  TCA_FLOWER_KEY_UDP_SRC_MASK,
1882                                                  mask.udp->hdr.src_port);
1883                         }
1884                         if (mask.udp->hdr.dst_port) {
1885                                 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_UDP_DST,
1886                                                  spec.udp->hdr.dst_port);
1887                                 mnl_attr_put_u16(nlh,
1888                                                  TCA_FLOWER_KEY_UDP_DST_MASK,
1889                                                  mask.udp->hdr.dst_port);
1890                         }
1891                         break;
1892                 case RTE_FLOW_ITEM_TYPE_TCP:
1893                         item_flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
1894                         mask.tcp = flow_tcf_item_mask
1895                                 (items, &rte_flow_item_tcp_mask,
1896                                  &flow_tcf_mask_supported.tcp,
1897                                  &flow_tcf_mask_empty.tcp,
1898                                  sizeof(flow_tcf_mask_supported.tcp),
1899                                  error);
1900                         assert(mask.tcp);
1901                         if (!ip_proto_set)
1902                                 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
1903                                                 IPPROTO_TCP);
1904                         if (mask.tcp == &flow_tcf_mask_empty.tcp)
1905                                 break;
1906                         spec.tcp = items->spec;
1907                         if (mask.tcp->hdr.src_port) {
1908                                 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_TCP_SRC,
1909                                                  spec.tcp->hdr.src_port);
1910                                 mnl_attr_put_u16(nlh,
1911                                                  TCA_FLOWER_KEY_TCP_SRC_MASK,
1912                                                  mask.tcp->hdr.src_port);
1913                         }
1914                         if (mask.tcp->hdr.dst_port) {
1915                                 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_TCP_DST,
1916                                                  spec.tcp->hdr.dst_port);
1917                                 mnl_attr_put_u16(nlh,
1918                                                  TCA_FLOWER_KEY_TCP_DST_MASK,
1919                                                  mask.tcp->hdr.dst_port);
1920                         }
1921                         if (mask.tcp->hdr.tcp_flags) {
1922                                 mnl_attr_put_u16
1923                                         (nlh,
1924                                          TCA_FLOWER_KEY_TCP_FLAGS,
1925                                          rte_cpu_to_be_16
1926                                                 (spec.tcp->hdr.tcp_flags));
1927                                 mnl_attr_put_u16
1928                                         (nlh,
1929                                          TCA_FLOWER_KEY_TCP_FLAGS_MASK,
1930                                          rte_cpu_to_be_16
1931                                                 (mask.tcp->hdr.tcp_flags));
1932                         }
1933                         break;
1934                 default:
1935                         return rte_flow_error_set(error, ENOTSUP,
1936                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1937                                                   NULL, "item not supported");
1938                 }
1939         }
1940         na_flower_act = mnl_attr_nest_start(nlh, TCA_FLOWER_ACT);
1941         na_act_index_cur = 1;
1942         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1943                 struct nlattr *na_act_index;
1944                 struct nlattr *na_act;
1945                 unsigned int vlan_act;
1946                 unsigned int i;
1947
1948                 switch (actions->type) {
1949                 case RTE_FLOW_ACTION_TYPE_VOID:
1950                         break;
1951                 case RTE_FLOW_ACTION_TYPE_PORT_ID:
1952                         conf.port_id = actions->conf;
1953                         if (conf.port_id->original)
1954                                 i = 0;
1955                         else
1956                                 for (i = 0; ptoi[i].ifindex; ++i)
1957                                         if (ptoi[i].port_id == conf.port_id->id)
1958                                                 break;
1959                         assert(ptoi[i].ifindex);
1960                         na_act_index =
1961                                 mnl_attr_nest_start(nlh, na_act_index_cur++);
1962                         assert(na_act_index);
1963                         mnl_attr_put_strz(nlh, TCA_ACT_KIND, "mirred");
1964                         na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
1965                         assert(na_act);
1966                         mnl_attr_put(nlh, TCA_MIRRED_PARMS,
1967                                      sizeof(struct tc_mirred),
1968                                      &(struct tc_mirred){
1969                                         .action = TC_ACT_STOLEN,
1970                                         .eaction = TCA_EGRESS_REDIR,
1971                                         .ifindex = ptoi[i].ifindex,
1972                                      });
1973                         mnl_attr_nest_end(nlh, na_act);
1974                         mnl_attr_nest_end(nlh, na_act_index);
1975                         break;
1976                 case RTE_FLOW_ACTION_TYPE_JUMP:
1977                         conf.jump = actions->conf;
1978                         na_act_index =
1979                                 mnl_attr_nest_start(nlh, na_act_index_cur++);
1980                         assert(na_act_index);
1981                         mnl_attr_put_strz(nlh, TCA_ACT_KIND, "gact");
1982                         na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
1983                         assert(na_act);
1984                         mnl_attr_put(nlh, TCA_GACT_PARMS,
1985                                      sizeof(struct tc_gact),
1986                                      &(struct tc_gact){
1987                                         .action = TC_ACT_GOTO_CHAIN |
1988                                                   conf.jump->group,
1989                                      });
1990                         mnl_attr_nest_end(nlh, na_act);
1991                         mnl_attr_nest_end(nlh, na_act_index);
1992                         break;
1993                 case RTE_FLOW_ACTION_TYPE_DROP:
1994                         na_act_index =
1995                                 mnl_attr_nest_start(nlh, na_act_index_cur++);
1996                         assert(na_act_index);
1997                         mnl_attr_put_strz(nlh, TCA_ACT_KIND, "gact");
1998                         na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
1999                         assert(na_act);
2000                         mnl_attr_put(nlh, TCA_GACT_PARMS,
2001                                      sizeof(struct tc_gact),
2002                                      &(struct tc_gact){
2003                                         .action = TC_ACT_SHOT,
2004                                      });
2005                         mnl_attr_nest_end(nlh, na_act);
2006                         mnl_attr_nest_end(nlh, na_act_index);
2007                         break;
2008                 case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
2009                         conf.of_push_vlan = NULL;
2010                         vlan_act = TCA_VLAN_ACT_POP;
2011                         goto action_of_vlan;
2012                 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
2013                         conf.of_push_vlan = actions->conf;
2014                         vlan_act = TCA_VLAN_ACT_PUSH;
2015                         goto action_of_vlan;
2016                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
2017                         conf.of_set_vlan_vid = actions->conf;
2018                         if (na_vlan_id)
2019                                 goto override_na_vlan_id;
2020                         vlan_act = TCA_VLAN_ACT_MODIFY;
2021                         goto action_of_vlan;
2022                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
2023                         conf.of_set_vlan_pcp = actions->conf;
2024                         if (na_vlan_priority)
2025                                 goto override_na_vlan_priority;
2026                         vlan_act = TCA_VLAN_ACT_MODIFY;
2027                         goto action_of_vlan;
2028 action_of_vlan:
2029                         na_act_index =
2030                                 mnl_attr_nest_start(nlh, na_act_index_cur++);
2031                         assert(na_act_index);
2032                         mnl_attr_put_strz(nlh, TCA_ACT_KIND, "vlan");
2033                         na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
2034                         assert(na_act);
2035                         mnl_attr_put(nlh, TCA_VLAN_PARMS,
2036                                      sizeof(struct tc_vlan),
2037                                      &(struct tc_vlan){
2038                                         .action = TC_ACT_PIPE,
2039                                         .v_action = vlan_act,
2040                                      });
2041                         if (vlan_act == TCA_VLAN_ACT_POP) {
2042                                 mnl_attr_nest_end(nlh, na_act);
2043                                 mnl_attr_nest_end(nlh, na_act_index);
2044                                 break;
2045                         }
2046                         if (vlan_act == TCA_VLAN_ACT_PUSH)
2047                                 mnl_attr_put_u16(nlh,
2048                                                  TCA_VLAN_PUSH_VLAN_PROTOCOL,
2049                                                  conf.of_push_vlan->ethertype);
2050                         na_vlan_id = mnl_nlmsg_get_payload_tail(nlh);
2051                         mnl_attr_put_u16(nlh, TCA_VLAN_PAD, 0);
2052                         na_vlan_priority = mnl_nlmsg_get_payload_tail(nlh);
2053                         mnl_attr_put_u8(nlh, TCA_VLAN_PAD, 0);
2054                         mnl_attr_nest_end(nlh, na_act);
2055                         mnl_attr_nest_end(nlh, na_act_index);
2056                         if (actions->type ==
2057                             RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID) {
2058 override_na_vlan_id:
2059                                 na_vlan_id->nla_type = TCA_VLAN_PUSH_VLAN_ID;
2060                                 *(uint16_t *)mnl_attr_get_payload(na_vlan_id) =
2061                                         rte_be_to_cpu_16
2062                                         (conf.of_set_vlan_vid->vlan_vid);
2063                         } else if (actions->type ==
2064                                    RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP) {
2065 override_na_vlan_priority:
2066                                 na_vlan_priority->nla_type =
2067                                         TCA_VLAN_PUSH_VLAN_PRIORITY;
2068                                 *(uint8_t *)mnl_attr_get_payload
2069                                         (na_vlan_priority) =
2070                                         conf.of_set_vlan_pcp->vlan_pcp;
2071                         }
2072                         break;
2073                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
2074                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
2075                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
2076                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
2077                 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
2078                 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
2079                 case RTE_FLOW_ACTION_TYPE_SET_TTL:
2080                 case RTE_FLOW_ACTION_TYPE_DEC_TTL:
2081                 case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
2082                 case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
2083                         na_act_index =
2084                                 mnl_attr_nest_start(nlh, na_act_index_cur++);
2085                         flow_tcf_create_pedit_mnl_msg(nlh,
2086                                                       &actions, item_flags);
2087                         mnl_attr_nest_end(nlh, na_act_index);
2088                         break;
2089                 default:
2090                         return rte_flow_error_set(error, ENOTSUP,
2091                                                   RTE_FLOW_ERROR_TYPE_ACTION,
2092                                                   actions,
2093                                                   "action not supported");
2094                 }
2095         }
2096         assert(na_flower);
2097         assert(na_flower_act);
2098         mnl_attr_nest_end(nlh, na_flower_act);
2099         mnl_attr_nest_end(nlh, na_flower);
2100         return 0;
2101 }
2102
2103 /**
2104  * Send Netlink message with acknowledgment.
2105  *
2106  * @param nl
2107  *   Libmnl socket to use.
2108  * @param nlh
2109  *   Message to send. This function always raises the NLM_F_ACK flag before
2110  *   sending.
2111  *
2112  * @return
2113  *   0 on success, a negative errno value otherwise and rte_errno is set.
2114  */
2115 static int
2116 flow_tcf_nl_ack(struct mnl_socket *nl, struct nlmsghdr *nlh)
2117 {
2118         alignas(struct nlmsghdr)
2119         uint8_t ans[mnl_nlmsg_size(sizeof(struct nlmsgerr)) +
2120                     nlh->nlmsg_len - sizeof(*nlh)];
2121         uint32_t seq = random();
2122         int ret;
2123
2124         nlh->nlmsg_flags |= NLM_F_ACK;
2125         nlh->nlmsg_seq = seq;
2126         ret = mnl_socket_sendto(nl, nlh, nlh->nlmsg_len);
2127         if (ret != -1)
2128                 ret = mnl_socket_recvfrom(nl, ans, sizeof(ans));
2129         if (ret != -1)
2130                 ret = mnl_cb_run
2131                         (ans, ret, seq, mnl_socket_get_portid(nl), NULL, NULL);
2132         if (ret > 0)
2133                 return 0;
2134         rte_errno = errno;
2135         return -rte_errno;
2136 }
2137
2138 /**
2139  * Apply flow to E-Switch by sending Netlink message.
2140  *
2141  * @param[in] dev
2142  *   Pointer to Ethernet device.
2143  * @param[in, out] flow
2144  *   Pointer to the sub flow.
2145  * @param[out] error
2146  *   Pointer to the error structure.
2147  *
2148  * @return
2149  *   0 on success, a negative errno value otherwise and rte_ernno is set.
2150  */
2151 static int
2152 flow_tcf_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
2153                struct rte_flow_error *error)
2154 {
2155         struct priv *priv = dev->data->dev_private;
2156         struct mnl_socket *nl = priv->mnl_socket;
2157         struct mlx5_flow *dev_flow;
2158         struct nlmsghdr *nlh;
2159
2160         dev_flow = LIST_FIRST(&flow->dev_flows);
2161         /* E-Switch flow can't be expanded. */
2162         assert(!LIST_NEXT(dev_flow, next));
2163         nlh = dev_flow->tcf.nlh;
2164         nlh->nlmsg_type = RTM_NEWTFILTER;
2165         nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
2166         if (!flow_tcf_nl_ack(nl, nlh))
2167                 return 0;
2168         return rte_flow_error_set(error, rte_errno,
2169                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
2170                                   "netlink: failed to create TC flow rule");
2171 }
2172
2173 /**
2174  * Remove flow from E-Switch by sending Netlink message.
2175  *
2176  * @param[in] dev
2177  *   Pointer to Ethernet device.
2178  * @param[in, out] flow
2179  *   Pointer to the sub flow.
2180  */
2181 static void
2182 flow_tcf_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
2183 {
2184         struct priv *priv = dev->data->dev_private;
2185         struct mnl_socket *nl = priv->mnl_socket;
2186         struct mlx5_flow *dev_flow;
2187         struct nlmsghdr *nlh;
2188
2189         if (!flow)
2190                 return;
2191         dev_flow = LIST_FIRST(&flow->dev_flows);
2192         if (!dev_flow)
2193                 return;
2194         /* E-Switch flow can't be expanded. */
2195         assert(!LIST_NEXT(dev_flow, next));
2196         nlh = dev_flow->tcf.nlh;
2197         nlh->nlmsg_type = RTM_DELTFILTER;
2198         nlh->nlmsg_flags = NLM_F_REQUEST;
2199         flow_tcf_nl_ack(nl, nlh);
2200 }
2201
2202 /**
2203  * Remove flow from E-Switch and release resources of the device flow.
2204  *
2205  * @param[in] dev
2206  *   Pointer to Ethernet device.
2207  * @param[in, out] flow
2208  *   Pointer to the sub flow.
2209  */
2210 static void
2211 flow_tcf_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
2212 {
2213         struct mlx5_flow *dev_flow;
2214
2215         if (!flow)
2216                 return;
2217         flow_tcf_remove(dev, flow);
2218         dev_flow = LIST_FIRST(&flow->dev_flows);
2219         if (!dev_flow)
2220                 return;
2221         /* E-Switch flow can't be expanded. */
2222         assert(!LIST_NEXT(dev_flow, next));
2223         LIST_REMOVE(dev_flow, next);
2224         rte_free(dev_flow);
2225 }
2226
2227 const struct mlx5_flow_driver_ops mlx5_flow_tcf_drv_ops = {
2228         .validate = flow_tcf_validate,
2229         .prepare = flow_tcf_prepare,
2230         .translate = flow_tcf_translate,
2231         .apply = flow_tcf_apply,
2232         .remove = flow_tcf_remove,
2233         .destroy = flow_tcf_destroy,
2234 };
2235
2236 /**
2237  * Initialize ingress qdisc of a given network interface.
2238  *
2239  * @param nl
2240  *   Libmnl socket of the @p NETLINK_ROUTE kind.
2241  * @param ifindex
2242  *   Index of network interface to initialize.
2243  * @param[out] error
2244  *   Perform verbose error reporting if not NULL.
2245  *
2246  * @return
2247  *   0 on success, a negative errno value otherwise and rte_errno is set.
2248  */
2249 int
2250 mlx5_flow_tcf_init(struct mnl_socket *nl, unsigned int ifindex,
2251                    struct rte_flow_error *error)
2252 {
2253         struct nlmsghdr *nlh;
2254         struct tcmsg *tcm;
2255         alignas(struct nlmsghdr)
2256         uint8_t buf[mnl_nlmsg_size(sizeof(*tcm) + 128)];
2257
2258         /* Destroy existing ingress qdisc and everything attached to it. */
2259         nlh = mnl_nlmsg_put_header(buf);
2260         nlh->nlmsg_type = RTM_DELQDISC;
2261         nlh->nlmsg_flags = NLM_F_REQUEST;
2262         tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
2263         tcm->tcm_family = AF_UNSPEC;
2264         tcm->tcm_ifindex = ifindex;
2265         tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
2266         tcm->tcm_parent = TC_H_INGRESS;
2267         /* Ignore errors when qdisc is already absent. */
2268         if (flow_tcf_nl_ack(nl, nlh) &&
2269             rte_errno != EINVAL && rte_errno != ENOENT)
2270                 return rte_flow_error_set(error, rte_errno,
2271                                           RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
2272                                           "netlink: failed to remove ingress"
2273                                           " qdisc");
2274         /* Create fresh ingress qdisc. */
2275         nlh = mnl_nlmsg_put_header(buf);
2276         nlh->nlmsg_type = RTM_NEWQDISC;
2277         nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
2278         tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
2279         tcm->tcm_family = AF_UNSPEC;
2280         tcm->tcm_ifindex = ifindex;
2281         tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
2282         tcm->tcm_parent = TC_H_INGRESS;
2283         mnl_attr_put_strz_check(nlh, sizeof(buf), TCA_KIND, "ingress");
2284         if (flow_tcf_nl_ack(nl, nlh))
2285                 return rte_flow_error_set(error, rte_errno,
2286                                           RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
2287                                           "netlink: failed to create ingress"
2288                                           " qdisc");
2289         return 0;
2290 }
2291
2292 /**
2293  * Create and configure a libmnl socket for Netlink flow rules.
2294  *
2295  * @return
2296  *   A valid libmnl socket object pointer on success, NULL otherwise and
2297  *   rte_errno is set.
2298  */
2299 struct mnl_socket *
2300 mlx5_flow_tcf_socket_create(void)
2301 {
2302         struct mnl_socket *nl = mnl_socket_open(NETLINK_ROUTE);
2303
2304         if (nl) {
2305                 mnl_socket_setsockopt(nl, NETLINK_CAP_ACK, &(int){ 1 },
2306                                       sizeof(int));
2307                 if (!mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID))
2308                         return nl;
2309         }
2310         rte_errno = errno;
2311         if (nl)
2312                 mnl_socket_close(nl);
2313         return NULL;
2314 }
2315
2316 /**
2317  * Destroy a libmnl socket.
2318  *
2319  * @param nl
2320  *   Libmnl socket of the @p NETLINK_ROUTE kind.
2321  */
2322 void
2323 mlx5_flow_tcf_socket_destroy(struct mnl_socket *nl)
2324 {
2325         mnl_socket_close(nl);
2326 }