net/mlx5: fix build with zero-size array
[dpdk.git] / drivers / net / mlx5 / mlx5_flow_tcf.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2018 6WIND S.A.
3  * Copyright 2018 Mellanox Technologies, Ltd
4  */
5
6 #include <assert.h>
7 #include <errno.h>
8 #include <libmnl/libmnl.h>
9 #include <linux/if_ether.h>
10 #include <linux/netlink.h>
11 #include <linux/pkt_cls.h>
12 #include <linux/pkt_sched.h>
13 #include <linux/rtnetlink.h>
14 #include <linux/tc_act/tc_gact.h>
15 #include <linux/tc_act/tc_mirred.h>
16 #include <netinet/in.h>
17 #include <stdalign.h>
18 #include <stdbool.h>
19 #include <stddef.h>
20 #include <stdint.h>
21 #include <stdlib.h>
22 #include <sys/socket.h>
23
24 #include <rte_byteorder.h>
25 #include <rte_errno.h>
26 #include <rte_ether.h>
27 #include <rte_flow.h>
28 #include <rte_malloc.h>
29
30 #include "mlx5.h"
31 #include "mlx5_flow.h"
32 #include "mlx5_autoconf.h"
33
34 #ifdef HAVE_TC_ACT_VLAN
35
36 #include <linux/tc_act/tc_vlan.h>
37
38 #else /* HAVE_TC_ACT_VLAN */
39
40 #define TCA_VLAN_ACT_POP 1
41 #define TCA_VLAN_ACT_PUSH 2
42 #define TCA_VLAN_ACT_MODIFY 3
43 #define TCA_VLAN_PARMS 2
44 #define TCA_VLAN_PUSH_VLAN_ID 3
45 #define TCA_VLAN_PUSH_VLAN_PROTOCOL 4
46 #define TCA_VLAN_PAD 5
47 #define TCA_VLAN_PUSH_VLAN_PRIORITY 6
48
49 struct tc_vlan {
50         tc_gen;
51         int v_action;
52 };
53
54 #endif /* HAVE_TC_ACT_VLAN */
55
56 #ifdef HAVE_TC_ACT_PEDIT
57
58 #include <linux/tc_act/tc_pedit.h>
59
60 #else /* HAVE_TC_ACT_VLAN */
61
62 enum {
63         TCA_PEDIT_UNSPEC,
64         TCA_PEDIT_TM,
65         TCA_PEDIT_PARMS,
66         TCA_PEDIT_PAD,
67         TCA_PEDIT_PARMS_EX,
68         TCA_PEDIT_KEYS_EX,
69         TCA_PEDIT_KEY_EX,
70         __TCA_PEDIT_MAX
71 };
72
73 enum {
74         TCA_PEDIT_KEY_EX_HTYPE = 1,
75         TCA_PEDIT_KEY_EX_CMD = 2,
76         __TCA_PEDIT_KEY_EX_MAX
77 };
78
79 enum pedit_header_type {
80         TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK = 0,
81         TCA_PEDIT_KEY_EX_HDR_TYPE_ETH = 1,
82         TCA_PEDIT_KEY_EX_HDR_TYPE_IP4 = 2,
83         TCA_PEDIT_KEY_EX_HDR_TYPE_IP6 = 3,
84         TCA_PEDIT_KEY_EX_HDR_TYPE_TCP = 4,
85         TCA_PEDIT_KEY_EX_HDR_TYPE_UDP = 5,
86         __PEDIT_HDR_TYPE_MAX,
87 };
88
89 enum pedit_cmd {
90         TCA_PEDIT_KEY_EX_CMD_SET = 0,
91         TCA_PEDIT_KEY_EX_CMD_ADD = 1,
92         __PEDIT_CMD_MAX,
93 };
94
95 struct tc_pedit_key {
96         __u32 mask; /* AND */
97         __u32 val; /*XOR */
98         __u32 off; /*offset */
99         __u32 at;
100         __u32 offmask;
101         __u32 shift;
102 };
103
104 __extension__
105 struct tc_pedit_sel {
106         tc_gen;
107         unsigned char nkeys;
108         unsigned char flags;
109         struct tc_pedit_key keys[0];
110 };
111
112 #endif /* HAVE_TC_ACT_VLAN */
113
114 /* Normally found in linux/netlink.h. */
115 #ifndef NETLINK_CAP_ACK
116 #define NETLINK_CAP_ACK 10
117 #endif
118
119 /* Normally found in linux/pkt_sched.h. */
120 #ifndef TC_H_MIN_INGRESS
121 #define TC_H_MIN_INGRESS 0xfff2u
122 #endif
123
124 /* Normally found in linux/pkt_cls.h. */
125 #ifndef TCA_CLS_FLAGS_SKIP_SW
126 #define TCA_CLS_FLAGS_SKIP_SW (1 << 1)
127 #endif
128 #ifndef HAVE_TCA_CHAIN
129 #define TCA_CHAIN 11
130 #endif
131 #ifndef HAVE_TCA_FLOWER_ACT
132 #define TCA_FLOWER_ACT 3
133 #endif
134 #ifndef HAVE_TCA_FLOWER_FLAGS
135 #define TCA_FLOWER_FLAGS 22
136 #endif
137 #ifndef HAVE_TCA_FLOWER_KEY_ETH_TYPE
138 #define TCA_FLOWER_KEY_ETH_TYPE 8
139 #endif
140 #ifndef HAVE_TCA_FLOWER_KEY_ETH_DST
141 #define TCA_FLOWER_KEY_ETH_DST 4
142 #endif
143 #ifndef HAVE_TCA_FLOWER_KEY_ETH_DST_MASK
144 #define TCA_FLOWER_KEY_ETH_DST_MASK 5
145 #endif
146 #ifndef HAVE_TCA_FLOWER_KEY_ETH_SRC
147 #define TCA_FLOWER_KEY_ETH_SRC 6
148 #endif
149 #ifndef HAVE_TCA_FLOWER_KEY_ETH_SRC_MASK
150 #define TCA_FLOWER_KEY_ETH_SRC_MASK 7
151 #endif
152 #ifndef HAVE_TCA_FLOWER_KEY_IP_PROTO
153 #define TCA_FLOWER_KEY_IP_PROTO 9
154 #endif
155 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_SRC
156 #define TCA_FLOWER_KEY_IPV4_SRC 10
157 #endif
158 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_SRC_MASK
159 #define TCA_FLOWER_KEY_IPV4_SRC_MASK 11
160 #endif
161 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_DST
162 #define TCA_FLOWER_KEY_IPV4_DST 12
163 #endif
164 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_DST_MASK
165 #define TCA_FLOWER_KEY_IPV4_DST_MASK 13
166 #endif
167 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_SRC
168 #define TCA_FLOWER_KEY_IPV6_SRC 14
169 #endif
170 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_SRC_MASK
171 #define TCA_FLOWER_KEY_IPV6_SRC_MASK 15
172 #endif
173 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_DST
174 #define TCA_FLOWER_KEY_IPV6_DST 16
175 #endif
176 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_DST_MASK
177 #define TCA_FLOWER_KEY_IPV6_DST_MASK 17
178 #endif
179 #ifndef HAVE_TCA_FLOWER_KEY_TCP_SRC
180 #define TCA_FLOWER_KEY_TCP_SRC 18
181 #endif
182 #ifndef HAVE_TCA_FLOWER_KEY_TCP_SRC_MASK
183 #define TCA_FLOWER_KEY_TCP_SRC_MASK 35
184 #endif
185 #ifndef HAVE_TCA_FLOWER_KEY_TCP_DST
186 #define TCA_FLOWER_KEY_TCP_DST 19
187 #endif
188 #ifndef HAVE_TCA_FLOWER_KEY_TCP_DST_MASK
189 #define TCA_FLOWER_KEY_TCP_DST_MASK 36
190 #endif
191 #ifndef HAVE_TCA_FLOWER_KEY_UDP_SRC
192 #define TCA_FLOWER_KEY_UDP_SRC 20
193 #endif
194 #ifndef HAVE_TCA_FLOWER_KEY_UDP_SRC_MASK
195 #define TCA_FLOWER_KEY_UDP_SRC_MASK 37
196 #endif
197 #ifndef HAVE_TCA_FLOWER_KEY_UDP_DST
198 #define TCA_FLOWER_KEY_UDP_DST 21
199 #endif
200 #ifndef HAVE_TCA_FLOWER_KEY_UDP_DST_MASK
201 #define TCA_FLOWER_KEY_UDP_DST_MASK 38
202 #endif
203 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_ID
204 #define TCA_FLOWER_KEY_VLAN_ID 23
205 #endif
206 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_PRIO
207 #define TCA_FLOWER_KEY_VLAN_PRIO 24
208 #endif
209 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_ETH_TYPE
210 #define TCA_FLOWER_KEY_VLAN_ETH_TYPE 25
211 #endif
212 #ifndef HAVE_TCA_FLOWER_KEY_TCP_FLAGS
213 #define TCA_FLOWER_KEY_TCP_FLAGS 71
214 #endif
215 #ifndef HAVE_TCA_FLOWER_KEY_TCP_FLAGS_MASK
216 #define TCA_FLOWER_KEY_TCP_FLAGS_MASK 72
217 #endif
218 #ifndef HAVE_TC_ACT_GOTO_CHAIN
219 #define TC_ACT_GOTO_CHAIN 0x20000000
220 #endif
221
222 #ifndef IPV6_ADDR_LEN
223 #define IPV6_ADDR_LEN 16
224 #endif
225
226 #ifndef IPV4_ADDR_LEN
227 #define IPV4_ADDR_LEN 4
228 #endif
229
230 #ifndef TP_PORT_LEN
231 #define TP_PORT_LEN 2 /* Transport Port (UDP/TCP) Length */
232 #endif
233
234 /** Empty masks for known item types. */
235 static const union {
236         struct rte_flow_item_port_id port_id;
237         struct rte_flow_item_eth eth;
238         struct rte_flow_item_vlan vlan;
239         struct rte_flow_item_ipv4 ipv4;
240         struct rte_flow_item_ipv6 ipv6;
241         struct rte_flow_item_tcp tcp;
242         struct rte_flow_item_udp udp;
243 } flow_tcf_mask_empty;
244
245 /** Supported masks for known item types. */
246 static const struct {
247         struct rte_flow_item_port_id port_id;
248         struct rte_flow_item_eth eth;
249         struct rte_flow_item_vlan vlan;
250         struct rte_flow_item_ipv4 ipv4;
251         struct rte_flow_item_ipv6 ipv6;
252         struct rte_flow_item_tcp tcp;
253         struct rte_flow_item_udp udp;
254 } flow_tcf_mask_supported = {
255         .port_id = {
256                 .id = 0xffffffff,
257         },
258         .eth = {
259                 .type = RTE_BE16(0xffff),
260                 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
261                 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
262         },
263         .vlan = {
264                 /* PCP and VID only, no DEI. */
265                 .tci = RTE_BE16(0xefff),
266                 .inner_type = RTE_BE16(0xffff),
267         },
268         .ipv4.hdr = {
269                 .next_proto_id = 0xff,
270                 .src_addr = RTE_BE32(0xffffffff),
271                 .dst_addr = RTE_BE32(0xffffffff),
272         },
273         .ipv6.hdr = {
274                 .proto = 0xff,
275                 .src_addr =
276                         "\xff\xff\xff\xff\xff\xff\xff\xff"
277                         "\xff\xff\xff\xff\xff\xff\xff\xff",
278                 .dst_addr =
279                         "\xff\xff\xff\xff\xff\xff\xff\xff"
280                         "\xff\xff\xff\xff\xff\xff\xff\xff",
281         },
282         .tcp.hdr = {
283                 .src_port = RTE_BE16(0xffff),
284                 .dst_port = RTE_BE16(0xffff),
285                 .tcp_flags = 0xff,
286         },
287         .udp.hdr = {
288                 .src_port = RTE_BE16(0xffff),
289                 .dst_port = RTE_BE16(0xffff),
290         },
291 };
292
293 #define SZ_NLATTR_HDR MNL_ALIGN(sizeof(struct nlattr))
294 #define SZ_NLATTR_NEST SZ_NLATTR_HDR
295 #define SZ_NLATTR_DATA_OF(len) MNL_ALIGN(SZ_NLATTR_HDR + (len))
296 #define SZ_NLATTR_TYPE_OF(typ) SZ_NLATTR_DATA_OF(sizeof(typ))
297 #define SZ_NLATTR_STRZ_OF(str) SZ_NLATTR_DATA_OF(strlen(str) + 1)
298
299 #define PTOI_TABLE_SZ_MAX(dev) (mlx5_dev_to_port_id((dev)->device, NULL, 0) + 2)
300
301 /** DPDK port to network interface index (ifindex) conversion. */
302 struct flow_tcf_ptoi {
303         uint16_t port_id; /**< DPDK port ID. */
304         unsigned int ifindex; /**< Network interface index. */
305 };
306
307 /* Due to a limitation on driver/FW. */
308 #define MLX5_TCF_GROUP_ID_MAX 3
309 #define MLX5_TCF_GROUP_PRIORITY_MAX 14
310
311 #define MLX5_TCF_FATE_ACTIONS \
312         (MLX5_FLOW_ACTION_DROP | MLX5_FLOW_ACTION_PORT_ID | \
313          MLX5_FLOW_ACTION_JUMP)
314
315 #define MLX5_TCF_VLAN_ACTIONS \
316         (MLX5_FLOW_ACTION_OF_POP_VLAN | MLX5_FLOW_ACTION_OF_PUSH_VLAN | \
317          MLX5_FLOW_ACTION_OF_SET_VLAN_VID | MLX5_FLOW_ACTION_OF_SET_VLAN_PCP)
318
319 #define MLX5_TCF_PEDIT_ACTIONS \
320         (MLX5_FLOW_ACTION_SET_IPV4_SRC | MLX5_FLOW_ACTION_SET_IPV4_DST | \
321          MLX5_FLOW_ACTION_SET_IPV6_SRC | MLX5_FLOW_ACTION_SET_IPV6_DST | \
322          MLX5_FLOW_ACTION_SET_TP_SRC | MLX5_FLOW_ACTION_SET_TP_DST)
323
324 #define MLX5_TCF_CONFIG_ACTIONS \
325         (MLX5_FLOW_ACTION_PORT_ID | MLX5_FLOW_ACTION_JUMP | \
326          MLX5_FLOW_ACTION_OF_PUSH_VLAN | MLX5_FLOW_ACTION_OF_SET_VLAN_VID | \
327          MLX5_FLOW_ACTION_OF_SET_VLAN_PCP | MLX5_TCF_PEDIT_ACTIONS)
328
329 #define MAX_PEDIT_KEYS 128
330 #define SZ_PEDIT_KEY_VAL 4
331
332 #define NUM_OF_PEDIT_KEYS(sz) \
333         (((sz) / SZ_PEDIT_KEY_VAL) + (((sz) % SZ_PEDIT_KEY_VAL) ? 1 : 0))
334
335 struct pedit_key_ex {
336         enum pedit_header_type htype;
337         enum pedit_cmd cmd;
338 };
339
340 struct pedit_parser {
341         struct tc_pedit_sel sel;
342         struct tc_pedit_key keys[MAX_PEDIT_KEYS];
343         struct pedit_key_ex keys_ex[MAX_PEDIT_KEYS];
344 };
345
346
347 /**
348  * Set pedit key of transport (TCP/UDP) port value
349  *
350  * @param[in] actions
351  *   pointer to action specification
352  * @param[in,out] p_parser
353  *   pointer to pedit_parser
354  * @param[in] item_flags
355  *   flags of all items presented
356  */
357 static void
358 flow_tcf_pedit_key_set_tp_port(const struct rte_flow_action *actions,
359                                 struct pedit_parser *p_parser,
360                                 uint64_t item_flags)
361 {
362         int idx = p_parser->sel.nkeys;
363
364         if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)
365                 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_UDP;
366         if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_TCP)
367                 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_TCP;
368         p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
369         /* offset of src/dst port is same for TCP and UDP */
370         p_parser->keys[idx].off =
371                 actions->type == RTE_FLOW_ACTION_TYPE_SET_TP_SRC ?
372                 offsetof(struct tcp_hdr, src_port) :
373                 offsetof(struct tcp_hdr, dst_port);
374         p_parser->keys[idx].mask = 0xFFFF0000;
375         p_parser->keys[idx].val =
376                 (__u32)((const struct rte_flow_action_set_tp *)
377                                 actions->conf)->port;
378         p_parser->sel.nkeys = (++idx);
379 }
380
381 /**
382  * Set pedit key of ipv6 address
383  *
384  * @param[in] actions
385  *   pointer to action specification
386  * @param[in,out] p_parser
387  *   pointer to pedit_parser
388  */
389 static void
390 flow_tcf_pedit_key_set_ipv6_addr(const struct rte_flow_action *actions,
391                                  struct pedit_parser *p_parser)
392 {
393         int idx = p_parser->sel.nkeys;
394         int keys = NUM_OF_PEDIT_KEYS(IPV6_ADDR_LEN);
395         int off_base =
396                 actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC ?
397                 offsetof(struct ipv6_hdr, src_addr) :
398                 offsetof(struct ipv6_hdr, dst_addr);
399         const struct rte_flow_action_set_ipv6 *conf =
400                 (const struct rte_flow_action_set_ipv6 *)actions->conf;
401
402         for (int i = 0; i < keys; i++, idx++) {
403                 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6;
404                 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
405                 p_parser->keys[idx].off = off_base + i * SZ_PEDIT_KEY_VAL;
406                 p_parser->keys[idx].mask = ~UINT32_MAX;
407                 memcpy(&p_parser->keys[idx].val,
408                         conf->ipv6_addr + i *  SZ_PEDIT_KEY_VAL,
409                         SZ_PEDIT_KEY_VAL);
410         }
411         p_parser->sel.nkeys += keys;
412 }
413
414 /**
415  * Set pedit key of ipv4 address
416  *
417  * @param[in] actions
418  *   pointer to action specification
419  * @param[in,out] p_parser
420  *   pointer to pedit_parser
421  */
422 static void
423 flow_tcf_pedit_key_set_ipv4_addr(const struct rte_flow_action *actions,
424                                  struct pedit_parser *p_parser)
425 {
426         int idx = p_parser->sel.nkeys;
427
428         p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP4;
429         p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
430         p_parser->keys[idx].off =
431                 actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC ?
432                 offsetof(struct ipv4_hdr, src_addr) :
433                 offsetof(struct ipv4_hdr, dst_addr);
434         p_parser->keys[idx].mask = ~UINT32_MAX;
435         p_parser->keys[idx].val =
436                 ((const struct rte_flow_action_set_ipv4 *)
437                  actions->conf)->ipv4_addr;
438         p_parser->sel.nkeys = (++idx);
439 }
440
441 /**
442  * Create the pedit's na attribute in netlink message
443  * on pre-allocate message buffer
444  *
445  * @param[in,out] nl
446  *   pointer to pre-allocated netlink message buffer
447  * @param[in,out] actions
448  *   pointer to pointer of actions specification.
449  * @param[in,out] action_flags
450  *   pointer to actions flags
451  * @param[in] item_flags
452  *   flags of all item presented
453  */
454 static void
455 flow_tcf_create_pedit_mnl_msg(struct nlmsghdr *nl,
456                               const struct rte_flow_action **actions,
457                               uint64_t item_flags)
458 {
459         struct pedit_parser p_parser;
460         struct nlattr *na_act_options;
461         struct nlattr *na_pedit_keys;
462
463         memset(&p_parser, 0, sizeof(p_parser));
464         mnl_attr_put_strz(nl, TCA_ACT_KIND, "pedit");
465         na_act_options = mnl_attr_nest_start(nl, TCA_ACT_OPTIONS);
466         /* all modify header actions should be in one tc-pedit action */
467         for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {
468                 switch ((*actions)->type) {
469                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
470                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
471                         flow_tcf_pedit_key_set_ipv4_addr(*actions, &p_parser);
472                         break;
473                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
474                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
475                         flow_tcf_pedit_key_set_ipv6_addr(*actions, &p_parser);
476                         break;
477                 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
478                 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
479                         flow_tcf_pedit_key_set_tp_port(*actions,
480                                                         &p_parser, item_flags);
481                         break;
482                 default:
483                         goto pedit_mnl_msg_done;
484                 }
485         }
486 pedit_mnl_msg_done:
487         p_parser.sel.action = TC_ACT_PIPE;
488         mnl_attr_put(nl, TCA_PEDIT_PARMS_EX,
489                      sizeof(p_parser.sel) +
490                      p_parser.sel.nkeys * sizeof(struct tc_pedit_key),
491                      &p_parser);
492         na_pedit_keys =
493                 mnl_attr_nest_start(nl, TCA_PEDIT_KEYS_EX | NLA_F_NESTED);
494         for (int i = 0; i < p_parser.sel.nkeys; i++) {
495                 struct nlattr *na_pedit_key =
496                         mnl_attr_nest_start(nl,
497                                             TCA_PEDIT_KEY_EX | NLA_F_NESTED);
498                 mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_HTYPE,
499                                  p_parser.keys_ex[i].htype);
500                 mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_CMD,
501                                  p_parser.keys_ex[i].cmd);
502                 mnl_attr_nest_end(nl, na_pedit_key);
503         }
504         mnl_attr_nest_end(nl, na_pedit_keys);
505         mnl_attr_nest_end(nl, na_act_options);
506         (*actions)--;
507 }
508
509 /**
510  * Calculate max memory size of one TC-pedit actions.
511  * One TC-pedit action can contain set of keys each defining
512  * a rewrite element (rte_flow action)
513  *
514  * @param[in,out] actions
515  *   actions specification.
516  * @param[in,out] action_flags
517  *   actions flags
518  * @param[in,out] size
519  *   accumulated size
520  * @return
521  *   Max memory size of one TC-pedit action
522  */
523 static int
524 flow_tcf_get_pedit_actions_size(const struct rte_flow_action **actions,
525                                 uint64_t *action_flags)
526 {
527         int pedit_size = 0;
528         int keys = 0;
529         uint64_t flags = 0;
530
531         pedit_size += SZ_NLATTR_NEST + /* na_act_index. */
532                       SZ_NLATTR_STRZ_OF("pedit") +
533                       SZ_NLATTR_NEST; /* TCA_ACT_OPTIONS. */
534         for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {
535                 switch ((*actions)->type) {
536                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
537                         keys += NUM_OF_PEDIT_KEYS(IPV4_ADDR_LEN);
538                         flags |= MLX5_FLOW_ACTION_SET_IPV4_SRC;
539                         break;
540                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
541                         keys += NUM_OF_PEDIT_KEYS(IPV4_ADDR_LEN);
542                         flags |= MLX5_FLOW_ACTION_SET_IPV4_DST;
543                         break;
544                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
545                         keys += NUM_OF_PEDIT_KEYS(IPV6_ADDR_LEN);
546                         flags |= MLX5_FLOW_ACTION_SET_IPV6_SRC;
547                         break;
548                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
549                         keys += NUM_OF_PEDIT_KEYS(IPV6_ADDR_LEN);
550                         flags |= MLX5_FLOW_ACTION_SET_IPV6_DST;
551                         break;
552                 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
553                         /* TCP is as same as UDP */
554                         keys += NUM_OF_PEDIT_KEYS(TP_PORT_LEN);
555                         flags |= MLX5_FLOW_ACTION_SET_TP_SRC;
556                         break;
557                 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
558                         /* TCP is as same as UDP */
559                         keys += NUM_OF_PEDIT_KEYS(TP_PORT_LEN);
560                         flags |= MLX5_FLOW_ACTION_SET_TP_DST;
561                         break;
562                 default:
563                         goto get_pedit_action_size_done;
564                 }
565         }
566 get_pedit_action_size_done:
567         /* TCA_PEDIT_PARAMS_EX */
568         pedit_size +=
569                 SZ_NLATTR_DATA_OF(sizeof(struct tc_pedit_sel) +
570                                   keys * sizeof(struct tc_pedit_key));
571         pedit_size += SZ_NLATTR_NEST; /* TCA_PEDIT_KEYS */
572         pedit_size += keys *
573                       /* TCA_PEDIT_KEY_EX + HTYPE + CMD */
574                       (SZ_NLATTR_NEST + SZ_NLATTR_DATA_OF(2) +
575                        SZ_NLATTR_DATA_OF(2));
576         (*action_flags) |= flags;
577         (*actions)--;
578         return pedit_size;
579 }
580
581 /**
582  * Retrieve mask for pattern item.
583  *
584  * This function does basic sanity checks on a pattern item in order to
585  * return the most appropriate mask for it.
586  *
587  * @param[in] item
588  *   Item specification.
589  * @param[in] mask_default
590  *   Default mask for pattern item as specified by the flow API.
591  * @param[in] mask_supported
592  *   Mask fields supported by the implementation.
593  * @param[in] mask_empty
594  *   Empty mask to return when there is no specification.
595  * @param[out] error
596  *   Perform verbose error reporting if not NULL.
597  *
598  * @return
599  *   Either @p item->mask or one of the mask parameters on success, NULL
600  *   otherwise and rte_errno is set.
601  */
602 static const void *
603 flow_tcf_item_mask(const struct rte_flow_item *item, const void *mask_default,
604                    const void *mask_supported, const void *mask_empty,
605                    size_t mask_size, struct rte_flow_error *error)
606 {
607         const uint8_t *mask;
608         size_t i;
609
610         /* item->last and item->mask cannot exist without item->spec. */
611         if (!item->spec && (item->mask || item->last)) {
612                 rte_flow_error_set(error, EINVAL,
613                                    RTE_FLOW_ERROR_TYPE_ITEM, item,
614                                    "\"mask\" or \"last\" field provided without"
615                                    " a corresponding \"spec\"");
616                 return NULL;
617         }
618         /* No spec, no mask, no problem. */
619         if (!item->spec)
620                 return mask_empty;
621         mask = item->mask ? item->mask : mask_default;
622         assert(mask);
623         /*
624          * Single-pass check to make sure that:
625          * - Mask is supported, no bits are set outside mask_supported.
626          * - Both item->spec and item->last are included in mask.
627          */
628         for (i = 0; i != mask_size; ++i) {
629                 if (!mask[i])
630                         continue;
631                 if ((mask[i] | ((const uint8_t *)mask_supported)[i]) !=
632                     ((const uint8_t *)mask_supported)[i]) {
633                         rte_flow_error_set(error, ENOTSUP,
634                                            RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
635                                            "unsupported field found"
636                                            " in \"mask\"");
637                         return NULL;
638                 }
639                 if (item->last &&
640                     (((const uint8_t *)item->spec)[i] & mask[i]) !=
641                     (((const uint8_t *)item->last)[i] & mask[i])) {
642                         rte_flow_error_set(error, EINVAL,
643                                            RTE_FLOW_ERROR_TYPE_ITEM_LAST,
644                                            item->last,
645                                            "range between \"spec\" and \"last\""
646                                            " not comprised in \"mask\"");
647                         return NULL;
648                 }
649         }
650         return mask;
651 }
652
653 /**
654  * Build a conversion table between port ID and ifindex.
655  *
656  * @param[in] dev
657  *   Pointer to Ethernet device.
658  * @param[out] ptoi
659  *   Pointer to ptoi table.
660  * @param[in] len
661  *   Size of ptoi table provided.
662  *
663  * @return
664  *   Size of ptoi table filled.
665  */
666 static unsigned int
667 flow_tcf_build_ptoi_table(struct rte_eth_dev *dev, struct flow_tcf_ptoi *ptoi,
668                           unsigned int len)
669 {
670         unsigned int n = mlx5_dev_to_port_id(dev->device, NULL, 0);
671         uint16_t port_id[n + 1];
672         unsigned int i;
673         unsigned int own = 0;
674
675         /* At least one port is needed when no switch domain is present. */
676         if (!n) {
677                 n = 1;
678                 port_id[0] = dev->data->port_id;
679         } else {
680                 n = RTE_MIN(mlx5_dev_to_port_id(dev->device, port_id, n), n);
681         }
682         if (n > len)
683                 return 0;
684         for (i = 0; i != n; ++i) {
685                 struct rte_eth_dev_info dev_info;
686
687                 rte_eth_dev_info_get(port_id[i], &dev_info);
688                 if (port_id[i] == dev->data->port_id)
689                         own = i;
690                 ptoi[i].port_id = port_id[i];
691                 ptoi[i].ifindex = dev_info.if_index;
692         }
693         /* Ensure first entry of ptoi[] is the current device. */
694         if (own) {
695                 ptoi[n] = ptoi[0];
696                 ptoi[0] = ptoi[own];
697                 ptoi[own] = ptoi[n];
698         }
699         /* An entry with zero ifindex terminates ptoi[]. */
700         ptoi[n].port_id = 0;
701         ptoi[n].ifindex = 0;
702         return n;
703 }
704
705 /**
706  * Verify the @p attr will be correctly understood by the E-switch.
707  *
708  * @param[in] attr
709  *   Pointer to flow attributes
710  * @param[out] error
711  *   Pointer to error structure.
712  *
713  * @return
714  *   0 on success, a negative errno value otherwise and rte_errno is set.
715  */
716 static int
717 flow_tcf_validate_attributes(const struct rte_flow_attr *attr,
718                              struct rte_flow_error *error)
719 {
720         /*
721          * Supported attributes: groups, some priorities and ingress only.
722          * group is supported only if kernel supports chain. Don't care about
723          * transfer as it is the caller's problem.
724          */
725         if (attr->group > MLX5_TCF_GROUP_ID_MAX)
726                 return rte_flow_error_set(error, ENOTSUP,
727                                           RTE_FLOW_ERROR_TYPE_ATTR_GROUP, attr,
728                                           "group ID larger than "
729                                           RTE_STR(MLX5_TCF_GROUP_ID_MAX)
730                                           " isn't supported");
731         else if (attr->group > 0 &&
732                  attr->priority > MLX5_TCF_GROUP_PRIORITY_MAX)
733                 return rte_flow_error_set(error, ENOTSUP,
734                                           RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
735                                           attr,
736                                           "lowest priority level is "
737                                           RTE_STR(MLX5_TCF_GROUP_PRIORITY_MAX)
738                                           " when group is configured");
739         else if (attr->priority > 0xfffe)
740                 return rte_flow_error_set(error, ENOTSUP,
741                                           RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
742                                           attr,
743                                           "lowest priority level is 0xfffe");
744         if (!attr->ingress)
745                 return rte_flow_error_set(error, EINVAL,
746                                           RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
747                                           attr, "only ingress is supported");
748         if (attr->egress)
749                 return rte_flow_error_set(error, ENOTSUP,
750                                           RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
751                                           attr, "egress is not supported");
752         return 0;
753 }
754
755 /**
756  * Validate flow for E-Switch.
757  *
758  * @param[in] priv
759  *   Pointer to the priv structure.
760  * @param[in] attr
761  *   Pointer to the flow attributes.
762  * @param[in] items
763  *   Pointer to the list of items.
764  * @param[in] actions
765  *   Pointer to the list of actions.
766  * @param[out] error
767  *   Pointer to the error structure.
768  *
769  * @return
770  *   0 on success, a negative errno value otherwise and rte_ernno is set.
771  */
772 static int
773 flow_tcf_validate(struct rte_eth_dev *dev,
774                   const struct rte_flow_attr *attr,
775                   const struct rte_flow_item items[],
776                   const struct rte_flow_action actions[],
777                   struct rte_flow_error *error)
778 {
779         union {
780                 const struct rte_flow_item_port_id *port_id;
781                 const struct rte_flow_item_eth *eth;
782                 const struct rte_flow_item_vlan *vlan;
783                 const struct rte_flow_item_ipv4 *ipv4;
784                 const struct rte_flow_item_ipv6 *ipv6;
785                 const struct rte_flow_item_tcp *tcp;
786                 const struct rte_flow_item_udp *udp;
787         } spec, mask;
788         union {
789                 const struct rte_flow_action_port_id *port_id;
790                 const struct rte_flow_action_jump *jump;
791                 const struct rte_flow_action_of_push_vlan *of_push_vlan;
792                 const struct rte_flow_action_of_set_vlan_vid *
793                         of_set_vlan_vid;
794                 const struct rte_flow_action_of_set_vlan_pcp *
795                         of_set_vlan_pcp;
796                 const struct rte_flow_action_set_ipv4 *set_ipv4;
797                 const struct rte_flow_action_set_ipv6 *set_ipv6;
798         } conf;
799         uint32_t item_flags = 0;
800         uint32_t action_flags = 0;
801         uint8_t next_protocol = -1;
802         unsigned int tcm_ifindex = 0;
803         uint8_t pedit_validated = 0;
804         struct flow_tcf_ptoi ptoi[PTOI_TABLE_SZ_MAX(dev)];
805         struct rte_eth_dev *port_id_dev = NULL;
806         bool in_port_id_set;
807         int ret;
808
809         claim_nonzero(flow_tcf_build_ptoi_table(dev, ptoi,
810                                                 PTOI_TABLE_SZ_MAX(dev)));
811         ret = flow_tcf_validate_attributes(attr, error);
812         if (ret < 0)
813                 return ret;
814         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
815                 unsigned int i;
816
817                 switch (items->type) {
818                 case RTE_FLOW_ITEM_TYPE_VOID:
819                         break;
820                 case RTE_FLOW_ITEM_TYPE_PORT_ID:
821                         mask.port_id = flow_tcf_item_mask
822                                 (items, &rte_flow_item_port_id_mask,
823                                  &flow_tcf_mask_supported.port_id,
824                                  &flow_tcf_mask_empty.port_id,
825                                  sizeof(flow_tcf_mask_supported.port_id),
826                                  error);
827                         if (!mask.port_id)
828                                 return -rte_errno;
829                         if (mask.port_id == &flow_tcf_mask_empty.port_id) {
830                                 in_port_id_set = 1;
831                                 break;
832                         }
833                         spec.port_id = items->spec;
834                         if (mask.port_id->id && mask.port_id->id != 0xffffffff)
835                                 return rte_flow_error_set
836                                         (error, ENOTSUP,
837                                          RTE_FLOW_ERROR_TYPE_ITEM_MASK,
838                                          mask.port_id,
839                                          "no support for partial mask on"
840                                          " \"id\" field");
841                         if (!mask.port_id->id)
842                                 i = 0;
843                         else
844                                 for (i = 0; ptoi[i].ifindex; ++i)
845                                         if (ptoi[i].port_id == spec.port_id->id)
846                                                 break;
847                         if (!ptoi[i].ifindex)
848                                 return rte_flow_error_set
849                                         (error, ENODEV,
850                                          RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
851                                          spec.port_id,
852                                          "missing data to convert port ID to"
853                                          " ifindex");
854                         if (in_port_id_set && ptoi[i].ifindex != tcm_ifindex)
855                                 return rte_flow_error_set
856                                         (error, ENOTSUP,
857                                          RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
858                                          spec.port_id,
859                                          "cannot match traffic for"
860                                          " several port IDs through"
861                                          " a single flow rule");
862                         tcm_ifindex = ptoi[i].ifindex;
863                         in_port_id_set = 1;
864                         break;
865                 case RTE_FLOW_ITEM_TYPE_ETH:
866                         ret = mlx5_flow_validate_item_eth(items, item_flags,
867                                                           error);
868                         if (ret < 0)
869                                 return ret;
870                         item_flags |= MLX5_FLOW_LAYER_OUTER_L2;
871                         /* TODO:
872                          * Redundant check due to different supported mask.
873                          * Same for the rest of items.
874                          */
875                         mask.eth = flow_tcf_item_mask
876                                 (items, &rte_flow_item_eth_mask,
877                                  &flow_tcf_mask_supported.eth,
878                                  &flow_tcf_mask_empty.eth,
879                                  sizeof(flow_tcf_mask_supported.eth),
880                                  error);
881                         if (!mask.eth)
882                                 return -rte_errno;
883                         if (mask.eth->type && mask.eth->type !=
884                             RTE_BE16(0xffff))
885                                 return rte_flow_error_set
886                                         (error, ENOTSUP,
887                                          RTE_FLOW_ERROR_TYPE_ITEM_MASK,
888                                          mask.eth,
889                                          "no support for partial mask on"
890                                          " \"type\" field");
891                         break;
892                 case RTE_FLOW_ITEM_TYPE_VLAN:
893                         ret = mlx5_flow_validate_item_vlan(items, item_flags,
894                                                            error);
895                         if (ret < 0)
896                                 return ret;
897                         item_flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
898                         mask.vlan = flow_tcf_item_mask
899                                 (items, &rte_flow_item_vlan_mask,
900                                  &flow_tcf_mask_supported.vlan,
901                                  &flow_tcf_mask_empty.vlan,
902                                  sizeof(flow_tcf_mask_supported.vlan),
903                                  error);
904                         if (!mask.vlan)
905                                 return -rte_errno;
906                         if ((mask.vlan->tci & RTE_BE16(0xe000) &&
907                              (mask.vlan->tci & RTE_BE16(0xe000)) !=
908                               RTE_BE16(0xe000)) ||
909                             (mask.vlan->tci & RTE_BE16(0x0fff) &&
910                              (mask.vlan->tci & RTE_BE16(0x0fff)) !=
911                               RTE_BE16(0x0fff)) ||
912                             (mask.vlan->inner_type &&
913                              mask.vlan->inner_type != RTE_BE16(0xffff)))
914                                 return rte_flow_error_set
915                                         (error, ENOTSUP,
916                                          RTE_FLOW_ERROR_TYPE_ITEM_MASK,
917                                          mask.vlan,
918                                          "no support for partial masks on"
919                                          " \"tci\" (PCP and VID parts) and"
920                                          " \"inner_type\" fields");
921                         break;
922                 case RTE_FLOW_ITEM_TYPE_IPV4:
923                         ret = mlx5_flow_validate_item_ipv4(items, item_flags,
924                                                            error);
925                         if (ret < 0)
926                                 return ret;
927                         item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
928                         mask.ipv4 = flow_tcf_item_mask
929                                 (items, &rte_flow_item_ipv4_mask,
930                                  &flow_tcf_mask_supported.ipv4,
931                                  &flow_tcf_mask_empty.ipv4,
932                                  sizeof(flow_tcf_mask_supported.ipv4),
933                                  error);
934                         if (!mask.ipv4)
935                                 return -rte_errno;
936                         if (mask.ipv4->hdr.next_proto_id &&
937                             mask.ipv4->hdr.next_proto_id != 0xff)
938                                 return rte_flow_error_set
939                                         (error, ENOTSUP,
940                                          RTE_FLOW_ERROR_TYPE_ITEM_MASK,
941                                          mask.ipv4,
942                                          "no support for partial mask on"
943                                          " \"hdr.next_proto_id\" field");
944                         else if (mask.ipv4->hdr.next_proto_id)
945                                 next_protocol =
946                                         ((const struct rte_flow_item_ipv4 *)
947                                          (items->spec))->hdr.next_proto_id;
948                         break;
949                 case RTE_FLOW_ITEM_TYPE_IPV6:
950                         ret = mlx5_flow_validate_item_ipv6(items, item_flags,
951                                                            error);
952                         if (ret < 0)
953                                 return ret;
954                         item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
955                         mask.ipv6 = flow_tcf_item_mask
956                                 (items, &rte_flow_item_ipv6_mask,
957                                  &flow_tcf_mask_supported.ipv6,
958                                  &flow_tcf_mask_empty.ipv6,
959                                  sizeof(flow_tcf_mask_supported.ipv6),
960                                  error);
961                         if (!mask.ipv6)
962                                 return -rte_errno;
963                         if (mask.ipv6->hdr.proto &&
964                             mask.ipv6->hdr.proto != 0xff)
965                                 return rte_flow_error_set
966                                         (error, ENOTSUP,
967                                          RTE_FLOW_ERROR_TYPE_ITEM_MASK,
968                                          mask.ipv6,
969                                          "no support for partial mask on"
970                                          " \"hdr.proto\" field");
971                         else if (mask.ipv6->hdr.proto)
972                                 next_protocol =
973                                         ((const struct rte_flow_item_ipv6 *)
974                                          (items->spec))->hdr.proto;
975                         break;
976                 case RTE_FLOW_ITEM_TYPE_UDP:
977                         ret = mlx5_flow_validate_item_udp(items, item_flags,
978                                                           next_protocol, error);
979                         if (ret < 0)
980                                 return ret;
981                         item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
982                         mask.udp = flow_tcf_item_mask
983                                 (items, &rte_flow_item_udp_mask,
984                                  &flow_tcf_mask_supported.udp,
985                                  &flow_tcf_mask_empty.udp,
986                                  sizeof(flow_tcf_mask_supported.udp),
987                                  error);
988                         if (!mask.udp)
989                                 return -rte_errno;
990                         break;
991                 case RTE_FLOW_ITEM_TYPE_TCP:
992                         ret = mlx5_flow_validate_item_tcp
993                                              (items, item_flags,
994                                               next_protocol,
995                                               &flow_tcf_mask_supported.tcp,
996                                               error);
997                         if (ret < 0)
998                                 return ret;
999                         item_flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
1000                         mask.tcp = flow_tcf_item_mask
1001                                 (items, &rte_flow_item_tcp_mask,
1002                                  &flow_tcf_mask_supported.tcp,
1003                                  &flow_tcf_mask_empty.tcp,
1004                                  sizeof(flow_tcf_mask_supported.tcp),
1005                                  error);
1006                         if (!mask.tcp)
1007                                 return -rte_errno;
1008                         break;
1009                 default:
1010                         return rte_flow_error_set(error, ENOTSUP,
1011                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1012                                                   NULL, "item not supported");
1013                 }
1014         }
1015         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1016                 unsigned int i;
1017                 uint32_t current_action_flag = 0;
1018
1019                 switch (actions->type) {
1020                 case RTE_FLOW_ACTION_TYPE_VOID:
1021                         break;
1022                 case RTE_FLOW_ACTION_TYPE_PORT_ID:
1023                         current_action_flag = MLX5_FLOW_ACTION_PORT_ID;
1024                         if (!actions->conf)
1025                                 break;
1026                         conf.port_id = actions->conf;
1027                         if (conf.port_id->original)
1028                                 i = 0;
1029                         else
1030                                 for (i = 0; ptoi[i].ifindex; ++i)
1031                                         if (ptoi[i].port_id == conf.port_id->id)
1032                                                 break;
1033                         if (!ptoi[i].ifindex)
1034                                 return rte_flow_error_set
1035                                         (error, ENODEV,
1036                                          RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1037                                          conf.port_id,
1038                                          "missing data to convert port ID to"
1039                                          " ifindex");
1040                         port_id_dev = &rte_eth_devices[conf.port_id->id];
1041                         break;
1042                 case RTE_FLOW_ACTION_TYPE_JUMP:
1043                         current_action_flag = MLX5_FLOW_ACTION_JUMP;
1044                         if (!actions->conf)
1045                                 break;
1046                         conf.jump = actions->conf;
1047                         if (attr->group >= conf.jump->group)
1048                                 return rte_flow_error_set
1049                                         (error, ENOTSUP,
1050                                          RTE_FLOW_ERROR_TYPE_ACTION,
1051                                          actions,
1052                                          "can jump only to a group forward");
1053                         break;
1054                 case RTE_FLOW_ACTION_TYPE_DROP:
1055                         current_action_flag = MLX5_FLOW_ACTION_DROP;
1056                         break;
1057                 case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
1058                         current_action_flag = MLX5_FLOW_ACTION_OF_POP_VLAN;
1059                         break;
1060                 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
1061                         current_action_flag = MLX5_FLOW_ACTION_OF_PUSH_VLAN;
1062                         break;
1063                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
1064                         if (!(action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN))
1065                                 return rte_flow_error_set
1066                                         (error, ENOTSUP,
1067                                          RTE_FLOW_ERROR_TYPE_ACTION, actions,
1068                                          "vlan modify is not supported,"
1069                                          " set action must follow push action");
1070                         current_action_flag = MLX5_FLOW_ACTION_OF_SET_VLAN_VID;
1071                         break;
1072                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
1073                         if (!(action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN))
1074                                 return rte_flow_error_set
1075                                         (error, ENOTSUP,
1076                                          RTE_FLOW_ERROR_TYPE_ACTION, actions,
1077                                          "vlan modify is not supported,"
1078                                          " set action must follow push action");
1079                         current_action_flag = MLX5_FLOW_ACTION_OF_SET_VLAN_PCP;
1080                         break;
1081                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
1082                         current_action_flag = MLX5_FLOW_ACTION_SET_IPV4_SRC;
1083                         break;
1084                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
1085                         current_action_flag = MLX5_FLOW_ACTION_SET_IPV4_DST;
1086                         break;
1087                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
1088                         current_action_flag = MLX5_FLOW_ACTION_SET_IPV6_SRC;
1089                         break;
1090                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
1091                         current_action_flag = MLX5_FLOW_ACTION_SET_IPV6_DST;
1092                         break;
1093                 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
1094                         current_action_flag = MLX5_FLOW_ACTION_SET_TP_SRC;
1095                         break;
1096                 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
1097                         current_action_flag = MLX5_FLOW_ACTION_SET_TP_DST;
1098                         break;
1099                 default:
1100                         return rte_flow_error_set(error, ENOTSUP,
1101                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1102                                                   actions,
1103                                                   "action not supported");
1104                 }
1105                 if (current_action_flag & MLX5_TCF_CONFIG_ACTIONS) {
1106                         if (!actions->conf)
1107                                 return rte_flow_error_set(error, EINVAL,
1108                                                 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1109                                                 actions,
1110                                                 "action configuration not set");
1111                 }
1112                 if ((current_action_flag & MLX5_TCF_PEDIT_ACTIONS) &&
1113                     pedit_validated)
1114                         return rte_flow_error_set(error, ENOTSUP,
1115                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1116                                                   actions,
1117                                                   "set actions should be "
1118                                                   "listed successively");
1119                 if ((current_action_flag & ~MLX5_TCF_PEDIT_ACTIONS) &&
1120                     (action_flags & MLX5_TCF_PEDIT_ACTIONS))
1121                         pedit_validated = 1;
1122                 if ((current_action_flag & MLX5_TCF_FATE_ACTIONS) &&
1123                     (action_flags & MLX5_TCF_FATE_ACTIONS))
1124                         return rte_flow_error_set(error, EINVAL,
1125                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1126                                                   actions,
1127                                                   "can't have multiple fate"
1128                                                   " actions");
1129                 action_flags |= current_action_flag;
1130         }
1131         if ((action_flags & MLX5_TCF_PEDIT_ACTIONS) &&
1132             (action_flags & MLX5_FLOW_ACTION_DROP))
1133                 return rte_flow_error_set(error, ENOTSUP,
1134                                           RTE_FLOW_ERROR_TYPE_ACTION,
1135                                           actions,
1136                                           "set action is not compatible with "
1137                                           "drop action");
1138         if ((action_flags & MLX5_TCF_PEDIT_ACTIONS) &&
1139             !(action_flags & MLX5_FLOW_ACTION_PORT_ID))
1140                 return rte_flow_error_set(error, ENOTSUP,
1141                                           RTE_FLOW_ERROR_TYPE_ACTION,
1142                                           actions,
1143                                           "set action must be followed by "
1144                                           "port_id action");
1145         if (action_flags &
1146            (MLX5_FLOW_ACTION_SET_IPV4_SRC | MLX5_FLOW_ACTION_SET_IPV4_DST)) {
1147                 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4))
1148                         return rte_flow_error_set(error, EINVAL,
1149                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1150                                                   actions,
1151                                                   "no ipv4 item found in"
1152                                                   " pattern");
1153         }
1154         if (action_flags &
1155            (MLX5_FLOW_ACTION_SET_IPV6_SRC | MLX5_FLOW_ACTION_SET_IPV6_DST)) {
1156                 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV6))
1157                         return rte_flow_error_set(error, EINVAL,
1158                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1159                                                   actions,
1160                                                   "no ipv6 item found in"
1161                                                   " pattern");
1162         }
1163         if (action_flags &
1164            (MLX5_FLOW_ACTION_SET_TP_SRC | MLX5_FLOW_ACTION_SET_TP_DST)) {
1165                 if (!(item_flags &
1166                      (MLX5_FLOW_LAYER_OUTER_L4_UDP |
1167                       MLX5_FLOW_LAYER_OUTER_L4_TCP)))
1168                         return rte_flow_error_set(error, EINVAL,
1169                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1170                                                   actions,
1171                                                   "no TCP/UDP item found in"
1172                                                   " pattern");
1173         }
1174         /*
1175          * FW syndrome (0xA9C090):
1176          *     set_flow_table_entry: push vlan action fte in fdb can ONLY be
1177          *     forward to the uplink.
1178          */
1179         if ((action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN) &&
1180             (action_flags & MLX5_FLOW_ACTION_PORT_ID) &&
1181             ((struct priv *)port_id_dev->data->dev_private)->representor)
1182                 return rte_flow_error_set(error, ENOTSUP,
1183                                           RTE_FLOW_ERROR_TYPE_ACTION, actions,
1184                                           "vlan push can only be applied"
1185                                           " when forwarding to uplink port");
1186         /*
1187          * FW syndrome (0x294609):
1188          *     set_flow_table_entry: modify/pop/push actions in fdb flow table
1189          *     are supported only while forwarding to vport.
1190          */
1191         if ((action_flags & MLX5_TCF_VLAN_ACTIONS) &&
1192             !(action_flags & MLX5_FLOW_ACTION_PORT_ID))
1193                 return rte_flow_error_set(error, ENOTSUP,
1194                                           RTE_FLOW_ERROR_TYPE_ACTION, actions,
1195                                           "vlan actions are supported"
1196                                           " only with port_id action");
1197         if (!(action_flags & MLX5_TCF_FATE_ACTIONS))
1198                 return rte_flow_error_set(error, EINVAL,
1199                                           RTE_FLOW_ERROR_TYPE_ACTION, actions,
1200                                           "no fate action is found");
1201         return 0;
1202 }
1203
1204 /**
1205  * Calculate maximum size of memory for flow items of Linux TC flower and
1206  * extract specified items.
1207  *
1208  * @param[in] items
1209  *   Pointer to the list of items.
1210  * @param[out] item_flags
1211  *   Pointer to the detected items.
1212  *
1213  * @return
1214  *   Maximum size of memory for items.
1215  */
1216 static int
1217 flow_tcf_get_items_and_size(const struct rte_flow_attr *attr,
1218                             const struct rte_flow_item items[],
1219                             uint64_t *item_flags)
1220 {
1221         int size = 0;
1222         uint64_t flags = 0;
1223
1224         size += SZ_NLATTR_STRZ_OF("flower") +
1225                 SZ_NLATTR_NEST + /* TCA_OPTIONS. */
1226                 SZ_NLATTR_TYPE_OF(uint32_t); /* TCA_CLS_FLAGS_SKIP_SW. */
1227         if (attr->group > 0)
1228                 size += SZ_NLATTR_TYPE_OF(uint32_t); /* TCA_CHAIN. */
1229         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1230                 switch (items->type) {
1231                 case RTE_FLOW_ITEM_TYPE_VOID:
1232                         break;
1233                 case RTE_FLOW_ITEM_TYPE_PORT_ID:
1234                         break;
1235                 case RTE_FLOW_ITEM_TYPE_ETH:
1236                         size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
1237                                 SZ_NLATTR_DATA_OF(ETHER_ADDR_LEN) * 4;
1238                                 /* dst/src MAC addr and mask. */
1239                         flags |= MLX5_FLOW_LAYER_OUTER_L2;
1240                         break;
1241                 case RTE_FLOW_ITEM_TYPE_VLAN:
1242                         size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
1243                                 SZ_NLATTR_TYPE_OF(uint16_t) +
1244                                 /* VLAN Ether type. */
1245                                 SZ_NLATTR_TYPE_OF(uint8_t) + /* VLAN prio. */
1246                                 SZ_NLATTR_TYPE_OF(uint16_t); /* VLAN ID. */
1247                         flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
1248                         break;
1249                 case RTE_FLOW_ITEM_TYPE_IPV4:
1250                         size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
1251                                 SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
1252                                 SZ_NLATTR_TYPE_OF(uint32_t) * 4;
1253                                 /* dst/src IP addr and mask. */
1254                         flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1255                         break;
1256                 case RTE_FLOW_ITEM_TYPE_IPV6:
1257                         size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
1258                                 SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
1259                                 SZ_NLATTR_TYPE_OF(IPV6_ADDR_LEN) * 4;
1260                                 /* dst/src IP addr and mask. */
1261                         flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1262                         break;
1263                 case RTE_FLOW_ITEM_TYPE_UDP:
1264                         size += SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
1265                                 SZ_NLATTR_TYPE_OF(uint16_t) * 4;
1266                                 /* dst/src port and mask. */
1267                         flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
1268                         break;
1269                 case RTE_FLOW_ITEM_TYPE_TCP:
1270                         size += SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
1271                                 SZ_NLATTR_TYPE_OF(uint16_t) * 4;
1272                                 /* dst/src port and mask. */
1273                         flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
1274                         break;
1275                 default:
1276                         DRV_LOG(WARNING,
1277                                 "unsupported item %p type %d,"
1278                                 " items must be validated before flow creation",
1279                                 (const void *)items, items->type);
1280                         break;
1281                 }
1282         }
1283         *item_flags = flags;
1284         return size;
1285 }
1286
1287 /**
1288  * Calculate maximum size of memory for flow actions of Linux TC flower and
1289  * extract specified actions.
1290  *
1291  * @param[in] actions
1292  *   Pointer to the list of actions.
1293  * @param[out] action_flags
1294  *   Pointer to the detected actions.
1295  *
1296  * @return
1297  *   Maximum size of memory for actions.
1298  */
1299 static int
1300 flow_tcf_get_actions_and_size(const struct rte_flow_action actions[],
1301                               uint64_t *action_flags)
1302 {
1303         int size = 0;
1304         uint64_t flags = 0;
1305
1306         size += SZ_NLATTR_NEST; /* TCA_FLOWER_ACT. */
1307         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1308                 switch (actions->type) {
1309                 case RTE_FLOW_ACTION_TYPE_VOID:
1310                         break;
1311                 case RTE_FLOW_ACTION_TYPE_PORT_ID:
1312                         size += SZ_NLATTR_NEST + /* na_act_index. */
1313                                 SZ_NLATTR_STRZ_OF("mirred") +
1314                                 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
1315                                 SZ_NLATTR_TYPE_OF(struct tc_mirred);
1316                         flags |= MLX5_FLOW_ACTION_PORT_ID;
1317                         break;
1318                 case RTE_FLOW_ACTION_TYPE_JUMP:
1319                         size += SZ_NLATTR_NEST + /* na_act_index. */
1320                                 SZ_NLATTR_STRZ_OF("gact") +
1321                                 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
1322                                 SZ_NLATTR_TYPE_OF(struct tc_gact);
1323                         flags |= MLX5_FLOW_ACTION_JUMP;
1324                         break;
1325                 case RTE_FLOW_ACTION_TYPE_DROP:
1326                         size += SZ_NLATTR_NEST + /* na_act_index. */
1327                                 SZ_NLATTR_STRZ_OF("gact") +
1328                                 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
1329                                 SZ_NLATTR_TYPE_OF(struct tc_gact);
1330                         flags |= MLX5_FLOW_ACTION_DROP;
1331                         break;
1332                 case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
1333                         flags |= MLX5_FLOW_ACTION_OF_POP_VLAN;
1334                         goto action_of_vlan;
1335                 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
1336                         flags |= MLX5_FLOW_ACTION_OF_PUSH_VLAN;
1337                         goto action_of_vlan;
1338                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
1339                         flags |= MLX5_FLOW_ACTION_OF_SET_VLAN_VID;
1340                         goto action_of_vlan;
1341                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
1342                         flags |= MLX5_FLOW_ACTION_OF_SET_VLAN_PCP;
1343                         goto action_of_vlan;
1344 action_of_vlan:
1345                         size += SZ_NLATTR_NEST + /* na_act_index. */
1346                                 SZ_NLATTR_STRZ_OF("vlan") +
1347                                 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
1348                                 SZ_NLATTR_TYPE_OF(struct tc_vlan) +
1349                                 SZ_NLATTR_TYPE_OF(uint16_t) +
1350                                 /* VLAN protocol. */
1351                                 SZ_NLATTR_TYPE_OF(uint16_t) + /* VLAN ID. */
1352                                 SZ_NLATTR_TYPE_OF(uint8_t); /* VLAN prio. */
1353                         break;
1354                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
1355                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
1356                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
1357                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
1358                 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
1359                 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
1360                         size += flow_tcf_get_pedit_actions_size(&actions,
1361                                                                 &flags);
1362                         break;
1363                 default:
1364                         DRV_LOG(WARNING,
1365                                 "unsupported action %p type %d,"
1366                                 " items must be validated before flow creation",
1367                                 (const void *)actions, actions->type);
1368                         break;
1369                 }
1370         }
1371         *action_flags = flags;
1372         return size;
1373 }
1374
1375 /**
1376  * Brand rtnetlink buffer with unique handle.
1377  *
1378  * This handle should be unique for a given network interface to avoid
1379  * collisions.
1380  *
1381  * @param nlh
1382  *   Pointer to Netlink message.
1383  * @param handle
1384  *   Unique 32-bit handle to use.
1385  */
1386 static void
1387 flow_tcf_nl_brand(struct nlmsghdr *nlh, uint32_t handle)
1388 {
1389         struct tcmsg *tcm = mnl_nlmsg_get_payload(nlh);
1390
1391         tcm->tcm_handle = handle;
1392         DRV_LOG(DEBUG, "Netlink msg %p is branded with handle %x",
1393                 (void *)nlh, handle);
1394 }
1395
1396 /**
1397  * Prepare a flow object for Linux TC flower. It calculates the maximum size of
1398  * memory required, allocates the memory, initializes Netlink message headers
1399  * and set unique TC message handle.
1400  *
1401  * @param[in] attr
1402  *   Pointer to the flow attributes.
1403  * @param[in] items
1404  *   Pointer to the list of items.
1405  * @param[in] actions
1406  *   Pointer to the list of actions.
1407  * @param[out] item_flags
1408  *   Pointer to bit mask of all items detected.
1409  * @param[out] action_flags
1410  *   Pointer to bit mask of all actions detected.
1411  * @param[out] error
1412  *   Pointer to the error structure.
1413  *
1414  * @return
1415  *   Pointer to mlx5_flow object on success,
1416  *   otherwise NULL and rte_ernno is set.
1417  */
1418 static struct mlx5_flow *
1419 flow_tcf_prepare(const struct rte_flow_attr *attr,
1420                  const struct rte_flow_item items[],
1421                  const struct rte_flow_action actions[],
1422                  uint64_t *item_flags, uint64_t *action_flags,
1423                  struct rte_flow_error *error)
1424 {
1425         size_t size = sizeof(struct mlx5_flow) +
1426                       MNL_ALIGN(sizeof(struct nlmsghdr)) +
1427                       MNL_ALIGN(sizeof(struct tcmsg));
1428         struct mlx5_flow *dev_flow;
1429         struct nlmsghdr *nlh;
1430         struct tcmsg *tcm;
1431
1432         size += flow_tcf_get_items_and_size(attr, items, item_flags);
1433         size += flow_tcf_get_actions_and_size(actions, action_flags);
1434         dev_flow = rte_zmalloc(__func__, size, MNL_ALIGNTO);
1435         if (!dev_flow) {
1436                 rte_flow_error_set(error, ENOMEM,
1437                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1438                                    "not enough memory to create E-Switch flow");
1439                 return NULL;
1440         }
1441         nlh = mnl_nlmsg_put_header((void *)(dev_flow + 1));
1442         tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
1443         *dev_flow = (struct mlx5_flow){
1444                 .tcf = (struct mlx5_flow_tcf){
1445                         .nlh = nlh,
1446                         .tcm = tcm,
1447                 },
1448         };
1449         /*
1450          * Generate a reasonably unique handle based on the address of the
1451          * target buffer.
1452          *
1453          * This is straightforward on 32-bit systems where the flow pointer can
1454          * be used directly. Otherwise, its least significant part is taken
1455          * after shifting it by the previous power of two of the pointed buffer
1456          * size.
1457          */
1458         if (sizeof(dev_flow) <= 4)
1459                 flow_tcf_nl_brand(nlh, (uintptr_t)dev_flow);
1460         else
1461                 flow_tcf_nl_brand(nlh, (uintptr_t)dev_flow >>
1462                                        rte_log2_u32(rte_align32prevpow2(size)));
1463         return dev_flow;
1464 }
1465
1466 /**
1467  * Translate flow for Linux TC flower and construct Netlink message.
1468  *
1469  * @param[in] priv
1470  *   Pointer to the priv structure.
1471  * @param[in, out] flow
1472  *   Pointer to the sub flow.
1473  * @param[in] attr
1474  *   Pointer to the flow attributes.
1475  * @param[in] items
1476  *   Pointer to the list of items.
1477  * @param[in] actions
1478  *   Pointer to the list of actions.
1479  * @param[out] error
1480  *   Pointer to the error structure.
1481  *
1482  * @return
1483  *   0 on success, a negative errno value otherwise and rte_ernno is set.
1484  */
1485 static int
1486 flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
1487                    const struct rte_flow_attr *attr,
1488                    const struct rte_flow_item items[],
1489                    const struct rte_flow_action actions[],
1490                    struct rte_flow_error *error)
1491 {
1492         union {
1493                 const struct rte_flow_item_port_id *port_id;
1494                 const struct rte_flow_item_eth *eth;
1495                 const struct rte_flow_item_vlan *vlan;
1496                 const struct rte_flow_item_ipv4 *ipv4;
1497                 const struct rte_flow_item_ipv6 *ipv6;
1498                 const struct rte_flow_item_tcp *tcp;
1499                 const struct rte_flow_item_udp *udp;
1500         } spec, mask;
1501         union {
1502                 const struct rte_flow_action_port_id *port_id;
1503                 const struct rte_flow_action_jump *jump;
1504                 const struct rte_flow_action_of_push_vlan *of_push_vlan;
1505                 const struct rte_flow_action_of_set_vlan_vid *
1506                         of_set_vlan_vid;
1507                 const struct rte_flow_action_of_set_vlan_pcp *
1508                         of_set_vlan_pcp;
1509         } conf;
1510         struct flow_tcf_ptoi ptoi[PTOI_TABLE_SZ_MAX(dev)];
1511         struct nlmsghdr *nlh = dev_flow->tcf.nlh;
1512         struct tcmsg *tcm = dev_flow->tcf.tcm;
1513         uint32_t na_act_index_cur;
1514         bool eth_type_set = 0;
1515         bool vlan_present = 0;
1516         bool vlan_eth_type_set = 0;
1517         bool ip_proto_set = 0;
1518         struct nlattr *na_flower;
1519         struct nlattr *na_flower_act;
1520         struct nlattr *na_vlan_id = NULL;
1521         struct nlattr *na_vlan_priority = NULL;
1522         uint64_t item_flags = 0;
1523
1524         claim_nonzero(flow_tcf_build_ptoi_table(dev, ptoi,
1525                                                 PTOI_TABLE_SZ_MAX(dev)));
1526         nlh = dev_flow->tcf.nlh;
1527         tcm = dev_flow->tcf.tcm;
1528         /* Prepare API must have been called beforehand. */
1529         assert(nlh != NULL && tcm != NULL);
1530         tcm->tcm_family = AF_UNSPEC;
1531         tcm->tcm_ifindex = ptoi[0].ifindex;
1532         tcm->tcm_parent = TC_H_MAKE(TC_H_INGRESS, TC_H_MIN_INGRESS);
1533         /*
1534          * Priority cannot be zero to prevent the kernel from picking one
1535          * automatically.
1536          */
1537         tcm->tcm_info = TC_H_MAKE((attr->priority + 1) << 16,
1538                                   RTE_BE16(ETH_P_ALL));
1539         if (attr->group > 0)
1540                 mnl_attr_put_u32(nlh, TCA_CHAIN, attr->group);
1541         mnl_attr_put_strz(nlh, TCA_KIND, "flower");
1542         na_flower = mnl_attr_nest_start(nlh, TCA_OPTIONS);
1543         mnl_attr_put_u32(nlh, TCA_FLOWER_FLAGS, TCA_CLS_FLAGS_SKIP_SW);
1544         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1545                 unsigned int i;
1546
1547                 switch (items->type) {
1548                 case RTE_FLOW_ITEM_TYPE_VOID:
1549                         break;
1550                 case RTE_FLOW_ITEM_TYPE_PORT_ID:
1551                         mask.port_id = flow_tcf_item_mask
1552                                 (items, &rte_flow_item_port_id_mask,
1553                                  &flow_tcf_mask_supported.port_id,
1554                                  &flow_tcf_mask_empty.port_id,
1555                                  sizeof(flow_tcf_mask_supported.port_id),
1556                                  error);
1557                         assert(mask.port_id);
1558                         if (mask.port_id == &flow_tcf_mask_empty.port_id)
1559                                 break;
1560                         spec.port_id = items->spec;
1561                         if (!mask.port_id->id)
1562                                 i = 0;
1563                         else
1564                                 for (i = 0; ptoi[i].ifindex; ++i)
1565                                         if (ptoi[i].port_id == spec.port_id->id)
1566                                                 break;
1567                         assert(ptoi[i].ifindex);
1568                         tcm->tcm_ifindex = ptoi[i].ifindex;
1569                         break;
1570                 case RTE_FLOW_ITEM_TYPE_ETH:
1571                         item_flags |= MLX5_FLOW_LAYER_OUTER_L2;
1572                         mask.eth = flow_tcf_item_mask
1573                                 (items, &rte_flow_item_eth_mask,
1574                                  &flow_tcf_mask_supported.eth,
1575                                  &flow_tcf_mask_empty.eth,
1576                                  sizeof(flow_tcf_mask_supported.eth),
1577                                  error);
1578                         assert(mask.eth);
1579                         if (mask.eth == &flow_tcf_mask_empty.eth)
1580                                 break;
1581                         spec.eth = items->spec;
1582                         if (mask.eth->type) {
1583                                 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_ETH_TYPE,
1584                                                  spec.eth->type);
1585                                 eth_type_set = 1;
1586                         }
1587                         if (!is_zero_ether_addr(&mask.eth->dst)) {
1588                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_DST,
1589                                              ETHER_ADDR_LEN,
1590                                              spec.eth->dst.addr_bytes);
1591                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_DST_MASK,
1592                                              ETHER_ADDR_LEN,
1593                                              mask.eth->dst.addr_bytes);
1594                         }
1595                         if (!is_zero_ether_addr(&mask.eth->src)) {
1596                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_SRC,
1597                                              ETHER_ADDR_LEN,
1598                                              spec.eth->src.addr_bytes);
1599                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_SRC_MASK,
1600                                              ETHER_ADDR_LEN,
1601                                              mask.eth->src.addr_bytes);
1602                         }
1603                         break;
1604                 case RTE_FLOW_ITEM_TYPE_VLAN:
1605                         item_flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
1606                         mask.vlan = flow_tcf_item_mask
1607                                 (items, &rte_flow_item_vlan_mask,
1608                                  &flow_tcf_mask_supported.vlan,
1609                                  &flow_tcf_mask_empty.vlan,
1610                                  sizeof(flow_tcf_mask_supported.vlan),
1611                                  error);
1612                         assert(mask.vlan);
1613                         if (!eth_type_set)
1614                                 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_ETH_TYPE,
1615                                                  RTE_BE16(ETH_P_8021Q));
1616                         eth_type_set = 1;
1617                         vlan_present = 1;
1618                         if (mask.vlan == &flow_tcf_mask_empty.vlan)
1619                                 break;
1620                         spec.vlan = items->spec;
1621                         if (mask.vlan->inner_type) {
1622                                 mnl_attr_put_u16(nlh,
1623                                                  TCA_FLOWER_KEY_VLAN_ETH_TYPE,
1624                                                  spec.vlan->inner_type);
1625                                 vlan_eth_type_set = 1;
1626                         }
1627                         if (mask.vlan->tci & RTE_BE16(0xe000))
1628                                 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_VLAN_PRIO,
1629                                                 (rte_be_to_cpu_16
1630                                                  (spec.vlan->tci) >> 13) & 0x7);
1631                         if (mask.vlan->tci & RTE_BE16(0x0fff))
1632                                 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_VLAN_ID,
1633                                                  rte_be_to_cpu_16
1634                                                  (spec.vlan->tci &
1635                                                   RTE_BE16(0x0fff)));
1636                         break;
1637                 case RTE_FLOW_ITEM_TYPE_IPV4:
1638                         item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1639                         mask.ipv4 = flow_tcf_item_mask
1640                                 (items, &rte_flow_item_ipv4_mask,
1641                                  &flow_tcf_mask_supported.ipv4,
1642                                  &flow_tcf_mask_empty.ipv4,
1643                                  sizeof(flow_tcf_mask_supported.ipv4),
1644                                  error);
1645                         assert(mask.ipv4);
1646                         if (!eth_type_set || !vlan_eth_type_set)
1647                                 mnl_attr_put_u16(nlh,
1648                                                  vlan_present ?
1649                                                  TCA_FLOWER_KEY_VLAN_ETH_TYPE :
1650                                                  TCA_FLOWER_KEY_ETH_TYPE,
1651                                                  RTE_BE16(ETH_P_IP));
1652                         eth_type_set = 1;
1653                         vlan_eth_type_set = 1;
1654                         if (mask.ipv4 == &flow_tcf_mask_empty.ipv4)
1655                                 break;
1656                         spec.ipv4 = items->spec;
1657                         if (mask.ipv4->hdr.next_proto_id) {
1658                                 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
1659                                                 spec.ipv4->hdr.next_proto_id);
1660                                 ip_proto_set = 1;
1661                         }
1662                         if (mask.ipv4->hdr.src_addr) {
1663                                 mnl_attr_put_u32(nlh, TCA_FLOWER_KEY_IPV4_SRC,
1664                                                  spec.ipv4->hdr.src_addr);
1665                                 mnl_attr_put_u32(nlh,
1666                                                  TCA_FLOWER_KEY_IPV4_SRC_MASK,
1667                                                  mask.ipv4->hdr.src_addr);
1668                         }
1669                         if (mask.ipv4->hdr.dst_addr) {
1670                                 mnl_attr_put_u32(nlh, TCA_FLOWER_KEY_IPV4_DST,
1671                                                  spec.ipv4->hdr.dst_addr);
1672                                 mnl_attr_put_u32(nlh,
1673                                                  TCA_FLOWER_KEY_IPV4_DST_MASK,
1674                                                  mask.ipv4->hdr.dst_addr);
1675                         }
1676                         break;
1677                 case RTE_FLOW_ITEM_TYPE_IPV6:
1678                         item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1679                         mask.ipv6 = flow_tcf_item_mask
1680                                 (items, &rte_flow_item_ipv6_mask,
1681                                  &flow_tcf_mask_supported.ipv6,
1682                                  &flow_tcf_mask_empty.ipv6,
1683                                  sizeof(flow_tcf_mask_supported.ipv6),
1684                                  error);
1685                         assert(mask.ipv6);
1686                         if (!eth_type_set || !vlan_eth_type_set)
1687                                 mnl_attr_put_u16(nlh,
1688                                                  vlan_present ?
1689                                                  TCA_FLOWER_KEY_VLAN_ETH_TYPE :
1690                                                  TCA_FLOWER_KEY_ETH_TYPE,
1691                                                  RTE_BE16(ETH_P_IPV6));
1692                         eth_type_set = 1;
1693                         vlan_eth_type_set = 1;
1694                         if (mask.ipv6 == &flow_tcf_mask_empty.ipv6)
1695                                 break;
1696                         spec.ipv6 = items->spec;
1697                         if (mask.ipv6->hdr.proto) {
1698                                 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
1699                                                 spec.ipv6->hdr.proto);
1700                                 ip_proto_set = 1;
1701                         }
1702                         if (!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.src_addr)) {
1703                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_SRC,
1704                                              sizeof(spec.ipv6->hdr.src_addr),
1705                                              spec.ipv6->hdr.src_addr);
1706                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_SRC_MASK,
1707                                              sizeof(mask.ipv6->hdr.src_addr),
1708                                              mask.ipv6->hdr.src_addr);
1709                         }
1710                         if (!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.dst_addr)) {
1711                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_DST,
1712                                              sizeof(spec.ipv6->hdr.dst_addr),
1713                                              spec.ipv6->hdr.dst_addr);
1714                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_DST_MASK,
1715                                              sizeof(mask.ipv6->hdr.dst_addr),
1716                                              mask.ipv6->hdr.dst_addr);
1717                         }
1718                         break;
1719                 case RTE_FLOW_ITEM_TYPE_UDP:
1720                         item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
1721                         mask.udp = flow_tcf_item_mask
1722                                 (items, &rte_flow_item_udp_mask,
1723                                  &flow_tcf_mask_supported.udp,
1724                                  &flow_tcf_mask_empty.udp,
1725                                  sizeof(flow_tcf_mask_supported.udp),
1726                                  error);
1727                         assert(mask.udp);
1728                         if (!ip_proto_set)
1729                                 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
1730                                                 IPPROTO_UDP);
1731                         if (mask.udp == &flow_tcf_mask_empty.udp)
1732                                 break;
1733                         spec.udp = items->spec;
1734                         if (mask.udp->hdr.src_port) {
1735                                 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_UDP_SRC,
1736                                                  spec.udp->hdr.src_port);
1737                                 mnl_attr_put_u16(nlh,
1738                                                  TCA_FLOWER_KEY_UDP_SRC_MASK,
1739                                                  mask.udp->hdr.src_port);
1740                         }
1741                         if (mask.udp->hdr.dst_port) {
1742                                 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_UDP_DST,
1743                                                  spec.udp->hdr.dst_port);
1744                                 mnl_attr_put_u16(nlh,
1745                                                  TCA_FLOWER_KEY_UDP_DST_MASK,
1746                                                  mask.udp->hdr.dst_port);
1747                         }
1748                         break;
1749                 case RTE_FLOW_ITEM_TYPE_TCP:
1750                         item_flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
1751                         mask.tcp = flow_tcf_item_mask
1752                                 (items, &rte_flow_item_tcp_mask,
1753                                  &flow_tcf_mask_supported.tcp,
1754                                  &flow_tcf_mask_empty.tcp,
1755                                  sizeof(flow_tcf_mask_supported.tcp),
1756                                  error);
1757                         assert(mask.tcp);
1758                         if (!ip_proto_set)
1759                                 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
1760                                                 IPPROTO_TCP);
1761                         if (mask.tcp == &flow_tcf_mask_empty.tcp)
1762                                 break;
1763                         spec.tcp = items->spec;
1764                         if (mask.tcp->hdr.src_port) {
1765                                 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_TCP_SRC,
1766                                                  spec.tcp->hdr.src_port);
1767                                 mnl_attr_put_u16(nlh,
1768                                                  TCA_FLOWER_KEY_TCP_SRC_MASK,
1769                                                  mask.tcp->hdr.src_port);
1770                         }
1771                         if (mask.tcp->hdr.dst_port) {
1772                                 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_TCP_DST,
1773                                                  spec.tcp->hdr.dst_port);
1774                                 mnl_attr_put_u16(nlh,
1775                                                  TCA_FLOWER_KEY_TCP_DST_MASK,
1776                                                  mask.tcp->hdr.dst_port);
1777                         }
1778                         if (mask.tcp->hdr.tcp_flags) {
1779                                 mnl_attr_put_u16
1780                                         (nlh,
1781                                          TCA_FLOWER_KEY_TCP_FLAGS,
1782                                          rte_cpu_to_be_16
1783                                                 (spec.tcp->hdr.tcp_flags));
1784                                 mnl_attr_put_u16
1785                                         (nlh,
1786                                          TCA_FLOWER_KEY_TCP_FLAGS_MASK,
1787                                          rte_cpu_to_be_16
1788                                                 (mask.tcp->hdr.tcp_flags));
1789                         }
1790                         break;
1791                 default:
1792                         return rte_flow_error_set(error, ENOTSUP,
1793                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1794                                                   NULL, "item not supported");
1795                 }
1796         }
1797         na_flower_act = mnl_attr_nest_start(nlh, TCA_FLOWER_ACT);
1798         na_act_index_cur = 1;
1799         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1800                 struct nlattr *na_act_index;
1801                 struct nlattr *na_act;
1802                 unsigned int vlan_act;
1803                 unsigned int i;
1804
1805                 switch (actions->type) {
1806                 case RTE_FLOW_ACTION_TYPE_VOID:
1807                         break;
1808                 case RTE_FLOW_ACTION_TYPE_PORT_ID:
1809                         conf.port_id = actions->conf;
1810                         if (conf.port_id->original)
1811                                 i = 0;
1812                         else
1813                                 for (i = 0; ptoi[i].ifindex; ++i)
1814                                         if (ptoi[i].port_id == conf.port_id->id)
1815                                                 break;
1816                         assert(ptoi[i].ifindex);
1817                         na_act_index =
1818                                 mnl_attr_nest_start(nlh, na_act_index_cur++);
1819                         assert(na_act_index);
1820                         mnl_attr_put_strz(nlh, TCA_ACT_KIND, "mirred");
1821                         na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
1822                         assert(na_act);
1823                         mnl_attr_put(nlh, TCA_MIRRED_PARMS,
1824                                      sizeof(struct tc_mirred),
1825                                      &(struct tc_mirred){
1826                                         .action = TC_ACT_STOLEN,
1827                                         .eaction = TCA_EGRESS_REDIR,
1828                                         .ifindex = ptoi[i].ifindex,
1829                                      });
1830                         mnl_attr_nest_end(nlh, na_act);
1831                         mnl_attr_nest_end(nlh, na_act_index);
1832                         break;
1833                 case RTE_FLOW_ACTION_TYPE_JUMP:
1834                         conf.jump = actions->conf;
1835                         na_act_index =
1836                                 mnl_attr_nest_start(nlh, na_act_index_cur++);
1837                         assert(na_act_index);
1838                         mnl_attr_put_strz(nlh, TCA_ACT_KIND, "gact");
1839                         na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
1840                         assert(na_act);
1841                         mnl_attr_put(nlh, TCA_GACT_PARMS,
1842                                      sizeof(struct tc_gact),
1843                                      &(struct tc_gact){
1844                                         .action = TC_ACT_GOTO_CHAIN |
1845                                                   conf.jump->group,
1846                                      });
1847                         mnl_attr_nest_end(nlh, na_act);
1848                         mnl_attr_nest_end(nlh, na_act_index);
1849                         break;
1850                 case RTE_FLOW_ACTION_TYPE_DROP:
1851                         na_act_index =
1852                                 mnl_attr_nest_start(nlh, na_act_index_cur++);
1853                         assert(na_act_index);
1854                         mnl_attr_put_strz(nlh, TCA_ACT_KIND, "gact");
1855                         na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
1856                         assert(na_act);
1857                         mnl_attr_put(nlh, TCA_GACT_PARMS,
1858                                      sizeof(struct tc_gact),
1859                                      &(struct tc_gact){
1860                                         .action = TC_ACT_SHOT,
1861                                      });
1862                         mnl_attr_nest_end(nlh, na_act);
1863                         mnl_attr_nest_end(nlh, na_act_index);
1864                         break;
1865                 case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
1866                         conf.of_push_vlan = NULL;
1867                         vlan_act = TCA_VLAN_ACT_POP;
1868                         goto action_of_vlan;
1869                 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
1870                         conf.of_push_vlan = actions->conf;
1871                         vlan_act = TCA_VLAN_ACT_PUSH;
1872                         goto action_of_vlan;
1873                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
1874                         conf.of_set_vlan_vid = actions->conf;
1875                         if (na_vlan_id)
1876                                 goto override_na_vlan_id;
1877                         vlan_act = TCA_VLAN_ACT_MODIFY;
1878                         goto action_of_vlan;
1879                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
1880                         conf.of_set_vlan_pcp = actions->conf;
1881                         if (na_vlan_priority)
1882                                 goto override_na_vlan_priority;
1883                         vlan_act = TCA_VLAN_ACT_MODIFY;
1884                         goto action_of_vlan;
1885 action_of_vlan:
1886                         na_act_index =
1887                                 mnl_attr_nest_start(nlh, na_act_index_cur++);
1888                         assert(na_act_index);
1889                         mnl_attr_put_strz(nlh, TCA_ACT_KIND, "vlan");
1890                         na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
1891                         assert(na_act);
1892                         mnl_attr_put(nlh, TCA_VLAN_PARMS,
1893                                      sizeof(struct tc_vlan),
1894                                      &(struct tc_vlan){
1895                                         .action = TC_ACT_PIPE,
1896                                         .v_action = vlan_act,
1897                                      });
1898                         if (vlan_act == TCA_VLAN_ACT_POP) {
1899                                 mnl_attr_nest_end(nlh, na_act);
1900                                 mnl_attr_nest_end(nlh, na_act_index);
1901                                 break;
1902                         }
1903                         if (vlan_act == TCA_VLAN_ACT_PUSH)
1904                                 mnl_attr_put_u16(nlh,
1905                                                  TCA_VLAN_PUSH_VLAN_PROTOCOL,
1906                                                  conf.of_push_vlan->ethertype);
1907                         na_vlan_id = mnl_nlmsg_get_payload_tail(nlh);
1908                         mnl_attr_put_u16(nlh, TCA_VLAN_PAD, 0);
1909                         na_vlan_priority = mnl_nlmsg_get_payload_tail(nlh);
1910                         mnl_attr_put_u8(nlh, TCA_VLAN_PAD, 0);
1911                         mnl_attr_nest_end(nlh, na_act);
1912                         mnl_attr_nest_end(nlh, na_act_index);
1913                         if (actions->type ==
1914                             RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID) {
1915 override_na_vlan_id:
1916                                 na_vlan_id->nla_type = TCA_VLAN_PUSH_VLAN_ID;
1917                                 *(uint16_t *)mnl_attr_get_payload(na_vlan_id) =
1918                                         rte_be_to_cpu_16
1919                                         (conf.of_set_vlan_vid->vlan_vid);
1920                         } else if (actions->type ==
1921                                    RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP) {
1922 override_na_vlan_priority:
1923                                 na_vlan_priority->nla_type =
1924                                         TCA_VLAN_PUSH_VLAN_PRIORITY;
1925                                 *(uint8_t *)mnl_attr_get_payload
1926                                         (na_vlan_priority) =
1927                                         conf.of_set_vlan_pcp->vlan_pcp;
1928                         }
1929                         break;
1930                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
1931                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
1932                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
1933                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
1934                 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
1935                 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
1936                         na_act_index =
1937                                 mnl_attr_nest_start(nlh, na_act_index_cur++);
1938                         flow_tcf_create_pedit_mnl_msg(nlh,
1939                                                       &actions, item_flags);
1940                         mnl_attr_nest_end(nlh, na_act_index);
1941                         break;
1942                 default:
1943                         return rte_flow_error_set(error, ENOTSUP,
1944                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1945                                                   actions,
1946                                                   "action not supported");
1947                 }
1948         }
1949         assert(na_flower);
1950         assert(na_flower_act);
1951         mnl_attr_nest_end(nlh, na_flower_act);
1952         mnl_attr_nest_end(nlh, na_flower);
1953         return 0;
1954 }
1955
1956 /**
1957  * Send Netlink message with acknowledgment.
1958  *
1959  * @param nl
1960  *   Libmnl socket to use.
1961  * @param nlh
1962  *   Message to send. This function always raises the NLM_F_ACK flag before
1963  *   sending.
1964  *
1965  * @return
1966  *   0 on success, a negative errno value otherwise and rte_errno is set.
1967  */
1968 static int
1969 flow_tcf_nl_ack(struct mnl_socket *nl, struct nlmsghdr *nlh)
1970 {
1971         alignas(struct nlmsghdr)
1972         uint8_t ans[mnl_nlmsg_size(sizeof(struct nlmsgerr)) +
1973                     nlh->nlmsg_len - sizeof(*nlh)];
1974         uint32_t seq = random();
1975         int ret;
1976
1977         nlh->nlmsg_flags |= NLM_F_ACK;
1978         nlh->nlmsg_seq = seq;
1979         ret = mnl_socket_sendto(nl, nlh, nlh->nlmsg_len);
1980         if (ret != -1)
1981                 ret = mnl_socket_recvfrom(nl, ans, sizeof(ans));
1982         if (ret != -1)
1983                 ret = mnl_cb_run
1984                         (ans, ret, seq, mnl_socket_get_portid(nl), NULL, NULL);
1985         if (ret > 0)
1986                 return 0;
1987         rte_errno = errno;
1988         return -rte_errno;
1989 }
1990
1991 /**
1992  * Apply flow to E-Switch by sending Netlink message.
1993  *
1994  * @param[in] dev
1995  *   Pointer to Ethernet device.
1996  * @param[in, out] flow
1997  *   Pointer to the sub flow.
1998  * @param[out] error
1999  *   Pointer to the error structure.
2000  *
2001  * @return
2002  *   0 on success, a negative errno value otherwise and rte_ernno is set.
2003  */
2004 static int
2005 flow_tcf_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
2006                struct rte_flow_error *error)
2007 {
2008         struct priv *priv = dev->data->dev_private;
2009         struct mnl_socket *nl = priv->mnl_socket;
2010         struct mlx5_flow *dev_flow;
2011         struct nlmsghdr *nlh;
2012
2013         dev_flow = LIST_FIRST(&flow->dev_flows);
2014         /* E-Switch flow can't be expanded. */
2015         assert(!LIST_NEXT(dev_flow, next));
2016         nlh = dev_flow->tcf.nlh;
2017         nlh->nlmsg_type = RTM_NEWTFILTER;
2018         nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
2019         if (!flow_tcf_nl_ack(nl, nlh))
2020                 return 0;
2021         return rte_flow_error_set(error, rte_errno,
2022                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
2023                                   "netlink: failed to create TC flow rule");
2024 }
2025
2026 /**
2027  * Remove flow from E-Switch by sending Netlink message.
2028  *
2029  * @param[in] dev
2030  *   Pointer to Ethernet device.
2031  * @param[in, out] flow
2032  *   Pointer to the sub flow.
2033  */
2034 static void
2035 flow_tcf_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
2036 {
2037         struct priv *priv = dev->data->dev_private;
2038         struct mnl_socket *nl = priv->mnl_socket;
2039         struct mlx5_flow *dev_flow;
2040         struct nlmsghdr *nlh;
2041
2042         if (!flow)
2043                 return;
2044         dev_flow = LIST_FIRST(&flow->dev_flows);
2045         if (!dev_flow)
2046                 return;
2047         /* E-Switch flow can't be expanded. */
2048         assert(!LIST_NEXT(dev_flow, next));
2049         nlh = dev_flow->tcf.nlh;
2050         nlh->nlmsg_type = RTM_DELTFILTER;
2051         nlh->nlmsg_flags = NLM_F_REQUEST;
2052         flow_tcf_nl_ack(nl, nlh);
2053 }
2054
2055 /**
2056  * Remove flow from E-Switch and release resources of the device flow.
2057  *
2058  * @param[in] dev
2059  *   Pointer to Ethernet device.
2060  * @param[in, out] flow
2061  *   Pointer to the sub flow.
2062  */
2063 static void
2064 flow_tcf_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
2065 {
2066         struct mlx5_flow *dev_flow;
2067
2068         if (!flow)
2069                 return;
2070         flow_tcf_remove(dev, flow);
2071         dev_flow = LIST_FIRST(&flow->dev_flows);
2072         if (!dev_flow)
2073                 return;
2074         /* E-Switch flow can't be expanded. */
2075         assert(!LIST_NEXT(dev_flow, next));
2076         LIST_REMOVE(dev_flow, next);
2077         rte_free(dev_flow);
2078 }
2079
2080 const struct mlx5_flow_driver_ops mlx5_flow_tcf_drv_ops = {
2081         .validate = flow_tcf_validate,
2082         .prepare = flow_tcf_prepare,
2083         .translate = flow_tcf_translate,
2084         .apply = flow_tcf_apply,
2085         .remove = flow_tcf_remove,
2086         .destroy = flow_tcf_destroy,
2087 };
2088
2089 /**
2090  * Initialize ingress qdisc of a given network interface.
2091  *
2092  * @param nl
2093  *   Libmnl socket of the @p NETLINK_ROUTE kind.
2094  * @param ifindex
2095  *   Index of network interface to initialize.
2096  * @param[out] error
2097  *   Perform verbose error reporting if not NULL.
2098  *
2099  * @return
2100  *   0 on success, a negative errno value otherwise and rte_errno is set.
2101  */
2102 int
2103 mlx5_flow_tcf_init(struct mnl_socket *nl, unsigned int ifindex,
2104                    struct rte_flow_error *error)
2105 {
2106         struct nlmsghdr *nlh;
2107         struct tcmsg *tcm;
2108         alignas(struct nlmsghdr)
2109         uint8_t buf[mnl_nlmsg_size(sizeof(*tcm) + 128)];
2110
2111         /* Destroy existing ingress qdisc and everything attached to it. */
2112         nlh = mnl_nlmsg_put_header(buf);
2113         nlh->nlmsg_type = RTM_DELQDISC;
2114         nlh->nlmsg_flags = NLM_F_REQUEST;
2115         tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
2116         tcm->tcm_family = AF_UNSPEC;
2117         tcm->tcm_ifindex = ifindex;
2118         tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
2119         tcm->tcm_parent = TC_H_INGRESS;
2120         /* Ignore errors when qdisc is already absent. */
2121         if (flow_tcf_nl_ack(nl, nlh) &&
2122             rte_errno != EINVAL && rte_errno != ENOENT)
2123                 return rte_flow_error_set(error, rte_errno,
2124                                           RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
2125                                           "netlink: failed to remove ingress"
2126                                           " qdisc");
2127         /* Create fresh ingress qdisc. */
2128         nlh = mnl_nlmsg_put_header(buf);
2129         nlh->nlmsg_type = RTM_NEWQDISC;
2130         nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
2131         tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
2132         tcm->tcm_family = AF_UNSPEC;
2133         tcm->tcm_ifindex = ifindex;
2134         tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
2135         tcm->tcm_parent = TC_H_INGRESS;
2136         mnl_attr_put_strz_check(nlh, sizeof(buf), TCA_KIND, "ingress");
2137         if (flow_tcf_nl_ack(nl, nlh))
2138                 return rte_flow_error_set(error, rte_errno,
2139                                           RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
2140                                           "netlink: failed to create ingress"
2141                                           " qdisc");
2142         return 0;
2143 }
2144
2145 /**
2146  * Create and configure a libmnl socket for Netlink flow rules.
2147  *
2148  * @return
2149  *   A valid libmnl socket object pointer on success, NULL otherwise and
2150  *   rte_errno is set.
2151  */
2152 struct mnl_socket *
2153 mlx5_flow_tcf_socket_create(void)
2154 {
2155         struct mnl_socket *nl = mnl_socket_open(NETLINK_ROUTE);
2156
2157         if (nl) {
2158                 mnl_socket_setsockopt(nl, NETLINK_CAP_ACK, &(int){ 1 },
2159                                       sizeof(int));
2160                 if (!mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID))
2161                         return nl;
2162         }
2163         rte_errno = errno;
2164         if (nl)
2165                 mnl_socket_close(nl);
2166         return NULL;
2167 }
2168
2169 /**
2170  * Destroy a libmnl socket.
2171  *
2172  * @param nl
2173  *   Libmnl socket of the @p NETLINK_ROUTE kind.
2174  */
2175 void
2176 mlx5_flow_tcf_socket_destroy(struct mnl_socket *nl)
2177 {
2178         mnl_socket_close(nl);
2179 }