net/mlx5: support multiple groups and jump action
[dpdk.git] / drivers / net / mlx5 / mlx5_flow_tcf.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2018 6WIND S.A.
3  * Copyright 2018 Mellanox Technologies, Ltd
4  */
5
6 #include <assert.h>
7 #include <errno.h>
8 #include <libmnl/libmnl.h>
9 #include <linux/if_ether.h>
10 #include <linux/netlink.h>
11 #include <linux/pkt_cls.h>
12 #include <linux/pkt_sched.h>
13 #include <linux/rtnetlink.h>
14 #include <linux/tc_act/tc_gact.h>
15 #include <linux/tc_act/tc_mirred.h>
16 #include <netinet/in.h>
17 #include <stdalign.h>
18 #include <stdbool.h>
19 #include <stddef.h>
20 #include <stdint.h>
21 #include <stdlib.h>
22 #include <sys/socket.h>
23
24 #include <rte_byteorder.h>
25 #include <rte_errno.h>
26 #include <rte_ether.h>
27 #include <rte_flow.h>
28 #include <rte_malloc.h>
29
30 #include "mlx5.h"
31 #include "mlx5_flow.h"
32 #include "mlx5_autoconf.h"
33
34 #ifdef HAVE_TC_ACT_VLAN
35
36 #include <linux/tc_act/tc_vlan.h>
37
38 #else /* HAVE_TC_ACT_VLAN */
39
40 #define TCA_VLAN_ACT_POP 1
41 #define TCA_VLAN_ACT_PUSH 2
42 #define TCA_VLAN_ACT_MODIFY 3
43 #define TCA_VLAN_PARMS 2
44 #define TCA_VLAN_PUSH_VLAN_ID 3
45 #define TCA_VLAN_PUSH_VLAN_PROTOCOL 4
46 #define TCA_VLAN_PAD 5
47 #define TCA_VLAN_PUSH_VLAN_PRIORITY 6
48
49 struct tc_vlan {
50         tc_gen;
51         int v_action;
52 };
53
54 #endif /* HAVE_TC_ACT_VLAN */
55
56 #ifdef HAVE_TC_ACT_PEDIT
57
58 #include <linux/tc_act/tc_pedit.h>
59
60 #else /* HAVE_TC_ACT_VLAN */
61
62 enum {
63         TCA_PEDIT_UNSPEC,
64         TCA_PEDIT_TM,
65         TCA_PEDIT_PARMS,
66         TCA_PEDIT_PAD,
67         TCA_PEDIT_PARMS_EX,
68         TCA_PEDIT_KEYS_EX,
69         TCA_PEDIT_KEY_EX,
70         __TCA_PEDIT_MAX
71 };
72
73 enum {
74         TCA_PEDIT_KEY_EX_HTYPE = 1,
75         TCA_PEDIT_KEY_EX_CMD = 2,
76         __TCA_PEDIT_KEY_EX_MAX
77 };
78
79 enum pedit_header_type {
80         TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK = 0,
81         TCA_PEDIT_KEY_EX_HDR_TYPE_ETH = 1,
82         TCA_PEDIT_KEY_EX_HDR_TYPE_IP4 = 2,
83         TCA_PEDIT_KEY_EX_HDR_TYPE_IP6 = 3,
84         TCA_PEDIT_KEY_EX_HDR_TYPE_TCP = 4,
85         TCA_PEDIT_KEY_EX_HDR_TYPE_UDP = 5,
86         __PEDIT_HDR_TYPE_MAX,
87 };
88
89 enum pedit_cmd {
90         TCA_PEDIT_KEY_EX_CMD_SET = 0,
91         TCA_PEDIT_KEY_EX_CMD_ADD = 1,
92         __PEDIT_CMD_MAX,
93 };
94
95 struct tc_pedit_key {
96         __u32           mask;  /* AND */
97         __u32           val;   /*XOR */
98         __u32           off;  /*offset */
99         __u32           at;
100         __u32           offmask;
101         __u32           shift;
102 };
103
104 struct tc_pedit_sel {
105         tc_gen;
106         unsigned char           nkeys;
107         unsigned char           flags;
108         struct tc_pedit_key     keys[0];
109 };
110
111 #endif /* HAVE_TC_ACT_VLAN */
112
113 /* Normally found in linux/netlink.h. */
114 #ifndef NETLINK_CAP_ACK
115 #define NETLINK_CAP_ACK 10
116 #endif
117
118 /* Normally found in linux/pkt_sched.h. */
119 #ifndef TC_H_MIN_INGRESS
120 #define TC_H_MIN_INGRESS 0xfff2u
121 #endif
122
123 /* Normally found in linux/pkt_cls.h. */
124 #ifndef TCA_CLS_FLAGS_SKIP_SW
125 #define TCA_CLS_FLAGS_SKIP_SW (1 << 1)
126 #endif
127 #ifndef HAVE_TCA_CHAIN
128 #define TCA_CHAIN 11
129 #endif
130 #ifndef HAVE_TCA_FLOWER_ACT
131 #define TCA_FLOWER_ACT 3
132 #endif
133 #ifndef HAVE_TCA_FLOWER_FLAGS
134 #define TCA_FLOWER_FLAGS 22
135 #endif
136 #ifndef HAVE_TCA_FLOWER_KEY_ETH_TYPE
137 #define TCA_FLOWER_KEY_ETH_TYPE 8
138 #endif
139 #ifndef HAVE_TCA_FLOWER_KEY_ETH_DST
140 #define TCA_FLOWER_KEY_ETH_DST 4
141 #endif
142 #ifndef HAVE_TCA_FLOWER_KEY_ETH_DST_MASK
143 #define TCA_FLOWER_KEY_ETH_DST_MASK 5
144 #endif
145 #ifndef HAVE_TCA_FLOWER_KEY_ETH_SRC
146 #define TCA_FLOWER_KEY_ETH_SRC 6
147 #endif
148 #ifndef HAVE_TCA_FLOWER_KEY_ETH_SRC_MASK
149 #define TCA_FLOWER_KEY_ETH_SRC_MASK 7
150 #endif
151 #ifndef HAVE_TCA_FLOWER_KEY_IP_PROTO
152 #define TCA_FLOWER_KEY_IP_PROTO 9
153 #endif
154 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_SRC
155 #define TCA_FLOWER_KEY_IPV4_SRC 10
156 #endif
157 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_SRC_MASK
158 #define TCA_FLOWER_KEY_IPV4_SRC_MASK 11
159 #endif
160 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_DST
161 #define TCA_FLOWER_KEY_IPV4_DST 12
162 #endif
163 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_DST_MASK
164 #define TCA_FLOWER_KEY_IPV4_DST_MASK 13
165 #endif
166 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_SRC
167 #define TCA_FLOWER_KEY_IPV6_SRC 14
168 #endif
169 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_SRC_MASK
170 #define TCA_FLOWER_KEY_IPV6_SRC_MASK 15
171 #endif
172 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_DST
173 #define TCA_FLOWER_KEY_IPV6_DST 16
174 #endif
175 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_DST_MASK
176 #define TCA_FLOWER_KEY_IPV6_DST_MASK 17
177 #endif
178 #ifndef HAVE_TCA_FLOWER_KEY_TCP_SRC
179 #define TCA_FLOWER_KEY_TCP_SRC 18
180 #endif
181 #ifndef HAVE_TCA_FLOWER_KEY_TCP_SRC_MASK
182 #define TCA_FLOWER_KEY_TCP_SRC_MASK 35
183 #endif
184 #ifndef HAVE_TCA_FLOWER_KEY_TCP_DST
185 #define TCA_FLOWER_KEY_TCP_DST 19
186 #endif
187 #ifndef HAVE_TCA_FLOWER_KEY_TCP_DST_MASK
188 #define TCA_FLOWER_KEY_TCP_DST_MASK 36
189 #endif
190 #ifndef HAVE_TCA_FLOWER_KEY_UDP_SRC
191 #define TCA_FLOWER_KEY_UDP_SRC 20
192 #endif
193 #ifndef HAVE_TCA_FLOWER_KEY_UDP_SRC_MASK
194 #define TCA_FLOWER_KEY_UDP_SRC_MASK 37
195 #endif
196 #ifndef HAVE_TCA_FLOWER_KEY_UDP_DST
197 #define TCA_FLOWER_KEY_UDP_DST 21
198 #endif
199 #ifndef HAVE_TCA_FLOWER_KEY_UDP_DST_MASK
200 #define TCA_FLOWER_KEY_UDP_DST_MASK 38
201 #endif
202 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_ID
203 #define TCA_FLOWER_KEY_VLAN_ID 23
204 #endif
205 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_PRIO
206 #define TCA_FLOWER_KEY_VLAN_PRIO 24
207 #endif
208 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_ETH_TYPE
209 #define TCA_FLOWER_KEY_VLAN_ETH_TYPE 25
210 #endif
211 #ifndef HAVE_TCA_FLOWER_KEY_TCP_FLAGS
212 #define TCA_FLOWER_KEY_TCP_FLAGS 71
213 #endif
214 #ifndef HAVE_TCA_FLOWER_KEY_TCP_FLAGS_MASK
215 #define TCA_FLOWER_KEY_TCP_FLAGS_MASK 72
216 #endif
217 #ifndef HAVE_TC_ACT_GOTO_CHAIN
218 #define TC_ACT_GOTO_CHAIN 0x20000000
219 #endif
220
221 #ifndef IPV6_ADDR_LEN
222 #define IPV6_ADDR_LEN 16
223 #endif
224
225 #ifndef IPV4_ADDR_LEN
226 #define IPV4_ADDR_LEN 4
227 #endif
228
229 #ifndef TP_PORT_LEN
230 #define TP_PORT_LEN 2 /* Transport Port (UDP/TCP) Length */
231 #endif
232
233 /** Empty masks for known item types. */
234 static const union {
235         struct rte_flow_item_port_id port_id;
236         struct rte_flow_item_eth eth;
237         struct rte_flow_item_vlan vlan;
238         struct rte_flow_item_ipv4 ipv4;
239         struct rte_flow_item_ipv6 ipv6;
240         struct rte_flow_item_tcp tcp;
241         struct rte_flow_item_udp udp;
242 } flow_tcf_mask_empty;
243
244 /** Supported masks for known item types. */
245 static const struct {
246         struct rte_flow_item_port_id port_id;
247         struct rte_flow_item_eth eth;
248         struct rte_flow_item_vlan vlan;
249         struct rte_flow_item_ipv4 ipv4;
250         struct rte_flow_item_ipv6 ipv6;
251         struct rte_flow_item_tcp tcp;
252         struct rte_flow_item_udp udp;
253 } flow_tcf_mask_supported = {
254         .port_id = {
255                 .id = 0xffffffff,
256         },
257         .eth = {
258                 .type = RTE_BE16(0xffff),
259                 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
260                 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
261         },
262         .vlan = {
263                 /* PCP and VID only, no DEI. */
264                 .tci = RTE_BE16(0xefff),
265                 .inner_type = RTE_BE16(0xffff),
266         },
267         .ipv4.hdr = {
268                 .next_proto_id = 0xff,
269                 .src_addr = RTE_BE32(0xffffffff),
270                 .dst_addr = RTE_BE32(0xffffffff),
271         },
272         .ipv6.hdr = {
273                 .proto = 0xff,
274                 .src_addr =
275                         "\xff\xff\xff\xff\xff\xff\xff\xff"
276                         "\xff\xff\xff\xff\xff\xff\xff\xff",
277                 .dst_addr =
278                         "\xff\xff\xff\xff\xff\xff\xff\xff"
279                         "\xff\xff\xff\xff\xff\xff\xff\xff",
280         },
281         .tcp.hdr = {
282                 .src_port = RTE_BE16(0xffff),
283                 .dst_port = RTE_BE16(0xffff),
284                 .tcp_flags = 0xff,
285         },
286         .udp.hdr = {
287                 .src_port = RTE_BE16(0xffff),
288                 .dst_port = RTE_BE16(0xffff),
289         },
290 };
291
292 #define SZ_NLATTR_HDR MNL_ALIGN(sizeof(struct nlattr))
293 #define SZ_NLATTR_NEST SZ_NLATTR_HDR
294 #define SZ_NLATTR_DATA_OF(len) MNL_ALIGN(SZ_NLATTR_HDR + (len))
295 #define SZ_NLATTR_TYPE_OF(typ) SZ_NLATTR_DATA_OF(sizeof(typ))
296 #define SZ_NLATTR_STRZ_OF(str) SZ_NLATTR_DATA_OF(strlen(str) + 1)
297
298 #define PTOI_TABLE_SZ_MAX(dev) (mlx5_dev_to_port_id((dev)->device, NULL, 0) + 2)
299
300 /** DPDK port to network interface index (ifindex) conversion. */
301 struct flow_tcf_ptoi {
302         uint16_t port_id; /**< DPDK port ID. */
303         unsigned int ifindex; /**< Network interface index. */
304 };
305
306 /* Due to a limitation on driver/FW. */
307 #define MLX5_TCF_GROUP_ID_MAX 3
308 #define MLX5_TCF_GROUP_PRIORITY_MAX 14
309
310 #define MLX5_TCF_FATE_ACTIONS \
311         (MLX5_FLOW_ACTION_DROP | MLX5_FLOW_ACTION_PORT_ID | \
312          MLX5_FLOW_ACTION_JUMP)
313
314 #define MLX5_TCF_VLAN_ACTIONS \
315         (MLX5_FLOW_ACTION_OF_POP_VLAN | MLX5_FLOW_ACTION_OF_PUSH_VLAN | \
316          MLX5_FLOW_ACTION_OF_SET_VLAN_VID | MLX5_FLOW_ACTION_OF_SET_VLAN_PCP)
317
318 #define MLX5_TCF_PEDIT_ACTIONS \
319         (MLX5_FLOW_ACTION_SET_IPV4_SRC | MLX5_FLOW_ACTION_SET_IPV4_DST | \
320          MLX5_FLOW_ACTION_SET_IPV6_SRC | MLX5_FLOW_ACTION_SET_IPV6_DST | \
321          MLX5_FLOW_ACTION_SET_TP_SRC | MLX5_FLOW_ACTION_SET_TP_DST)
322
323 #define MLX5_TCF_CONFIG_ACTIONS \
324         (MLX5_FLOW_ACTION_PORT_ID | MLX5_FLOW_ACTION_JUMP | \
325          MLX5_FLOW_ACTION_OF_PUSH_VLAN | MLX5_FLOW_ACTION_OF_SET_VLAN_VID | \
326          MLX5_FLOW_ACTION_OF_SET_VLAN_PCP | MLX5_TCF_PEDIT_ACTIONS)
327
328 #define MAX_PEDIT_KEYS 128
329 #define SZ_PEDIT_KEY_VAL 4
330
331 #define NUM_OF_PEDIT_KEYS(sz) \
332         (((sz) / SZ_PEDIT_KEY_VAL) + (((sz) % SZ_PEDIT_KEY_VAL) ? 1 : 0))
333
334 struct pedit_key_ex {
335         enum pedit_header_type htype;
336         enum pedit_cmd cmd;
337 };
338
339 struct pedit_parser {
340         struct tc_pedit_sel sel;
341         struct tc_pedit_key keys[MAX_PEDIT_KEYS];
342         struct pedit_key_ex keys_ex[MAX_PEDIT_KEYS];
343 };
344
345
346 /**
347  * Set pedit key of transport (TCP/UDP) port value
348  *
349  * @param[in] actions
350  *   pointer to action specification
351  * @param[in,out] p_parser
352  *   pointer to pedit_parser
353  * @param[in] item_flags
354  *   flags of all items presented
355  */
356 static void
357 flow_tcf_pedit_key_set_tp_port(const struct rte_flow_action *actions,
358                                 struct pedit_parser *p_parser,
359                                 uint64_t item_flags)
360 {
361         int idx = p_parser->sel.nkeys;
362
363         if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)
364                 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_UDP;
365         if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_TCP)
366                 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_TCP;
367         p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
368         /* offset of src/dst port is same for TCP and UDP */
369         p_parser->keys[idx].off =
370                 actions->type == RTE_FLOW_ACTION_TYPE_SET_TP_SRC ?
371                 offsetof(struct tcp_hdr, src_port) :
372                 offsetof(struct tcp_hdr, dst_port);
373         p_parser->keys[idx].mask = 0xFFFF0000;
374         p_parser->keys[idx].val =
375                 (__u32)((const struct rte_flow_action_set_tp *)
376                                 actions->conf)->port;
377         p_parser->sel.nkeys = (++idx);
378 }
379
380 /**
381  * Set pedit key of ipv6 address
382  *
383  * @param[in] actions
384  *   pointer to action specification
385  * @param[in,out] p_parser
386  *   pointer to pedit_parser
387  */
388 static void
389 flow_tcf_pedit_key_set_ipv6_addr(const struct rte_flow_action *actions,
390                                  struct pedit_parser *p_parser)
391 {
392         int idx = p_parser->sel.nkeys;
393         int keys = NUM_OF_PEDIT_KEYS(IPV6_ADDR_LEN);
394         int off_base =
395                 actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC ?
396                 offsetof(struct ipv6_hdr, src_addr) :
397                 offsetof(struct ipv6_hdr, dst_addr);
398         const struct rte_flow_action_set_ipv6 *conf =
399                 (const struct rte_flow_action_set_ipv6 *)actions->conf;
400
401         for (int i = 0; i < keys; i++, idx++) {
402                 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6;
403                 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
404                 p_parser->keys[idx].off = off_base + i * SZ_PEDIT_KEY_VAL;
405                 p_parser->keys[idx].mask = ~UINT32_MAX;
406                 memcpy(&p_parser->keys[idx].val,
407                         conf->ipv6_addr + i *  SZ_PEDIT_KEY_VAL,
408                         SZ_PEDIT_KEY_VAL);
409         }
410         p_parser->sel.nkeys += keys;
411 }
412
413 /**
414  * Set pedit key of ipv4 address
415  *
416  * @param[in] actions
417  *   pointer to action specification
418  * @param[in,out] p_parser
419  *   pointer to pedit_parser
420  */
421 static void
422 flow_tcf_pedit_key_set_ipv4_addr(const struct rte_flow_action *actions,
423                                  struct pedit_parser *p_parser)
424 {
425         int idx = p_parser->sel.nkeys;
426
427         p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP4;
428         p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
429         p_parser->keys[idx].off =
430                 actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC ?
431                 offsetof(struct ipv4_hdr, src_addr) :
432                 offsetof(struct ipv4_hdr, dst_addr);
433         p_parser->keys[idx].mask = ~UINT32_MAX;
434         p_parser->keys[idx].val =
435                 ((const struct rte_flow_action_set_ipv4 *)
436                  actions->conf)->ipv4_addr;
437         p_parser->sel.nkeys = (++idx);
438 }
439
440 /**
441  * Create the pedit's na attribute in netlink message
442  * on pre-allocate message buffer
443  *
444  * @param[in,out] nl
445  *   pointer to pre-allocated netlink message buffer
446  * @param[in,out] actions
447  *   pointer to pointer of actions specification.
448  * @param[in,out] action_flags
449  *   pointer to actions flags
450  * @param[in] item_flags
451  *   flags of all item presented
452  */
453 static void
454 flow_tcf_create_pedit_mnl_msg(struct nlmsghdr *nl,
455                               const struct rte_flow_action **actions,
456                               uint64_t item_flags)
457 {
458         struct pedit_parser p_parser;
459         struct nlattr *na_act_options;
460         struct nlattr *na_pedit_keys;
461
462         memset(&p_parser, 0, sizeof(p_parser));
463         mnl_attr_put_strz(nl, TCA_ACT_KIND, "pedit");
464         na_act_options = mnl_attr_nest_start(nl, TCA_ACT_OPTIONS);
465         /* all modify header actions should be in one tc-pedit action */
466         for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {
467                 switch ((*actions)->type) {
468                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
469                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
470                         flow_tcf_pedit_key_set_ipv4_addr(*actions, &p_parser);
471                         break;
472                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
473                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
474                         flow_tcf_pedit_key_set_ipv6_addr(*actions, &p_parser);
475                         break;
476                 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
477                 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
478                         flow_tcf_pedit_key_set_tp_port(*actions,
479                                                         &p_parser, item_flags);
480                         break;
481                 default:
482                         goto pedit_mnl_msg_done;
483                 }
484         }
485 pedit_mnl_msg_done:
486         p_parser.sel.action = TC_ACT_PIPE;
487         mnl_attr_put(nl, TCA_PEDIT_PARMS_EX,
488                      sizeof(p_parser.sel) +
489                      p_parser.sel.nkeys * sizeof(struct tc_pedit_key),
490                      &p_parser);
491         na_pedit_keys =
492                 mnl_attr_nest_start(nl, TCA_PEDIT_KEYS_EX | NLA_F_NESTED);
493         for (int i = 0; i < p_parser.sel.nkeys; i++) {
494                 struct nlattr *na_pedit_key =
495                         mnl_attr_nest_start(nl,
496                                             TCA_PEDIT_KEY_EX | NLA_F_NESTED);
497                 mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_HTYPE,
498                                  p_parser.keys_ex[i].htype);
499                 mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_CMD,
500                                  p_parser.keys_ex[i].cmd);
501                 mnl_attr_nest_end(nl, na_pedit_key);
502         }
503         mnl_attr_nest_end(nl, na_pedit_keys);
504         mnl_attr_nest_end(nl, na_act_options);
505         (*actions)--;
506 }
507
508 /**
509  * Calculate max memory size of one TC-pedit actions.
510  * One TC-pedit action can contain set of keys each defining
511  * a rewrite element (rte_flow action)
512  *
513  * @param[in,out] actions
514  *   actions specification.
515  * @param[in,out] action_flags
516  *   actions flags
517  * @param[in,out] size
518  *   accumulated size
519  * @return
520  *   Max memory size of one TC-pedit action
521  */
522 static int
523 flow_tcf_get_pedit_actions_size(const struct rte_flow_action **actions,
524                                 uint64_t *action_flags)
525 {
526         int pedit_size = 0;
527         int keys = 0;
528         uint64_t flags = 0;
529
530         pedit_size += SZ_NLATTR_NEST + /* na_act_index. */
531                       SZ_NLATTR_STRZ_OF("pedit") +
532                       SZ_NLATTR_NEST; /* TCA_ACT_OPTIONS. */
533         for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {
534                 switch ((*actions)->type) {
535                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
536                         keys += NUM_OF_PEDIT_KEYS(IPV4_ADDR_LEN);
537                         flags |= MLX5_FLOW_ACTION_SET_IPV4_SRC;
538                         break;
539                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
540                         keys += NUM_OF_PEDIT_KEYS(IPV4_ADDR_LEN);
541                         flags |= MLX5_FLOW_ACTION_SET_IPV4_DST;
542                         break;
543                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
544                         keys += NUM_OF_PEDIT_KEYS(IPV6_ADDR_LEN);
545                         flags |= MLX5_FLOW_ACTION_SET_IPV6_SRC;
546                         break;
547                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
548                         keys += NUM_OF_PEDIT_KEYS(IPV6_ADDR_LEN);
549                         flags |= MLX5_FLOW_ACTION_SET_IPV6_DST;
550                         break;
551                 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
552                         /* TCP is as same as UDP */
553                         keys += NUM_OF_PEDIT_KEYS(TP_PORT_LEN);
554                         flags |= MLX5_FLOW_ACTION_SET_TP_SRC;
555                         break;
556                 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
557                         /* TCP is as same as UDP */
558                         keys += NUM_OF_PEDIT_KEYS(TP_PORT_LEN);
559                         flags |= MLX5_FLOW_ACTION_SET_TP_DST;
560                         break;
561                 default:
562                         goto get_pedit_action_size_done;
563                 }
564         }
565 get_pedit_action_size_done:
566         /* TCA_PEDIT_PARAMS_EX */
567         pedit_size +=
568                 SZ_NLATTR_DATA_OF(sizeof(struct tc_pedit_sel) +
569                                   keys * sizeof(struct tc_pedit_key));
570         pedit_size += SZ_NLATTR_NEST; /* TCA_PEDIT_KEYS */
571         pedit_size += keys *
572                       /* TCA_PEDIT_KEY_EX + HTYPE + CMD */
573                       (SZ_NLATTR_NEST + SZ_NLATTR_DATA_OF(2) +
574                        SZ_NLATTR_DATA_OF(2));
575         (*action_flags) |= flags;
576         (*actions)--;
577         return pedit_size;
578 }
579
580 /**
581  * Retrieve mask for pattern item.
582  *
583  * This function does basic sanity checks on a pattern item in order to
584  * return the most appropriate mask for it.
585  *
586  * @param[in] item
587  *   Item specification.
588  * @param[in] mask_default
589  *   Default mask for pattern item as specified by the flow API.
590  * @param[in] mask_supported
591  *   Mask fields supported by the implementation.
592  * @param[in] mask_empty
593  *   Empty mask to return when there is no specification.
594  * @param[out] error
595  *   Perform verbose error reporting if not NULL.
596  *
597  * @return
598  *   Either @p item->mask or one of the mask parameters on success, NULL
599  *   otherwise and rte_errno is set.
600  */
601 static const void *
602 flow_tcf_item_mask(const struct rte_flow_item *item, const void *mask_default,
603                    const void *mask_supported, const void *mask_empty,
604                    size_t mask_size, struct rte_flow_error *error)
605 {
606         const uint8_t *mask;
607         size_t i;
608
609         /* item->last and item->mask cannot exist without item->spec. */
610         if (!item->spec && (item->mask || item->last)) {
611                 rte_flow_error_set(error, EINVAL,
612                                    RTE_FLOW_ERROR_TYPE_ITEM, item,
613                                    "\"mask\" or \"last\" field provided without"
614                                    " a corresponding \"spec\"");
615                 return NULL;
616         }
617         /* No spec, no mask, no problem. */
618         if (!item->spec)
619                 return mask_empty;
620         mask = item->mask ? item->mask : mask_default;
621         assert(mask);
622         /*
623          * Single-pass check to make sure that:
624          * - Mask is supported, no bits are set outside mask_supported.
625          * - Both item->spec and item->last are included in mask.
626          */
627         for (i = 0; i != mask_size; ++i) {
628                 if (!mask[i])
629                         continue;
630                 if ((mask[i] | ((const uint8_t *)mask_supported)[i]) !=
631                     ((const uint8_t *)mask_supported)[i]) {
632                         rte_flow_error_set(error, ENOTSUP,
633                                            RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
634                                            "unsupported field found"
635                                            " in \"mask\"");
636                         return NULL;
637                 }
638                 if (item->last &&
639                     (((const uint8_t *)item->spec)[i] & mask[i]) !=
640                     (((const uint8_t *)item->last)[i] & mask[i])) {
641                         rte_flow_error_set(error, EINVAL,
642                                            RTE_FLOW_ERROR_TYPE_ITEM_LAST,
643                                            item->last,
644                                            "range between \"spec\" and \"last\""
645                                            " not comprised in \"mask\"");
646                         return NULL;
647                 }
648         }
649         return mask;
650 }
651
652 /**
653  * Build a conversion table between port ID and ifindex.
654  *
655  * @param[in] dev
656  *   Pointer to Ethernet device.
657  * @param[out] ptoi
658  *   Pointer to ptoi table.
659  * @param[in] len
660  *   Size of ptoi table provided.
661  *
662  * @return
663  *   Size of ptoi table filled.
664  */
665 static unsigned int
666 flow_tcf_build_ptoi_table(struct rte_eth_dev *dev, struct flow_tcf_ptoi *ptoi,
667                           unsigned int len)
668 {
669         unsigned int n = mlx5_dev_to_port_id(dev->device, NULL, 0);
670         uint16_t port_id[n + 1];
671         unsigned int i;
672         unsigned int own = 0;
673
674         /* At least one port is needed when no switch domain is present. */
675         if (!n) {
676                 n = 1;
677                 port_id[0] = dev->data->port_id;
678         } else {
679                 n = RTE_MIN(mlx5_dev_to_port_id(dev->device, port_id, n), n);
680         }
681         if (n > len)
682                 return 0;
683         for (i = 0; i != n; ++i) {
684                 struct rte_eth_dev_info dev_info;
685
686                 rte_eth_dev_info_get(port_id[i], &dev_info);
687                 if (port_id[i] == dev->data->port_id)
688                         own = i;
689                 ptoi[i].port_id = port_id[i];
690                 ptoi[i].ifindex = dev_info.if_index;
691         }
692         /* Ensure first entry of ptoi[] is the current device. */
693         if (own) {
694                 ptoi[n] = ptoi[0];
695                 ptoi[0] = ptoi[own];
696                 ptoi[own] = ptoi[n];
697         }
698         /* An entry with zero ifindex terminates ptoi[]. */
699         ptoi[n].port_id = 0;
700         ptoi[n].ifindex = 0;
701         return n;
702 }
703
704 /**
705  * Verify the @p attr will be correctly understood by the E-switch.
706  *
707  * @param[in] attr
708  *   Pointer to flow attributes
709  * @param[out] error
710  *   Pointer to error structure.
711  *
712  * @return
713  *   0 on success, a negative errno value otherwise and rte_errno is set.
714  */
715 static int
716 flow_tcf_validate_attributes(const struct rte_flow_attr *attr,
717                              struct rte_flow_error *error)
718 {
719         /*
720          * Supported attributes: groups, some priorities and ingress only.
721          * group is supported only if kernel supports chain. Don't care about
722          * transfer as it is the caller's problem.
723          */
724         if (attr->group > MLX5_TCF_GROUP_ID_MAX)
725                 return rte_flow_error_set(error, ENOTSUP,
726                                           RTE_FLOW_ERROR_TYPE_ATTR_GROUP, attr,
727                                           "group ID larger than "
728                                           RTE_STR(MLX5_TCF_GROUP_ID_MAX)
729                                           " isn't supported");
730         else if (attr->group > 0 &&
731                  attr->priority > MLX5_TCF_GROUP_PRIORITY_MAX)
732                 return rte_flow_error_set(error, ENOTSUP,
733                                           RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
734                                           attr,
735                                           "lowest priority level is "
736                                           RTE_STR(MLX5_TCF_GROUP_PRIORITY_MAX)
737                                           " when group is configured");
738         else if (attr->priority > 0xfffe)
739                 return rte_flow_error_set(error, ENOTSUP,
740                                           RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
741                                           attr,
742                                           "lowest priority level is 0xfffe");
743         if (!attr->ingress)
744                 return rte_flow_error_set(error, EINVAL,
745                                           RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
746                                           attr, "only ingress is supported");
747         if (attr->egress)
748                 return rte_flow_error_set(error, ENOTSUP,
749                                           RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
750                                           attr, "egress is not supported");
751         return 0;
752 }
753
754 /**
755  * Validate flow for E-Switch.
756  *
757  * @param[in] priv
758  *   Pointer to the priv structure.
759  * @param[in] attr
760  *   Pointer to the flow attributes.
761  * @param[in] items
762  *   Pointer to the list of items.
763  * @param[in] actions
764  *   Pointer to the list of actions.
765  * @param[out] error
766  *   Pointer to the error structure.
767  *
768  * @return
769  *   0 on success, a negative errno value otherwise and rte_ernno is set.
770  */
771 static int
772 flow_tcf_validate(struct rte_eth_dev *dev,
773                   const struct rte_flow_attr *attr,
774                   const struct rte_flow_item items[],
775                   const struct rte_flow_action actions[],
776                   struct rte_flow_error *error)
777 {
778         union {
779                 const struct rte_flow_item_port_id *port_id;
780                 const struct rte_flow_item_eth *eth;
781                 const struct rte_flow_item_vlan *vlan;
782                 const struct rte_flow_item_ipv4 *ipv4;
783                 const struct rte_flow_item_ipv6 *ipv6;
784                 const struct rte_flow_item_tcp *tcp;
785                 const struct rte_flow_item_udp *udp;
786         } spec, mask;
787         union {
788                 const struct rte_flow_action_port_id *port_id;
789                 const struct rte_flow_action_jump *jump;
790                 const struct rte_flow_action_of_push_vlan *of_push_vlan;
791                 const struct rte_flow_action_of_set_vlan_vid *
792                         of_set_vlan_vid;
793                 const struct rte_flow_action_of_set_vlan_pcp *
794                         of_set_vlan_pcp;
795                 const struct rte_flow_action_set_ipv4 *set_ipv4;
796                 const struct rte_flow_action_set_ipv6 *set_ipv6;
797         } conf;
798         uint32_t item_flags = 0;
799         uint32_t action_flags = 0;
800         uint8_t next_protocol = -1;
801         unsigned int tcm_ifindex = 0;
802         uint8_t pedit_validated = 0;
803         struct flow_tcf_ptoi ptoi[PTOI_TABLE_SZ_MAX(dev)];
804         struct rte_eth_dev *port_id_dev = NULL;
805         bool in_port_id_set;
806         int ret;
807
808         claim_nonzero(flow_tcf_build_ptoi_table(dev, ptoi,
809                                                 PTOI_TABLE_SZ_MAX(dev)));
810         ret = flow_tcf_validate_attributes(attr, error);
811         if (ret < 0)
812                 return ret;
813         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
814                 unsigned int i;
815
816                 switch (items->type) {
817                 case RTE_FLOW_ITEM_TYPE_VOID:
818                         break;
819                 case RTE_FLOW_ITEM_TYPE_PORT_ID:
820                         mask.port_id = flow_tcf_item_mask
821                                 (items, &rte_flow_item_port_id_mask,
822                                  &flow_tcf_mask_supported.port_id,
823                                  &flow_tcf_mask_empty.port_id,
824                                  sizeof(flow_tcf_mask_supported.port_id),
825                                  error);
826                         if (!mask.port_id)
827                                 return -rte_errno;
828                         if (mask.port_id == &flow_tcf_mask_empty.port_id) {
829                                 in_port_id_set = 1;
830                                 break;
831                         }
832                         spec.port_id = items->spec;
833                         if (mask.port_id->id && mask.port_id->id != 0xffffffff)
834                                 return rte_flow_error_set
835                                         (error, ENOTSUP,
836                                          RTE_FLOW_ERROR_TYPE_ITEM_MASK,
837                                          mask.port_id,
838                                          "no support for partial mask on"
839                                          " \"id\" field");
840                         if (!mask.port_id->id)
841                                 i = 0;
842                         else
843                                 for (i = 0; ptoi[i].ifindex; ++i)
844                                         if (ptoi[i].port_id == spec.port_id->id)
845                                                 break;
846                         if (!ptoi[i].ifindex)
847                                 return rte_flow_error_set
848                                         (error, ENODEV,
849                                          RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
850                                          spec.port_id,
851                                          "missing data to convert port ID to"
852                                          " ifindex");
853                         if (in_port_id_set && ptoi[i].ifindex != tcm_ifindex)
854                                 return rte_flow_error_set
855                                         (error, ENOTSUP,
856                                          RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
857                                          spec.port_id,
858                                          "cannot match traffic for"
859                                          " several port IDs through"
860                                          " a single flow rule");
861                         tcm_ifindex = ptoi[i].ifindex;
862                         in_port_id_set = 1;
863                         break;
864                 case RTE_FLOW_ITEM_TYPE_ETH:
865                         ret = mlx5_flow_validate_item_eth(items, item_flags,
866                                                           error);
867                         if (ret < 0)
868                                 return ret;
869                         item_flags |= MLX5_FLOW_LAYER_OUTER_L2;
870                         /* TODO:
871                          * Redundant check due to different supported mask.
872                          * Same for the rest of items.
873                          */
874                         mask.eth = flow_tcf_item_mask
875                                 (items, &rte_flow_item_eth_mask,
876                                  &flow_tcf_mask_supported.eth,
877                                  &flow_tcf_mask_empty.eth,
878                                  sizeof(flow_tcf_mask_supported.eth),
879                                  error);
880                         if (!mask.eth)
881                                 return -rte_errno;
882                         if (mask.eth->type && mask.eth->type !=
883                             RTE_BE16(0xffff))
884                                 return rte_flow_error_set
885                                         (error, ENOTSUP,
886                                          RTE_FLOW_ERROR_TYPE_ITEM_MASK,
887                                          mask.eth,
888                                          "no support for partial mask on"
889                                          " \"type\" field");
890                         break;
891                 case RTE_FLOW_ITEM_TYPE_VLAN:
892                         ret = mlx5_flow_validate_item_vlan(items, item_flags,
893                                                            error);
894                         if (ret < 0)
895                                 return ret;
896                         item_flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
897                         mask.vlan = flow_tcf_item_mask
898                                 (items, &rte_flow_item_vlan_mask,
899                                  &flow_tcf_mask_supported.vlan,
900                                  &flow_tcf_mask_empty.vlan,
901                                  sizeof(flow_tcf_mask_supported.vlan),
902                                  error);
903                         if (!mask.vlan)
904                                 return -rte_errno;
905                         if ((mask.vlan->tci & RTE_BE16(0xe000) &&
906                              (mask.vlan->tci & RTE_BE16(0xe000)) !=
907                               RTE_BE16(0xe000)) ||
908                             (mask.vlan->tci & RTE_BE16(0x0fff) &&
909                              (mask.vlan->tci & RTE_BE16(0x0fff)) !=
910                               RTE_BE16(0x0fff)) ||
911                             (mask.vlan->inner_type &&
912                              mask.vlan->inner_type != RTE_BE16(0xffff)))
913                                 return rte_flow_error_set
914                                         (error, ENOTSUP,
915                                          RTE_FLOW_ERROR_TYPE_ITEM_MASK,
916                                          mask.vlan,
917                                          "no support for partial masks on"
918                                          " \"tci\" (PCP and VID parts) and"
919                                          " \"inner_type\" fields");
920                         break;
921                 case RTE_FLOW_ITEM_TYPE_IPV4:
922                         ret = mlx5_flow_validate_item_ipv4(items, item_flags,
923                                                            error);
924                         if (ret < 0)
925                                 return ret;
926                         item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
927                         mask.ipv4 = flow_tcf_item_mask
928                                 (items, &rte_flow_item_ipv4_mask,
929                                  &flow_tcf_mask_supported.ipv4,
930                                  &flow_tcf_mask_empty.ipv4,
931                                  sizeof(flow_tcf_mask_supported.ipv4),
932                                  error);
933                         if (!mask.ipv4)
934                                 return -rte_errno;
935                         if (mask.ipv4->hdr.next_proto_id &&
936                             mask.ipv4->hdr.next_proto_id != 0xff)
937                                 return rte_flow_error_set
938                                         (error, ENOTSUP,
939                                          RTE_FLOW_ERROR_TYPE_ITEM_MASK,
940                                          mask.ipv4,
941                                          "no support for partial mask on"
942                                          " \"hdr.next_proto_id\" field");
943                         else if (mask.ipv4->hdr.next_proto_id)
944                                 next_protocol =
945                                         ((const struct rte_flow_item_ipv4 *)
946                                          (items->spec))->hdr.next_proto_id;
947                         break;
948                 case RTE_FLOW_ITEM_TYPE_IPV6:
949                         ret = mlx5_flow_validate_item_ipv6(items, item_flags,
950                                                            error);
951                         if (ret < 0)
952                                 return ret;
953                         item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
954                         mask.ipv6 = flow_tcf_item_mask
955                                 (items, &rte_flow_item_ipv6_mask,
956                                  &flow_tcf_mask_supported.ipv6,
957                                  &flow_tcf_mask_empty.ipv6,
958                                  sizeof(flow_tcf_mask_supported.ipv6),
959                                  error);
960                         if (!mask.ipv6)
961                                 return -rte_errno;
962                         if (mask.ipv6->hdr.proto &&
963                             mask.ipv6->hdr.proto != 0xff)
964                                 return rte_flow_error_set
965                                         (error, ENOTSUP,
966                                          RTE_FLOW_ERROR_TYPE_ITEM_MASK,
967                                          mask.ipv6,
968                                          "no support for partial mask on"
969                                          " \"hdr.proto\" field");
970                         else if (mask.ipv6->hdr.proto)
971                                 next_protocol =
972                                         ((const struct rte_flow_item_ipv6 *)
973                                          (items->spec))->hdr.proto;
974                         break;
975                 case RTE_FLOW_ITEM_TYPE_UDP:
976                         ret = mlx5_flow_validate_item_udp(items, item_flags,
977                                                           next_protocol, error);
978                         if (ret < 0)
979                                 return ret;
980                         item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
981                         mask.udp = flow_tcf_item_mask
982                                 (items, &rte_flow_item_udp_mask,
983                                  &flow_tcf_mask_supported.udp,
984                                  &flow_tcf_mask_empty.udp,
985                                  sizeof(flow_tcf_mask_supported.udp),
986                                  error);
987                         if (!mask.udp)
988                                 return -rte_errno;
989                         break;
990                 case RTE_FLOW_ITEM_TYPE_TCP:
991                         ret = mlx5_flow_validate_item_tcp
992                                              (items, item_flags,
993                                               next_protocol,
994                                               &flow_tcf_mask_supported.tcp,
995                                               error);
996                         if (ret < 0)
997                                 return ret;
998                         item_flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
999                         mask.tcp = flow_tcf_item_mask
1000                                 (items, &rte_flow_item_tcp_mask,
1001                                  &flow_tcf_mask_supported.tcp,
1002                                  &flow_tcf_mask_empty.tcp,
1003                                  sizeof(flow_tcf_mask_supported.tcp),
1004                                  error);
1005                         if (!mask.tcp)
1006                                 return -rte_errno;
1007                         break;
1008                 default:
1009                         return rte_flow_error_set(error, ENOTSUP,
1010                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1011                                                   NULL, "item not supported");
1012                 }
1013         }
1014         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1015                 unsigned int i;
1016                 uint32_t current_action_flag = 0;
1017
1018                 switch (actions->type) {
1019                 case RTE_FLOW_ACTION_TYPE_VOID:
1020                         break;
1021                 case RTE_FLOW_ACTION_TYPE_PORT_ID:
1022                         current_action_flag = MLX5_FLOW_ACTION_PORT_ID;
1023                         if (!actions->conf)
1024                                 break;
1025                         conf.port_id = actions->conf;
1026                         if (conf.port_id->original)
1027                                 i = 0;
1028                         else
1029                                 for (i = 0; ptoi[i].ifindex; ++i)
1030                                         if (ptoi[i].port_id == conf.port_id->id)
1031                                                 break;
1032                         if (!ptoi[i].ifindex)
1033                                 return rte_flow_error_set
1034                                         (error, ENODEV,
1035                                          RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1036                                          conf.port_id,
1037                                          "missing data to convert port ID to"
1038                                          " ifindex");
1039                         port_id_dev = &rte_eth_devices[conf.port_id->id];
1040                         break;
1041                 case RTE_FLOW_ACTION_TYPE_JUMP:
1042                         current_action_flag = MLX5_FLOW_ACTION_JUMP;
1043                         if (!actions->conf)
1044                                 break;
1045                         conf.jump = actions->conf;
1046                         if (attr->group >= conf.jump->group)
1047                                 return rte_flow_error_set
1048                                         (error, ENOTSUP,
1049                                          RTE_FLOW_ERROR_TYPE_ACTION,
1050                                          actions,
1051                                          "can jump only to a group forward");
1052                         break;
1053                 case RTE_FLOW_ACTION_TYPE_DROP:
1054                         current_action_flag = MLX5_FLOW_ACTION_DROP;
1055                         break;
1056                 case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
1057                         current_action_flag = MLX5_FLOW_ACTION_OF_POP_VLAN;
1058                         break;
1059                 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
1060                         current_action_flag = MLX5_FLOW_ACTION_OF_PUSH_VLAN;
1061                         break;
1062                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
1063                         if (!(action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN))
1064                                 return rte_flow_error_set
1065                                         (error, ENOTSUP,
1066                                          RTE_FLOW_ERROR_TYPE_ACTION, actions,
1067                                          "vlan modify is not supported,"
1068                                          " set action must follow push action");
1069                         current_action_flag = MLX5_FLOW_ACTION_OF_SET_VLAN_VID;
1070                         break;
1071                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
1072                         if (!(action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN))
1073                                 return rte_flow_error_set
1074                                         (error, ENOTSUP,
1075                                          RTE_FLOW_ERROR_TYPE_ACTION, actions,
1076                                          "vlan modify is not supported,"
1077                                          " set action must follow push action");
1078                         current_action_flag = MLX5_FLOW_ACTION_OF_SET_VLAN_PCP;
1079                         break;
1080                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
1081                         current_action_flag = MLX5_FLOW_ACTION_SET_IPV4_SRC;
1082                         break;
1083                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
1084                         current_action_flag = MLX5_FLOW_ACTION_SET_IPV4_DST;
1085                         break;
1086                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
1087                         current_action_flag = MLX5_FLOW_ACTION_SET_IPV6_SRC;
1088                         break;
1089                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
1090                         current_action_flag = MLX5_FLOW_ACTION_SET_IPV6_DST;
1091                         break;
1092                 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
1093                         current_action_flag = MLX5_FLOW_ACTION_SET_TP_SRC;
1094                         break;
1095                 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
1096                         current_action_flag = MLX5_FLOW_ACTION_SET_TP_DST;
1097                         break;
1098                 default:
1099                         return rte_flow_error_set(error, ENOTSUP,
1100                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1101                                                   actions,
1102                                                   "action not supported");
1103                 }
1104                 if (current_action_flag & MLX5_TCF_CONFIG_ACTIONS) {
1105                         if (!actions->conf)
1106                                 return rte_flow_error_set(error, EINVAL,
1107                                                 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1108                                                 actions,
1109                                                 "action configuration not set");
1110                 }
1111                 if ((current_action_flag & MLX5_TCF_PEDIT_ACTIONS) &&
1112                     pedit_validated)
1113                         return rte_flow_error_set(error, ENOTSUP,
1114                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1115                                                   actions,
1116                                                   "set actions should be "
1117                                                   "listed successively");
1118                 if ((current_action_flag & ~MLX5_TCF_PEDIT_ACTIONS) &&
1119                     (action_flags & MLX5_TCF_PEDIT_ACTIONS))
1120                         pedit_validated = 1;
1121                 if ((current_action_flag & MLX5_TCF_FATE_ACTIONS) &&
1122                     (action_flags & MLX5_TCF_FATE_ACTIONS))
1123                         return rte_flow_error_set(error, EINVAL,
1124                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1125                                                   actions,
1126                                                   "can't have multiple fate"
1127                                                   " actions");
1128                 action_flags |= current_action_flag;
1129         }
1130         if ((action_flags & MLX5_TCF_PEDIT_ACTIONS) &&
1131             (action_flags & MLX5_FLOW_ACTION_DROP))
1132                 return rte_flow_error_set(error, ENOTSUP,
1133                                           RTE_FLOW_ERROR_TYPE_ACTION,
1134                                           actions,
1135                                           "set action is not compatible with "
1136                                           "drop action");
1137         if ((action_flags & MLX5_TCF_PEDIT_ACTIONS) &&
1138             !(action_flags & MLX5_FLOW_ACTION_PORT_ID))
1139                 return rte_flow_error_set(error, ENOTSUP,
1140                                           RTE_FLOW_ERROR_TYPE_ACTION,
1141                                           actions,
1142                                           "set action must be followed by "
1143                                           "port_id action");
1144         if (action_flags &
1145            (MLX5_FLOW_ACTION_SET_IPV4_SRC | MLX5_FLOW_ACTION_SET_IPV4_DST)) {
1146                 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4))
1147                         return rte_flow_error_set(error, EINVAL,
1148                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1149                                                   actions,
1150                                                   "no ipv4 item found in"
1151                                                   " pattern");
1152         }
1153         if (action_flags &
1154            (MLX5_FLOW_ACTION_SET_IPV6_SRC | MLX5_FLOW_ACTION_SET_IPV6_DST)) {
1155                 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV6))
1156                         return rte_flow_error_set(error, EINVAL,
1157                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1158                                                   actions,
1159                                                   "no ipv6 item found in"
1160                                                   " pattern");
1161         }
1162         if (action_flags &
1163            (MLX5_FLOW_ACTION_SET_TP_SRC | MLX5_FLOW_ACTION_SET_TP_DST)) {
1164                 if (!(item_flags &
1165                      (MLX5_FLOW_LAYER_OUTER_L4_UDP |
1166                       MLX5_FLOW_LAYER_OUTER_L4_TCP)))
1167                         return rte_flow_error_set(error, EINVAL,
1168                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1169                                                   actions,
1170                                                   "no TCP/UDP item found in"
1171                                                   " pattern");
1172         }
1173         /*
1174          * FW syndrome (0xA9C090):
1175          *     set_flow_table_entry: push vlan action fte in fdb can ONLY be
1176          *     forward to the uplink.
1177          */
1178         if ((action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN) &&
1179             (action_flags & MLX5_FLOW_ACTION_PORT_ID) &&
1180             ((struct priv *)port_id_dev->data->dev_private)->representor)
1181                 return rte_flow_error_set(error, ENOTSUP,
1182                                           RTE_FLOW_ERROR_TYPE_ACTION, actions,
1183                                           "vlan push can only be applied"
1184                                           " when forwarding to uplink port");
1185         /*
1186          * FW syndrome (0x294609):
1187          *     set_flow_table_entry: modify/pop/push actions in fdb flow table
1188          *     are supported only while forwarding to vport.
1189          */
1190         if ((action_flags & MLX5_TCF_VLAN_ACTIONS) &&
1191             !(action_flags & MLX5_FLOW_ACTION_PORT_ID))
1192                 return rte_flow_error_set(error, ENOTSUP,
1193                                           RTE_FLOW_ERROR_TYPE_ACTION, actions,
1194                                           "vlan actions are supported"
1195                                           " only with port_id action");
1196         if (!(action_flags & MLX5_TCF_FATE_ACTIONS))
1197                 return rte_flow_error_set(error, EINVAL,
1198                                           RTE_FLOW_ERROR_TYPE_ACTION, actions,
1199                                           "no fate action is found");
1200         return 0;
1201 }
1202
1203 /**
1204  * Calculate maximum size of memory for flow items of Linux TC flower and
1205  * extract specified items.
1206  *
1207  * @param[in] items
1208  *   Pointer to the list of items.
1209  * @param[out] item_flags
1210  *   Pointer to the detected items.
1211  *
1212  * @return
1213  *   Maximum size of memory for items.
1214  */
1215 static int
1216 flow_tcf_get_items_and_size(const struct rte_flow_attr *attr,
1217                             const struct rte_flow_item items[],
1218                             uint64_t *item_flags)
1219 {
1220         int size = 0;
1221         uint64_t flags = 0;
1222
1223         size += SZ_NLATTR_STRZ_OF("flower") +
1224                 SZ_NLATTR_NEST + /* TCA_OPTIONS. */
1225                 SZ_NLATTR_TYPE_OF(uint32_t); /* TCA_CLS_FLAGS_SKIP_SW. */
1226         if (attr->group > 0)
1227                 size += SZ_NLATTR_TYPE_OF(uint32_t); /* TCA_CHAIN. */
1228         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1229                 switch (items->type) {
1230                 case RTE_FLOW_ITEM_TYPE_VOID:
1231                         break;
1232                 case RTE_FLOW_ITEM_TYPE_PORT_ID:
1233                         break;
1234                 case RTE_FLOW_ITEM_TYPE_ETH:
1235                         size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
1236                                 SZ_NLATTR_DATA_OF(ETHER_ADDR_LEN) * 4;
1237                                 /* dst/src MAC addr and mask. */
1238                         flags |= MLX5_FLOW_LAYER_OUTER_L2;
1239                         break;
1240                 case RTE_FLOW_ITEM_TYPE_VLAN:
1241                         size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
1242                                 SZ_NLATTR_TYPE_OF(uint16_t) +
1243                                 /* VLAN Ether type. */
1244                                 SZ_NLATTR_TYPE_OF(uint8_t) + /* VLAN prio. */
1245                                 SZ_NLATTR_TYPE_OF(uint16_t); /* VLAN ID. */
1246                         flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
1247                         break;
1248                 case RTE_FLOW_ITEM_TYPE_IPV4:
1249                         size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
1250                                 SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
1251                                 SZ_NLATTR_TYPE_OF(uint32_t) * 4;
1252                                 /* dst/src IP addr and mask. */
1253                         flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1254                         break;
1255                 case RTE_FLOW_ITEM_TYPE_IPV6:
1256                         size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
1257                                 SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
1258                                 SZ_NLATTR_TYPE_OF(IPV6_ADDR_LEN) * 4;
1259                                 /* dst/src IP addr and mask. */
1260                         flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1261                         break;
1262                 case RTE_FLOW_ITEM_TYPE_UDP:
1263                         size += SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
1264                                 SZ_NLATTR_TYPE_OF(uint16_t) * 4;
1265                                 /* dst/src port and mask. */
1266                         flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
1267                         break;
1268                 case RTE_FLOW_ITEM_TYPE_TCP:
1269                         size += SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
1270                                 SZ_NLATTR_TYPE_OF(uint16_t) * 4;
1271                                 /* dst/src port and mask. */
1272                         flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
1273                         break;
1274                 default:
1275                         DRV_LOG(WARNING,
1276                                 "unsupported item %p type %d,"
1277                                 " items must be validated before flow creation",
1278                                 (const void *)items, items->type);
1279                         break;
1280                 }
1281         }
1282         *item_flags = flags;
1283         return size;
1284 }
1285
1286 /**
1287  * Calculate maximum size of memory for flow actions of Linux TC flower and
1288  * extract specified actions.
1289  *
1290  * @param[in] actions
1291  *   Pointer to the list of actions.
1292  * @param[out] action_flags
1293  *   Pointer to the detected actions.
1294  *
1295  * @return
1296  *   Maximum size of memory for actions.
1297  */
1298 static int
1299 flow_tcf_get_actions_and_size(const struct rte_flow_action actions[],
1300                               uint64_t *action_flags)
1301 {
1302         int size = 0;
1303         uint64_t flags = 0;
1304
1305         size += SZ_NLATTR_NEST; /* TCA_FLOWER_ACT. */
1306         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1307                 switch (actions->type) {
1308                 case RTE_FLOW_ACTION_TYPE_VOID:
1309                         break;
1310                 case RTE_FLOW_ACTION_TYPE_PORT_ID:
1311                         size += SZ_NLATTR_NEST + /* na_act_index. */
1312                                 SZ_NLATTR_STRZ_OF("mirred") +
1313                                 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
1314                                 SZ_NLATTR_TYPE_OF(struct tc_mirred);
1315                         flags |= MLX5_FLOW_ACTION_PORT_ID;
1316                         break;
1317                 case RTE_FLOW_ACTION_TYPE_JUMP:
1318                         size += SZ_NLATTR_NEST + /* na_act_index. */
1319                                 SZ_NLATTR_STRZ_OF("gact") +
1320                                 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
1321                                 SZ_NLATTR_TYPE_OF(struct tc_gact);
1322                         flags |= MLX5_FLOW_ACTION_JUMP;
1323                         break;
1324                 case RTE_FLOW_ACTION_TYPE_DROP:
1325                         size += SZ_NLATTR_NEST + /* na_act_index. */
1326                                 SZ_NLATTR_STRZ_OF("gact") +
1327                                 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
1328                                 SZ_NLATTR_TYPE_OF(struct tc_gact);
1329                         flags |= MLX5_FLOW_ACTION_DROP;
1330                         break;
1331                 case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
1332                         flags |= MLX5_FLOW_ACTION_OF_POP_VLAN;
1333                         goto action_of_vlan;
1334                 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
1335                         flags |= MLX5_FLOW_ACTION_OF_PUSH_VLAN;
1336                         goto action_of_vlan;
1337                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
1338                         flags |= MLX5_FLOW_ACTION_OF_SET_VLAN_VID;
1339                         goto action_of_vlan;
1340                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
1341                         flags |= MLX5_FLOW_ACTION_OF_SET_VLAN_PCP;
1342                         goto action_of_vlan;
1343 action_of_vlan:
1344                         size += SZ_NLATTR_NEST + /* na_act_index. */
1345                                 SZ_NLATTR_STRZ_OF("vlan") +
1346                                 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
1347                                 SZ_NLATTR_TYPE_OF(struct tc_vlan) +
1348                                 SZ_NLATTR_TYPE_OF(uint16_t) +
1349                                 /* VLAN protocol. */
1350                                 SZ_NLATTR_TYPE_OF(uint16_t) + /* VLAN ID. */
1351                                 SZ_NLATTR_TYPE_OF(uint8_t); /* VLAN prio. */
1352                         break;
1353                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
1354                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
1355                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
1356                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
1357                 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
1358                 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
1359                         size += flow_tcf_get_pedit_actions_size(&actions,
1360                                                                 &flags);
1361                         break;
1362                 default:
1363                         DRV_LOG(WARNING,
1364                                 "unsupported action %p type %d,"
1365                                 " items must be validated before flow creation",
1366                                 (const void *)actions, actions->type);
1367                         break;
1368                 }
1369         }
1370         *action_flags = flags;
1371         return size;
1372 }
1373
1374 /**
1375  * Brand rtnetlink buffer with unique handle.
1376  *
1377  * This handle should be unique for a given network interface to avoid
1378  * collisions.
1379  *
1380  * @param nlh
1381  *   Pointer to Netlink message.
1382  * @param handle
1383  *   Unique 32-bit handle to use.
1384  */
1385 static void
1386 flow_tcf_nl_brand(struct nlmsghdr *nlh, uint32_t handle)
1387 {
1388         struct tcmsg *tcm = mnl_nlmsg_get_payload(nlh);
1389
1390         tcm->tcm_handle = handle;
1391         DRV_LOG(DEBUG, "Netlink msg %p is branded with handle %x",
1392                 (void *)nlh, handle);
1393 }
1394
1395 /**
1396  * Prepare a flow object for Linux TC flower. It calculates the maximum size of
1397  * memory required, allocates the memory, initializes Netlink message headers
1398  * and set unique TC message handle.
1399  *
1400  * @param[in] attr
1401  *   Pointer to the flow attributes.
1402  * @param[in] items
1403  *   Pointer to the list of items.
1404  * @param[in] actions
1405  *   Pointer to the list of actions.
1406  * @param[out] item_flags
1407  *   Pointer to bit mask of all items detected.
1408  * @param[out] action_flags
1409  *   Pointer to bit mask of all actions detected.
1410  * @param[out] error
1411  *   Pointer to the error structure.
1412  *
1413  * @return
1414  *   Pointer to mlx5_flow object on success,
1415  *   otherwise NULL and rte_ernno is set.
1416  */
1417 static struct mlx5_flow *
1418 flow_tcf_prepare(const struct rte_flow_attr *attr,
1419                  const struct rte_flow_item items[],
1420                  const struct rte_flow_action actions[],
1421                  uint64_t *item_flags, uint64_t *action_flags,
1422                  struct rte_flow_error *error)
1423 {
1424         size_t size = sizeof(struct mlx5_flow) +
1425                       MNL_ALIGN(sizeof(struct nlmsghdr)) +
1426                       MNL_ALIGN(sizeof(struct tcmsg));
1427         struct mlx5_flow *dev_flow;
1428         struct nlmsghdr *nlh;
1429         struct tcmsg *tcm;
1430
1431         size += flow_tcf_get_items_and_size(attr, items, item_flags);
1432         size += flow_tcf_get_actions_and_size(actions, action_flags);
1433         dev_flow = rte_zmalloc(__func__, size, MNL_ALIGNTO);
1434         if (!dev_flow) {
1435                 rte_flow_error_set(error, ENOMEM,
1436                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1437                                    "not enough memory to create E-Switch flow");
1438                 return NULL;
1439         }
1440         nlh = mnl_nlmsg_put_header((void *)(dev_flow + 1));
1441         tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
1442         *dev_flow = (struct mlx5_flow){
1443                 .tcf = (struct mlx5_flow_tcf){
1444                         .nlh = nlh,
1445                         .tcm = tcm,
1446                 },
1447         };
1448         /*
1449          * Generate a reasonably unique handle based on the address of the
1450          * target buffer.
1451          *
1452          * This is straightforward on 32-bit systems where the flow pointer can
1453          * be used directly. Otherwise, its least significant part is taken
1454          * after shifting it by the previous power of two of the pointed buffer
1455          * size.
1456          */
1457         if (sizeof(dev_flow) <= 4)
1458                 flow_tcf_nl_brand(nlh, (uintptr_t)dev_flow);
1459         else
1460                 flow_tcf_nl_brand(nlh, (uintptr_t)dev_flow >>
1461                                        rte_log2_u32(rte_align32prevpow2(size)));
1462         return dev_flow;
1463 }
1464
1465 /**
1466  * Translate flow for Linux TC flower and construct Netlink message.
1467  *
1468  * @param[in] priv
1469  *   Pointer to the priv structure.
1470  * @param[in, out] flow
1471  *   Pointer to the sub flow.
1472  * @param[in] attr
1473  *   Pointer to the flow attributes.
1474  * @param[in] items
1475  *   Pointer to the list of items.
1476  * @param[in] actions
1477  *   Pointer to the list of actions.
1478  * @param[out] error
1479  *   Pointer to the error structure.
1480  *
1481  * @return
1482  *   0 on success, a negative errno value otherwise and rte_ernno is set.
1483  */
1484 static int
1485 flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
1486                    const struct rte_flow_attr *attr,
1487                    const struct rte_flow_item items[],
1488                    const struct rte_flow_action actions[],
1489                    struct rte_flow_error *error)
1490 {
1491         union {
1492                 const struct rte_flow_item_port_id *port_id;
1493                 const struct rte_flow_item_eth *eth;
1494                 const struct rte_flow_item_vlan *vlan;
1495                 const struct rte_flow_item_ipv4 *ipv4;
1496                 const struct rte_flow_item_ipv6 *ipv6;
1497                 const struct rte_flow_item_tcp *tcp;
1498                 const struct rte_flow_item_udp *udp;
1499         } spec, mask;
1500         union {
1501                 const struct rte_flow_action_port_id *port_id;
1502                 const struct rte_flow_action_jump *jump;
1503                 const struct rte_flow_action_of_push_vlan *of_push_vlan;
1504                 const struct rte_flow_action_of_set_vlan_vid *
1505                         of_set_vlan_vid;
1506                 const struct rte_flow_action_of_set_vlan_pcp *
1507                         of_set_vlan_pcp;
1508         } conf;
1509         struct flow_tcf_ptoi ptoi[PTOI_TABLE_SZ_MAX(dev)];
1510         struct nlmsghdr *nlh = dev_flow->tcf.nlh;
1511         struct tcmsg *tcm = dev_flow->tcf.tcm;
1512         uint32_t na_act_index_cur;
1513         bool eth_type_set = 0;
1514         bool vlan_present = 0;
1515         bool vlan_eth_type_set = 0;
1516         bool ip_proto_set = 0;
1517         struct nlattr *na_flower;
1518         struct nlattr *na_flower_act;
1519         struct nlattr *na_vlan_id = NULL;
1520         struct nlattr *na_vlan_priority = NULL;
1521         uint64_t item_flags = 0;
1522
1523         claim_nonzero(flow_tcf_build_ptoi_table(dev, ptoi,
1524                                                 PTOI_TABLE_SZ_MAX(dev)));
1525         nlh = dev_flow->tcf.nlh;
1526         tcm = dev_flow->tcf.tcm;
1527         /* Prepare API must have been called beforehand. */
1528         assert(nlh != NULL && tcm != NULL);
1529         tcm->tcm_family = AF_UNSPEC;
1530         tcm->tcm_ifindex = ptoi[0].ifindex;
1531         tcm->tcm_parent = TC_H_MAKE(TC_H_INGRESS, TC_H_MIN_INGRESS);
1532         /*
1533          * Priority cannot be zero to prevent the kernel from picking one
1534          * automatically.
1535          */
1536         tcm->tcm_info = TC_H_MAKE((attr->priority + 1) << 16,
1537                                   RTE_BE16(ETH_P_ALL));
1538         if (attr->group > 0)
1539                 mnl_attr_put_u32(nlh, TCA_CHAIN, attr->group);
1540         mnl_attr_put_strz(nlh, TCA_KIND, "flower");
1541         na_flower = mnl_attr_nest_start(nlh, TCA_OPTIONS);
1542         mnl_attr_put_u32(nlh, TCA_FLOWER_FLAGS, TCA_CLS_FLAGS_SKIP_SW);
1543         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1544                 unsigned int i;
1545
1546                 switch (items->type) {
1547                 case RTE_FLOW_ITEM_TYPE_VOID:
1548                         break;
1549                 case RTE_FLOW_ITEM_TYPE_PORT_ID:
1550                         mask.port_id = flow_tcf_item_mask
1551                                 (items, &rte_flow_item_port_id_mask,
1552                                  &flow_tcf_mask_supported.port_id,
1553                                  &flow_tcf_mask_empty.port_id,
1554                                  sizeof(flow_tcf_mask_supported.port_id),
1555                                  error);
1556                         assert(mask.port_id);
1557                         if (mask.port_id == &flow_tcf_mask_empty.port_id)
1558                                 break;
1559                         spec.port_id = items->spec;
1560                         if (!mask.port_id->id)
1561                                 i = 0;
1562                         else
1563                                 for (i = 0; ptoi[i].ifindex; ++i)
1564                                         if (ptoi[i].port_id == spec.port_id->id)
1565                                                 break;
1566                         assert(ptoi[i].ifindex);
1567                         tcm->tcm_ifindex = ptoi[i].ifindex;
1568                         break;
1569                 case RTE_FLOW_ITEM_TYPE_ETH:
1570                         item_flags |= MLX5_FLOW_LAYER_OUTER_L2;
1571                         mask.eth = flow_tcf_item_mask
1572                                 (items, &rte_flow_item_eth_mask,
1573                                  &flow_tcf_mask_supported.eth,
1574                                  &flow_tcf_mask_empty.eth,
1575                                  sizeof(flow_tcf_mask_supported.eth),
1576                                  error);
1577                         assert(mask.eth);
1578                         if (mask.eth == &flow_tcf_mask_empty.eth)
1579                                 break;
1580                         spec.eth = items->spec;
1581                         if (mask.eth->type) {
1582                                 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_ETH_TYPE,
1583                                                  spec.eth->type);
1584                                 eth_type_set = 1;
1585                         }
1586                         if (!is_zero_ether_addr(&mask.eth->dst)) {
1587                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_DST,
1588                                              ETHER_ADDR_LEN,
1589                                              spec.eth->dst.addr_bytes);
1590                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_DST_MASK,
1591                                              ETHER_ADDR_LEN,
1592                                              mask.eth->dst.addr_bytes);
1593                         }
1594                         if (!is_zero_ether_addr(&mask.eth->src)) {
1595                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_SRC,
1596                                              ETHER_ADDR_LEN,
1597                                              spec.eth->src.addr_bytes);
1598                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_SRC_MASK,
1599                                              ETHER_ADDR_LEN,
1600                                              mask.eth->src.addr_bytes);
1601                         }
1602                         break;
1603                 case RTE_FLOW_ITEM_TYPE_VLAN:
1604                         item_flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
1605                         mask.vlan = flow_tcf_item_mask
1606                                 (items, &rte_flow_item_vlan_mask,
1607                                  &flow_tcf_mask_supported.vlan,
1608                                  &flow_tcf_mask_empty.vlan,
1609                                  sizeof(flow_tcf_mask_supported.vlan),
1610                                  error);
1611                         assert(mask.vlan);
1612                         if (!eth_type_set)
1613                                 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_ETH_TYPE,
1614                                                  RTE_BE16(ETH_P_8021Q));
1615                         eth_type_set = 1;
1616                         vlan_present = 1;
1617                         if (mask.vlan == &flow_tcf_mask_empty.vlan)
1618                                 break;
1619                         spec.vlan = items->spec;
1620                         if (mask.vlan->inner_type) {
1621                                 mnl_attr_put_u16(nlh,
1622                                                  TCA_FLOWER_KEY_VLAN_ETH_TYPE,
1623                                                  spec.vlan->inner_type);
1624                                 vlan_eth_type_set = 1;
1625                         }
1626                         if (mask.vlan->tci & RTE_BE16(0xe000))
1627                                 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_VLAN_PRIO,
1628                                                 (rte_be_to_cpu_16
1629                                                  (spec.vlan->tci) >> 13) & 0x7);
1630                         if (mask.vlan->tci & RTE_BE16(0x0fff))
1631                                 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_VLAN_ID,
1632                                                  rte_be_to_cpu_16
1633                                                  (spec.vlan->tci &
1634                                                   RTE_BE16(0x0fff)));
1635                         break;
1636                 case RTE_FLOW_ITEM_TYPE_IPV4:
1637                         item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1638                         mask.ipv4 = flow_tcf_item_mask
1639                                 (items, &rte_flow_item_ipv4_mask,
1640                                  &flow_tcf_mask_supported.ipv4,
1641                                  &flow_tcf_mask_empty.ipv4,
1642                                  sizeof(flow_tcf_mask_supported.ipv4),
1643                                  error);
1644                         assert(mask.ipv4);
1645                         if (!eth_type_set || !vlan_eth_type_set)
1646                                 mnl_attr_put_u16(nlh,
1647                                                  vlan_present ?
1648                                                  TCA_FLOWER_KEY_VLAN_ETH_TYPE :
1649                                                  TCA_FLOWER_KEY_ETH_TYPE,
1650                                                  RTE_BE16(ETH_P_IP));
1651                         eth_type_set = 1;
1652                         vlan_eth_type_set = 1;
1653                         if (mask.ipv4 == &flow_tcf_mask_empty.ipv4)
1654                                 break;
1655                         spec.ipv4 = items->spec;
1656                         if (mask.ipv4->hdr.next_proto_id) {
1657                                 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
1658                                                 spec.ipv4->hdr.next_proto_id);
1659                                 ip_proto_set = 1;
1660                         }
1661                         if (mask.ipv4->hdr.src_addr) {
1662                                 mnl_attr_put_u32(nlh, TCA_FLOWER_KEY_IPV4_SRC,
1663                                                  spec.ipv4->hdr.src_addr);
1664                                 mnl_attr_put_u32(nlh,
1665                                                  TCA_FLOWER_KEY_IPV4_SRC_MASK,
1666                                                  mask.ipv4->hdr.src_addr);
1667                         }
1668                         if (mask.ipv4->hdr.dst_addr) {
1669                                 mnl_attr_put_u32(nlh, TCA_FLOWER_KEY_IPV4_DST,
1670                                                  spec.ipv4->hdr.dst_addr);
1671                                 mnl_attr_put_u32(nlh,
1672                                                  TCA_FLOWER_KEY_IPV4_DST_MASK,
1673                                                  mask.ipv4->hdr.dst_addr);
1674                         }
1675                         break;
1676                 case RTE_FLOW_ITEM_TYPE_IPV6:
1677                         item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1678                         mask.ipv6 = flow_tcf_item_mask
1679                                 (items, &rte_flow_item_ipv6_mask,
1680                                  &flow_tcf_mask_supported.ipv6,
1681                                  &flow_tcf_mask_empty.ipv6,
1682                                  sizeof(flow_tcf_mask_supported.ipv6),
1683                                  error);
1684                         assert(mask.ipv6);
1685                         if (!eth_type_set || !vlan_eth_type_set)
1686                                 mnl_attr_put_u16(nlh,
1687                                                  vlan_present ?
1688                                                  TCA_FLOWER_KEY_VLAN_ETH_TYPE :
1689                                                  TCA_FLOWER_KEY_ETH_TYPE,
1690                                                  RTE_BE16(ETH_P_IPV6));
1691                         eth_type_set = 1;
1692                         vlan_eth_type_set = 1;
1693                         if (mask.ipv6 == &flow_tcf_mask_empty.ipv6)
1694                                 break;
1695                         spec.ipv6 = items->spec;
1696                         if (mask.ipv6->hdr.proto) {
1697                                 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
1698                                                 spec.ipv6->hdr.proto);
1699                                 ip_proto_set = 1;
1700                         }
1701                         if (!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.src_addr)) {
1702                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_SRC,
1703                                              sizeof(spec.ipv6->hdr.src_addr),
1704                                              spec.ipv6->hdr.src_addr);
1705                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_SRC_MASK,
1706                                              sizeof(mask.ipv6->hdr.src_addr),
1707                                              mask.ipv6->hdr.src_addr);
1708                         }
1709                         if (!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.dst_addr)) {
1710                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_DST,
1711                                              sizeof(spec.ipv6->hdr.dst_addr),
1712                                              spec.ipv6->hdr.dst_addr);
1713                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_DST_MASK,
1714                                              sizeof(mask.ipv6->hdr.dst_addr),
1715                                              mask.ipv6->hdr.dst_addr);
1716                         }
1717                         break;
1718                 case RTE_FLOW_ITEM_TYPE_UDP:
1719                         item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
1720                         mask.udp = flow_tcf_item_mask
1721                                 (items, &rte_flow_item_udp_mask,
1722                                  &flow_tcf_mask_supported.udp,
1723                                  &flow_tcf_mask_empty.udp,
1724                                  sizeof(flow_tcf_mask_supported.udp),
1725                                  error);
1726                         assert(mask.udp);
1727                         if (!ip_proto_set)
1728                                 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
1729                                                 IPPROTO_UDP);
1730                         if (mask.udp == &flow_tcf_mask_empty.udp)
1731                                 break;
1732                         spec.udp = items->spec;
1733                         if (mask.udp->hdr.src_port) {
1734                                 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_UDP_SRC,
1735                                                  spec.udp->hdr.src_port);
1736                                 mnl_attr_put_u16(nlh,
1737                                                  TCA_FLOWER_KEY_UDP_SRC_MASK,
1738                                                  mask.udp->hdr.src_port);
1739                         }
1740                         if (mask.udp->hdr.dst_port) {
1741                                 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_UDP_DST,
1742                                                  spec.udp->hdr.dst_port);
1743                                 mnl_attr_put_u16(nlh,
1744                                                  TCA_FLOWER_KEY_UDP_DST_MASK,
1745                                                  mask.udp->hdr.dst_port);
1746                         }
1747                         break;
1748                 case RTE_FLOW_ITEM_TYPE_TCP:
1749                         item_flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
1750                         mask.tcp = flow_tcf_item_mask
1751                                 (items, &rte_flow_item_tcp_mask,
1752                                  &flow_tcf_mask_supported.tcp,
1753                                  &flow_tcf_mask_empty.tcp,
1754                                  sizeof(flow_tcf_mask_supported.tcp),
1755                                  error);
1756                         assert(mask.tcp);
1757                         if (!ip_proto_set)
1758                                 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
1759                                                 IPPROTO_TCP);
1760                         if (mask.tcp == &flow_tcf_mask_empty.tcp)
1761                                 break;
1762                         spec.tcp = items->spec;
1763                         if (mask.tcp->hdr.src_port) {
1764                                 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_TCP_SRC,
1765                                                  spec.tcp->hdr.src_port);
1766                                 mnl_attr_put_u16(nlh,
1767                                                  TCA_FLOWER_KEY_TCP_SRC_MASK,
1768                                                  mask.tcp->hdr.src_port);
1769                         }
1770                         if (mask.tcp->hdr.dst_port) {
1771                                 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_TCP_DST,
1772                                                  spec.tcp->hdr.dst_port);
1773                                 mnl_attr_put_u16(nlh,
1774                                                  TCA_FLOWER_KEY_TCP_DST_MASK,
1775                                                  mask.tcp->hdr.dst_port);
1776                         }
1777                         if (mask.tcp->hdr.tcp_flags) {
1778                                 mnl_attr_put_u16
1779                                         (nlh,
1780                                          TCA_FLOWER_KEY_TCP_FLAGS,
1781                                          rte_cpu_to_be_16
1782                                                 (spec.tcp->hdr.tcp_flags));
1783                                 mnl_attr_put_u16
1784                                         (nlh,
1785                                          TCA_FLOWER_KEY_TCP_FLAGS_MASK,
1786                                          rte_cpu_to_be_16
1787                                                 (mask.tcp->hdr.tcp_flags));
1788                         }
1789                         break;
1790                 default:
1791                         return rte_flow_error_set(error, ENOTSUP,
1792                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1793                                                   NULL, "item not supported");
1794                 }
1795         }
1796         na_flower_act = mnl_attr_nest_start(nlh, TCA_FLOWER_ACT);
1797         na_act_index_cur = 1;
1798         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1799                 struct nlattr *na_act_index;
1800                 struct nlattr *na_act;
1801                 unsigned int vlan_act;
1802                 unsigned int i;
1803
1804                 switch (actions->type) {
1805                 case RTE_FLOW_ACTION_TYPE_VOID:
1806                         break;
1807                 case RTE_FLOW_ACTION_TYPE_PORT_ID:
1808                         conf.port_id = actions->conf;
1809                         if (conf.port_id->original)
1810                                 i = 0;
1811                         else
1812                                 for (i = 0; ptoi[i].ifindex; ++i)
1813                                         if (ptoi[i].port_id == conf.port_id->id)
1814                                                 break;
1815                         assert(ptoi[i].ifindex);
1816                         na_act_index =
1817                                 mnl_attr_nest_start(nlh, na_act_index_cur++);
1818                         assert(na_act_index);
1819                         mnl_attr_put_strz(nlh, TCA_ACT_KIND, "mirred");
1820                         na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
1821                         assert(na_act);
1822                         mnl_attr_put(nlh, TCA_MIRRED_PARMS,
1823                                      sizeof(struct tc_mirred),
1824                                      &(struct tc_mirred){
1825                                         .action = TC_ACT_STOLEN,
1826                                         .eaction = TCA_EGRESS_REDIR,
1827                                         .ifindex = ptoi[i].ifindex,
1828                                      });
1829                         mnl_attr_nest_end(nlh, na_act);
1830                         mnl_attr_nest_end(nlh, na_act_index);
1831                         break;
1832                 case RTE_FLOW_ACTION_TYPE_JUMP:
1833                         conf.jump = actions->conf;
1834                         na_act_index =
1835                                 mnl_attr_nest_start(nlh, na_act_index_cur++);
1836                         assert(na_act_index);
1837                         mnl_attr_put_strz(nlh, TCA_ACT_KIND, "gact");
1838                         na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
1839                         assert(na_act);
1840                         mnl_attr_put(nlh, TCA_GACT_PARMS,
1841                                      sizeof(struct tc_gact),
1842                                      &(struct tc_gact){
1843                                         .action = TC_ACT_GOTO_CHAIN |
1844                                                   conf.jump->group,
1845                                      });
1846                         mnl_attr_nest_end(nlh, na_act);
1847                         mnl_attr_nest_end(nlh, na_act_index);
1848                         break;
1849                 case RTE_FLOW_ACTION_TYPE_DROP:
1850                         na_act_index =
1851                                 mnl_attr_nest_start(nlh, na_act_index_cur++);
1852                         assert(na_act_index);
1853                         mnl_attr_put_strz(nlh, TCA_ACT_KIND, "gact");
1854                         na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
1855                         assert(na_act);
1856                         mnl_attr_put(nlh, TCA_GACT_PARMS,
1857                                      sizeof(struct tc_gact),
1858                                      &(struct tc_gact){
1859                                         .action = TC_ACT_SHOT,
1860                                      });
1861                         mnl_attr_nest_end(nlh, na_act);
1862                         mnl_attr_nest_end(nlh, na_act_index);
1863                         break;
1864                 case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
1865                         conf.of_push_vlan = NULL;
1866                         vlan_act = TCA_VLAN_ACT_POP;
1867                         goto action_of_vlan;
1868                 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
1869                         conf.of_push_vlan = actions->conf;
1870                         vlan_act = TCA_VLAN_ACT_PUSH;
1871                         goto action_of_vlan;
1872                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
1873                         conf.of_set_vlan_vid = actions->conf;
1874                         if (na_vlan_id)
1875                                 goto override_na_vlan_id;
1876                         vlan_act = TCA_VLAN_ACT_MODIFY;
1877                         goto action_of_vlan;
1878                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
1879                         conf.of_set_vlan_pcp = actions->conf;
1880                         if (na_vlan_priority)
1881                                 goto override_na_vlan_priority;
1882                         vlan_act = TCA_VLAN_ACT_MODIFY;
1883                         goto action_of_vlan;
1884 action_of_vlan:
1885                         na_act_index =
1886                                 mnl_attr_nest_start(nlh, na_act_index_cur++);
1887                         assert(na_act_index);
1888                         mnl_attr_put_strz(nlh, TCA_ACT_KIND, "vlan");
1889                         na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
1890                         assert(na_act);
1891                         mnl_attr_put(nlh, TCA_VLAN_PARMS,
1892                                      sizeof(struct tc_vlan),
1893                                      &(struct tc_vlan){
1894                                         .action = TC_ACT_PIPE,
1895                                         .v_action = vlan_act,
1896                                      });
1897                         if (vlan_act == TCA_VLAN_ACT_POP) {
1898                                 mnl_attr_nest_end(nlh, na_act);
1899                                 mnl_attr_nest_end(nlh, na_act_index);
1900                                 break;
1901                         }
1902                         if (vlan_act == TCA_VLAN_ACT_PUSH)
1903                                 mnl_attr_put_u16(nlh,
1904                                                  TCA_VLAN_PUSH_VLAN_PROTOCOL,
1905                                                  conf.of_push_vlan->ethertype);
1906                         na_vlan_id = mnl_nlmsg_get_payload_tail(nlh);
1907                         mnl_attr_put_u16(nlh, TCA_VLAN_PAD, 0);
1908                         na_vlan_priority = mnl_nlmsg_get_payload_tail(nlh);
1909                         mnl_attr_put_u8(nlh, TCA_VLAN_PAD, 0);
1910                         mnl_attr_nest_end(nlh, na_act);
1911                         mnl_attr_nest_end(nlh, na_act_index);
1912                         if (actions->type ==
1913                             RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID) {
1914 override_na_vlan_id:
1915                                 na_vlan_id->nla_type = TCA_VLAN_PUSH_VLAN_ID;
1916                                 *(uint16_t *)mnl_attr_get_payload(na_vlan_id) =
1917                                         rte_be_to_cpu_16
1918                                         (conf.of_set_vlan_vid->vlan_vid);
1919                         } else if (actions->type ==
1920                                    RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP) {
1921 override_na_vlan_priority:
1922                                 na_vlan_priority->nla_type =
1923                                         TCA_VLAN_PUSH_VLAN_PRIORITY;
1924                                 *(uint8_t *)mnl_attr_get_payload
1925                                         (na_vlan_priority) =
1926                                         conf.of_set_vlan_pcp->vlan_pcp;
1927                         }
1928                         break;
1929                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
1930                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
1931                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
1932                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
1933                 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
1934                 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
1935                         na_act_index =
1936                                 mnl_attr_nest_start(nlh, na_act_index_cur++);
1937                         flow_tcf_create_pedit_mnl_msg(nlh,
1938                                                       &actions, item_flags);
1939                         mnl_attr_nest_end(nlh, na_act_index);
1940                         break;
1941                 default:
1942                         return rte_flow_error_set(error, ENOTSUP,
1943                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1944                                                   actions,
1945                                                   "action not supported");
1946                 }
1947         }
1948         assert(na_flower);
1949         assert(na_flower_act);
1950         mnl_attr_nest_end(nlh, na_flower_act);
1951         mnl_attr_nest_end(nlh, na_flower);
1952         return 0;
1953 }
1954
1955 /**
1956  * Send Netlink message with acknowledgment.
1957  *
1958  * @param nl
1959  *   Libmnl socket to use.
1960  * @param nlh
1961  *   Message to send. This function always raises the NLM_F_ACK flag before
1962  *   sending.
1963  *
1964  * @return
1965  *   0 on success, a negative errno value otherwise and rte_errno is set.
1966  */
1967 static int
1968 flow_tcf_nl_ack(struct mnl_socket *nl, struct nlmsghdr *nlh)
1969 {
1970         alignas(struct nlmsghdr)
1971         uint8_t ans[mnl_nlmsg_size(sizeof(struct nlmsgerr)) +
1972                     nlh->nlmsg_len - sizeof(*nlh)];
1973         uint32_t seq = random();
1974         int ret;
1975
1976         nlh->nlmsg_flags |= NLM_F_ACK;
1977         nlh->nlmsg_seq = seq;
1978         ret = mnl_socket_sendto(nl, nlh, nlh->nlmsg_len);
1979         if (ret != -1)
1980                 ret = mnl_socket_recvfrom(nl, ans, sizeof(ans));
1981         if (ret != -1)
1982                 ret = mnl_cb_run
1983                         (ans, ret, seq, mnl_socket_get_portid(nl), NULL, NULL);
1984         if (ret > 0)
1985                 return 0;
1986         rte_errno = errno;
1987         return -rte_errno;
1988 }
1989
1990 /**
1991  * Apply flow to E-Switch by sending Netlink message.
1992  *
1993  * @param[in] dev
1994  *   Pointer to Ethernet device.
1995  * @param[in, out] flow
1996  *   Pointer to the sub flow.
1997  * @param[out] error
1998  *   Pointer to the error structure.
1999  *
2000  * @return
2001  *   0 on success, a negative errno value otherwise and rte_ernno is set.
2002  */
2003 static int
2004 flow_tcf_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
2005                struct rte_flow_error *error)
2006 {
2007         struct priv *priv = dev->data->dev_private;
2008         struct mnl_socket *nl = priv->mnl_socket;
2009         struct mlx5_flow *dev_flow;
2010         struct nlmsghdr *nlh;
2011
2012         dev_flow = LIST_FIRST(&flow->dev_flows);
2013         /* E-Switch flow can't be expanded. */
2014         assert(!LIST_NEXT(dev_flow, next));
2015         nlh = dev_flow->tcf.nlh;
2016         nlh->nlmsg_type = RTM_NEWTFILTER;
2017         nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
2018         if (!flow_tcf_nl_ack(nl, nlh))
2019                 return 0;
2020         return rte_flow_error_set(error, rte_errno,
2021                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
2022                                   "netlink: failed to create TC flow rule");
2023 }
2024
2025 /**
2026  * Remove flow from E-Switch by sending Netlink message.
2027  *
2028  * @param[in] dev
2029  *   Pointer to Ethernet device.
2030  * @param[in, out] flow
2031  *   Pointer to the sub flow.
2032  */
2033 static void
2034 flow_tcf_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
2035 {
2036         struct priv *priv = dev->data->dev_private;
2037         struct mnl_socket *nl = priv->mnl_socket;
2038         struct mlx5_flow *dev_flow;
2039         struct nlmsghdr *nlh;
2040
2041         if (!flow)
2042                 return;
2043         dev_flow = LIST_FIRST(&flow->dev_flows);
2044         if (!dev_flow)
2045                 return;
2046         /* E-Switch flow can't be expanded. */
2047         assert(!LIST_NEXT(dev_flow, next));
2048         nlh = dev_flow->tcf.nlh;
2049         nlh->nlmsg_type = RTM_DELTFILTER;
2050         nlh->nlmsg_flags = NLM_F_REQUEST;
2051         flow_tcf_nl_ack(nl, nlh);
2052 }
2053
2054 /**
2055  * Remove flow from E-Switch and release resources of the device flow.
2056  *
2057  * @param[in] dev
2058  *   Pointer to Ethernet device.
2059  * @param[in, out] flow
2060  *   Pointer to the sub flow.
2061  */
2062 static void
2063 flow_tcf_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
2064 {
2065         struct mlx5_flow *dev_flow;
2066
2067         if (!flow)
2068                 return;
2069         flow_tcf_remove(dev, flow);
2070         dev_flow = LIST_FIRST(&flow->dev_flows);
2071         if (!dev_flow)
2072                 return;
2073         /* E-Switch flow can't be expanded. */
2074         assert(!LIST_NEXT(dev_flow, next));
2075         LIST_REMOVE(dev_flow, next);
2076         rte_free(dev_flow);
2077 }
2078
2079 const struct mlx5_flow_driver_ops mlx5_flow_tcf_drv_ops = {
2080         .validate = flow_tcf_validate,
2081         .prepare = flow_tcf_prepare,
2082         .translate = flow_tcf_translate,
2083         .apply = flow_tcf_apply,
2084         .remove = flow_tcf_remove,
2085         .destroy = flow_tcf_destroy,
2086 };
2087
2088 /**
2089  * Initialize ingress qdisc of a given network interface.
2090  *
2091  * @param nl
2092  *   Libmnl socket of the @p NETLINK_ROUTE kind.
2093  * @param ifindex
2094  *   Index of network interface to initialize.
2095  * @param[out] error
2096  *   Perform verbose error reporting if not NULL.
2097  *
2098  * @return
2099  *   0 on success, a negative errno value otherwise and rte_errno is set.
2100  */
2101 int
2102 mlx5_flow_tcf_init(struct mnl_socket *nl, unsigned int ifindex,
2103                    struct rte_flow_error *error)
2104 {
2105         struct nlmsghdr *nlh;
2106         struct tcmsg *tcm;
2107         alignas(struct nlmsghdr)
2108         uint8_t buf[mnl_nlmsg_size(sizeof(*tcm) + 128)];
2109
2110         /* Destroy existing ingress qdisc and everything attached to it. */
2111         nlh = mnl_nlmsg_put_header(buf);
2112         nlh->nlmsg_type = RTM_DELQDISC;
2113         nlh->nlmsg_flags = NLM_F_REQUEST;
2114         tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
2115         tcm->tcm_family = AF_UNSPEC;
2116         tcm->tcm_ifindex = ifindex;
2117         tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
2118         tcm->tcm_parent = TC_H_INGRESS;
2119         /* Ignore errors when qdisc is already absent. */
2120         if (flow_tcf_nl_ack(nl, nlh) &&
2121             rte_errno != EINVAL && rte_errno != ENOENT)
2122                 return rte_flow_error_set(error, rte_errno,
2123                                           RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
2124                                           "netlink: failed to remove ingress"
2125                                           " qdisc");
2126         /* Create fresh ingress qdisc. */
2127         nlh = mnl_nlmsg_put_header(buf);
2128         nlh->nlmsg_type = RTM_NEWQDISC;
2129         nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
2130         tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
2131         tcm->tcm_family = AF_UNSPEC;
2132         tcm->tcm_ifindex = ifindex;
2133         tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
2134         tcm->tcm_parent = TC_H_INGRESS;
2135         mnl_attr_put_strz_check(nlh, sizeof(buf), TCA_KIND, "ingress");
2136         if (flow_tcf_nl_ack(nl, nlh))
2137                 return rte_flow_error_set(error, rte_errno,
2138                                           RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
2139                                           "netlink: failed to create ingress"
2140                                           " qdisc");
2141         return 0;
2142 }
2143
2144 /**
2145  * Create and configure a libmnl socket for Netlink flow rules.
2146  *
2147  * @return
2148  *   A valid libmnl socket object pointer on success, NULL otherwise and
2149  *   rte_errno is set.
2150  */
2151 struct mnl_socket *
2152 mlx5_flow_tcf_socket_create(void)
2153 {
2154         struct mnl_socket *nl = mnl_socket_open(NETLINK_ROUTE);
2155
2156         if (nl) {
2157                 mnl_socket_setsockopt(nl, NETLINK_CAP_ACK, &(int){ 1 },
2158                                       sizeof(int));
2159                 if (!mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID))
2160                         return nl;
2161         }
2162         rte_errno = errno;
2163         if (nl)
2164                 mnl_socket_close(nl);
2165         return NULL;
2166 }
2167
2168 /**
2169  * Destroy a libmnl socket.
2170  *
2171  * @param nl
2172  *   Libmnl socket of the @p NETLINK_ROUTE kind.
2173  */
2174 void
2175 mlx5_flow_tcf_socket_destroy(struct mnl_socket *nl)
2176 {
2177         mnl_socket_close(nl);
2178 }