net/mlx5: rewrite IP address UDP/TCP port by E-Switch
[dpdk.git] / drivers / net / mlx5 / mlx5_flow_tcf.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2018 6WIND S.A.
3  * Copyright 2018 Mellanox Technologies, Ltd
4  */
5
6 #include <assert.h>
7 #include <errno.h>
8 #include <libmnl/libmnl.h>
9 #include <linux/if_ether.h>
10 #include <linux/netlink.h>
11 #include <linux/pkt_cls.h>
12 #include <linux/pkt_sched.h>
13 #include <linux/rtnetlink.h>
14 #include <linux/tc_act/tc_gact.h>
15 #include <linux/tc_act/tc_mirred.h>
16 #include <netinet/in.h>
17 #include <stdalign.h>
18 #include <stdbool.h>
19 #include <stddef.h>
20 #include <stdint.h>
21 #include <stdlib.h>
22 #include <sys/socket.h>
23
24 #include <rte_byteorder.h>
25 #include <rte_errno.h>
26 #include <rte_ether.h>
27 #include <rte_flow.h>
28 #include <rte_malloc.h>
29
30 #include "mlx5.h"
31 #include "mlx5_flow.h"
32 #include "mlx5_autoconf.h"
33
34 #ifdef HAVE_TC_ACT_VLAN
35
36 #include <linux/tc_act/tc_vlan.h>
37
38 #else /* HAVE_TC_ACT_VLAN */
39
40 #define TCA_VLAN_ACT_POP 1
41 #define TCA_VLAN_ACT_PUSH 2
42 #define TCA_VLAN_ACT_MODIFY 3
43 #define TCA_VLAN_PARMS 2
44 #define TCA_VLAN_PUSH_VLAN_ID 3
45 #define TCA_VLAN_PUSH_VLAN_PROTOCOL 4
46 #define TCA_VLAN_PAD 5
47 #define TCA_VLAN_PUSH_VLAN_PRIORITY 6
48
49 struct tc_vlan {
50         tc_gen;
51         int v_action;
52 };
53
54 #endif /* HAVE_TC_ACT_VLAN */
55
56 #ifdef HAVE_TC_ACT_PEDIT
57
58 #include <linux/tc_act/tc_pedit.h>
59
60 #else /* HAVE_TC_ACT_VLAN */
61
62 enum {
63         TCA_PEDIT_UNSPEC,
64         TCA_PEDIT_TM,
65         TCA_PEDIT_PARMS,
66         TCA_PEDIT_PAD,
67         TCA_PEDIT_PARMS_EX,
68         TCA_PEDIT_KEYS_EX,
69         TCA_PEDIT_KEY_EX,
70         __TCA_PEDIT_MAX
71 };
72
73 enum {
74         TCA_PEDIT_KEY_EX_HTYPE = 1,
75         TCA_PEDIT_KEY_EX_CMD = 2,
76         __TCA_PEDIT_KEY_EX_MAX
77 };
78
79 enum pedit_header_type {
80         TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK = 0,
81         TCA_PEDIT_KEY_EX_HDR_TYPE_ETH = 1,
82         TCA_PEDIT_KEY_EX_HDR_TYPE_IP4 = 2,
83         TCA_PEDIT_KEY_EX_HDR_TYPE_IP6 = 3,
84         TCA_PEDIT_KEY_EX_HDR_TYPE_TCP = 4,
85         TCA_PEDIT_KEY_EX_HDR_TYPE_UDP = 5,
86         __PEDIT_HDR_TYPE_MAX,
87 };
88
89 enum pedit_cmd {
90         TCA_PEDIT_KEY_EX_CMD_SET = 0,
91         TCA_PEDIT_KEY_EX_CMD_ADD = 1,
92         __PEDIT_CMD_MAX,
93 };
94
95 struct tc_pedit_key {
96         __u32           mask;  /* AND */
97         __u32           val;   /*XOR */
98         __u32           off;  /*offset */
99         __u32           at;
100         __u32           offmask;
101         __u32           shift;
102 };
103
104 struct tc_pedit_sel {
105         tc_gen;
106         unsigned char           nkeys;
107         unsigned char           flags;
108         struct tc_pedit_key     keys[0];
109 };
110
111 #endif /* HAVE_TC_ACT_VLAN */
112
113 /* Normally found in linux/netlink.h. */
114 #ifndef NETLINK_CAP_ACK
115 #define NETLINK_CAP_ACK 10
116 #endif
117
118 /* Normally found in linux/pkt_sched.h. */
119 #ifndef TC_H_MIN_INGRESS
120 #define TC_H_MIN_INGRESS 0xfff2u
121 #endif
122
123 /* Normally found in linux/pkt_cls.h. */
124 #ifndef TCA_CLS_FLAGS_SKIP_SW
125 #define TCA_CLS_FLAGS_SKIP_SW (1 << 1)
126 #endif
127 #ifndef HAVE_TCA_FLOWER_ACT
128 #define TCA_FLOWER_ACT 3
129 #endif
130 #ifndef HAVE_TCA_FLOWER_FLAGS
131 #define TCA_FLOWER_FLAGS 22
132 #endif
133 #ifndef HAVE_TCA_FLOWER_KEY_ETH_TYPE
134 #define TCA_FLOWER_KEY_ETH_TYPE 8
135 #endif
136 #ifndef HAVE_TCA_FLOWER_KEY_ETH_DST
137 #define TCA_FLOWER_KEY_ETH_DST 4
138 #endif
139 #ifndef HAVE_TCA_FLOWER_KEY_ETH_DST_MASK
140 #define TCA_FLOWER_KEY_ETH_DST_MASK 5
141 #endif
142 #ifndef HAVE_TCA_FLOWER_KEY_ETH_SRC
143 #define TCA_FLOWER_KEY_ETH_SRC 6
144 #endif
145 #ifndef HAVE_TCA_FLOWER_KEY_ETH_SRC_MASK
146 #define TCA_FLOWER_KEY_ETH_SRC_MASK 7
147 #endif
148 #ifndef HAVE_TCA_FLOWER_KEY_IP_PROTO
149 #define TCA_FLOWER_KEY_IP_PROTO 9
150 #endif
151 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_SRC
152 #define TCA_FLOWER_KEY_IPV4_SRC 10
153 #endif
154 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_SRC_MASK
155 #define TCA_FLOWER_KEY_IPV4_SRC_MASK 11
156 #endif
157 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_DST
158 #define TCA_FLOWER_KEY_IPV4_DST 12
159 #endif
160 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_DST_MASK
161 #define TCA_FLOWER_KEY_IPV4_DST_MASK 13
162 #endif
163 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_SRC
164 #define TCA_FLOWER_KEY_IPV6_SRC 14
165 #endif
166 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_SRC_MASK
167 #define TCA_FLOWER_KEY_IPV6_SRC_MASK 15
168 #endif
169 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_DST
170 #define TCA_FLOWER_KEY_IPV6_DST 16
171 #endif
172 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_DST_MASK
173 #define TCA_FLOWER_KEY_IPV6_DST_MASK 17
174 #endif
175 #ifndef HAVE_TCA_FLOWER_KEY_TCP_SRC
176 #define TCA_FLOWER_KEY_TCP_SRC 18
177 #endif
178 #ifndef HAVE_TCA_FLOWER_KEY_TCP_SRC_MASK
179 #define TCA_FLOWER_KEY_TCP_SRC_MASK 35
180 #endif
181 #ifndef HAVE_TCA_FLOWER_KEY_TCP_DST
182 #define TCA_FLOWER_KEY_TCP_DST 19
183 #endif
184 #ifndef HAVE_TCA_FLOWER_KEY_TCP_DST_MASK
185 #define TCA_FLOWER_KEY_TCP_DST_MASK 36
186 #endif
187 #ifndef HAVE_TCA_FLOWER_KEY_UDP_SRC
188 #define TCA_FLOWER_KEY_UDP_SRC 20
189 #endif
190 #ifndef HAVE_TCA_FLOWER_KEY_UDP_SRC_MASK
191 #define TCA_FLOWER_KEY_UDP_SRC_MASK 37
192 #endif
193 #ifndef HAVE_TCA_FLOWER_KEY_UDP_DST
194 #define TCA_FLOWER_KEY_UDP_DST 21
195 #endif
196 #ifndef HAVE_TCA_FLOWER_KEY_UDP_DST_MASK
197 #define TCA_FLOWER_KEY_UDP_DST_MASK 38
198 #endif
199 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_ID
200 #define TCA_FLOWER_KEY_VLAN_ID 23
201 #endif
202 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_PRIO
203 #define TCA_FLOWER_KEY_VLAN_PRIO 24
204 #endif
205 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_ETH_TYPE
206 #define TCA_FLOWER_KEY_VLAN_ETH_TYPE 25
207 #endif
208 #ifndef HAVE_TCA_FLOWER_KEY_TCP_FLAGS
209 #define TCA_FLOWER_KEY_TCP_FLAGS 71
210 #endif
211 #ifndef HAVE_TCA_FLOWER_KEY_TCP_FLAGS_MASK
212 #define TCA_FLOWER_KEY_TCP_FLAGS_MASK 72
213 #endif
214
215 #ifndef IPV6_ADDR_LEN
216 #define IPV6_ADDR_LEN 16
217 #endif
218
219 #ifndef IPV4_ADDR_LEN
220 #define IPV4_ADDR_LEN 4
221 #endif
222
223 #ifndef TP_PORT_LEN
224 #define TP_PORT_LEN 2 /* Transport Port (UDP/TCP) Length */
225 #endif
226
227 /** Empty masks for known item types. */
228 static const union {
229         struct rte_flow_item_port_id port_id;
230         struct rte_flow_item_eth eth;
231         struct rte_flow_item_vlan vlan;
232         struct rte_flow_item_ipv4 ipv4;
233         struct rte_flow_item_ipv6 ipv6;
234         struct rte_flow_item_tcp tcp;
235         struct rte_flow_item_udp udp;
236 } flow_tcf_mask_empty;
237
238 /** Supported masks for known item types. */
239 static const struct {
240         struct rte_flow_item_port_id port_id;
241         struct rte_flow_item_eth eth;
242         struct rte_flow_item_vlan vlan;
243         struct rte_flow_item_ipv4 ipv4;
244         struct rte_flow_item_ipv6 ipv6;
245         struct rte_flow_item_tcp tcp;
246         struct rte_flow_item_udp udp;
247 } flow_tcf_mask_supported = {
248         .port_id = {
249                 .id = 0xffffffff,
250         },
251         .eth = {
252                 .type = RTE_BE16(0xffff),
253                 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
254                 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
255         },
256         .vlan = {
257                 /* PCP and VID only, no DEI. */
258                 .tci = RTE_BE16(0xefff),
259                 .inner_type = RTE_BE16(0xffff),
260         },
261         .ipv4.hdr = {
262                 .next_proto_id = 0xff,
263                 .src_addr = RTE_BE32(0xffffffff),
264                 .dst_addr = RTE_BE32(0xffffffff),
265         },
266         .ipv6.hdr = {
267                 .proto = 0xff,
268                 .src_addr =
269                         "\xff\xff\xff\xff\xff\xff\xff\xff"
270                         "\xff\xff\xff\xff\xff\xff\xff\xff",
271                 .dst_addr =
272                         "\xff\xff\xff\xff\xff\xff\xff\xff"
273                         "\xff\xff\xff\xff\xff\xff\xff\xff",
274         },
275         .tcp.hdr = {
276                 .src_port = RTE_BE16(0xffff),
277                 .dst_port = RTE_BE16(0xffff),
278                 .tcp_flags = 0xff,
279         },
280         .udp.hdr = {
281                 .src_port = RTE_BE16(0xffff),
282                 .dst_port = RTE_BE16(0xffff),
283         },
284 };
285
286 #define SZ_NLATTR_HDR MNL_ALIGN(sizeof(struct nlattr))
287 #define SZ_NLATTR_NEST SZ_NLATTR_HDR
288 #define SZ_NLATTR_DATA_OF(len) MNL_ALIGN(SZ_NLATTR_HDR + (len))
289 #define SZ_NLATTR_TYPE_OF(typ) SZ_NLATTR_DATA_OF(sizeof(typ))
290 #define SZ_NLATTR_STRZ_OF(str) SZ_NLATTR_DATA_OF(strlen(str) + 1)
291
292 #define PTOI_TABLE_SZ_MAX(dev) (mlx5_dev_to_port_id((dev)->device, NULL, 0) + 2)
293
294 /** DPDK port to network interface index (ifindex) conversion. */
295 struct flow_tcf_ptoi {
296         uint16_t port_id; /**< DPDK port ID. */
297         unsigned int ifindex; /**< Network interface index. */
298 };
299
300 #define MLX5_TCF_FATE_ACTIONS (MLX5_FLOW_ACTION_DROP | MLX5_FLOW_ACTION_PORT_ID)
301 #define MLX5_TCF_VLAN_ACTIONS \
302         (MLX5_FLOW_ACTION_OF_POP_VLAN | MLX5_FLOW_ACTION_OF_PUSH_VLAN | \
303          MLX5_FLOW_ACTION_OF_SET_VLAN_VID | MLX5_FLOW_ACTION_OF_SET_VLAN_PCP)
304
305 #define MLX5_TCF_PEDIT_ACTIONS \
306         (MLX5_FLOW_ACTION_SET_IPV4_SRC | MLX5_FLOW_ACTION_SET_IPV4_DST | \
307          MLX5_FLOW_ACTION_SET_IPV6_SRC | MLX5_FLOW_ACTION_SET_IPV6_DST | \
308          MLX5_FLOW_ACTION_SET_TP_SRC | MLX5_FLOW_ACTION_SET_TP_DST)
309
310 #define MLX5_TCF_CONFIG_ACTIONS \
311         (MLX5_FLOW_ACTION_PORT_ID | MLX5_FLOW_ACTION_OF_PUSH_VLAN | \
312          MLX5_FLOW_ACTION_OF_SET_VLAN_VID | MLX5_FLOW_ACTION_OF_SET_VLAN_PCP | \
313          MLX5_TCF_PEDIT_ACTIONS)
314
315 #define MAX_PEDIT_KEYS 128
316 #define SZ_PEDIT_KEY_VAL 4
317
318 #define NUM_OF_PEDIT_KEYS(sz) \
319         (((sz) / SZ_PEDIT_KEY_VAL) + (((sz) % SZ_PEDIT_KEY_VAL) ? 1 : 0))
320
321 struct pedit_key_ex {
322         enum pedit_header_type htype;
323         enum pedit_cmd cmd;
324 };
325
326 struct pedit_parser {
327         struct tc_pedit_sel sel;
328         struct tc_pedit_key keys[MAX_PEDIT_KEYS];
329         struct pedit_key_ex keys_ex[MAX_PEDIT_KEYS];
330 };
331
332
333 /**
334  * Set pedit key of transport (TCP/UDP) port value
335  *
336  * @param[in] actions
337  *   pointer to action specification
338  * @param[in,out] p_parser
339  *   pointer to pedit_parser
340  * @param[in] item_flags
341  *   flags of all items presented
342  */
343 static void
344 flow_tcf_pedit_key_set_tp_port(const struct rte_flow_action *actions,
345                                 struct pedit_parser *p_parser,
346                                 uint64_t item_flags)
347 {
348         int idx = p_parser->sel.nkeys;
349
350         if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)
351                 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_UDP;
352         if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_TCP)
353                 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_TCP;
354         p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
355         /* offset of src/dst port is same for TCP and UDP */
356         p_parser->keys[idx].off =
357                 actions->type == RTE_FLOW_ACTION_TYPE_SET_TP_SRC ?
358                 offsetof(struct tcp_hdr, src_port) :
359                 offsetof(struct tcp_hdr, dst_port);
360         p_parser->keys[idx].mask = 0xFFFF0000;
361         p_parser->keys[idx].val =
362                 (__u32)((const struct rte_flow_action_set_tp *)
363                                 actions->conf)->port;
364         p_parser->sel.nkeys = (++idx);
365 }
366
367 /**
368  * Set pedit key of ipv6 address
369  *
370  * @param[in] actions
371  *   pointer to action specification
372  * @param[in,out] p_parser
373  *   pointer to pedit_parser
374  */
375 static void
376 flow_tcf_pedit_key_set_ipv6_addr(const struct rte_flow_action *actions,
377                                  struct pedit_parser *p_parser)
378 {
379         int idx = p_parser->sel.nkeys;
380         int keys = NUM_OF_PEDIT_KEYS(IPV6_ADDR_LEN);
381         int off_base =
382                 actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC ?
383                 offsetof(struct ipv6_hdr, src_addr) :
384                 offsetof(struct ipv6_hdr, dst_addr);
385         const struct rte_flow_action_set_ipv6 *conf =
386                 (const struct rte_flow_action_set_ipv6 *)actions->conf;
387
388         for (int i = 0; i < keys; i++, idx++) {
389                 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6;
390                 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
391                 p_parser->keys[idx].off = off_base + i * SZ_PEDIT_KEY_VAL;
392                 p_parser->keys[idx].mask = ~UINT32_MAX;
393                 memcpy(&p_parser->keys[idx].val,
394                         conf->ipv6_addr + i *  SZ_PEDIT_KEY_VAL,
395                         SZ_PEDIT_KEY_VAL);
396         }
397         p_parser->sel.nkeys += keys;
398 }
399
400 /**
401  * Set pedit key of ipv4 address
402  *
403  * @param[in] actions
404  *   pointer to action specification
405  * @param[in,out] p_parser
406  *   pointer to pedit_parser
407  */
408 static void
409 flow_tcf_pedit_key_set_ipv4_addr(const struct rte_flow_action *actions,
410                                  struct pedit_parser *p_parser)
411 {
412         int idx = p_parser->sel.nkeys;
413
414         p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP4;
415         p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
416         p_parser->keys[idx].off =
417                 actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC ?
418                 offsetof(struct ipv4_hdr, src_addr) :
419                 offsetof(struct ipv4_hdr, dst_addr);
420         p_parser->keys[idx].mask = ~UINT32_MAX;
421         p_parser->keys[idx].val =
422                 ((const struct rte_flow_action_set_ipv4 *)
423                  actions->conf)->ipv4_addr;
424         p_parser->sel.nkeys = (++idx);
425 }
426
427 /**
428  * Create the pedit's na attribute in netlink message
429  * on pre-allocate message buffer
430  *
431  * @param[in,out] nl
432  *   pointer to pre-allocated netlink message buffer
433  * @param[in,out] actions
434  *   pointer to pointer of actions specification.
435  * @param[in,out] action_flags
436  *   pointer to actions flags
437  * @param[in] item_flags
438  *   flags of all item presented
439  */
440 static void
441 flow_tcf_create_pedit_mnl_msg(struct nlmsghdr *nl,
442                               const struct rte_flow_action **actions,
443                               uint64_t item_flags)
444 {
445         struct pedit_parser p_parser;
446         struct nlattr *na_act_options;
447         struct nlattr *na_pedit_keys;
448
449         memset(&p_parser, 0, sizeof(p_parser));
450         mnl_attr_put_strz(nl, TCA_ACT_KIND, "pedit");
451         na_act_options = mnl_attr_nest_start(nl, TCA_ACT_OPTIONS);
452         /* all modify header actions should be in one tc-pedit action */
453         for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {
454                 switch ((*actions)->type) {
455                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
456                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
457                         flow_tcf_pedit_key_set_ipv4_addr(*actions, &p_parser);
458                         break;
459                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
460                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
461                         flow_tcf_pedit_key_set_ipv6_addr(*actions, &p_parser);
462                         break;
463                 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
464                 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
465                         flow_tcf_pedit_key_set_tp_port(*actions,
466                                                         &p_parser, item_flags);
467                         break;
468                 default:
469                         goto pedit_mnl_msg_done;
470                 }
471         }
472 pedit_mnl_msg_done:
473         p_parser.sel.action = TC_ACT_PIPE;
474         mnl_attr_put(nl, TCA_PEDIT_PARMS_EX,
475                      sizeof(p_parser.sel) +
476                      p_parser.sel.nkeys * sizeof(struct tc_pedit_key),
477                      &p_parser);
478         na_pedit_keys =
479                 mnl_attr_nest_start(nl, TCA_PEDIT_KEYS_EX | NLA_F_NESTED);
480         for (int i = 0; i < p_parser.sel.nkeys; i++) {
481                 struct nlattr *na_pedit_key =
482                         mnl_attr_nest_start(nl,
483                                             TCA_PEDIT_KEY_EX | NLA_F_NESTED);
484                 mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_HTYPE,
485                                  p_parser.keys_ex[i].htype);
486                 mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_CMD,
487                                  p_parser.keys_ex[i].cmd);
488                 mnl_attr_nest_end(nl, na_pedit_key);
489         }
490         mnl_attr_nest_end(nl, na_pedit_keys);
491         mnl_attr_nest_end(nl, na_act_options);
492         (*actions)--;
493 }
494
495 /**
496  * Calculate max memory size of one TC-pedit actions.
497  * One TC-pedit action can contain set of keys each defining
498  * a rewrite element (rte_flow action)
499  *
500  * @param[in,out] actions
501  *   actions specification.
502  * @param[in,out] action_flags
503  *   actions flags
504  * @param[in,out] size
505  *   accumulated size
506  * @return
507  *   Max memory size of one TC-pedit action
508  */
509 static int
510 flow_tcf_get_pedit_actions_size(const struct rte_flow_action **actions,
511                                 uint64_t *action_flags)
512 {
513         int pedit_size = 0;
514         int keys = 0;
515         uint64_t flags = 0;
516
517         pedit_size += SZ_NLATTR_NEST + /* na_act_index. */
518                       SZ_NLATTR_STRZ_OF("pedit") +
519                       SZ_NLATTR_NEST; /* TCA_ACT_OPTIONS. */
520         for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {
521                 switch ((*actions)->type) {
522                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
523                         keys += NUM_OF_PEDIT_KEYS(IPV4_ADDR_LEN);
524                         flags |= MLX5_FLOW_ACTION_SET_IPV4_SRC;
525                         break;
526                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
527                         keys += NUM_OF_PEDIT_KEYS(IPV4_ADDR_LEN);
528                         flags |= MLX5_FLOW_ACTION_SET_IPV4_DST;
529                         break;
530                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
531                         keys += NUM_OF_PEDIT_KEYS(IPV6_ADDR_LEN);
532                         flags |= MLX5_FLOW_ACTION_SET_IPV6_SRC;
533                         break;
534                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
535                         keys += NUM_OF_PEDIT_KEYS(IPV6_ADDR_LEN);
536                         flags |= MLX5_FLOW_ACTION_SET_IPV6_DST;
537                         break;
538                 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
539                         /* TCP is as same as UDP */
540                         keys += NUM_OF_PEDIT_KEYS(TP_PORT_LEN);
541                         flags |= MLX5_FLOW_ACTION_SET_TP_SRC;
542                         break;
543                 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
544                         /* TCP is as same as UDP */
545                         keys += NUM_OF_PEDIT_KEYS(TP_PORT_LEN);
546                         flags |= MLX5_FLOW_ACTION_SET_TP_DST;
547                         break;
548                 default:
549                         goto get_pedit_action_size_done;
550                 }
551         }
552 get_pedit_action_size_done:
553         /* TCA_PEDIT_PARAMS_EX */
554         pedit_size +=
555                 SZ_NLATTR_DATA_OF(sizeof(struct tc_pedit_sel) +
556                                   keys * sizeof(struct tc_pedit_key));
557         pedit_size += SZ_NLATTR_NEST; /* TCA_PEDIT_KEYS */
558         pedit_size += keys *
559                       /* TCA_PEDIT_KEY_EX + HTYPE + CMD */
560                       (SZ_NLATTR_NEST + SZ_NLATTR_DATA_OF(2) +
561                        SZ_NLATTR_DATA_OF(2));
562         (*action_flags) |= flags;
563         (*actions)--;
564         return pedit_size;
565 }
566
567 /**
568  * Retrieve mask for pattern item.
569  *
570  * This function does basic sanity checks on a pattern item in order to
571  * return the most appropriate mask for it.
572  *
573  * @param[in] item
574  *   Item specification.
575  * @param[in] mask_default
576  *   Default mask for pattern item as specified by the flow API.
577  * @param[in] mask_supported
578  *   Mask fields supported by the implementation.
579  * @param[in] mask_empty
580  *   Empty mask to return when there is no specification.
581  * @param[out] error
582  *   Perform verbose error reporting if not NULL.
583  *
584  * @return
585  *   Either @p item->mask or one of the mask parameters on success, NULL
586  *   otherwise and rte_errno is set.
587  */
588 static const void *
589 flow_tcf_item_mask(const struct rte_flow_item *item, const void *mask_default,
590                    const void *mask_supported, const void *mask_empty,
591                    size_t mask_size, struct rte_flow_error *error)
592 {
593         const uint8_t *mask;
594         size_t i;
595
596         /* item->last and item->mask cannot exist without item->spec. */
597         if (!item->spec && (item->mask || item->last)) {
598                 rte_flow_error_set(error, EINVAL,
599                                    RTE_FLOW_ERROR_TYPE_ITEM, item,
600                                    "\"mask\" or \"last\" field provided without"
601                                    " a corresponding \"spec\"");
602                 return NULL;
603         }
604         /* No spec, no mask, no problem. */
605         if (!item->spec)
606                 return mask_empty;
607         mask = item->mask ? item->mask : mask_default;
608         assert(mask);
609         /*
610          * Single-pass check to make sure that:
611          * - Mask is supported, no bits are set outside mask_supported.
612          * - Both item->spec and item->last are included in mask.
613          */
614         for (i = 0; i != mask_size; ++i) {
615                 if (!mask[i])
616                         continue;
617                 if ((mask[i] | ((const uint8_t *)mask_supported)[i]) !=
618                     ((const uint8_t *)mask_supported)[i]) {
619                         rte_flow_error_set(error, ENOTSUP,
620                                            RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
621                                            "unsupported field found"
622                                            " in \"mask\"");
623                         return NULL;
624                 }
625                 if (item->last &&
626                     (((const uint8_t *)item->spec)[i] & mask[i]) !=
627                     (((const uint8_t *)item->last)[i] & mask[i])) {
628                         rte_flow_error_set(error, EINVAL,
629                                            RTE_FLOW_ERROR_TYPE_ITEM_LAST,
630                                            item->last,
631                                            "range between \"spec\" and \"last\""
632                                            " not comprised in \"mask\"");
633                         return NULL;
634                 }
635         }
636         return mask;
637 }
638
639 /**
640  * Build a conversion table between port ID and ifindex.
641  *
642  * @param[in] dev
643  *   Pointer to Ethernet device.
644  * @param[out] ptoi
645  *   Pointer to ptoi table.
646  * @param[in] len
647  *   Size of ptoi table provided.
648  *
649  * @return
650  *   Size of ptoi table filled.
651  */
652 static unsigned int
653 flow_tcf_build_ptoi_table(struct rte_eth_dev *dev, struct flow_tcf_ptoi *ptoi,
654                           unsigned int len)
655 {
656         unsigned int n = mlx5_dev_to_port_id(dev->device, NULL, 0);
657         uint16_t port_id[n + 1];
658         unsigned int i;
659         unsigned int own = 0;
660
661         /* At least one port is needed when no switch domain is present. */
662         if (!n) {
663                 n = 1;
664                 port_id[0] = dev->data->port_id;
665         } else {
666                 n = RTE_MIN(mlx5_dev_to_port_id(dev->device, port_id, n), n);
667         }
668         if (n > len)
669                 return 0;
670         for (i = 0; i != n; ++i) {
671                 struct rte_eth_dev_info dev_info;
672
673                 rte_eth_dev_info_get(port_id[i], &dev_info);
674                 if (port_id[i] == dev->data->port_id)
675                         own = i;
676                 ptoi[i].port_id = port_id[i];
677                 ptoi[i].ifindex = dev_info.if_index;
678         }
679         /* Ensure first entry of ptoi[] is the current device. */
680         if (own) {
681                 ptoi[n] = ptoi[0];
682                 ptoi[0] = ptoi[own];
683                 ptoi[own] = ptoi[n];
684         }
685         /* An entry with zero ifindex terminates ptoi[]. */
686         ptoi[n].port_id = 0;
687         ptoi[n].ifindex = 0;
688         return n;
689 }
690
691 /**
692  * Verify the @p attr will be correctly understood by the E-switch.
693  *
694  * @param[in] attr
695  *   Pointer to flow attributes
696  * @param[out] error
697  *   Pointer to error structure.
698  *
699  * @return
700  *   0 on success, a negative errno value otherwise and rte_errno is set.
701  */
702 static int
703 flow_tcf_validate_attributes(const struct rte_flow_attr *attr,
704                              struct rte_flow_error *error)
705 {
706         /*
707          * Supported attributes: no groups, some priorities and ingress only.
708          * Don't care about transfer as it is the caller's problem.
709          */
710         if (attr->group)
711                 return rte_flow_error_set(error, ENOTSUP,
712                                           RTE_FLOW_ERROR_TYPE_ATTR_GROUP, attr,
713                                           "groups are not supported");
714         if (attr->priority > 0xfffe)
715                 return rte_flow_error_set(error, ENOTSUP,
716                                           RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
717                                           attr,
718                                           "lowest priority level is 0xfffe");
719         if (!attr->ingress)
720                 return rte_flow_error_set(error, EINVAL,
721                                           RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
722                                           attr, "only ingress is supported");
723         if (attr->egress)
724                 return rte_flow_error_set(error, ENOTSUP,
725                                           RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
726                                           attr, "egress is not supported");
727         return 0;
728 }
729
730 /**
731  * Validate flow for E-Switch.
732  *
733  * @param[in] priv
734  *   Pointer to the priv structure.
735  * @param[in] attr
736  *   Pointer to the flow attributes.
737  * @param[in] items
738  *   Pointer to the list of items.
739  * @param[in] actions
740  *   Pointer to the list of actions.
741  * @param[out] error
742  *   Pointer to the error structure.
743  *
744  * @return
745  *   0 on success, a negative errno value otherwise and rte_ernno is set.
746  */
747 static int
748 flow_tcf_validate(struct rte_eth_dev *dev,
749                   const struct rte_flow_attr *attr,
750                   const struct rte_flow_item items[],
751                   const struct rte_flow_action actions[],
752                   struct rte_flow_error *error)
753 {
754         union {
755                 const struct rte_flow_item_port_id *port_id;
756                 const struct rte_flow_item_eth *eth;
757                 const struct rte_flow_item_vlan *vlan;
758                 const struct rte_flow_item_ipv4 *ipv4;
759                 const struct rte_flow_item_ipv6 *ipv6;
760                 const struct rte_flow_item_tcp *tcp;
761                 const struct rte_flow_item_udp *udp;
762         } spec, mask;
763         union {
764                 const struct rte_flow_action_port_id *port_id;
765                 const struct rte_flow_action_of_push_vlan *of_push_vlan;
766                 const struct rte_flow_action_of_set_vlan_vid *
767                         of_set_vlan_vid;
768                 const struct rte_flow_action_of_set_vlan_pcp *
769                         of_set_vlan_pcp;
770                 const struct rte_flow_action_set_ipv4 *set_ipv4;
771                 const struct rte_flow_action_set_ipv6 *set_ipv6;
772         } conf;
773         uint32_t item_flags = 0;
774         uint32_t action_flags = 0;
775         uint8_t next_protocol = -1;
776         unsigned int tcm_ifindex = 0;
777         uint8_t pedit_validated = 0;
778         struct flow_tcf_ptoi ptoi[PTOI_TABLE_SZ_MAX(dev)];
779         struct rte_eth_dev *port_id_dev = NULL;
780         bool in_port_id_set;
781         int ret;
782
783         claim_nonzero(flow_tcf_build_ptoi_table(dev, ptoi,
784                                                 PTOI_TABLE_SZ_MAX(dev)));
785         ret = flow_tcf_validate_attributes(attr, error);
786         if (ret < 0)
787                 return ret;
788         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
789                 unsigned int i;
790
791                 switch (items->type) {
792                 case RTE_FLOW_ITEM_TYPE_VOID:
793                         break;
794                 case RTE_FLOW_ITEM_TYPE_PORT_ID:
795                         mask.port_id = flow_tcf_item_mask
796                                 (items, &rte_flow_item_port_id_mask,
797                                  &flow_tcf_mask_supported.port_id,
798                                  &flow_tcf_mask_empty.port_id,
799                                  sizeof(flow_tcf_mask_supported.port_id),
800                                  error);
801                         if (!mask.port_id)
802                                 return -rte_errno;
803                         if (mask.port_id == &flow_tcf_mask_empty.port_id) {
804                                 in_port_id_set = 1;
805                                 break;
806                         }
807                         spec.port_id = items->spec;
808                         if (mask.port_id->id && mask.port_id->id != 0xffffffff)
809                                 return rte_flow_error_set
810                                         (error, ENOTSUP,
811                                          RTE_FLOW_ERROR_TYPE_ITEM_MASK,
812                                          mask.port_id,
813                                          "no support for partial mask on"
814                                          " \"id\" field");
815                         if (!mask.port_id->id)
816                                 i = 0;
817                         else
818                                 for (i = 0; ptoi[i].ifindex; ++i)
819                                         if (ptoi[i].port_id == spec.port_id->id)
820                                                 break;
821                         if (!ptoi[i].ifindex)
822                                 return rte_flow_error_set
823                                         (error, ENODEV,
824                                          RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
825                                          spec.port_id,
826                                          "missing data to convert port ID to"
827                                          " ifindex");
828                         if (in_port_id_set && ptoi[i].ifindex != tcm_ifindex)
829                                 return rte_flow_error_set
830                                         (error, ENOTSUP,
831                                          RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
832                                          spec.port_id,
833                                          "cannot match traffic for"
834                                          " several port IDs through"
835                                          " a single flow rule");
836                         tcm_ifindex = ptoi[i].ifindex;
837                         in_port_id_set = 1;
838                         break;
839                 case RTE_FLOW_ITEM_TYPE_ETH:
840                         ret = mlx5_flow_validate_item_eth(items, item_flags,
841                                                           error);
842                         if (ret < 0)
843                                 return ret;
844                         item_flags |= MLX5_FLOW_LAYER_OUTER_L2;
845                         /* TODO:
846                          * Redundant check due to different supported mask.
847                          * Same for the rest of items.
848                          */
849                         mask.eth = flow_tcf_item_mask
850                                 (items, &rte_flow_item_eth_mask,
851                                  &flow_tcf_mask_supported.eth,
852                                  &flow_tcf_mask_empty.eth,
853                                  sizeof(flow_tcf_mask_supported.eth),
854                                  error);
855                         if (!mask.eth)
856                                 return -rte_errno;
857                         if (mask.eth->type && mask.eth->type !=
858                             RTE_BE16(0xffff))
859                                 return rte_flow_error_set
860                                         (error, ENOTSUP,
861                                          RTE_FLOW_ERROR_TYPE_ITEM_MASK,
862                                          mask.eth,
863                                          "no support for partial mask on"
864                                          " \"type\" field");
865                         break;
866                 case RTE_FLOW_ITEM_TYPE_VLAN:
867                         ret = mlx5_flow_validate_item_vlan(items, item_flags,
868                                                            error);
869                         if (ret < 0)
870                                 return ret;
871                         item_flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
872                         mask.vlan = flow_tcf_item_mask
873                                 (items, &rte_flow_item_vlan_mask,
874                                  &flow_tcf_mask_supported.vlan,
875                                  &flow_tcf_mask_empty.vlan,
876                                  sizeof(flow_tcf_mask_supported.vlan),
877                                  error);
878                         if (!mask.vlan)
879                                 return -rte_errno;
880                         if ((mask.vlan->tci & RTE_BE16(0xe000) &&
881                              (mask.vlan->tci & RTE_BE16(0xe000)) !=
882                               RTE_BE16(0xe000)) ||
883                             (mask.vlan->tci & RTE_BE16(0x0fff) &&
884                              (mask.vlan->tci & RTE_BE16(0x0fff)) !=
885                               RTE_BE16(0x0fff)) ||
886                             (mask.vlan->inner_type &&
887                              mask.vlan->inner_type != RTE_BE16(0xffff)))
888                                 return rte_flow_error_set
889                                         (error, ENOTSUP,
890                                          RTE_FLOW_ERROR_TYPE_ITEM_MASK,
891                                          mask.vlan,
892                                          "no support for partial masks on"
893                                          " \"tci\" (PCP and VID parts) and"
894                                          " \"inner_type\" fields");
895                         break;
896                 case RTE_FLOW_ITEM_TYPE_IPV4:
897                         ret = mlx5_flow_validate_item_ipv4(items, item_flags,
898                                                            error);
899                         if (ret < 0)
900                                 return ret;
901                         item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
902                         mask.ipv4 = flow_tcf_item_mask
903                                 (items, &rte_flow_item_ipv4_mask,
904                                  &flow_tcf_mask_supported.ipv4,
905                                  &flow_tcf_mask_empty.ipv4,
906                                  sizeof(flow_tcf_mask_supported.ipv4),
907                                  error);
908                         if (!mask.ipv4)
909                                 return -rte_errno;
910                         if (mask.ipv4->hdr.next_proto_id &&
911                             mask.ipv4->hdr.next_proto_id != 0xff)
912                                 return rte_flow_error_set
913                                         (error, ENOTSUP,
914                                          RTE_FLOW_ERROR_TYPE_ITEM_MASK,
915                                          mask.ipv4,
916                                          "no support for partial mask on"
917                                          " \"hdr.next_proto_id\" field");
918                         else if (mask.ipv4->hdr.next_proto_id)
919                                 next_protocol =
920                                         ((const struct rte_flow_item_ipv4 *)
921                                          (items->spec))->hdr.next_proto_id;
922                         break;
923                 case RTE_FLOW_ITEM_TYPE_IPV6:
924                         ret = mlx5_flow_validate_item_ipv6(items, item_flags,
925                                                            error);
926                         if (ret < 0)
927                                 return ret;
928                         item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
929                         mask.ipv6 = flow_tcf_item_mask
930                                 (items, &rte_flow_item_ipv6_mask,
931                                  &flow_tcf_mask_supported.ipv6,
932                                  &flow_tcf_mask_empty.ipv6,
933                                  sizeof(flow_tcf_mask_supported.ipv6),
934                                  error);
935                         if (!mask.ipv6)
936                                 return -rte_errno;
937                         if (mask.ipv6->hdr.proto &&
938                             mask.ipv6->hdr.proto != 0xff)
939                                 return rte_flow_error_set
940                                         (error, ENOTSUP,
941                                          RTE_FLOW_ERROR_TYPE_ITEM_MASK,
942                                          mask.ipv6,
943                                          "no support for partial mask on"
944                                          " \"hdr.proto\" field");
945                         else if (mask.ipv6->hdr.proto)
946                                 next_protocol =
947                                         ((const struct rte_flow_item_ipv6 *)
948                                          (items->spec))->hdr.proto;
949                         break;
950                 case RTE_FLOW_ITEM_TYPE_UDP:
951                         ret = mlx5_flow_validate_item_udp(items, item_flags,
952                                                           next_protocol, error);
953                         if (ret < 0)
954                                 return ret;
955                         item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
956                         mask.udp = flow_tcf_item_mask
957                                 (items, &rte_flow_item_udp_mask,
958                                  &flow_tcf_mask_supported.udp,
959                                  &flow_tcf_mask_empty.udp,
960                                  sizeof(flow_tcf_mask_supported.udp),
961                                  error);
962                         if (!mask.udp)
963                                 return -rte_errno;
964                         break;
965                 case RTE_FLOW_ITEM_TYPE_TCP:
966                         ret = mlx5_flow_validate_item_tcp
967                                              (items, item_flags,
968                                               next_protocol,
969                                               &flow_tcf_mask_supported.tcp,
970                                               error);
971                         if (ret < 0)
972                                 return ret;
973                         item_flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
974                         mask.tcp = flow_tcf_item_mask
975                                 (items, &rte_flow_item_tcp_mask,
976                                  &flow_tcf_mask_supported.tcp,
977                                  &flow_tcf_mask_empty.tcp,
978                                  sizeof(flow_tcf_mask_supported.tcp),
979                                  error);
980                         if (!mask.tcp)
981                                 return -rte_errno;
982                         break;
983                 default:
984                         return rte_flow_error_set(error, ENOTSUP,
985                                                   RTE_FLOW_ERROR_TYPE_ITEM,
986                                                   NULL, "item not supported");
987                 }
988         }
989         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
990                 unsigned int i;
991                 uint32_t current_action_flag = 0;
992
993                 switch (actions->type) {
994                 case RTE_FLOW_ACTION_TYPE_VOID:
995                         break;
996                 case RTE_FLOW_ACTION_TYPE_PORT_ID:
997                         current_action_flag = MLX5_FLOW_ACTION_PORT_ID;
998                         if (action_flags & MLX5_TCF_FATE_ACTIONS)
999                                 return rte_flow_error_set
1000                                         (error, EINVAL,
1001                                          RTE_FLOW_ERROR_TYPE_ACTION, actions,
1002                                          "can't have multiple fate actions");
1003                         if (!actions->conf)
1004                                 break;
1005                         conf.port_id = actions->conf;
1006                         if (conf.port_id->original)
1007                                 i = 0;
1008                         else
1009                                 for (i = 0; ptoi[i].ifindex; ++i)
1010                                         if (ptoi[i].port_id == conf.port_id->id)
1011                                                 break;
1012                         if (!ptoi[i].ifindex)
1013                                 return rte_flow_error_set
1014                                         (error, ENODEV,
1015                                          RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1016                                          conf.port_id,
1017                                          "missing data to convert port ID to"
1018                                          " ifindex");
1019                         port_id_dev = &rte_eth_devices[conf.port_id->id];
1020                         break;
1021                 case RTE_FLOW_ACTION_TYPE_DROP:
1022                         if (action_flags & MLX5_TCF_FATE_ACTIONS)
1023                                 return rte_flow_error_set
1024                                         (error, EINVAL,
1025                                          RTE_FLOW_ERROR_TYPE_ACTION, actions,
1026                                          "can't have multiple fate actions");
1027                         current_action_flag = MLX5_FLOW_ACTION_DROP;
1028                         break;
1029                 case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
1030                         current_action_flag = MLX5_FLOW_ACTION_OF_POP_VLAN;
1031                         break;
1032                 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
1033                         current_action_flag = MLX5_FLOW_ACTION_OF_PUSH_VLAN;
1034                         break;
1035                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
1036                         if (!(action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN))
1037                                 return rte_flow_error_set
1038                                         (error, ENOTSUP,
1039                                          RTE_FLOW_ERROR_TYPE_ACTION, actions,
1040                                          "vlan modify is not supported,"
1041                                          " set action must follow push action");
1042                         current_action_flag = MLX5_FLOW_ACTION_OF_SET_VLAN_VID;
1043                         break;
1044                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
1045                         if (!(action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN))
1046                                 return rte_flow_error_set
1047                                         (error, ENOTSUP,
1048                                          RTE_FLOW_ERROR_TYPE_ACTION, actions,
1049                                          "vlan modify is not supported,"
1050                                          " set action must follow push action");
1051                         current_action_flag = MLX5_FLOW_ACTION_OF_SET_VLAN_PCP;
1052                         break;
1053                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
1054                         current_action_flag = MLX5_FLOW_ACTION_SET_IPV4_SRC;
1055                         break;
1056                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
1057                         current_action_flag = MLX5_FLOW_ACTION_SET_IPV4_DST;
1058                         break;
1059                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
1060                         current_action_flag = MLX5_FLOW_ACTION_SET_IPV6_SRC;
1061                         break;
1062                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
1063                         current_action_flag = MLX5_FLOW_ACTION_SET_IPV6_DST;
1064                         break;
1065                 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
1066                         current_action_flag = MLX5_FLOW_ACTION_SET_TP_SRC;
1067                         break;
1068                 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
1069                         current_action_flag = MLX5_FLOW_ACTION_SET_TP_DST;
1070                         break;
1071                 default:
1072                         return rte_flow_error_set(error, ENOTSUP,
1073                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1074                                                   actions,
1075                                                   "action not supported");
1076                 }
1077                 if (current_action_flag & MLX5_TCF_CONFIG_ACTIONS) {
1078                         if (!actions->conf)
1079                                 return rte_flow_error_set(error, EINVAL,
1080                                                 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1081                                                 actions,
1082                                                 "action configuration not set");
1083                 }
1084                 if ((current_action_flag & MLX5_TCF_PEDIT_ACTIONS) &&
1085                                 pedit_validated)
1086                         return rte_flow_error_set(error, ENOTSUP,
1087                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1088                                                   actions,
1089                                                   "set actions should be "
1090                                                   "listed successively");
1091                 if ((current_action_flag & ~MLX5_TCF_PEDIT_ACTIONS) &&
1092                     (action_flags & MLX5_TCF_PEDIT_ACTIONS))
1093                         pedit_validated = 1;
1094                 action_flags |= current_action_flag;
1095         }
1096         if ((action_flags & MLX5_TCF_PEDIT_ACTIONS) &&
1097             (action_flags & MLX5_FLOW_ACTION_DROP))
1098                 return rte_flow_error_set(error, ENOTSUP,
1099                                           RTE_FLOW_ERROR_TYPE_ACTION,
1100                                           actions,
1101                                           "set action is not compatible with "
1102                                           "drop action");
1103         if ((action_flags & MLX5_TCF_PEDIT_ACTIONS) &&
1104             !(action_flags & MLX5_FLOW_ACTION_PORT_ID))
1105                 return rte_flow_error_set(error, ENOTSUP,
1106                                           RTE_FLOW_ERROR_TYPE_ACTION,
1107                                           actions,
1108                                           "set action must be followed by "
1109                                           "port_id action");
1110         if (action_flags &
1111            (MLX5_FLOW_ACTION_SET_IPV4_SRC | MLX5_FLOW_ACTION_SET_IPV4_DST)) {
1112                 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4))
1113                         return rte_flow_error_set(error, EINVAL,
1114                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1115                                                   actions,
1116                                                   "no ipv4 item found in"
1117                                                   " pattern");
1118         }
1119         if (action_flags &
1120            (MLX5_FLOW_ACTION_SET_IPV6_SRC | MLX5_FLOW_ACTION_SET_IPV6_DST)) {
1121                 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV6))
1122                         return rte_flow_error_set(error, EINVAL,
1123                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1124                                                   actions,
1125                                                   "no ipv6 item found in"
1126                                                   " pattern");
1127         }
1128         if (action_flags &
1129            (MLX5_FLOW_ACTION_SET_TP_SRC | MLX5_FLOW_ACTION_SET_TP_DST)) {
1130                 if (!(item_flags &
1131                      (MLX5_FLOW_LAYER_OUTER_L4_UDP |
1132                       MLX5_FLOW_LAYER_OUTER_L4_TCP)))
1133                         return rte_flow_error_set(error, EINVAL,
1134                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1135                                                   actions,
1136                                                   "no TCP/UDP item found in"
1137                                                   " pattern");
1138         }
1139         /*
1140          * FW syndrome (0xA9C090):
1141          *     set_flow_table_entry: push vlan action fte in fdb can ONLY be
1142          *     forward to the uplink.
1143          */
1144         if ((action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN) &&
1145             (action_flags & MLX5_FLOW_ACTION_PORT_ID) &&
1146             ((struct priv *)port_id_dev->data->dev_private)->representor)
1147                 return rte_flow_error_set(error, ENOTSUP,
1148                                           RTE_FLOW_ERROR_TYPE_ACTION, actions,
1149                                           "vlan push can only be applied"
1150                                           " when forwarding to uplink port");
1151         /*
1152          * FW syndrome (0x294609):
1153          *     set_flow_table_entry: modify/pop/push actions in fdb flow table
1154          *     are supported only while forwarding to vport.
1155          */
1156         if ((action_flags & MLX5_TCF_VLAN_ACTIONS) &&
1157             !(action_flags & MLX5_FLOW_ACTION_PORT_ID))
1158                 return rte_flow_error_set(error, ENOTSUP,
1159                                           RTE_FLOW_ERROR_TYPE_ACTION, actions,
1160                                           "vlan actions are supported"
1161                                           " only with port_id action");
1162         if (!(action_flags & MLX5_TCF_FATE_ACTIONS))
1163                 return rte_flow_error_set(error, EINVAL,
1164                                           RTE_FLOW_ERROR_TYPE_ACTION, actions,
1165                                           "no fate action is found");
1166         return 0;
1167 }
1168
1169 /**
1170  * Calculate maximum size of memory for flow items of Linux TC flower and
1171  * extract specified items.
1172  *
1173  * @param[in] items
1174  *   Pointer to the list of items.
1175  * @param[out] item_flags
1176  *   Pointer to the detected items.
1177  *
1178  * @return
1179  *   Maximum size of memory for items.
1180  */
1181 static int
1182 flow_tcf_get_items_and_size(const struct rte_flow_item items[],
1183                             uint64_t *item_flags)
1184 {
1185         int size = 0;
1186         uint64_t flags = 0;
1187
1188         size += SZ_NLATTR_STRZ_OF("flower") +
1189                 SZ_NLATTR_NEST + /* TCA_OPTIONS. */
1190                 SZ_NLATTR_TYPE_OF(uint32_t); /* TCA_CLS_FLAGS_SKIP_SW. */
1191         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1192                 switch (items->type) {
1193                 case RTE_FLOW_ITEM_TYPE_VOID:
1194                         break;
1195                 case RTE_FLOW_ITEM_TYPE_PORT_ID:
1196                         break;
1197                 case RTE_FLOW_ITEM_TYPE_ETH:
1198                         size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
1199                                 SZ_NLATTR_DATA_OF(ETHER_ADDR_LEN) * 4;
1200                                 /* dst/src MAC addr and mask. */
1201                         flags |= MLX5_FLOW_LAYER_OUTER_L2;
1202                         break;
1203                 case RTE_FLOW_ITEM_TYPE_VLAN:
1204                         size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
1205                                 SZ_NLATTR_TYPE_OF(uint16_t) +
1206                                 /* VLAN Ether type. */
1207                                 SZ_NLATTR_TYPE_OF(uint8_t) + /* VLAN prio. */
1208                                 SZ_NLATTR_TYPE_OF(uint16_t); /* VLAN ID. */
1209                         flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
1210                         break;
1211                 case RTE_FLOW_ITEM_TYPE_IPV4:
1212                         size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
1213                                 SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
1214                                 SZ_NLATTR_TYPE_OF(uint32_t) * 4;
1215                                 /* dst/src IP addr and mask. */
1216                         flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1217                         break;
1218                 case RTE_FLOW_ITEM_TYPE_IPV6:
1219                         size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
1220                                 SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
1221                                 SZ_NLATTR_TYPE_OF(IPV6_ADDR_LEN) * 4;
1222                                 /* dst/src IP addr and mask. */
1223                         flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1224                         break;
1225                 case RTE_FLOW_ITEM_TYPE_UDP:
1226                         size += SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
1227                                 SZ_NLATTR_TYPE_OF(uint16_t) * 4;
1228                                 /* dst/src port and mask. */
1229                         flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
1230                         break;
1231                 case RTE_FLOW_ITEM_TYPE_TCP:
1232                         size += SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
1233                                 SZ_NLATTR_TYPE_OF(uint16_t) * 4;
1234                                 /* dst/src port and mask. */
1235                         flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
1236                         break;
1237                 default:
1238                         DRV_LOG(WARNING,
1239                                 "unsupported item %p type %d,"
1240                                 " items must be validated before flow creation",
1241                                 (const void *)items, items->type);
1242                         break;
1243                 }
1244         }
1245         *item_flags = flags;
1246         return size;
1247 }
1248
1249 /**
1250  * Calculate maximum size of memory for flow actions of Linux TC flower and
1251  * extract specified actions.
1252  *
1253  * @param[in] actions
1254  *   Pointer to the list of actions.
1255  * @param[out] action_flags
1256  *   Pointer to the detected actions.
1257  *
1258  * @return
1259  *   Maximum size of memory for actions.
1260  */
1261 static int
1262 flow_tcf_get_actions_and_size(const struct rte_flow_action actions[],
1263                               uint64_t *action_flags)
1264 {
1265         int size = 0;
1266         uint64_t flags = 0;
1267
1268         size += SZ_NLATTR_NEST; /* TCA_FLOWER_ACT. */
1269         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1270                 switch (actions->type) {
1271                 case RTE_FLOW_ACTION_TYPE_VOID:
1272                         break;
1273                 case RTE_FLOW_ACTION_TYPE_PORT_ID:
1274                         size += SZ_NLATTR_NEST + /* na_act_index. */
1275                                 SZ_NLATTR_STRZ_OF("mirred") +
1276                                 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
1277                                 SZ_NLATTR_TYPE_OF(struct tc_mirred);
1278                         flags |= MLX5_FLOW_ACTION_PORT_ID;
1279                         break;
1280                 case RTE_FLOW_ACTION_TYPE_DROP:
1281                         size += SZ_NLATTR_NEST + /* na_act_index. */
1282                                 SZ_NLATTR_STRZ_OF("gact") +
1283                                 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
1284                                 SZ_NLATTR_TYPE_OF(struct tc_gact);
1285                         flags |= MLX5_FLOW_ACTION_DROP;
1286                         break;
1287                 case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
1288                         flags |= MLX5_FLOW_ACTION_OF_POP_VLAN;
1289                         goto action_of_vlan;
1290                 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
1291                         flags |= MLX5_FLOW_ACTION_OF_PUSH_VLAN;
1292                         goto action_of_vlan;
1293                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
1294                         flags |= MLX5_FLOW_ACTION_OF_SET_VLAN_VID;
1295                         goto action_of_vlan;
1296                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
1297                         flags |= MLX5_FLOW_ACTION_OF_SET_VLAN_PCP;
1298                         goto action_of_vlan;
1299 action_of_vlan:
1300                         size += SZ_NLATTR_NEST + /* na_act_index. */
1301                                 SZ_NLATTR_STRZ_OF("vlan") +
1302                                 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
1303                                 SZ_NLATTR_TYPE_OF(struct tc_vlan) +
1304                                 SZ_NLATTR_TYPE_OF(uint16_t) +
1305                                 /* VLAN protocol. */
1306                                 SZ_NLATTR_TYPE_OF(uint16_t) + /* VLAN ID. */
1307                                 SZ_NLATTR_TYPE_OF(uint8_t); /* VLAN prio. */
1308                         break;
1309                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
1310                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
1311                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
1312                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
1313                 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
1314                 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
1315                         size += flow_tcf_get_pedit_actions_size(&actions,
1316                                                                 &flags);
1317                         break;
1318                 default:
1319                         DRV_LOG(WARNING,
1320                                 "unsupported action %p type %d,"
1321                                 " items must be validated before flow creation",
1322                                 (const void *)actions, actions->type);
1323                         break;
1324                 }
1325         }
1326         *action_flags = flags;
1327         return size;
1328 }
1329
1330 /**
1331  * Brand rtnetlink buffer with unique handle.
1332  *
1333  * This handle should be unique for a given network interface to avoid
1334  * collisions.
1335  *
1336  * @param nlh
1337  *   Pointer to Netlink message.
1338  * @param handle
1339  *   Unique 32-bit handle to use.
1340  */
1341 static void
1342 flow_tcf_nl_brand(struct nlmsghdr *nlh, uint32_t handle)
1343 {
1344         struct tcmsg *tcm = mnl_nlmsg_get_payload(nlh);
1345
1346         tcm->tcm_handle = handle;
1347         DRV_LOG(DEBUG, "Netlink msg %p is branded with handle %x",
1348                 (void *)nlh, handle);
1349 }
1350
1351 /**
1352  * Prepare a flow object for Linux TC flower. It calculates the maximum size of
1353  * memory required, allocates the memory, initializes Netlink message headers
1354  * and set unique TC message handle.
1355  *
1356  * @param[in] attr
1357  *   Pointer to the flow attributes.
1358  * @param[in] items
1359  *   Pointer to the list of items.
1360  * @param[in] actions
1361  *   Pointer to the list of actions.
1362  * @param[out] item_flags
1363  *   Pointer to bit mask of all items detected.
1364  * @param[out] action_flags
1365  *   Pointer to bit mask of all actions detected.
1366  * @param[out] error
1367  *   Pointer to the error structure.
1368  *
1369  * @return
1370  *   Pointer to mlx5_flow object on success,
1371  *   otherwise NULL and rte_ernno is set.
1372  */
1373 static struct mlx5_flow *
1374 flow_tcf_prepare(const struct rte_flow_attr *attr __rte_unused,
1375                  const struct rte_flow_item items[],
1376                  const struct rte_flow_action actions[],
1377                  uint64_t *item_flags, uint64_t *action_flags,
1378                  struct rte_flow_error *error)
1379 {
1380         size_t size = sizeof(struct mlx5_flow) +
1381                       MNL_ALIGN(sizeof(struct nlmsghdr)) +
1382                       MNL_ALIGN(sizeof(struct tcmsg));
1383         struct mlx5_flow *dev_flow;
1384         struct nlmsghdr *nlh;
1385         struct tcmsg *tcm;
1386
1387         size += flow_tcf_get_items_and_size(items, item_flags);
1388         size += flow_tcf_get_actions_and_size(actions, action_flags);
1389         dev_flow = rte_zmalloc(__func__, size, MNL_ALIGNTO);
1390         if (!dev_flow) {
1391                 rte_flow_error_set(error, ENOMEM,
1392                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1393                                    "not enough memory to create E-Switch flow");
1394                 return NULL;
1395         }
1396         nlh = mnl_nlmsg_put_header((void *)(dev_flow + 1));
1397         tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
1398         *dev_flow = (struct mlx5_flow){
1399                 .tcf = (struct mlx5_flow_tcf){
1400                         .nlh = nlh,
1401                         .tcm = tcm,
1402                 },
1403         };
1404         /*
1405          * Generate a reasonably unique handle based on the address of the
1406          * target buffer.
1407          *
1408          * This is straightforward on 32-bit systems where the flow pointer can
1409          * be used directly. Otherwise, its least significant part is taken
1410          * after shifting it by the previous power of two of the pointed buffer
1411          * size.
1412          */
1413         if (sizeof(dev_flow) <= 4)
1414                 flow_tcf_nl_brand(nlh, (uintptr_t)dev_flow);
1415         else
1416                 flow_tcf_nl_brand(nlh, (uintptr_t)dev_flow >>
1417                                        rte_log2_u32(rte_align32prevpow2(size)));
1418         return dev_flow;
1419 }
1420
1421 /**
1422  * Translate flow for Linux TC flower and construct Netlink message.
1423  *
1424  * @param[in] priv
1425  *   Pointer to the priv structure.
1426  * @param[in, out] flow
1427  *   Pointer to the sub flow.
1428  * @param[in] attr
1429  *   Pointer to the flow attributes.
1430  * @param[in] items
1431  *   Pointer to the list of items.
1432  * @param[in] actions
1433  *   Pointer to the list of actions.
1434  * @param[out] error
1435  *   Pointer to the error structure.
1436  *
1437  * @return
1438  *   0 on success, a negative errno value otherwise and rte_ernno is set.
1439  */
1440 static int
1441 flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
1442                    const struct rte_flow_attr *attr,
1443                    const struct rte_flow_item items[],
1444                    const struct rte_flow_action actions[],
1445                    struct rte_flow_error *error)
1446 {
1447         union {
1448                 const struct rte_flow_item_port_id *port_id;
1449                 const struct rte_flow_item_eth *eth;
1450                 const struct rte_flow_item_vlan *vlan;
1451                 const struct rte_flow_item_ipv4 *ipv4;
1452                 const struct rte_flow_item_ipv6 *ipv6;
1453                 const struct rte_flow_item_tcp *tcp;
1454                 const struct rte_flow_item_udp *udp;
1455         } spec, mask;
1456         union {
1457                 const struct rte_flow_action_port_id *port_id;
1458                 const struct rte_flow_action_of_push_vlan *of_push_vlan;
1459                 const struct rte_flow_action_of_set_vlan_vid *
1460                         of_set_vlan_vid;
1461                 const struct rte_flow_action_of_set_vlan_pcp *
1462                         of_set_vlan_pcp;
1463         } conf;
1464         struct flow_tcf_ptoi ptoi[PTOI_TABLE_SZ_MAX(dev)];
1465         struct nlmsghdr *nlh = dev_flow->tcf.nlh;
1466         struct tcmsg *tcm = dev_flow->tcf.tcm;
1467         uint32_t na_act_index_cur;
1468         bool eth_type_set = 0;
1469         bool vlan_present = 0;
1470         bool vlan_eth_type_set = 0;
1471         bool ip_proto_set = 0;
1472         struct nlattr *na_flower;
1473         struct nlattr *na_flower_act;
1474         struct nlattr *na_vlan_id = NULL;
1475         struct nlattr *na_vlan_priority = NULL;
1476         uint64_t item_flags = 0;
1477
1478         claim_nonzero(flow_tcf_build_ptoi_table(dev, ptoi,
1479                                                 PTOI_TABLE_SZ_MAX(dev)));
1480         nlh = dev_flow->tcf.nlh;
1481         tcm = dev_flow->tcf.tcm;
1482         /* Prepare API must have been called beforehand. */
1483         assert(nlh != NULL && tcm != NULL);
1484         tcm->tcm_family = AF_UNSPEC;
1485         tcm->tcm_ifindex = ptoi[0].ifindex;
1486         tcm->tcm_parent = TC_H_MAKE(TC_H_INGRESS, TC_H_MIN_INGRESS);
1487         /*
1488          * Priority cannot be zero to prevent the kernel from picking one
1489          * automatically.
1490          */
1491         tcm->tcm_info = TC_H_MAKE((attr->priority + 1) << 16,
1492                                   RTE_BE16(ETH_P_ALL));
1493         mnl_attr_put_strz(nlh, TCA_KIND, "flower");
1494         na_flower = mnl_attr_nest_start(nlh, TCA_OPTIONS);
1495         mnl_attr_put_u32(nlh, TCA_FLOWER_FLAGS, TCA_CLS_FLAGS_SKIP_SW);
1496         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1497                 unsigned int i;
1498
1499                 switch (items->type) {
1500                 case RTE_FLOW_ITEM_TYPE_VOID:
1501                         break;
1502                 case RTE_FLOW_ITEM_TYPE_PORT_ID:
1503                         mask.port_id = flow_tcf_item_mask
1504                                 (items, &rte_flow_item_port_id_mask,
1505                                  &flow_tcf_mask_supported.port_id,
1506                                  &flow_tcf_mask_empty.port_id,
1507                                  sizeof(flow_tcf_mask_supported.port_id),
1508                                  error);
1509                         assert(mask.port_id);
1510                         if (mask.port_id == &flow_tcf_mask_empty.port_id)
1511                                 break;
1512                         spec.port_id = items->spec;
1513                         if (!mask.port_id->id)
1514                                 i = 0;
1515                         else
1516                                 for (i = 0; ptoi[i].ifindex; ++i)
1517                                         if (ptoi[i].port_id == spec.port_id->id)
1518                                                 break;
1519                         assert(ptoi[i].ifindex);
1520                         tcm->tcm_ifindex = ptoi[i].ifindex;
1521                         break;
1522                 case RTE_FLOW_ITEM_TYPE_ETH:
1523                         item_flags |= MLX5_FLOW_LAYER_OUTER_L2;
1524                         mask.eth = flow_tcf_item_mask
1525                                 (items, &rte_flow_item_eth_mask,
1526                                  &flow_tcf_mask_supported.eth,
1527                                  &flow_tcf_mask_empty.eth,
1528                                  sizeof(flow_tcf_mask_supported.eth),
1529                                  error);
1530                         assert(mask.eth);
1531                         if (mask.eth == &flow_tcf_mask_empty.eth)
1532                                 break;
1533                         spec.eth = items->spec;
1534                         if (mask.eth->type) {
1535                                 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_ETH_TYPE,
1536                                                  spec.eth->type);
1537                                 eth_type_set = 1;
1538                         }
1539                         if (!is_zero_ether_addr(&mask.eth->dst)) {
1540                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_DST,
1541                                              ETHER_ADDR_LEN,
1542                                              spec.eth->dst.addr_bytes);
1543                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_DST_MASK,
1544                                              ETHER_ADDR_LEN,
1545                                              mask.eth->dst.addr_bytes);
1546                         }
1547                         if (!is_zero_ether_addr(&mask.eth->src)) {
1548                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_SRC,
1549                                              ETHER_ADDR_LEN,
1550                                              spec.eth->src.addr_bytes);
1551                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_SRC_MASK,
1552                                              ETHER_ADDR_LEN,
1553                                              mask.eth->src.addr_bytes);
1554                         }
1555                         break;
1556                 case RTE_FLOW_ITEM_TYPE_VLAN:
1557                         item_flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
1558                         mask.vlan = flow_tcf_item_mask
1559                                 (items, &rte_flow_item_vlan_mask,
1560                                  &flow_tcf_mask_supported.vlan,
1561                                  &flow_tcf_mask_empty.vlan,
1562                                  sizeof(flow_tcf_mask_supported.vlan),
1563                                  error);
1564                         assert(mask.vlan);
1565                         if (!eth_type_set)
1566                                 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_ETH_TYPE,
1567                                                  RTE_BE16(ETH_P_8021Q));
1568                         eth_type_set = 1;
1569                         vlan_present = 1;
1570                         if (mask.vlan == &flow_tcf_mask_empty.vlan)
1571                                 break;
1572                         spec.vlan = items->spec;
1573                         if (mask.vlan->inner_type) {
1574                                 mnl_attr_put_u16(nlh,
1575                                                  TCA_FLOWER_KEY_VLAN_ETH_TYPE,
1576                                                  spec.vlan->inner_type);
1577                                 vlan_eth_type_set = 1;
1578                         }
1579                         if (mask.vlan->tci & RTE_BE16(0xe000))
1580                                 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_VLAN_PRIO,
1581                                                 (rte_be_to_cpu_16
1582                                                  (spec.vlan->tci) >> 13) & 0x7);
1583                         if (mask.vlan->tci & RTE_BE16(0x0fff))
1584                                 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_VLAN_ID,
1585                                                  rte_be_to_cpu_16
1586                                                  (spec.vlan->tci &
1587                                                   RTE_BE16(0x0fff)));
1588                         break;
1589                 case RTE_FLOW_ITEM_TYPE_IPV4:
1590                         item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1591                         mask.ipv4 = flow_tcf_item_mask
1592                                 (items, &rte_flow_item_ipv4_mask,
1593                                  &flow_tcf_mask_supported.ipv4,
1594                                  &flow_tcf_mask_empty.ipv4,
1595                                  sizeof(flow_tcf_mask_supported.ipv4),
1596                                  error);
1597                         assert(mask.ipv4);
1598                         if (!eth_type_set || !vlan_eth_type_set)
1599                                 mnl_attr_put_u16(nlh,
1600                                                  vlan_present ?
1601                                                  TCA_FLOWER_KEY_VLAN_ETH_TYPE :
1602                                                  TCA_FLOWER_KEY_ETH_TYPE,
1603                                                  RTE_BE16(ETH_P_IP));
1604                         eth_type_set = 1;
1605                         vlan_eth_type_set = 1;
1606                         if (mask.ipv4 == &flow_tcf_mask_empty.ipv4)
1607                                 break;
1608                         spec.ipv4 = items->spec;
1609                         if (mask.ipv4->hdr.next_proto_id) {
1610                                 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
1611                                                 spec.ipv4->hdr.next_proto_id);
1612                                 ip_proto_set = 1;
1613                         }
1614                         if (mask.ipv4->hdr.src_addr) {
1615                                 mnl_attr_put_u32(nlh, TCA_FLOWER_KEY_IPV4_SRC,
1616                                                  spec.ipv4->hdr.src_addr);
1617                                 mnl_attr_put_u32(nlh,
1618                                                  TCA_FLOWER_KEY_IPV4_SRC_MASK,
1619                                                  mask.ipv4->hdr.src_addr);
1620                         }
1621                         if (mask.ipv4->hdr.dst_addr) {
1622                                 mnl_attr_put_u32(nlh, TCA_FLOWER_KEY_IPV4_DST,
1623                                                  spec.ipv4->hdr.dst_addr);
1624                                 mnl_attr_put_u32(nlh,
1625                                                  TCA_FLOWER_KEY_IPV4_DST_MASK,
1626                                                  mask.ipv4->hdr.dst_addr);
1627                         }
1628                         break;
1629                 case RTE_FLOW_ITEM_TYPE_IPV6:
1630                         item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1631                         mask.ipv6 = flow_tcf_item_mask
1632                                 (items, &rte_flow_item_ipv6_mask,
1633                                  &flow_tcf_mask_supported.ipv6,
1634                                  &flow_tcf_mask_empty.ipv6,
1635                                  sizeof(flow_tcf_mask_supported.ipv6),
1636                                  error);
1637                         assert(mask.ipv6);
1638                         if (!eth_type_set || !vlan_eth_type_set)
1639                                 mnl_attr_put_u16(nlh,
1640                                                  vlan_present ?
1641                                                  TCA_FLOWER_KEY_VLAN_ETH_TYPE :
1642                                                  TCA_FLOWER_KEY_ETH_TYPE,
1643                                                  RTE_BE16(ETH_P_IPV6));
1644                         eth_type_set = 1;
1645                         vlan_eth_type_set = 1;
1646                         if (mask.ipv6 == &flow_tcf_mask_empty.ipv6)
1647                                 break;
1648                         spec.ipv6 = items->spec;
1649                         if (mask.ipv6->hdr.proto) {
1650                                 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
1651                                                 spec.ipv6->hdr.proto);
1652                                 ip_proto_set = 1;
1653                         }
1654                         if (!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.src_addr)) {
1655                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_SRC,
1656                                              sizeof(spec.ipv6->hdr.src_addr),
1657                                              spec.ipv6->hdr.src_addr);
1658                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_SRC_MASK,
1659                                              sizeof(mask.ipv6->hdr.src_addr),
1660                                              mask.ipv6->hdr.src_addr);
1661                         }
1662                         if (!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.dst_addr)) {
1663                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_DST,
1664                                              sizeof(spec.ipv6->hdr.dst_addr),
1665                                              spec.ipv6->hdr.dst_addr);
1666                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_DST_MASK,
1667                                              sizeof(mask.ipv6->hdr.dst_addr),
1668                                              mask.ipv6->hdr.dst_addr);
1669                         }
1670                         break;
1671                 case RTE_FLOW_ITEM_TYPE_UDP:
1672                         item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
1673                         mask.udp = flow_tcf_item_mask
1674                                 (items, &rte_flow_item_udp_mask,
1675                                  &flow_tcf_mask_supported.udp,
1676                                  &flow_tcf_mask_empty.udp,
1677                                  sizeof(flow_tcf_mask_supported.udp),
1678                                  error);
1679                         assert(mask.udp);
1680                         if (!ip_proto_set)
1681                                 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
1682                                                 IPPROTO_UDP);
1683                         if (mask.udp == &flow_tcf_mask_empty.udp)
1684                                 break;
1685                         spec.udp = items->spec;
1686                         if (mask.udp->hdr.src_port) {
1687                                 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_UDP_SRC,
1688                                                  spec.udp->hdr.src_port);
1689                                 mnl_attr_put_u16(nlh,
1690                                                  TCA_FLOWER_KEY_UDP_SRC_MASK,
1691                                                  mask.udp->hdr.src_port);
1692                         }
1693                         if (mask.udp->hdr.dst_port) {
1694                                 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_UDP_DST,
1695                                                  spec.udp->hdr.dst_port);
1696                                 mnl_attr_put_u16(nlh,
1697                                                  TCA_FLOWER_KEY_UDP_DST_MASK,
1698                                                  mask.udp->hdr.dst_port);
1699                         }
1700                         break;
1701                 case RTE_FLOW_ITEM_TYPE_TCP:
1702                         item_flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
1703                         mask.tcp = flow_tcf_item_mask
1704                                 (items, &rte_flow_item_tcp_mask,
1705                                  &flow_tcf_mask_supported.tcp,
1706                                  &flow_tcf_mask_empty.tcp,
1707                                  sizeof(flow_tcf_mask_supported.tcp),
1708                                  error);
1709                         assert(mask.tcp);
1710                         if (!ip_proto_set)
1711                                 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
1712                                                 IPPROTO_TCP);
1713                         if (mask.tcp == &flow_tcf_mask_empty.tcp)
1714                                 break;
1715                         spec.tcp = items->spec;
1716                         if (mask.tcp->hdr.src_port) {
1717                                 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_TCP_SRC,
1718                                                  spec.tcp->hdr.src_port);
1719                                 mnl_attr_put_u16(nlh,
1720                                                  TCA_FLOWER_KEY_TCP_SRC_MASK,
1721                                                  mask.tcp->hdr.src_port);
1722                         }
1723                         if (mask.tcp->hdr.dst_port) {
1724                                 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_TCP_DST,
1725                                                  spec.tcp->hdr.dst_port);
1726                                 mnl_attr_put_u16(nlh,
1727                                                  TCA_FLOWER_KEY_TCP_DST_MASK,
1728                                                  mask.tcp->hdr.dst_port);
1729                         }
1730                         if (mask.tcp->hdr.tcp_flags) {
1731                                 mnl_attr_put_u16
1732                                         (nlh,
1733                                          TCA_FLOWER_KEY_TCP_FLAGS,
1734                                          rte_cpu_to_be_16
1735                                                 (spec.tcp->hdr.tcp_flags));
1736                                 mnl_attr_put_u16
1737                                         (nlh,
1738                                          TCA_FLOWER_KEY_TCP_FLAGS_MASK,
1739                                          rte_cpu_to_be_16
1740                                                 (mask.tcp->hdr.tcp_flags));
1741                         }
1742                         break;
1743                 default:
1744                         return rte_flow_error_set(error, ENOTSUP,
1745                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1746                                                   NULL, "item not supported");
1747                 }
1748         }
1749         na_flower_act = mnl_attr_nest_start(nlh, TCA_FLOWER_ACT);
1750         na_act_index_cur = 1;
1751         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1752                 struct nlattr *na_act_index;
1753                 struct nlattr *na_act;
1754                 unsigned int vlan_act;
1755                 unsigned int i;
1756
1757                 switch (actions->type) {
1758                 case RTE_FLOW_ACTION_TYPE_VOID:
1759                         break;
1760                 case RTE_FLOW_ACTION_TYPE_PORT_ID:
1761                         conf.port_id = actions->conf;
1762                         if (conf.port_id->original)
1763                                 i = 0;
1764                         else
1765                                 for (i = 0; ptoi[i].ifindex; ++i)
1766                                         if (ptoi[i].port_id == conf.port_id->id)
1767                                                 break;
1768                         assert(ptoi[i].ifindex);
1769                         na_act_index =
1770                                 mnl_attr_nest_start(nlh, na_act_index_cur++);
1771                         assert(na_act_index);
1772                         mnl_attr_put_strz(nlh, TCA_ACT_KIND, "mirred");
1773                         na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
1774                         assert(na_act);
1775                         mnl_attr_put(nlh, TCA_MIRRED_PARMS,
1776                                      sizeof(struct tc_mirred),
1777                                      &(struct tc_mirred){
1778                                         .action = TC_ACT_STOLEN,
1779                                         .eaction = TCA_EGRESS_REDIR,
1780                                         .ifindex = ptoi[i].ifindex,
1781                                      });
1782                         mnl_attr_nest_end(nlh, na_act);
1783                         mnl_attr_nest_end(nlh, na_act_index);
1784                         break;
1785                 case RTE_FLOW_ACTION_TYPE_DROP:
1786                         na_act_index =
1787                                 mnl_attr_nest_start(nlh, na_act_index_cur++);
1788                         assert(na_act_index);
1789                         mnl_attr_put_strz(nlh, TCA_ACT_KIND, "gact");
1790                         na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
1791                         assert(na_act);
1792                         mnl_attr_put(nlh, TCA_GACT_PARMS,
1793                                      sizeof(struct tc_gact),
1794                                      &(struct tc_gact){
1795                                         .action = TC_ACT_SHOT,
1796                                      });
1797                         mnl_attr_nest_end(nlh, na_act);
1798                         mnl_attr_nest_end(nlh, na_act_index);
1799                         break;
1800                 case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
1801                         conf.of_push_vlan = NULL;
1802                         vlan_act = TCA_VLAN_ACT_POP;
1803                         goto action_of_vlan;
1804                 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
1805                         conf.of_push_vlan = actions->conf;
1806                         vlan_act = TCA_VLAN_ACT_PUSH;
1807                         goto action_of_vlan;
1808                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
1809                         conf.of_set_vlan_vid = actions->conf;
1810                         if (na_vlan_id)
1811                                 goto override_na_vlan_id;
1812                         vlan_act = TCA_VLAN_ACT_MODIFY;
1813                         goto action_of_vlan;
1814                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
1815                         conf.of_set_vlan_pcp = actions->conf;
1816                         if (na_vlan_priority)
1817                                 goto override_na_vlan_priority;
1818                         vlan_act = TCA_VLAN_ACT_MODIFY;
1819                         goto action_of_vlan;
1820 action_of_vlan:
1821                         na_act_index =
1822                                 mnl_attr_nest_start(nlh, na_act_index_cur++);
1823                         assert(na_act_index);
1824                         mnl_attr_put_strz(nlh, TCA_ACT_KIND, "vlan");
1825                         na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
1826                         assert(na_act);
1827                         mnl_attr_put(nlh, TCA_VLAN_PARMS,
1828                                      sizeof(struct tc_vlan),
1829                                      &(struct tc_vlan){
1830                                         .action = TC_ACT_PIPE,
1831                                         .v_action = vlan_act,
1832                                      });
1833                         if (vlan_act == TCA_VLAN_ACT_POP) {
1834                                 mnl_attr_nest_end(nlh, na_act);
1835                                 mnl_attr_nest_end(nlh, na_act_index);
1836                                 break;
1837                         }
1838                         if (vlan_act == TCA_VLAN_ACT_PUSH)
1839                                 mnl_attr_put_u16(nlh,
1840                                                  TCA_VLAN_PUSH_VLAN_PROTOCOL,
1841                                                  conf.of_push_vlan->ethertype);
1842                         na_vlan_id = mnl_nlmsg_get_payload_tail(nlh);
1843                         mnl_attr_put_u16(nlh, TCA_VLAN_PAD, 0);
1844                         na_vlan_priority = mnl_nlmsg_get_payload_tail(nlh);
1845                         mnl_attr_put_u8(nlh, TCA_VLAN_PAD, 0);
1846                         mnl_attr_nest_end(nlh, na_act);
1847                         mnl_attr_nest_end(nlh, na_act_index);
1848                         if (actions->type ==
1849                             RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID) {
1850 override_na_vlan_id:
1851                                 na_vlan_id->nla_type = TCA_VLAN_PUSH_VLAN_ID;
1852                                 *(uint16_t *)mnl_attr_get_payload(na_vlan_id) =
1853                                         rte_be_to_cpu_16
1854                                         (conf.of_set_vlan_vid->vlan_vid);
1855                         } else if (actions->type ==
1856                                    RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP) {
1857 override_na_vlan_priority:
1858                                 na_vlan_priority->nla_type =
1859                                         TCA_VLAN_PUSH_VLAN_PRIORITY;
1860                                 *(uint8_t *)mnl_attr_get_payload
1861                                         (na_vlan_priority) =
1862                                         conf.of_set_vlan_pcp->vlan_pcp;
1863                         }
1864                         break;
1865                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
1866                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
1867                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
1868                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
1869                 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
1870                 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
1871                         na_act_index =
1872                                 mnl_attr_nest_start(nlh, na_act_index_cur++);
1873                         flow_tcf_create_pedit_mnl_msg(nlh,
1874                                                       &actions, item_flags);
1875                         mnl_attr_nest_end(nlh, na_act_index);
1876                         break;
1877                 default:
1878                         return rte_flow_error_set(error, ENOTSUP,
1879                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1880                                                   actions,
1881                                                   "action not supported");
1882                 }
1883         }
1884         assert(na_flower);
1885         assert(na_flower_act);
1886         mnl_attr_nest_end(nlh, na_flower_act);
1887         mnl_attr_nest_end(nlh, na_flower);
1888         return 0;
1889 }
1890
1891 /**
1892  * Send Netlink message with acknowledgment.
1893  *
1894  * @param nl
1895  *   Libmnl socket to use.
1896  * @param nlh
1897  *   Message to send. This function always raises the NLM_F_ACK flag before
1898  *   sending.
1899  *
1900  * @return
1901  *   0 on success, a negative errno value otherwise and rte_errno is set.
1902  */
1903 static int
1904 flow_tcf_nl_ack(struct mnl_socket *nl, struct nlmsghdr *nlh)
1905 {
1906         alignas(struct nlmsghdr)
1907         uint8_t ans[mnl_nlmsg_size(sizeof(struct nlmsgerr)) +
1908                     nlh->nlmsg_len - sizeof(*nlh)];
1909         uint32_t seq = random();
1910         int ret;
1911
1912         nlh->nlmsg_flags |= NLM_F_ACK;
1913         nlh->nlmsg_seq = seq;
1914         ret = mnl_socket_sendto(nl, nlh, nlh->nlmsg_len);
1915         if (ret != -1)
1916                 ret = mnl_socket_recvfrom(nl, ans, sizeof(ans));
1917         if (ret != -1)
1918                 ret = mnl_cb_run
1919                         (ans, ret, seq, mnl_socket_get_portid(nl), NULL, NULL);
1920         if (ret > 0)
1921                 return 0;
1922         rte_errno = errno;
1923         return -rte_errno;
1924 }
1925
1926 /**
1927  * Apply flow to E-Switch by sending Netlink message.
1928  *
1929  * @param[in] dev
1930  *   Pointer to Ethernet device.
1931  * @param[in, out] flow
1932  *   Pointer to the sub flow.
1933  * @param[out] error
1934  *   Pointer to the error structure.
1935  *
1936  * @return
1937  *   0 on success, a negative errno value otherwise and rte_ernno is set.
1938  */
1939 static int
1940 flow_tcf_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
1941                struct rte_flow_error *error)
1942 {
1943         struct priv *priv = dev->data->dev_private;
1944         struct mnl_socket *nl = priv->mnl_socket;
1945         struct mlx5_flow *dev_flow;
1946         struct nlmsghdr *nlh;
1947
1948         dev_flow = LIST_FIRST(&flow->dev_flows);
1949         /* E-Switch flow can't be expanded. */
1950         assert(!LIST_NEXT(dev_flow, next));
1951         nlh = dev_flow->tcf.nlh;
1952         nlh->nlmsg_type = RTM_NEWTFILTER;
1953         nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
1954         if (!flow_tcf_nl_ack(nl, nlh))
1955                 return 0;
1956         return rte_flow_error_set(error, rte_errno,
1957                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1958                                   "netlink: failed to create TC flow rule");
1959 }
1960
1961 /**
1962  * Remove flow from E-Switch by sending Netlink message.
1963  *
1964  * @param[in] dev
1965  *   Pointer to Ethernet device.
1966  * @param[in, out] flow
1967  *   Pointer to the sub flow.
1968  */
1969 static void
1970 flow_tcf_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
1971 {
1972         struct priv *priv = dev->data->dev_private;
1973         struct mnl_socket *nl = priv->mnl_socket;
1974         struct mlx5_flow *dev_flow;
1975         struct nlmsghdr *nlh;
1976
1977         if (!flow)
1978                 return;
1979         dev_flow = LIST_FIRST(&flow->dev_flows);
1980         if (!dev_flow)
1981                 return;
1982         /* E-Switch flow can't be expanded. */
1983         assert(!LIST_NEXT(dev_flow, next));
1984         nlh = dev_flow->tcf.nlh;
1985         nlh->nlmsg_type = RTM_DELTFILTER;
1986         nlh->nlmsg_flags = NLM_F_REQUEST;
1987         flow_tcf_nl_ack(nl, nlh);
1988 }
1989
1990 /**
1991  * Remove flow from E-Switch and release resources of the device flow.
1992  *
1993  * @param[in] dev
1994  *   Pointer to Ethernet device.
1995  * @param[in, out] flow
1996  *   Pointer to the sub flow.
1997  */
1998 static void
1999 flow_tcf_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
2000 {
2001         struct mlx5_flow *dev_flow;
2002
2003         if (!flow)
2004                 return;
2005         flow_tcf_remove(dev, flow);
2006         dev_flow = LIST_FIRST(&flow->dev_flows);
2007         if (!dev_flow)
2008                 return;
2009         /* E-Switch flow can't be expanded. */
2010         assert(!LIST_NEXT(dev_flow, next));
2011         LIST_REMOVE(dev_flow, next);
2012         rte_free(dev_flow);
2013 }
2014
2015 const struct mlx5_flow_driver_ops mlx5_flow_tcf_drv_ops = {
2016         .validate = flow_tcf_validate,
2017         .prepare = flow_tcf_prepare,
2018         .translate = flow_tcf_translate,
2019         .apply = flow_tcf_apply,
2020         .remove = flow_tcf_remove,
2021         .destroy = flow_tcf_destroy,
2022 };
2023
2024 /**
2025  * Initialize ingress qdisc of a given network interface.
2026  *
2027  * @param nl
2028  *   Libmnl socket of the @p NETLINK_ROUTE kind.
2029  * @param ifindex
2030  *   Index of network interface to initialize.
2031  * @param[out] error
2032  *   Perform verbose error reporting if not NULL.
2033  *
2034  * @return
2035  *   0 on success, a negative errno value otherwise and rte_errno is set.
2036  */
2037 int
2038 mlx5_flow_tcf_init(struct mnl_socket *nl, unsigned int ifindex,
2039                    struct rte_flow_error *error)
2040 {
2041         struct nlmsghdr *nlh;
2042         struct tcmsg *tcm;
2043         alignas(struct nlmsghdr)
2044         uint8_t buf[mnl_nlmsg_size(sizeof(*tcm) + 128)];
2045
2046         /* Destroy existing ingress qdisc and everything attached to it. */
2047         nlh = mnl_nlmsg_put_header(buf);
2048         nlh->nlmsg_type = RTM_DELQDISC;
2049         nlh->nlmsg_flags = NLM_F_REQUEST;
2050         tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
2051         tcm->tcm_family = AF_UNSPEC;
2052         tcm->tcm_ifindex = ifindex;
2053         tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
2054         tcm->tcm_parent = TC_H_INGRESS;
2055         /* Ignore errors when qdisc is already absent. */
2056         if (flow_tcf_nl_ack(nl, nlh) &&
2057             rte_errno != EINVAL && rte_errno != ENOENT)
2058                 return rte_flow_error_set(error, rte_errno,
2059                                           RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
2060                                           "netlink: failed to remove ingress"
2061                                           " qdisc");
2062         /* Create fresh ingress qdisc. */
2063         nlh = mnl_nlmsg_put_header(buf);
2064         nlh->nlmsg_type = RTM_NEWQDISC;
2065         nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
2066         tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
2067         tcm->tcm_family = AF_UNSPEC;
2068         tcm->tcm_ifindex = ifindex;
2069         tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
2070         tcm->tcm_parent = TC_H_INGRESS;
2071         mnl_attr_put_strz_check(nlh, sizeof(buf), TCA_KIND, "ingress");
2072         if (flow_tcf_nl_ack(nl, nlh))
2073                 return rte_flow_error_set(error, rte_errno,
2074                                           RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
2075                                           "netlink: failed to create ingress"
2076                                           " qdisc");
2077         return 0;
2078 }
2079
2080 /**
2081  * Create and configure a libmnl socket for Netlink flow rules.
2082  *
2083  * @return
2084  *   A valid libmnl socket object pointer on success, NULL otherwise and
2085  *   rte_errno is set.
2086  */
2087 struct mnl_socket *
2088 mlx5_flow_tcf_socket_create(void)
2089 {
2090         struct mnl_socket *nl = mnl_socket_open(NETLINK_ROUTE);
2091
2092         if (nl) {
2093                 mnl_socket_setsockopt(nl, NETLINK_CAP_ACK, &(int){ 1 },
2094                                       sizeof(int));
2095                 if (!mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID))
2096                         return nl;
2097         }
2098         rte_errno = errno;
2099         if (nl)
2100                 mnl_socket_close(nl);
2101         return NULL;
2102 }
2103
2104 /**
2105  * Destroy a libmnl socket.
2106  *
2107  * @param nl
2108  *   Libmnl socket of the @p NETLINK_ROUTE kind.
2109  */
2110 void
2111 mlx5_flow_tcf_socket_destroy(struct mnl_socket *nl)
2112 {
2113         mnl_socket_close(nl);
2114 }