net/mlx5: add flow query abstraction interface
[dpdk.git] / drivers / net / mlx5 / mlx5_flow_tcf.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2018 6WIND S.A.
3  * Copyright 2018 Mellanox Technologies, Ltd
4  */
5
6 #include <assert.h>
7 #include <errno.h>
8 #include <libmnl/libmnl.h>
9 #include <linux/if_ether.h>
10 #include <linux/netlink.h>
11 #include <linux/pkt_cls.h>
12 #include <linux/pkt_sched.h>
13 #include <linux/rtnetlink.h>
14 #include <linux/tc_act/tc_gact.h>
15 #include <linux/tc_act/tc_mirred.h>
16 #include <netinet/in.h>
17 #include <stdalign.h>
18 #include <stdbool.h>
19 #include <stddef.h>
20 #include <stdint.h>
21 #include <stdlib.h>
22 #include <sys/socket.h>
23
24 #include <rte_byteorder.h>
25 #include <rte_errno.h>
26 #include <rte_ether.h>
27 #include <rte_flow.h>
28 #include <rte_malloc.h>
29
30 #include "mlx5.h"
31 #include "mlx5_flow.h"
32 #include "mlx5_autoconf.h"
33
34 #ifdef HAVE_TC_ACT_VLAN
35
36 #include <linux/tc_act/tc_vlan.h>
37
38 #else /* HAVE_TC_ACT_VLAN */
39
40 #define TCA_VLAN_ACT_POP 1
41 #define TCA_VLAN_ACT_PUSH 2
42 #define TCA_VLAN_ACT_MODIFY 3
43 #define TCA_VLAN_PARMS 2
44 #define TCA_VLAN_PUSH_VLAN_ID 3
45 #define TCA_VLAN_PUSH_VLAN_PROTOCOL 4
46 #define TCA_VLAN_PAD 5
47 #define TCA_VLAN_PUSH_VLAN_PRIORITY 6
48
49 struct tc_vlan {
50         tc_gen;
51         int v_action;
52 };
53
54 #endif /* HAVE_TC_ACT_VLAN */
55
56 #ifdef HAVE_TC_ACT_PEDIT
57
58 #include <linux/tc_act/tc_pedit.h>
59
60 #else /* HAVE_TC_ACT_VLAN */
61
62 enum {
63         TCA_PEDIT_UNSPEC,
64         TCA_PEDIT_TM,
65         TCA_PEDIT_PARMS,
66         TCA_PEDIT_PAD,
67         TCA_PEDIT_PARMS_EX,
68         TCA_PEDIT_KEYS_EX,
69         TCA_PEDIT_KEY_EX,
70         __TCA_PEDIT_MAX
71 };
72
73 enum {
74         TCA_PEDIT_KEY_EX_HTYPE = 1,
75         TCA_PEDIT_KEY_EX_CMD = 2,
76         __TCA_PEDIT_KEY_EX_MAX
77 };
78
79 enum pedit_header_type {
80         TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK = 0,
81         TCA_PEDIT_KEY_EX_HDR_TYPE_ETH = 1,
82         TCA_PEDIT_KEY_EX_HDR_TYPE_IP4 = 2,
83         TCA_PEDIT_KEY_EX_HDR_TYPE_IP6 = 3,
84         TCA_PEDIT_KEY_EX_HDR_TYPE_TCP = 4,
85         TCA_PEDIT_KEY_EX_HDR_TYPE_UDP = 5,
86         __PEDIT_HDR_TYPE_MAX,
87 };
88
89 enum pedit_cmd {
90         TCA_PEDIT_KEY_EX_CMD_SET = 0,
91         TCA_PEDIT_KEY_EX_CMD_ADD = 1,
92         __PEDIT_CMD_MAX,
93 };
94
95 struct tc_pedit_key {
96         __u32 mask; /* AND */
97         __u32 val; /*XOR */
98         __u32 off; /*offset */
99         __u32 at;
100         __u32 offmask;
101         __u32 shift;
102 };
103
104 __extension__
105 struct tc_pedit_sel {
106         tc_gen;
107         unsigned char nkeys;
108         unsigned char flags;
109         struct tc_pedit_key keys[0];
110 };
111
112 #endif /* HAVE_TC_ACT_VLAN */
113
114 /* Normally found in linux/netlink.h. */
115 #ifndef NETLINK_CAP_ACK
116 #define NETLINK_CAP_ACK 10
117 #endif
118
119 /* Normally found in linux/pkt_sched.h. */
120 #ifndef TC_H_MIN_INGRESS
121 #define TC_H_MIN_INGRESS 0xfff2u
122 #endif
123
124 /* Normally found in linux/pkt_cls.h. */
125 #ifndef TCA_CLS_FLAGS_SKIP_SW
126 #define TCA_CLS_FLAGS_SKIP_SW (1 << 1)
127 #endif
128 #ifndef HAVE_TCA_CHAIN
129 #define TCA_CHAIN 11
130 #endif
131 #ifndef HAVE_TCA_FLOWER_ACT
132 #define TCA_FLOWER_ACT 3
133 #endif
134 #ifndef HAVE_TCA_FLOWER_FLAGS
135 #define TCA_FLOWER_FLAGS 22
136 #endif
137 #ifndef HAVE_TCA_FLOWER_KEY_ETH_TYPE
138 #define TCA_FLOWER_KEY_ETH_TYPE 8
139 #endif
140 #ifndef HAVE_TCA_FLOWER_KEY_ETH_DST
141 #define TCA_FLOWER_KEY_ETH_DST 4
142 #endif
143 #ifndef HAVE_TCA_FLOWER_KEY_ETH_DST_MASK
144 #define TCA_FLOWER_KEY_ETH_DST_MASK 5
145 #endif
146 #ifndef HAVE_TCA_FLOWER_KEY_ETH_SRC
147 #define TCA_FLOWER_KEY_ETH_SRC 6
148 #endif
149 #ifndef HAVE_TCA_FLOWER_KEY_ETH_SRC_MASK
150 #define TCA_FLOWER_KEY_ETH_SRC_MASK 7
151 #endif
152 #ifndef HAVE_TCA_FLOWER_KEY_IP_PROTO
153 #define TCA_FLOWER_KEY_IP_PROTO 9
154 #endif
155 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_SRC
156 #define TCA_FLOWER_KEY_IPV4_SRC 10
157 #endif
158 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_SRC_MASK
159 #define TCA_FLOWER_KEY_IPV4_SRC_MASK 11
160 #endif
161 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_DST
162 #define TCA_FLOWER_KEY_IPV4_DST 12
163 #endif
164 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_DST_MASK
165 #define TCA_FLOWER_KEY_IPV4_DST_MASK 13
166 #endif
167 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_SRC
168 #define TCA_FLOWER_KEY_IPV6_SRC 14
169 #endif
170 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_SRC_MASK
171 #define TCA_FLOWER_KEY_IPV6_SRC_MASK 15
172 #endif
173 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_DST
174 #define TCA_FLOWER_KEY_IPV6_DST 16
175 #endif
176 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_DST_MASK
177 #define TCA_FLOWER_KEY_IPV6_DST_MASK 17
178 #endif
179 #ifndef HAVE_TCA_FLOWER_KEY_TCP_SRC
180 #define TCA_FLOWER_KEY_TCP_SRC 18
181 #endif
182 #ifndef HAVE_TCA_FLOWER_KEY_TCP_SRC_MASK
183 #define TCA_FLOWER_KEY_TCP_SRC_MASK 35
184 #endif
185 #ifndef HAVE_TCA_FLOWER_KEY_TCP_DST
186 #define TCA_FLOWER_KEY_TCP_DST 19
187 #endif
188 #ifndef HAVE_TCA_FLOWER_KEY_TCP_DST_MASK
189 #define TCA_FLOWER_KEY_TCP_DST_MASK 36
190 #endif
191 #ifndef HAVE_TCA_FLOWER_KEY_UDP_SRC
192 #define TCA_FLOWER_KEY_UDP_SRC 20
193 #endif
194 #ifndef HAVE_TCA_FLOWER_KEY_UDP_SRC_MASK
195 #define TCA_FLOWER_KEY_UDP_SRC_MASK 37
196 #endif
197 #ifndef HAVE_TCA_FLOWER_KEY_UDP_DST
198 #define TCA_FLOWER_KEY_UDP_DST 21
199 #endif
200 #ifndef HAVE_TCA_FLOWER_KEY_UDP_DST_MASK
201 #define TCA_FLOWER_KEY_UDP_DST_MASK 38
202 #endif
203 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_ID
204 #define TCA_FLOWER_KEY_VLAN_ID 23
205 #endif
206 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_PRIO
207 #define TCA_FLOWER_KEY_VLAN_PRIO 24
208 #endif
209 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_ETH_TYPE
210 #define TCA_FLOWER_KEY_VLAN_ETH_TYPE 25
211 #endif
212 #ifndef HAVE_TCA_FLOWER_KEY_TCP_FLAGS
213 #define TCA_FLOWER_KEY_TCP_FLAGS 71
214 #endif
215 #ifndef HAVE_TCA_FLOWER_KEY_TCP_FLAGS_MASK
216 #define TCA_FLOWER_KEY_TCP_FLAGS_MASK 72
217 #endif
218 #ifndef HAVE_TC_ACT_GOTO_CHAIN
219 #define TC_ACT_GOTO_CHAIN 0x20000000
220 #endif
221
222 #ifndef IPV6_ADDR_LEN
223 #define IPV6_ADDR_LEN 16
224 #endif
225
226 #ifndef IPV4_ADDR_LEN
227 #define IPV4_ADDR_LEN 4
228 #endif
229
230 #ifndef TP_PORT_LEN
231 #define TP_PORT_LEN 2 /* Transport Port (UDP/TCP) Length */
232 #endif
233
234 #ifndef TTL_LEN
235 #define TTL_LEN 1
236 #endif
237
238 /**
239  * Structure for holding netlink context.
240  * Note the size of the message buffer which is MNL_SOCKET_BUFFER_SIZE.
241  * Using this (8KB) buffer size ensures that netlink messages will never be
242  * truncated.
243  */
244 struct mlx5_flow_tcf_context {
245         struct mnl_socket *nl; /* NETLINK_ROUTE libmnl socket. */
246         uint32_t seq; /* Message sequence number. */
247         uint32_t buf_size; /* Message buffer size. */
248         uint8_t *buf; /* Message buffer. */
249 };
250
251 /** Empty masks for known item types. */
252 static const union {
253         struct rte_flow_item_port_id port_id;
254         struct rte_flow_item_eth eth;
255         struct rte_flow_item_vlan vlan;
256         struct rte_flow_item_ipv4 ipv4;
257         struct rte_flow_item_ipv6 ipv6;
258         struct rte_flow_item_tcp tcp;
259         struct rte_flow_item_udp udp;
260 } flow_tcf_mask_empty;
261
262 /** Supported masks for known item types. */
263 static const struct {
264         struct rte_flow_item_port_id port_id;
265         struct rte_flow_item_eth eth;
266         struct rte_flow_item_vlan vlan;
267         struct rte_flow_item_ipv4 ipv4;
268         struct rte_flow_item_ipv6 ipv6;
269         struct rte_flow_item_tcp tcp;
270         struct rte_flow_item_udp udp;
271 } flow_tcf_mask_supported = {
272         .port_id = {
273                 .id = 0xffffffff,
274         },
275         .eth = {
276                 .type = RTE_BE16(0xffff),
277                 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
278                 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
279         },
280         .vlan = {
281                 /* PCP and VID only, no DEI. */
282                 .tci = RTE_BE16(0xefff),
283                 .inner_type = RTE_BE16(0xffff),
284         },
285         .ipv4.hdr = {
286                 .next_proto_id = 0xff,
287                 .src_addr = RTE_BE32(0xffffffff),
288                 .dst_addr = RTE_BE32(0xffffffff),
289         },
290         .ipv6.hdr = {
291                 .proto = 0xff,
292                 .src_addr =
293                         "\xff\xff\xff\xff\xff\xff\xff\xff"
294                         "\xff\xff\xff\xff\xff\xff\xff\xff",
295                 .dst_addr =
296                         "\xff\xff\xff\xff\xff\xff\xff\xff"
297                         "\xff\xff\xff\xff\xff\xff\xff\xff",
298         },
299         .tcp.hdr = {
300                 .src_port = RTE_BE16(0xffff),
301                 .dst_port = RTE_BE16(0xffff),
302                 .tcp_flags = 0xff,
303         },
304         .udp.hdr = {
305                 .src_port = RTE_BE16(0xffff),
306                 .dst_port = RTE_BE16(0xffff),
307         },
308 };
309
310 #define SZ_NLATTR_HDR MNL_ALIGN(sizeof(struct nlattr))
311 #define SZ_NLATTR_NEST SZ_NLATTR_HDR
312 #define SZ_NLATTR_DATA_OF(len) MNL_ALIGN(SZ_NLATTR_HDR + (len))
313 #define SZ_NLATTR_TYPE_OF(typ) SZ_NLATTR_DATA_OF(sizeof(typ))
314 #define SZ_NLATTR_STRZ_OF(str) SZ_NLATTR_DATA_OF(strlen(str) + 1)
315
316 #define PTOI_TABLE_SZ_MAX(dev) (mlx5_dev_to_port_id((dev)->device, NULL, 0) + 2)
317
318 /** DPDK port to network interface index (ifindex) conversion. */
319 struct flow_tcf_ptoi {
320         uint16_t port_id; /**< DPDK port ID. */
321         unsigned int ifindex; /**< Network interface index. */
322 };
323
324 /* Due to a limitation on driver/FW. */
325 #define MLX5_TCF_GROUP_ID_MAX 3
326 #define MLX5_TCF_GROUP_PRIORITY_MAX 14
327
328 #define MLX5_TCF_FATE_ACTIONS \
329         (MLX5_FLOW_ACTION_DROP | MLX5_FLOW_ACTION_PORT_ID | \
330          MLX5_FLOW_ACTION_JUMP)
331
332 #define MLX5_TCF_VLAN_ACTIONS \
333         (MLX5_FLOW_ACTION_OF_POP_VLAN | MLX5_FLOW_ACTION_OF_PUSH_VLAN | \
334          MLX5_FLOW_ACTION_OF_SET_VLAN_VID | MLX5_FLOW_ACTION_OF_SET_VLAN_PCP)
335
336 #define MLX5_TCF_PEDIT_ACTIONS \
337         (MLX5_FLOW_ACTION_SET_IPV4_SRC | MLX5_FLOW_ACTION_SET_IPV4_DST | \
338          MLX5_FLOW_ACTION_SET_IPV6_SRC | MLX5_FLOW_ACTION_SET_IPV6_DST | \
339          MLX5_FLOW_ACTION_SET_TP_SRC | MLX5_FLOW_ACTION_SET_TP_DST | \
340          MLX5_FLOW_ACTION_SET_TTL | MLX5_FLOW_ACTION_DEC_TTL | \
341          MLX5_FLOW_ACTION_SET_MAC_SRC | MLX5_FLOW_ACTION_SET_MAC_DST)
342
343 #define MLX5_TCF_CONFIG_ACTIONS \
344         (MLX5_FLOW_ACTION_PORT_ID | MLX5_FLOW_ACTION_JUMP | \
345          MLX5_FLOW_ACTION_OF_PUSH_VLAN | MLX5_FLOW_ACTION_OF_SET_VLAN_VID | \
346          MLX5_FLOW_ACTION_OF_SET_VLAN_PCP | \
347          (MLX5_TCF_PEDIT_ACTIONS & ~MLX5_FLOW_ACTION_DEC_TTL))
348
349 #define MAX_PEDIT_KEYS 128
350 #define SZ_PEDIT_KEY_VAL 4
351
352 #define NUM_OF_PEDIT_KEYS(sz) \
353         (((sz) / SZ_PEDIT_KEY_VAL) + (((sz) % SZ_PEDIT_KEY_VAL) ? 1 : 0))
354
355 struct pedit_key_ex {
356         enum pedit_header_type htype;
357         enum pedit_cmd cmd;
358 };
359
360 struct pedit_parser {
361         struct tc_pedit_sel sel;
362         struct tc_pedit_key keys[MAX_PEDIT_KEYS];
363         struct pedit_key_ex keys_ex[MAX_PEDIT_KEYS];
364 };
365
366
367 /**
368  * Set pedit key of MAC address
369  *
370  * @param[in] actions
371  *   pointer to action specification
372  * @param[in,out] p_parser
373  *   pointer to pedit_parser
374  */
375 static void
376 flow_tcf_pedit_key_set_mac(const struct rte_flow_action *actions,
377                            struct pedit_parser *p_parser)
378 {
379         int idx = p_parser->sel.nkeys;
380         uint32_t off = actions->type == RTE_FLOW_ACTION_TYPE_SET_MAC_SRC ?
381                                         offsetof(struct ether_hdr, s_addr) :
382                                         offsetof(struct ether_hdr, d_addr);
383         const struct rte_flow_action_set_mac *conf =
384                 (const struct rte_flow_action_set_mac *)actions->conf;
385
386         p_parser->keys[idx].off = off;
387         p_parser->keys[idx].mask = ~UINT32_MAX;
388         p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_ETH;
389         p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
390         memcpy(&p_parser->keys[idx].val,
391                 conf->mac_addr, SZ_PEDIT_KEY_VAL);
392         idx++;
393         p_parser->keys[idx].off = off + SZ_PEDIT_KEY_VAL;
394         p_parser->keys[idx].mask = 0xFFFF0000;
395         p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_ETH;
396         p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
397         memcpy(&p_parser->keys[idx].val,
398                 conf->mac_addr + SZ_PEDIT_KEY_VAL,
399                 ETHER_ADDR_LEN - SZ_PEDIT_KEY_VAL);
400         p_parser->sel.nkeys = (++idx);
401 }
402
403 /**
404  * Set pedit key of decrease/set ttl
405  *
406  * @param[in] actions
407  *   pointer to action specification
408  * @param[in,out] p_parser
409  *   pointer to pedit_parser
410  * @param[in] item_flags
411  *   flags of all items presented
412  */
413 static void
414 flow_tcf_pedit_key_set_dec_ttl(const struct rte_flow_action *actions,
415                                 struct pedit_parser *p_parser,
416                                 uint64_t item_flags)
417 {
418         int idx = p_parser->sel.nkeys;
419
420         p_parser->keys[idx].mask = 0xFFFFFF00;
421         if (item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4) {
422                 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP4;
423                 p_parser->keys[idx].off =
424                         offsetof(struct ipv4_hdr, time_to_live);
425         }
426         if (item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV6) {
427                 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6;
428                 p_parser->keys[idx].off =
429                         offsetof(struct ipv6_hdr, hop_limits);
430         }
431         if (actions->type == RTE_FLOW_ACTION_TYPE_DEC_TTL) {
432                 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_ADD;
433                 p_parser->keys[idx].val = 0x000000FF;
434         } else {
435                 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
436                 p_parser->keys[idx].val =
437                         (__u32)((const struct rte_flow_action_set_ttl *)
438                          actions->conf)->ttl_value;
439         }
440         p_parser->sel.nkeys = (++idx);
441 }
442
443 /**
444  * Set pedit key of transport (TCP/UDP) port value
445  *
446  * @param[in] actions
447  *   pointer to action specification
448  * @param[in,out] p_parser
449  *   pointer to pedit_parser
450  * @param[in] item_flags
451  *   flags of all items presented
452  */
453 static void
454 flow_tcf_pedit_key_set_tp_port(const struct rte_flow_action *actions,
455                                 struct pedit_parser *p_parser,
456                                 uint64_t item_flags)
457 {
458         int idx = p_parser->sel.nkeys;
459
460         if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)
461                 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_UDP;
462         if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_TCP)
463                 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_TCP;
464         p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
465         /* offset of src/dst port is same for TCP and UDP */
466         p_parser->keys[idx].off =
467                 actions->type == RTE_FLOW_ACTION_TYPE_SET_TP_SRC ?
468                 offsetof(struct tcp_hdr, src_port) :
469                 offsetof(struct tcp_hdr, dst_port);
470         p_parser->keys[idx].mask = 0xFFFF0000;
471         p_parser->keys[idx].val =
472                 (__u32)((const struct rte_flow_action_set_tp *)
473                                 actions->conf)->port;
474         p_parser->sel.nkeys = (++idx);
475 }
476
477 /**
478  * Set pedit key of ipv6 address
479  *
480  * @param[in] actions
481  *   pointer to action specification
482  * @param[in,out] p_parser
483  *   pointer to pedit_parser
484  */
485 static void
486 flow_tcf_pedit_key_set_ipv6_addr(const struct rte_flow_action *actions,
487                                  struct pedit_parser *p_parser)
488 {
489         int idx = p_parser->sel.nkeys;
490         int keys = NUM_OF_PEDIT_KEYS(IPV6_ADDR_LEN);
491         int off_base =
492                 actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC ?
493                 offsetof(struct ipv6_hdr, src_addr) :
494                 offsetof(struct ipv6_hdr, dst_addr);
495         const struct rte_flow_action_set_ipv6 *conf =
496                 (const struct rte_flow_action_set_ipv6 *)actions->conf;
497
498         for (int i = 0; i < keys; i++, idx++) {
499                 p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6;
500                 p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
501                 p_parser->keys[idx].off = off_base + i * SZ_PEDIT_KEY_VAL;
502                 p_parser->keys[idx].mask = ~UINT32_MAX;
503                 memcpy(&p_parser->keys[idx].val,
504                         conf->ipv6_addr + i *  SZ_PEDIT_KEY_VAL,
505                         SZ_PEDIT_KEY_VAL);
506         }
507         p_parser->sel.nkeys += keys;
508 }
509
510 /**
511  * Set pedit key of ipv4 address
512  *
513  * @param[in] actions
514  *   pointer to action specification
515  * @param[in,out] p_parser
516  *   pointer to pedit_parser
517  */
518 static void
519 flow_tcf_pedit_key_set_ipv4_addr(const struct rte_flow_action *actions,
520                                  struct pedit_parser *p_parser)
521 {
522         int idx = p_parser->sel.nkeys;
523
524         p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP4;
525         p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
526         p_parser->keys[idx].off =
527                 actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC ?
528                 offsetof(struct ipv4_hdr, src_addr) :
529                 offsetof(struct ipv4_hdr, dst_addr);
530         p_parser->keys[idx].mask = ~UINT32_MAX;
531         p_parser->keys[idx].val =
532                 ((const struct rte_flow_action_set_ipv4 *)
533                  actions->conf)->ipv4_addr;
534         p_parser->sel.nkeys = (++idx);
535 }
536
537 /**
538  * Create the pedit's na attribute in netlink message
539  * on pre-allocate message buffer
540  *
541  * @param[in,out] nl
542  *   pointer to pre-allocated netlink message buffer
543  * @param[in,out] actions
544  *   pointer to pointer of actions specification.
545  * @param[in,out] action_flags
546  *   pointer to actions flags
547  * @param[in] item_flags
548  *   flags of all item presented
549  */
550 static void
551 flow_tcf_create_pedit_mnl_msg(struct nlmsghdr *nl,
552                               const struct rte_flow_action **actions,
553                               uint64_t item_flags)
554 {
555         struct pedit_parser p_parser;
556         struct nlattr *na_act_options;
557         struct nlattr *na_pedit_keys;
558
559         memset(&p_parser, 0, sizeof(p_parser));
560         mnl_attr_put_strz(nl, TCA_ACT_KIND, "pedit");
561         na_act_options = mnl_attr_nest_start(nl, TCA_ACT_OPTIONS);
562         /* all modify header actions should be in one tc-pedit action */
563         for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {
564                 switch ((*actions)->type) {
565                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
566                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
567                         flow_tcf_pedit_key_set_ipv4_addr(*actions, &p_parser);
568                         break;
569                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
570                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
571                         flow_tcf_pedit_key_set_ipv6_addr(*actions, &p_parser);
572                         break;
573                 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
574                 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
575                         flow_tcf_pedit_key_set_tp_port(*actions,
576                                                         &p_parser, item_flags);
577                         break;
578                 case RTE_FLOW_ACTION_TYPE_SET_TTL:
579                 case RTE_FLOW_ACTION_TYPE_DEC_TTL:
580                         flow_tcf_pedit_key_set_dec_ttl(*actions,
581                                                         &p_parser, item_flags);
582                         break;
583                 case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
584                 case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
585                         flow_tcf_pedit_key_set_mac(*actions, &p_parser);
586                         break;
587                 default:
588                         goto pedit_mnl_msg_done;
589                 }
590         }
591 pedit_mnl_msg_done:
592         p_parser.sel.action = TC_ACT_PIPE;
593         mnl_attr_put(nl, TCA_PEDIT_PARMS_EX,
594                      sizeof(p_parser.sel) +
595                      p_parser.sel.nkeys * sizeof(struct tc_pedit_key),
596                      &p_parser);
597         na_pedit_keys =
598                 mnl_attr_nest_start(nl, TCA_PEDIT_KEYS_EX | NLA_F_NESTED);
599         for (int i = 0; i < p_parser.sel.nkeys; i++) {
600                 struct nlattr *na_pedit_key =
601                         mnl_attr_nest_start(nl,
602                                             TCA_PEDIT_KEY_EX | NLA_F_NESTED);
603                 mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_HTYPE,
604                                  p_parser.keys_ex[i].htype);
605                 mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_CMD,
606                                  p_parser.keys_ex[i].cmd);
607                 mnl_attr_nest_end(nl, na_pedit_key);
608         }
609         mnl_attr_nest_end(nl, na_pedit_keys);
610         mnl_attr_nest_end(nl, na_act_options);
611         (*actions)--;
612 }
613
614 /**
615  * Calculate max memory size of one TC-pedit actions.
616  * One TC-pedit action can contain set of keys each defining
617  * a rewrite element (rte_flow action)
618  *
619  * @param[in,out] actions
620  *   actions specification.
621  * @param[in,out] action_flags
622  *   actions flags
623  * @param[in,out] size
624  *   accumulated size
625  * @return
626  *   Max memory size of one TC-pedit action
627  */
628 static int
629 flow_tcf_get_pedit_actions_size(const struct rte_flow_action **actions,
630                                 uint64_t *action_flags)
631 {
632         int pedit_size = 0;
633         int keys = 0;
634         uint64_t flags = 0;
635
636         pedit_size += SZ_NLATTR_NEST + /* na_act_index. */
637                       SZ_NLATTR_STRZ_OF("pedit") +
638                       SZ_NLATTR_NEST; /* TCA_ACT_OPTIONS. */
639         for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {
640                 switch ((*actions)->type) {
641                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
642                         keys += NUM_OF_PEDIT_KEYS(IPV4_ADDR_LEN);
643                         flags |= MLX5_FLOW_ACTION_SET_IPV4_SRC;
644                         break;
645                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
646                         keys += NUM_OF_PEDIT_KEYS(IPV4_ADDR_LEN);
647                         flags |= MLX5_FLOW_ACTION_SET_IPV4_DST;
648                         break;
649                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
650                         keys += NUM_OF_PEDIT_KEYS(IPV6_ADDR_LEN);
651                         flags |= MLX5_FLOW_ACTION_SET_IPV6_SRC;
652                         break;
653                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
654                         keys += NUM_OF_PEDIT_KEYS(IPV6_ADDR_LEN);
655                         flags |= MLX5_FLOW_ACTION_SET_IPV6_DST;
656                         break;
657                 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
658                         /* TCP is as same as UDP */
659                         keys += NUM_OF_PEDIT_KEYS(TP_PORT_LEN);
660                         flags |= MLX5_FLOW_ACTION_SET_TP_SRC;
661                         break;
662                 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
663                         /* TCP is as same as UDP */
664                         keys += NUM_OF_PEDIT_KEYS(TP_PORT_LEN);
665                         flags |= MLX5_FLOW_ACTION_SET_TP_DST;
666                         break;
667                 case RTE_FLOW_ACTION_TYPE_SET_TTL:
668                         keys += NUM_OF_PEDIT_KEYS(TTL_LEN);
669                         flags |= MLX5_FLOW_ACTION_SET_TTL;
670                         break;
671                 case RTE_FLOW_ACTION_TYPE_DEC_TTL:
672                         keys += NUM_OF_PEDIT_KEYS(TTL_LEN);
673                         flags |= MLX5_FLOW_ACTION_DEC_TTL;
674                         break;
675                 case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
676                         keys += NUM_OF_PEDIT_KEYS(ETHER_ADDR_LEN);
677                         flags |= MLX5_FLOW_ACTION_SET_MAC_SRC;
678                         break;
679                 case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
680                         keys += NUM_OF_PEDIT_KEYS(ETHER_ADDR_LEN);
681                         flags |= MLX5_FLOW_ACTION_SET_MAC_DST;
682                         break;
683                 default:
684                         goto get_pedit_action_size_done;
685                 }
686         }
687 get_pedit_action_size_done:
688         /* TCA_PEDIT_PARAMS_EX */
689         pedit_size +=
690                 SZ_NLATTR_DATA_OF(sizeof(struct tc_pedit_sel) +
691                                   keys * sizeof(struct tc_pedit_key));
692         pedit_size += SZ_NLATTR_NEST; /* TCA_PEDIT_KEYS */
693         pedit_size += keys *
694                       /* TCA_PEDIT_KEY_EX + HTYPE + CMD */
695                       (SZ_NLATTR_NEST + SZ_NLATTR_DATA_OF(2) +
696                        SZ_NLATTR_DATA_OF(2));
697         (*action_flags) |= flags;
698         (*actions)--;
699         return pedit_size;
700 }
701
702 /**
703  * Retrieve mask for pattern item.
704  *
705  * This function does basic sanity checks on a pattern item in order to
706  * return the most appropriate mask for it.
707  *
708  * @param[in] item
709  *   Item specification.
710  * @param[in] mask_default
711  *   Default mask for pattern item as specified by the flow API.
712  * @param[in] mask_supported
713  *   Mask fields supported by the implementation.
714  * @param[in] mask_empty
715  *   Empty mask to return when there is no specification.
716  * @param[out] error
717  *   Perform verbose error reporting if not NULL.
718  *
719  * @return
720  *   Either @p item->mask or one of the mask parameters on success, NULL
721  *   otherwise and rte_errno is set.
722  */
723 static const void *
724 flow_tcf_item_mask(const struct rte_flow_item *item, const void *mask_default,
725                    const void *mask_supported, const void *mask_empty,
726                    size_t mask_size, struct rte_flow_error *error)
727 {
728         const uint8_t *mask;
729         size_t i;
730
731         /* item->last and item->mask cannot exist without item->spec. */
732         if (!item->spec && (item->mask || item->last)) {
733                 rte_flow_error_set(error, EINVAL,
734                                    RTE_FLOW_ERROR_TYPE_ITEM, item,
735                                    "\"mask\" or \"last\" field provided without"
736                                    " a corresponding \"spec\"");
737                 return NULL;
738         }
739         /* No spec, no mask, no problem. */
740         if (!item->spec)
741                 return mask_empty;
742         mask = item->mask ? item->mask : mask_default;
743         assert(mask);
744         /*
745          * Single-pass check to make sure that:
746          * - Mask is supported, no bits are set outside mask_supported.
747          * - Both item->spec and item->last are included in mask.
748          */
749         for (i = 0; i != mask_size; ++i) {
750                 if (!mask[i])
751                         continue;
752                 if ((mask[i] | ((const uint8_t *)mask_supported)[i]) !=
753                     ((const uint8_t *)mask_supported)[i]) {
754                         rte_flow_error_set(error, ENOTSUP,
755                                            RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
756                                            "unsupported field found"
757                                            " in \"mask\"");
758                         return NULL;
759                 }
760                 if (item->last &&
761                     (((const uint8_t *)item->spec)[i] & mask[i]) !=
762                     (((const uint8_t *)item->last)[i] & mask[i])) {
763                         rte_flow_error_set(error, EINVAL,
764                                            RTE_FLOW_ERROR_TYPE_ITEM_LAST,
765                                            item->last,
766                                            "range between \"spec\" and \"last\""
767                                            " not comprised in \"mask\"");
768                         return NULL;
769                 }
770         }
771         return mask;
772 }
773
774 /**
775  * Build a conversion table between port ID and ifindex.
776  *
777  * @param[in] dev
778  *   Pointer to Ethernet device.
779  * @param[out] ptoi
780  *   Pointer to ptoi table.
781  * @param[in] len
782  *   Size of ptoi table provided.
783  *
784  * @return
785  *   Size of ptoi table filled.
786  */
787 static unsigned int
788 flow_tcf_build_ptoi_table(struct rte_eth_dev *dev, struct flow_tcf_ptoi *ptoi,
789                           unsigned int len)
790 {
791         unsigned int n = mlx5_dev_to_port_id(dev->device, NULL, 0);
792         uint16_t port_id[n + 1];
793         unsigned int i;
794         unsigned int own = 0;
795
796         /* At least one port is needed when no switch domain is present. */
797         if (!n) {
798                 n = 1;
799                 port_id[0] = dev->data->port_id;
800         } else {
801                 n = RTE_MIN(mlx5_dev_to_port_id(dev->device, port_id, n), n);
802         }
803         if (n > len)
804                 return 0;
805         for (i = 0; i != n; ++i) {
806                 struct rte_eth_dev_info dev_info;
807
808                 rte_eth_dev_info_get(port_id[i], &dev_info);
809                 if (port_id[i] == dev->data->port_id)
810                         own = i;
811                 ptoi[i].port_id = port_id[i];
812                 ptoi[i].ifindex = dev_info.if_index;
813         }
814         /* Ensure first entry of ptoi[] is the current device. */
815         if (own) {
816                 ptoi[n] = ptoi[0];
817                 ptoi[0] = ptoi[own];
818                 ptoi[own] = ptoi[n];
819         }
820         /* An entry with zero ifindex terminates ptoi[]. */
821         ptoi[n].port_id = 0;
822         ptoi[n].ifindex = 0;
823         return n;
824 }
825
826 /**
827  * Verify the @p attr will be correctly understood by the E-switch.
828  *
829  * @param[in] attr
830  *   Pointer to flow attributes
831  * @param[out] error
832  *   Pointer to error structure.
833  *
834  * @return
835  *   0 on success, a negative errno value otherwise and rte_errno is set.
836  */
837 static int
838 flow_tcf_validate_attributes(const struct rte_flow_attr *attr,
839                              struct rte_flow_error *error)
840 {
841         /*
842          * Supported attributes: groups, some priorities and ingress only.
843          * group is supported only if kernel supports chain. Don't care about
844          * transfer as it is the caller's problem.
845          */
846         if (attr->group > MLX5_TCF_GROUP_ID_MAX)
847                 return rte_flow_error_set(error, ENOTSUP,
848                                           RTE_FLOW_ERROR_TYPE_ATTR_GROUP, attr,
849                                           "group ID larger than "
850                                           RTE_STR(MLX5_TCF_GROUP_ID_MAX)
851                                           " isn't supported");
852         else if (attr->group > 0 &&
853                  attr->priority > MLX5_TCF_GROUP_PRIORITY_MAX)
854                 return rte_flow_error_set(error, ENOTSUP,
855                                           RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
856                                           attr,
857                                           "lowest priority level is "
858                                           RTE_STR(MLX5_TCF_GROUP_PRIORITY_MAX)
859                                           " when group is configured");
860         else if (attr->priority > 0xfffe)
861                 return rte_flow_error_set(error, ENOTSUP,
862                                           RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
863                                           attr,
864                                           "lowest priority level is 0xfffe");
865         if (!attr->ingress)
866                 return rte_flow_error_set(error, EINVAL,
867                                           RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
868                                           attr, "only ingress is supported");
869         if (attr->egress)
870                 return rte_flow_error_set(error, ENOTSUP,
871                                           RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
872                                           attr, "egress is not supported");
873         return 0;
874 }
875
876 /**
877  * Validate flow for E-Switch.
878  *
879  * @param[in] priv
880  *   Pointer to the priv structure.
881  * @param[in] attr
882  *   Pointer to the flow attributes.
883  * @param[in] items
884  *   Pointer to the list of items.
885  * @param[in] actions
886  *   Pointer to the list of actions.
887  * @param[out] error
888  *   Pointer to the error structure.
889  *
890  * @return
891  *   0 on success, a negative errno value otherwise and rte_ernno is set.
892  */
893 static int
894 flow_tcf_validate(struct rte_eth_dev *dev,
895                   const struct rte_flow_attr *attr,
896                   const struct rte_flow_item items[],
897                   const struct rte_flow_action actions[],
898                   struct rte_flow_error *error)
899 {
900         union {
901                 const struct rte_flow_item_port_id *port_id;
902                 const struct rte_flow_item_eth *eth;
903                 const struct rte_flow_item_vlan *vlan;
904                 const struct rte_flow_item_ipv4 *ipv4;
905                 const struct rte_flow_item_ipv6 *ipv6;
906                 const struct rte_flow_item_tcp *tcp;
907                 const struct rte_flow_item_udp *udp;
908         } spec, mask;
909         union {
910                 const struct rte_flow_action_port_id *port_id;
911                 const struct rte_flow_action_jump *jump;
912                 const struct rte_flow_action_of_push_vlan *of_push_vlan;
913                 const struct rte_flow_action_of_set_vlan_vid *
914                         of_set_vlan_vid;
915                 const struct rte_flow_action_of_set_vlan_pcp *
916                         of_set_vlan_pcp;
917                 const struct rte_flow_action_set_ipv4 *set_ipv4;
918                 const struct rte_flow_action_set_ipv6 *set_ipv6;
919         } conf;
920         uint32_t item_flags = 0;
921         uint32_t action_flags = 0;
922         uint8_t next_protocol = -1;
923         unsigned int tcm_ifindex = 0;
924         uint8_t pedit_validated = 0;
925         struct flow_tcf_ptoi ptoi[PTOI_TABLE_SZ_MAX(dev)];
926         struct rte_eth_dev *port_id_dev = NULL;
927         bool in_port_id_set;
928         int ret;
929
930         claim_nonzero(flow_tcf_build_ptoi_table(dev, ptoi,
931                                                 PTOI_TABLE_SZ_MAX(dev)));
932         ret = flow_tcf_validate_attributes(attr, error);
933         if (ret < 0)
934                 return ret;
935         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
936                 unsigned int i;
937
938                 switch (items->type) {
939                 case RTE_FLOW_ITEM_TYPE_VOID:
940                         break;
941                 case RTE_FLOW_ITEM_TYPE_PORT_ID:
942                         mask.port_id = flow_tcf_item_mask
943                                 (items, &rte_flow_item_port_id_mask,
944                                  &flow_tcf_mask_supported.port_id,
945                                  &flow_tcf_mask_empty.port_id,
946                                  sizeof(flow_tcf_mask_supported.port_id),
947                                  error);
948                         if (!mask.port_id)
949                                 return -rte_errno;
950                         if (mask.port_id == &flow_tcf_mask_empty.port_id) {
951                                 in_port_id_set = 1;
952                                 break;
953                         }
954                         spec.port_id = items->spec;
955                         if (mask.port_id->id && mask.port_id->id != 0xffffffff)
956                                 return rte_flow_error_set
957                                         (error, ENOTSUP,
958                                          RTE_FLOW_ERROR_TYPE_ITEM_MASK,
959                                          mask.port_id,
960                                          "no support for partial mask on"
961                                          " \"id\" field");
962                         if (!mask.port_id->id)
963                                 i = 0;
964                         else
965                                 for (i = 0; ptoi[i].ifindex; ++i)
966                                         if (ptoi[i].port_id == spec.port_id->id)
967                                                 break;
968                         if (!ptoi[i].ifindex)
969                                 return rte_flow_error_set
970                                         (error, ENODEV,
971                                          RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
972                                          spec.port_id,
973                                          "missing data to convert port ID to"
974                                          " ifindex");
975                         if (in_port_id_set && ptoi[i].ifindex != tcm_ifindex)
976                                 return rte_flow_error_set
977                                         (error, ENOTSUP,
978                                          RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
979                                          spec.port_id,
980                                          "cannot match traffic for"
981                                          " several port IDs through"
982                                          " a single flow rule");
983                         tcm_ifindex = ptoi[i].ifindex;
984                         in_port_id_set = 1;
985                         break;
986                 case RTE_FLOW_ITEM_TYPE_ETH:
987                         ret = mlx5_flow_validate_item_eth(items, item_flags,
988                                                           error);
989                         if (ret < 0)
990                                 return ret;
991                         item_flags |= MLX5_FLOW_LAYER_OUTER_L2;
992                         /* TODO:
993                          * Redundant check due to different supported mask.
994                          * Same for the rest of items.
995                          */
996                         mask.eth = flow_tcf_item_mask
997                                 (items, &rte_flow_item_eth_mask,
998                                  &flow_tcf_mask_supported.eth,
999                                  &flow_tcf_mask_empty.eth,
1000                                  sizeof(flow_tcf_mask_supported.eth),
1001                                  error);
1002                         if (!mask.eth)
1003                                 return -rte_errno;
1004                         if (mask.eth->type && mask.eth->type !=
1005                             RTE_BE16(0xffff))
1006                                 return rte_flow_error_set
1007                                         (error, ENOTSUP,
1008                                          RTE_FLOW_ERROR_TYPE_ITEM_MASK,
1009                                          mask.eth,
1010                                          "no support for partial mask on"
1011                                          " \"type\" field");
1012                         break;
1013                 case RTE_FLOW_ITEM_TYPE_VLAN:
1014                         ret = mlx5_flow_validate_item_vlan(items, item_flags,
1015                                                            error);
1016                         if (ret < 0)
1017                                 return ret;
1018                         item_flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
1019                         mask.vlan = flow_tcf_item_mask
1020                                 (items, &rte_flow_item_vlan_mask,
1021                                  &flow_tcf_mask_supported.vlan,
1022                                  &flow_tcf_mask_empty.vlan,
1023                                  sizeof(flow_tcf_mask_supported.vlan),
1024                                  error);
1025                         if (!mask.vlan)
1026                                 return -rte_errno;
1027                         if ((mask.vlan->tci & RTE_BE16(0xe000) &&
1028                              (mask.vlan->tci & RTE_BE16(0xe000)) !=
1029                               RTE_BE16(0xe000)) ||
1030                             (mask.vlan->tci & RTE_BE16(0x0fff) &&
1031                              (mask.vlan->tci & RTE_BE16(0x0fff)) !=
1032                               RTE_BE16(0x0fff)) ||
1033                             (mask.vlan->inner_type &&
1034                              mask.vlan->inner_type != RTE_BE16(0xffff)))
1035                                 return rte_flow_error_set
1036                                         (error, ENOTSUP,
1037                                          RTE_FLOW_ERROR_TYPE_ITEM_MASK,
1038                                          mask.vlan,
1039                                          "no support for partial masks on"
1040                                          " \"tci\" (PCP and VID parts) and"
1041                                          " \"inner_type\" fields");
1042                         break;
1043                 case RTE_FLOW_ITEM_TYPE_IPV4:
1044                         ret = mlx5_flow_validate_item_ipv4(items, item_flags,
1045                                                            error);
1046                         if (ret < 0)
1047                                 return ret;
1048                         item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1049                         mask.ipv4 = flow_tcf_item_mask
1050                                 (items, &rte_flow_item_ipv4_mask,
1051                                  &flow_tcf_mask_supported.ipv4,
1052                                  &flow_tcf_mask_empty.ipv4,
1053                                  sizeof(flow_tcf_mask_supported.ipv4),
1054                                  error);
1055                         if (!mask.ipv4)
1056                                 return -rte_errno;
1057                         if (mask.ipv4->hdr.next_proto_id &&
1058                             mask.ipv4->hdr.next_proto_id != 0xff)
1059                                 return rte_flow_error_set
1060                                         (error, ENOTSUP,
1061                                          RTE_FLOW_ERROR_TYPE_ITEM_MASK,
1062                                          mask.ipv4,
1063                                          "no support for partial mask on"
1064                                          " \"hdr.next_proto_id\" field");
1065                         else if (mask.ipv4->hdr.next_proto_id)
1066                                 next_protocol =
1067                                         ((const struct rte_flow_item_ipv4 *)
1068                                          (items->spec))->hdr.next_proto_id;
1069                         break;
1070                 case RTE_FLOW_ITEM_TYPE_IPV6:
1071                         ret = mlx5_flow_validate_item_ipv6(items, item_flags,
1072                                                            error);
1073                         if (ret < 0)
1074                                 return ret;
1075                         item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1076                         mask.ipv6 = flow_tcf_item_mask
1077                                 (items, &rte_flow_item_ipv6_mask,
1078                                  &flow_tcf_mask_supported.ipv6,
1079                                  &flow_tcf_mask_empty.ipv6,
1080                                  sizeof(flow_tcf_mask_supported.ipv6),
1081                                  error);
1082                         if (!mask.ipv6)
1083                                 return -rte_errno;
1084                         if (mask.ipv6->hdr.proto &&
1085                             mask.ipv6->hdr.proto != 0xff)
1086                                 return rte_flow_error_set
1087                                         (error, ENOTSUP,
1088                                          RTE_FLOW_ERROR_TYPE_ITEM_MASK,
1089                                          mask.ipv6,
1090                                          "no support for partial mask on"
1091                                          " \"hdr.proto\" field");
1092                         else if (mask.ipv6->hdr.proto)
1093                                 next_protocol =
1094                                         ((const struct rte_flow_item_ipv6 *)
1095                                          (items->spec))->hdr.proto;
1096                         break;
1097                 case RTE_FLOW_ITEM_TYPE_UDP:
1098                         ret = mlx5_flow_validate_item_udp(items, item_flags,
1099                                                           next_protocol, error);
1100                         if (ret < 0)
1101                                 return ret;
1102                         item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
1103                         mask.udp = flow_tcf_item_mask
1104                                 (items, &rte_flow_item_udp_mask,
1105                                  &flow_tcf_mask_supported.udp,
1106                                  &flow_tcf_mask_empty.udp,
1107                                  sizeof(flow_tcf_mask_supported.udp),
1108                                  error);
1109                         if (!mask.udp)
1110                                 return -rte_errno;
1111                         break;
1112                 case RTE_FLOW_ITEM_TYPE_TCP:
1113                         ret = mlx5_flow_validate_item_tcp
1114                                              (items, item_flags,
1115                                               next_protocol,
1116                                               &flow_tcf_mask_supported.tcp,
1117                                               error);
1118                         if (ret < 0)
1119                                 return ret;
1120                         item_flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
1121                         mask.tcp = flow_tcf_item_mask
1122                                 (items, &rte_flow_item_tcp_mask,
1123                                  &flow_tcf_mask_supported.tcp,
1124                                  &flow_tcf_mask_empty.tcp,
1125                                  sizeof(flow_tcf_mask_supported.tcp),
1126                                  error);
1127                         if (!mask.tcp)
1128                                 return -rte_errno;
1129                         break;
1130                 default:
1131                         return rte_flow_error_set(error, ENOTSUP,
1132                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1133                                                   NULL, "item not supported");
1134                 }
1135         }
1136         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1137                 unsigned int i;
1138                 uint32_t current_action_flag = 0;
1139
1140                 switch (actions->type) {
1141                 case RTE_FLOW_ACTION_TYPE_VOID:
1142                         break;
1143                 case RTE_FLOW_ACTION_TYPE_PORT_ID:
1144                         current_action_flag = MLX5_FLOW_ACTION_PORT_ID;
1145                         if (!actions->conf)
1146                                 break;
1147                         conf.port_id = actions->conf;
1148                         if (conf.port_id->original)
1149                                 i = 0;
1150                         else
1151                                 for (i = 0; ptoi[i].ifindex; ++i)
1152                                         if (ptoi[i].port_id == conf.port_id->id)
1153                                                 break;
1154                         if (!ptoi[i].ifindex)
1155                                 return rte_flow_error_set
1156                                         (error, ENODEV,
1157                                          RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1158                                          conf.port_id,
1159                                          "missing data to convert port ID to"
1160                                          " ifindex");
1161                         port_id_dev = &rte_eth_devices[conf.port_id->id];
1162                         break;
1163                 case RTE_FLOW_ACTION_TYPE_JUMP:
1164                         current_action_flag = MLX5_FLOW_ACTION_JUMP;
1165                         if (!actions->conf)
1166                                 break;
1167                         conf.jump = actions->conf;
1168                         if (attr->group >= conf.jump->group)
1169                                 return rte_flow_error_set
1170                                         (error, ENOTSUP,
1171                                          RTE_FLOW_ERROR_TYPE_ACTION,
1172                                          actions,
1173                                          "can jump only to a group forward");
1174                         break;
1175                 case RTE_FLOW_ACTION_TYPE_DROP:
1176                         current_action_flag = MLX5_FLOW_ACTION_DROP;
1177                         break;
1178                 case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
1179                         current_action_flag = MLX5_FLOW_ACTION_OF_POP_VLAN;
1180                         break;
1181                 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
1182                         current_action_flag = MLX5_FLOW_ACTION_OF_PUSH_VLAN;
1183                         break;
1184                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
1185                         if (!(action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN))
1186                                 return rte_flow_error_set
1187                                         (error, ENOTSUP,
1188                                          RTE_FLOW_ERROR_TYPE_ACTION, actions,
1189                                          "vlan modify is not supported,"
1190                                          " set action must follow push action");
1191                         current_action_flag = MLX5_FLOW_ACTION_OF_SET_VLAN_VID;
1192                         break;
1193                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
1194                         if (!(action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN))
1195                                 return rte_flow_error_set
1196                                         (error, ENOTSUP,
1197                                          RTE_FLOW_ERROR_TYPE_ACTION, actions,
1198                                          "vlan modify is not supported,"
1199                                          " set action must follow push action");
1200                         current_action_flag = MLX5_FLOW_ACTION_OF_SET_VLAN_PCP;
1201                         break;
1202                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
1203                         current_action_flag = MLX5_FLOW_ACTION_SET_IPV4_SRC;
1204                         break;
1205                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
1206                         current_action_flag = MLX5_FLOW_ACTION_SET_IPV4_DST;
1207                         break;
1208                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
1209                         current_action_flag = MLX5_FLOW_ACTION_SET_IPV6_SRC;
1210                         break;
1211                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
1212                         current_action_flag = MLX5_FLOW_ACTION_SET_IPV6_DST;
1213                         break;
1214                 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
1215                         current_action_flag = MLX5_FLOW_ACTION_SET_TP_SRC;
1216                         break;
1217                 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
1218                         current_action_flag = MLX5_FLOW_ACTION_SET_TP_DST;
1219                         break;
1220                 case RTE_FLOW_ACTION_TYPE_SET_TTL:
1221                         current_action_flag = MLX5_FLOW_ACTION_SET_TTL;
1222                         break;
1223                 case RTE_FLOW_ACTION_TYPE_DEC_TTL:
1224                         current_action_flag = MLX5_FLOW_ACTION_DEC_TTL;
1225                         break;
1226                 case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
1227                         current_action_flag = MLX5_FLOW_ACTION_SET_MAC_SRC;
1228                         break;
1229                 case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
1230                         current_action_flag = MLX5_FLOW_ACTION_SET_MAC_DST;
1231                         break;
1232                 default:
1233                         return rte_flow_error_set(error, ENOTSUP,
1234                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1235                                                   actions,
1236                                                   "action not supported");
1237                 }
1238                 if (current_action_flag & MLX5_TCF_CONFIG_ACTIONS) {
1239                         if (!actions->conf)
1240                                 return rte_flow_error_set(error, EINVAL,
1241                                                 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1242                                                 actions,
1243                                                 "action configuration not set");
1244                 }
1245                 if ((current_action_flag & MLX5_TCF_PEDIT_ACTIONS) &&
1246                     pedit_validated)
1247                         return rte_flow_error_set(error, ENOTSUP,
1248                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1249                                                   actions,
1250                                                   "set actions should be "
1251                                                   "listed successively");
1252                 if ((current_action_flag & ~MLX5_TCF_PEDIT_ACTIONS) &&
1253                     (action_flags & MLX5_TCF_PEDIT_ACTIONS))
1254                         pedit_validated = 1;
1255                 if ((current_action_flag & MLX5_TCF_FATE_ACTIONS) &&
1256                     (action_flags & MLX5_TCF_FATE_ACTIONS))
1257                         return rte_flow_error_set(error, EINVAL,
1258                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1259                                                   actions,
1260                                                   "can't have multiple fate"
1261                                                   " actions");
1262                 action_flags |= current_action_flag;
1263         }
1264         if ((action_flags & MLX5_TCF_PEDIT_ACTIONS) &&
1265             (action_flags & MLX5_FLOW_ACTION_DROP))
1266                 return rte_flow_error_set(error, ENOTSUP,
1267                                           RTE_FLOW_ERROR_TYPE_ACTION,
1268                                           actions,
1269                                           "set action is not compatible with "
1270                                           "drop action");
1271         if ((action_flags & MLX5_TCF_PEDIT_ACTIONS) &&
1272             !(action_flags & MLX5_FLOW_ACTION_PORT_ID))
1273                 return rte_flow_error_set(error, ENOTSUP,
1274                                           RTE_FLOW_ERROR_TYPE_ACTION,
1275                                           actions,
1276                                           "set action must be followed by "
1277                                           "port_id action");
1278         if (action_flags &
1279            (MLX5_FLOW_ACTION_SET_IPV4_SRC | MLX5_FLOW_ACTION_SET_IPV4_DST)) {
1280                 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4))
1281                         return rte_flow_error_set(error, EINVAL,
1282                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1283                                                   actions,
1284                                                   "no ipv4 item found in"
1285                                                   " pattern");
1286         }
1287         if (action_flags &
1288            (MLX5_FLOW_ACTION_SET_IPV6_SRC | MLX5_FLOW_ACTION_SET_IPV6_DST)) {
1289                 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV6))
1290                         return rte_flow_error_set(error, EINVAL,
1291                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1292                                                   actions,
1293                                                   "no ipv6 item found in"
1294                                                   " pattern");
1295         }
1296         if (action_flags &
1297            (MLX5_FLOW_ACTION_SET_TP_SRC | MLX5_FLOW_ACTION_SET_TP_DST)) {
1298                 if (!(item_flags &
1299                      (MLX5_FLOW_LAYER_OUTER_L4_UDP |
1300                       MLX5_FLOW_LAYER_OUTER_L4_TCP)))
1301                         return rte_flow_error_set(error, EINVAL,
1302                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1303                                                   actions,
1304                                                   "no TCP/UDP item found in"
1305                                                   " pattern");
1306         }
1307         /*
1308          * FW syndrome (0xA9C090):
1309          *     set_flow_table_entry: push vlan action fte in fdb can ONLY be
1310          *     forward to the uplink.
1311          */
1312         if ((action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN) &&
1313             (action_flags & MLX5_FLOW_ACTION_PORT_ID) &&
1314             ((struct priv *)port_id_dev->data->dev_private)->representor)
1315                 return rte_flow_error_set(error, ENOTSUP,
1316                                           RTE_FLOW_ERROR_TYPE_ACTION, actions,
1317                                           "vlan push can only be applied"
1318                                           " when forwarding to uplink port");
1319         /*
1320          * FW syndrome (0x294609):
1321          *     set_flow_table_entry: modify/pop/push actions in fdb flow table
1322          *     are supported only while forwarding to vport.
1323          */
1324         if ((action_flags & MLX5_TCF_VLAN_ACTIONS) &&
1325             !(action_flags & MLX5_FLOW_ACTION_PORT_ID))
1326                 return rte_flow_error_set(error, ENOTSUP,
1327                                           RTE_FLOW_ERROR_TYPE_ACTION, actions,
1328                                           "vlan actions are supported"
1329                                           " only with port_id action");
1330         if (!(action_flags & MLX5_TCF_FATE_ACTIONS))
1331                 return rte_flow_error_set(error, EINVAL,
1332                                           RTE_FLOW_ERROR_TYPE_ACTION, actions,
1333                                           "no fate action is found");
1334         if (action_flags &
1335            (MLX5_FLOW_ACTION_SET_TTL | MLX5_FLOW_ACTION_DEC_TTL)) {
1336                 if (!(item_flags &
1337                      (MLX5_FLOW_LAYER_OUTER_L3_IPV4 |
1338                       MLX5_FLOW_LAYER_OUTER_L3_IPV6)))
1339                         return rte_flow_error_set(error, EINVAL,
1340                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1341                                                   actions,
1342                                                   "no IP found in pattern");
1343         }
1344         if (action_flags &
1345             (MLX5_FLOW_ACTION_SET_MAC_SRC | MLX5_FLOW_ACTION_SET_MAC_DST)) {
1346                 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L2))
1347                         return rte_flow_error_set(error, ENOTSUP,
1348                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1349                                                   actions,
1350                                                   "no ethernet found in"
1351                                                   " pattern");
1352         }
1353         return 0;
1354 }
1355
1356 /**
1357  * Calculate maximum size of memory for flow items of Linux TC flower and
1358  * extract specified items.
1359  *
1360  * @param[in] items
1361  *   Pointer to the list of items.
1362  * @param[out] item_flags
1363  *   Pointer to the detected items.
1364  *
1365  * @return
1366  *   Maximum size of memory for items.
1367  */
1368 static int
1369 flow_tcf_get_items_and_size(const struct rte_flow_attr *attr,
1370                             const struct rte_flow_item items[],
1371                             uint64_t *item_flags)
1372 {
1373         int size = 0;
1374         uint64_t flags = 0;
1375
1376         size += SZ_NLATTR_STRZ_OF("flower") +
1377                 SZ_NLATTR_NEST + /* TCA_OPTIONS. */
1378                 SZ_NLATTR_TYPE_OF(uint32_t); /* TCA_CLS_FLAGS_SKIP_SW. */
1379         if (attr->group > 0)
1380                 size += SZ_NLATTR_TYPE_OF(uint32_t); /* TCA_CHAIN. */
1381         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1382                 switch (items->type) {
1383                 case RTE_FLOW_ITEM_TYPE_VOID:
1384                         break;
1385                 case RTE_FLOW_ITEM_TYPE_PORT_ID:
1386                         break;
1387                 case RTE_FLOW_ITEM_TYPE_ETH:
1388                         size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
1389                                 SZ_NLATTR_DATA_OF(ETHER_ADDR_LEN) * 4;
1390                                 /* dst/src MAC addr and mask. */
1391                         flags |= MLX5_FLOW_LAYER_OUTER_L2;
1392                         break;
1393                 case RTE_FLOW_ITEM_TYPE_VLAN:
1394                         size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
1395                                 SZ_NLATTR_TYPE_OF(uint16_t) +
1396                                 /* VLAN Ether type. */
1397                                 SZ_NLATTR_TYPE_OF(uint8_t) + /* VLAN prio. */
1398                                 SZ_NLATTR_TYPE_OF(uint16_t); /* VLAN ID. */
1399                         flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
1400                         break;
1401                 case RTE_FLOW_ITEM_TYPE_IPV4:
1402                         size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
1403                                 SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
1404                                 SZ_NLATTR_TYPE_OF(uint32_t) * 4;
1405                                 /* dst/src IP addr and mask. */
1406                         flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1407                         break;
1408                 case RTE_FLOW_ITEM_TYPE_IPV6:
1409                         size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
1410                                 SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
1411                                 SZ_NLATTR_TYPE_OF(IPV6_ADDR_LEN) * 4;
1412                                 /* dst/src IP addr and mask. */
1413                         flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1414                         break;
1415                 case RTE_FLOW_ITEM_TYPE_UDP:
1416                         size += SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
1417                                 SZ_NLATTR_TYPE_OF(uint16_t) * 4;
1418                                 /* dst/src port and mask. */
1419                         flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
1420                         break;
1421                 case RTE_FLOW_ITEM_TYPE_TCP:
1422                         size += SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
1423                                 SZ_NLATTR_TYPE_OF(uint16_t) * 4;
1424                                 /* dst/src port and mask. */
1425                         flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
1426                         break;
1427                 default:
1428                         DRV_LOG(WARNING,
1429                                 "unsupported item %p type %d,"
1430                                 " items must be validated before flow creation",
1431                                 (const void *)items, items->type);
1432                         break;
1433                 }
1434         }
1435         *item_flags = flags;
1436         return size;
1437 }
1438
1439 /**
1440  * Calculate maximum size of memory for flow actions of Linux TC flower and
1441  * extract specified actions.
1442  *
1443  * @param[in] actions
1444  *   Pointer to the list of actions.
1445  * @param[out] action_flags
1446  *   Pointer to the detected actions.
1447  *
1448  * @return
1449  *   Maximum size of memory for actions.
1450  */
1451 static int
1452 flow_tcf_get_actions_and_size(const struct rte_flow_action actions[],
1453                               uint64_t *action_flags)
1454 {
1455         int size = 0;
1456         uint64_t flags = 0;
1457
1458         size += SZ_NLATTR_NEST; /* TCA_FLOWER_ACT. */
1459         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1460                 switch (actions->type) {
1461                 case RTE_FLOW_ACTION_TYPE_VOID:
1462                         break;
1463                 case RTE_FLOW_ACTION_TYPE_PORT_ID:
1464                         size += SZ_NLATTR_NEST + /* na_act_index. */
1465                                 SZ_NLATTR_STRZ_OF("mirred") +
1466                                 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
1467                                 SZ_NLATTR_TYPE_OF(struct tc_mirred);
1468                         flags |= MLX5_FLOW_ACTION_PORT_ID;
1469                         break;
1470                 case RTE_FLOW_ACTION_TYPE_JUMP:
1471                         size += SZ_NLATTR_NEST + /* na_act_index. */
1472                                 SZ_NLATTR_STRZ_OF("gact") +
1473                                 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
1474                                 SZ_NLATTR_TYPE_OF(struct tc_gact);
1475                         flags |= MLX5_FLOW_ACTION_JUMP;
1476                         break;
1477                 case RTE_FLOW_ACTION_TYPE_DROP:
1478                         size += SZ_NLATTR_NEST + /* na_act_index. */
1479                                 SZ_NLATTR_STRZ_OF("gact") +
1480                                 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
1481                                 SZ_NLATTR_TYPE_OF(struct tc_gact);
1482                         flags |= MLX5_FLOW_ACTION_DROP;
1483                         break;
1484                 case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
1485                         flags |= MLX5_FLOW_ACTION_OF_POP_VLAN;
1486                         goto action_of_vlan;
1487                 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
1488                         flags |= MLX5_FLOW_ACTION_OF_PUSH_VLAN;
1489                         goto action_of_vlan;
1490                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
1491                         flags |= MLX5_FLOW_ACTION_OF_SET_VLAN_VID;
1492                         goto action_of_vlan;
1493                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
1494                         flags |= MLX5_FLOW_ACTION_OF_SET_VLAN_PCP;
1495                         goto action_of_vlan;
1496 action_of_vlan:
1497                         size += SZ_NLATTR_NEST + /* na_act_index. */
1498                                 SZ_NLATTR_STRZ_OF("vlan") +
1499                                 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
1500                                 SZ_NLATTR_TYPE_OF(struct tc_vlan) +
1501                                 SZ_NLATTR_TYPE_OF(uint16_t) +
1502                                 /* VLAN protocol. */
1503                                 SZ_NLATTR_TYPE_OF(uint16_t) + /* VLAN ID. */
1504                                 SZ_NLATTR_TYPE_OF(uint8_t); /* VLAN prio. */
1505                         break;
1506                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
1507                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
1508                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
1509                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
1510                 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
1511                 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
1512                 case RTE_FLOW_ACTION_TYPE_SET_TTL:
1513                 case RTE_FLOW_ACTION_TYPE_DEC_TTL:
1514                 case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
1515                 case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
1516                         size += flow_tcf_get_pedit_actions_size(&actions,
1517                                                                 &flags);
1518                         break;
1519                 default:
1520                         DRV_LOG(WARNING,
1521                                 "unsupported action %p type %d,"
1522                                 " items must be validated before flow creation",
1523                                 (const void *)actions, actions->type);
1524                         break;
1525                 }
1526         }
1527         *action_flags = flags;
1528         return size;
1529 }
1530
1531 /**
1532  * Brand rtnetlink buffer with unique handle.
1533  *
1534  * This handle should be unique for a given network interface to avoid
1535  * collisions.
1536  *
1537  * @param nlh
1538  *   Pointer to Netlink message.
1539  * @param handle
1540  *   Unique 32-bit handle to use.
1541  */
1542 static void
1543 flow_tcf_nl_brand(struct nlmsghdr *nlh, uint32_t handle)
1544 {
1545         struct tcmsg *tcm = mnl_nlmsg_get_payload(nlh);
1546
1547         tcm->tcm_handle = handle;
1548         DRV_LOG(DEBUG, "Netlink msg %p is branded with handle %x",
1549                 (void *)nlh, handle);
1550 }
1551
1552 /**
1553  * Prepare a flow object for Linux TC flower. It calculates the maximum size of
1554  * memory required, allocates the memory, initializes Netlink message headers
1555  * and set unique TC message handle.
1556  *
1557  * @param[in] attr
1558  *   Pointer to the flow attributes.
1559  * @param[in] items
1560  *   Pointer to the list of items.
1561  * @param[in] actions
1562  *   Pointer to the list of actions.
1563  * @param[out] item_flags
1564  *   Pointer to bit mask of all items detected.
1565  * @param[out] action_flags
1566  *   Pointer to bit mask of all actions detected.
1567  * @param[out] error
1568  *   Pointer to the error structure.
1569  *
1570  * @return
1571  *   Pointer to mlx5_flow object on success,
1572  *   otherwise NULL and rte_ernno is set.
1573  */
1574 static struct mlx5_flow *
1575 flow_tcf_prepare(const struct rte_flow_attr *attr,
1576                  const struct rte_flow_item items[],
1577                  const struct rte_flow_action actions[],
1578                  uint64_t *item_flags, uint64_t *action_flags,
1579                  struct rte_flow_error *error)
1580 {
1581         size_t size = sizeof(struct mlx5_flow) +
1582                       MNL_ALIGN(sizeof(struct nlmsghdr)) +
1583                       MNL_ALIGN(sizeof(struct tcmsg));
1584         struct mlx5_flow *dev_flow;
1585         struct nlmsghdr *nlh;
1586         struct tcmsg *tcm;
1587
1588         size += flow_tcf_get_items_and_size(attr, items, item_flags);
1589         size += flow_tcf_get_actions_and_size(actions, action_flags);
1590         dev_flow = rte_zmalloc(__func__, size, MNL_ALIGNTO);
1591         if (!dev_flow) {
1592                 rte_flow_error_set(error, ENOMEM,
1593                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1594                                    "not enough memory to create E-Switch flow");
1595                 return NULL;
1596         }
1597         nlh = mnl_nlmsg_put_header((void *)(dev_flow + 1));
1598         tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
1599         *dev_flow = (struct mlx5_flow){
1600                 .tcf = (struct mlx5_flow_tcf){
1601                         .nlh = nlh,
1602                         .tcm = tcm,
1603                 },
1604         };
1605         /*
1606          * Generate a reasonably unique handle based on the address of the
1607          * target buffer.
1608          *
1609          * This is straightforward on 32-bit systems where the flow pointer can
1610          * be used directly. Otherwise, its least significant part is taken
1611          * after shifting it by the previous power of two of the pointed buffer
1612          * size.
1613          */
1614         if (sizeof(dev_flow) <= 4)
1615                 flow_tcf_nl_brand(nlh, (uintptr_t)dev_flow);
1616         else
1617                 flow_tcf_nl_brand(nlh, (uintptr_t)dev_flow >>
1618                                        rte_log2_u32(rte_align32prevpow2(size)));
1619         return dev_flow;
1620 }
1621
1622 /**
1623  * Translate flow for Linux TC flower and construct Netlink message.
1624  *
1625  * @param[in] priv
1626  *   Pointer to the priv structure.
1627  * @param[in, out] flow
1628  *   Pointer to the sub flow.
1629  * @param[in] attr
1630  *   Pointer to the flow attributes.
1631  * @param[in] items
1632  *   Pointer to the list of items.
1633  * @param[in] actions
1634  *   Pointer to the list of actions.
1635  * @param[out] error
1636  *   Pointer to the error structure.
1637  *
1638  * @return
1639  *   0 on success, a negative errno value otherwise and rte_ernno is set.
1640  */
1641 static int
1642 flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
1643                    const struct rte_flow_attr *attr,
1644                    const struct rte_flow_item items[],
1645                    const struct rte_flow_action actions[],
1646                    struct rte_flow_error *error)
1647 {
1648         union {
1649                 const struct rte_flow_item_port_id *port_id;
1650                 const struct rte_flow_item_eth *eth;
1651                 const struct rte_flow_item_vlan *vlan;
1652                 const struct rte_flow_item_ipv4 *ipv4;
1653                 const struct rte_flow_item_ipv6 *ipv6;
1654                 const struct rte_flow_item_tcp *tcp;
1655                 const struct rte_flow_item_udp *udp;
1656         } spec, mask;
1657         union {
1658                 const struct rte_flow_action_port_id *port_id;
1659                 const struct rte_flow_action_jump *jump;
1660                 const struct rte_flow_action_of_push_vlan *of_push_vlan;
1661                 const struct rte_flow_action_of_set_vlan_vid *
1662                         of_set_vlan_vid;
1663                 const struct rte_flow_action_of_set_vlan_pcp *
1664                         of_set_vlan_pcp;
1665         } conf;
1666         struct flow_tcf_ptoi ptoi[PTOI_TABLE_SZ_MAX(dev)];
1667         struct nlmsghdr *nlh = dev_flow->tcf.nlh;
1668         struct tcmsg *tcm = dev_flow->tcf.tcm;
1669         uint32_t na_act_index_cur;
1670         bool eth_type_set = 0;
1671         bool vlan_present = 0;
1672         bool vlan_eth_type_set = 0;
1673         bool ip_proto_set = 0;
1674         struct nlattr *na_flower;
1675         struct nlattr *na_flower_act;
1676         struct nlattr *na_vlan_id = NULL;
1677         struct nlattr *na_vlan_priority = NULL;
1678         uint64_t item_flags = 0;
1679
1680         claim_nonzero(flow_tcf_build_ptoi_table(dev, ptoi,
1681                                                 PTOI_TABLE_SZ_MAX(dev)));
1682         nlh = dev_flow->tcf.nlh;
1683         tcm = dev_flow->tcf.tcm;
1684         /* Prepare API must have been called beforehand. */
1685         assert(nlh != NULL && tcm != NULL);
1686         tcm->tcm_family = AF_UNSPEC;
1687         tcm->tcm_ifindex = ptoi[0].ifindex;
1688         tcm->tcm_parent = TC_H_MAKE(TC_H_INGRESS, TC_H_MIN_INGRESS);
1689         /*
1690          * Priority cannot be zero to prevent the kernel from picking one
1691          * automatically.
1692          */
1693         tcm->tcm_info = TC_H_MAKE((attr->priority + 1) << 16,
1694                                   RTE_BE16(ETH_P_ALL));
1695         if (attr->group > 0)
1696                 mnl_attr_put_u32(nlh, TCA_CHAIN, attr->group);
1697         mnl_attr_put_strz(nlh, TCA_KIND, "flower");
1698         na_flower = mnl_attr_nest_start(nlh, TCA_OPTIONS);
1699         mnl_attr_put_u32(nlh, TCA_FLOWER_FLAGS, TCA_CLS_FLAGS_SKIP_SW);
1700         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1701                 unsigned int i;
1702
1703                 switch (items->type) {
1704                 case RTE_FLOW_ITEM_TYPE_VOID:
1705                         break;
1706                 case RTE_FLOW_ITEM_TYPE_PORT_ID:
1707                         mask.port_id = flow_tcf_item_mask
1708                                 (items, &rte_flow_item_port_id_mask,
1709                                  &flow_tcf_mask_supported.port_id,
1710                                  &flow_tcf_mask_empty.port_id,
1711                                  sizeof(flow_tcf_mask_supported.port_id),
1712                                  error);
1713                         assert(mask.port_id);
1714                         if (mask.port_id == &flow_tcf_mask_empty.port_id)
1715                                 break;
1716                         spec.port_id = items->spec;
1717                         if (!mask.port_id->id)
1718                                 i = 0;
1719                         else
1720                                 for (i = 0; ptoi[i].ifindex; ++i)
1721                                         if (ptoi[i].port_id == spec.port_id->id)
1722                                                 break;
1723                         assert(ptoi[i].ifindex);
1724                         tcm->tcm_ifindex = ptoi[i].ifindex;
1725                         break;
1726                 case RTE_FLOW_ITEM_TYPE_ETH:
1727                         item_flags |= MLX5_FLOW_LAYER_OUTER_L2;
1728                         mask.eth = flow_tcf_item_mask
1729                                 (items, &rte_flow_item_eth_mask,
1730                                  &flow_tcf_mask_supported.eth,
1731                                  &flow_tcf_mask_empty.eth,
1732                                  sizeof(flow_tcf_mask_supported.eth),
1733                                  error);
1734                         assert(mask.eth);
1735                         if (mask.eth == &flow_tcf_mask_empty.eth)
1736                                 break;
1737                         spec.eth = items->spec;
1738                         if (mask.eth->type) {
1739                                 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_ETH_TYPE,
1740                                                  spec.eth->type);
1741                                 eth_type_set = 1;
1742                         }
1743                         if (!is_zero_ether_addr(&mask.eth->dst)) {
1744                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_DST,
1745                                              ETHER_ADDR_LEN,
1746                                              spec.eth->dst.addr_bytes);
1747                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_DST_MASK,
1748                                              ETHER_ADDR_LEN,
1749                                              mask.eth->dst.addr_bytes);
1750                         }
1751                         if (!is_zero_ether_addr(&mask.eth->src)) {
1752                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_SRC,
1753                                              ETHER_ADDR_LEN,
1754                                              spec.eth->src.addr_bytes);
1755                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_SRC_MASK,
1756                                              ETHER_ADDR_LEN,
1757                                              mask.eth->src.addr_bytes);
1758                         }
1759                         break;
1760                 case RTE_FLOW_ITEM_TYPE_VLAN:
1761                         item_flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
1762                         mask.vlan = flow_tcf_item_mask
1763                                 (items, &rte_flow_item_vlan_mask,
1764                                  &flow_tcf_mask_supported.vlan,
1765                                  &flow_tcf_mask_empty.vlan,
1766                                  sizeof(flow_tcf_mask_supported.vlan),
1767                                  error);
1768                         assert(mask.vlan);
1769                         if (!eth_type_set)
1770                                 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_ETH_TYPE,
1771                                                  RTE_BE16(ETH_P_8021Q));
1772                         eth_type_set = 1;
1773                         vlan_present = 1;
1774                         if (mask.vlan == &flow_tcf_mask_empty.vlan)
1775                                 break;
1776                         spec.vlan = items->spec;
1777                         if (mask.vlan->inner_type) {
1778                                 mnl_attr_put_u16(nlh,
1779                                                  TCA_FLOWER_KEY_VLAN_ETH_TYPE,
1780                                                  spec.vlan->inner_type);
1781                                 vlan_eth_type_set = 1;
1782                         }
1783                         if (mask.vlan->tci & RTE_BE16(0xe000))
1784                                 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_VLAN_PRIO,
1785                                                 (rte_be_to_cpu_16
1786                                                  (spec.vlan->tci) >> 13) & 0x7);
1787                         if (mask.vlan->tci & RTE_BE16(0x0fff))
1788                                 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_VLAN_ID,
1789                                                  rte_be_to_cpu_16
1790                                                  (spec.vlan->tci &
1791                                                   RTE_BE16(0x0fff)));
1792                         break;
1793                 case RTE_FLOW_ITEM_TYPE_IPV4:
1794                         item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1795                         mask.ipv4 = flow_tcf_item_mask
1796                                 (items, &rte_flow_item_ipv4_mask,
1797                                  &flow_tcf_mask_supported.ipv4,
1798                                  &flow_tcf_mask_empty.ipv4,
1799                                  sizeof(flow_tcf_mask_supported.ipv4),
1800                                  error);
1801                         assert(mask.ipv4);
1802                         if (!eth_type_set || !vlan_eth_type_set)
1803                                 mnl_attr_put_u16(nlh,
1804                                                  vlan_present ?
1805                                                  TCA_FLOWER_KEY_VLAN_ETH_TYPE :
1806                                                  TCA_FLOWER_KEY_ETH_TYPE,
1807                                                  RTE_BE16(ETH_P_IP));
1808                         eth_type_set = 1;
1809                         vlan_eth_type_set = 1;
1810                         if (mask.ipv4 == &flow_tcf_mask_empty.ipv4)
1811                                 break;
1812                         spec.ipv4 = items->spec;
1813                         if (mask.ipv4->hdr.next_proto_id) {
1814                                 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
1815                                                 spec.ipv4->hdr.next_proto_id);
1816                                 ip_proto_set = 1;
1817                         }
1818                         if (mask.ipv4->hdr.src_addr) {
1819                                 mnl_attr_put_u32(nlh, TCA_FLOWER_KEY_IPV4_SRC,
1820                                                  spec.ipv4->hdr.src_addr);
1821                                 mnl_attr_put_u32(nlh,
1822                                                  TCA_FLOWER_KEY_IPV4_SRC_MASK,
1823                                                  mask.ipv4->hdr.src_addr);
1824                         }
1825                         if (mask.ipv4->hdr.dst_addr) {
1826                                 mnl_attr_put_u32(nlh, TCA_FLOWER_KEY_IPV4_DST,
1827                                                  spec.ipv4->hdr.dst_addr);
1828                                 mnl_attr_put_u32(nlh,
1829                                                  TCA_FLOWER_KEY_IPV4_DST_MASK,
1830                                                  mask.ipv4->hdr.dst_addr);
1831                         }
1832                         break;
1833                 case RTE_FLOW_ITEM_TYPE_IPV6:
1834                         item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1835                         mask.ipv6 = flow_tcf_item_mask
1836                                 (items, &rte_flow_item_ipv6_mask,
1837                                  &flow_tcf_mask_supported.ipv6,
1838                                  &flow_tcf_mask_empty.ipv6,
1839                                  sizeof(flow_tcf_mask_supported.ipv6),
1840                                  error);
1841                         assert(mask.ipv6);
1842                         if (!eth_type_set || !vlan_eth_type_set)
1843                                 mnl_attr_put_u16(nlh,
1844                                                  vlan_present ?
1845                                                  TCA_FLOWER_KEY_VLAN_ETH_TYPE :
1846                                                  TCA_FLOWER_KEY_ETH_TYPE,
1847                                                  RTE_BE16(ETH_P_IPV6));
1848                         eth_type_set = 1;
1849                         vlan_eth_type_set = 1;
1850                         if (mask.ipv6 == &flow_tcf_mask_empty.ipv6)
1851                                 break;
1852                         spec.ipv6 = items->spec;
1853                         if (mask.ipv6->hdr.proto) {
1854                                 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
1855                                                 spec.ipv6->hdr.proto);
1856                                 ip_proto_set = 1;
1857                         }
1858                         if (!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.src_addr)) {
1859                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_SRC,
1860                                              sizeof(spec.ipv6->hdr.src_addr),
1861                                              spec.ipv6->hdr.src_addr);
1862                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_SRC_MASK,
1863                                              sizeof(mask.ipv6->hdr.src_addr),
1864                                              mask.ipv6->hdr.src_addr);
1865                         }
1866                         if (!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.dst_addr)) {
1867                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_DST,
1868                                              sizeof(spec.ipv6->hdr.dst_addr),
1869                                              spec.ipv6->hdr.dst_addr);
1870                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_DST_MASK,
1871                                              sizeof(mask.ipv6->hdr.dst_addr),
1872                                              mask.ipv6->hdr.dst_addr);
1873                         }
1874                         break;
1875                 case RTE_FLOW_ITEM_TYPE_UDP:
1876                         item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
1877                         mask.udp = flow_tcf_item_mask
1878                                 (items, &rte_flow_item_udp_mask,
1879                                  &flow_tcf_mask_supported.udp,
1880                                  &flow_tcf_mask_empty.udp,
1881                                  sizeof(flow_tcf_mask_supported.udp),
1882                                  error);
1883                         assert(mask.udp);
1884                         if (!ip_proto_set)
1885                                 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
1886                                                 IPPROTO_UDP);
1887                         if (mask.udp == &flow_tcf_mask_empty.udp)
1888                                 break;
1889                         spec.udp = items->spec;
1890                         if (mask.udp->hdr.src_port) {
1891                                 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_UDP_SRC,
1892                                                  spec.udp->hdr.src_port);
1893                                 mnl_attr_put_u16(nlh,
1894                                                  TCA_FLOWER_KEY_UDP_SRC_MASK,
1895                                                  mask.udp->hdr.src_port);
1896                         }
1897                         if (mask.udp->hdr.dst_port) {
1898                                 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_UDP_DST,
1899                                                  spec.udp->hdr.dst_port);
1900                                 mnl_attr_put_u16(nlh,
1901                                                  TCA_FLOWER_KEY_UDP_DST_MASK,
1902                                                  mask.udp->hdr.dst_port);
1903                         }
1904                         break;
1905                 case RTE_FLOW_ITEM_TYPE_TCP:
1906                         item_flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
1907                         mask.tcp = flow_tcf_item_mask
1908                                 (items, &rte_flow_item_tcp_mask,
1909                                  &flow_tcf_mask_supported.tcp,
1910                                  &flow_tcf_mask_empty.tcp,
1911                                  sizeof(flow_tcf_mask_supported.tcp),
1912                                  error);
1913                         assert(mask.tcp);
1914                         if (!ip_proto_set)
1915                                 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
1916                                                 IPPROTO_TCP);
1917                         if (mask.tcp == &flow_tcf_mask_empty.tcp)
1918                                 break;
1919                         spec.tcp = items->spec;
1920                         if (mask.tcp->hdr.src_port) {
1921                                 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_TCP_SRC,
1922                                                  spec.tcp->hdr.src_port);
1923                                 mnl_attr_put_u16(nlh,
1924                                                  TCA_FLOWER_KEY_TCP_SRC_MASK,
1925                                                  mask.tcp->hdr.src_port);
1926                         }
1927                         if (mask.tcp->hdr.dst_port) {
1928                                 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_TCP_DST,
1929                                                  spec.tcp->hdr.dst_port);
1930                                 mnl_attr_put_u16(nlh,
1931                                                  TCA_FLOWER_KEY_TCP_DST_MASK,
1932                                                  mask.tcp->hdr.dst_port);
1933                         }
1934                         if (mask.tcp->hdr.tcp_flags) {
1935                                 mnl_attr_put_u16
1936                                         (nlh,
1937                                          TCA_FLOWER_KEY_TCP_FLAGS,
1938                                          rte_cpu_to_be_16
1939                                                 (spec.tcp->hdr.tcp_flags));
1940                                 mnl_attr_put_u16
1941                                         (nlh,
1942                                          TCA_FLOWER_KEY_TCP_FLAGS_MASK,
1943                                          rte_cpu_to_be_16
1944                                                 (mask.tcp->hdr.tcp_flags));
1945                         }
1946                         break;
1947                 default:
1948                         return rte_flow_error_set(error, ENOTSUP,
1949                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1950                                                   NULL, "item not supported");
1951                 }
1952         }
1953         na_flower_act = mnl_attr_nest_start(nlh, TCA_FLOWER_ACT);
1954         na_act_index_cur = 1;
1955         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1956                 struct nlattr *na_act_index;
1957                 struct nlattr *na_act;
1958                 unsigned int vlan_act;
1959                 unsigned int i;
1960
1961                 switch (actions->type) {
1962                 case RTE_FLOW_ACTION_TYPE_VOID:
1963                         break;
1964                 case RTE_FLOW_ACTION_TYPE_PORT_ID:
1965                         conf.port_id = actions->conf;
1966                         if (conf.port_id->original)
1967                                 i = 0;
1968                         else
1969                                 for (i = 0; ptoi[i].ifindex; ++i)
1970                                         if (ptoi[i].port_id == conf.port_id->id)
1971                                                 break;
1972                         assert(ptoi[i].ifindex);
1973                         na_act_index =
1974                                 mnl_attr_nest_start(nlh, na_act_index_cur++);
1975                         assert(na_act_index);
1976                         mnl_attr_put_strz(nlh, TCA_ACT_KIND, "mirred");
1977                         na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
1978                         assert(na_act);
1979                         mnl_attr_put(nlh, TCA_MIRRED_PARMS,
1980                                      sizeof(struct tc_mirred),
1981                                      &(struct tc_mirred){
1982                                         .action = TC_ACT_STOLEN,
1983                                         .eaction = TCA_EGRESS_REDIR,
1984                                         .ifindex = ptoi[i].ifindex,
1985                                      });
1986                         mnl_attr_nest_end(nlh, na_act);
1987                         mnl_attr_nest_end(nlh, na_act_index);
1988                         break;
1989                 case RTE_FLOW_ACTION_TYPE_JUMP:
1990                         conf.jump = actions->conf;
1991                         na_act_index =
1992                                 mnl_attr_nest_start(nlh, na_act_index_cur++);
1993                         assert(na_act_index);
1994                         mnl_attr_put_strz(nlh, TCA_ACT_KIND, "gact");
1995                         na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
1996                         assert(na_act);
1997                         mnl_attr_put(nlh, TCA_GACT_PARMS,
1998                                      sizeof(struct tc_gact),
1999                                      &(struct tc_gact){
2000                                         .action = TC_ACT_GOTO_CHAIN |
2001                                                   conf.jump->group,
2002                                      });
2003                         mnl_attr_nest_end(nlh, na_act);
2004                         mnl_attr_nest_end(nlh, na_act_index);
2005                         break;
2006                 case RTE_FLOW_ACTION_TYPE_DROP:
2007                         na_act_index =
2008                                 mnl_attr_nest_start(nlh, na_act_index_cur++);
2009                         assert(na_act_index);
2010                         mnl_attr_put_strz(nlh, TCA_ACT_KIND, "gact");
2011                         na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
2012                         assert(na_act);
2013                         mnl_attr_put(nlh, TCA_GACT_PARMS,
2014                                      sizeof(struct tc_gact),
2015                                      &(struct tc_gact){
2016                                         .action = TC_ACT_SHOT,
2017                                      });
2018                         mnl_attr_nest_end(nlh, na_act);
2019                         mnl_attr_nest_end(nlh, na_act_index);
2020                         break;
2021                 case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
2022                         conf.of_push_vlan = NULL;
2023                         vlan_act = TCA_VLAN_ACT_POP;
2024                         goto action_of_vlan;
2025                 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
2026                         conf.of_push_vlan = actions->conf;
2027                         vlan_act = TCA_VLAN_ACT_PUSH;
2028                         goto action_of_vlan;
2029                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
2030                         conf.of_set_vlan_vid = actions->conf;
2031                         if (na_vlan_id)
2032                                 goto override_na_vlan_id;
2033                         vlan_act = TCA_VLAN_ACT_MODIFY;
2034                         goto action_of_vlan;
2035                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
2036                         conf.of_set_vlan_pcp = actions->conf;
2037                         if (na_vlan_priority)
2038                                 goto override_na_vlan_priority;
2039                         vlan_act = TCA_VLAN_ACT_MODIFY;
2040                         goto action_of_vlan;
2041 action_of_vlan:
2042                         na_act_index =
2043                                 mnl_attr_nest_start(nlh, na_act_index_cur++);
2044                         assert(na_act_index);
2045                         mnl_attr_put_strz(nlh, TCA_ACT_KIND, "vlan");
2046                         na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
2047                         assert(na_act);
2048                         mnl_attr_put(nlh, TCA_VLAN_PARMS,
2049                                      sizeof(struct tc_vlan),
2050                                      &(struct tc_vlan){
2051                                         .action = TC_ACT_PIPE,
2052                                         .v_action = vlan_act,
2053                                      });
2054                         if (vlan_act == TCA_VLAN_ACT_POP) {
2055                                 mnl_attr_nest_end(nlh, na_act);
2056                                 mnl_attr_nest_end(nlh, na_act_index);
2057                                 break;
2058                         }
2059                         if (vlan_act == TCA_VLAN_ACT_PUSH)
2060                                 mnl_attr_put_u16(nlh,
2061                                                  TCA_VLAN_PUSH_VLAN_PROTOCOL,
2062                                                  conf.of_push_vlan->ethertype);
2063                         na_vlan_id = mnl_nlmsg_get_payload_tail(nlh);
2064                         mnl_attr_put_u16(nlh, TCA_VLAN_PAD, 0);
2065                         na_vlan_priority = mnl_nlmsg_get_payload_tail(nlh);
2066                         mnl_attr_put_u8(nlh, TCA_VLAN_PAD, 0);
2067                         mnl_attr_nest_end(nlh, na_act);
2068                         mnl_attr_nest_end(nlh, na_act_index);
2069                         if (actions->type ==
2070                             RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID) {
2071 override_na_vlan_id:
2072                                 na_vlan_id->nla_type = TCA_VLAN_PUSH_VLAN_ID;
2073                                 *(uint16_t *)mnl_attr_get_payload(na_vlan_id) =
2074                                         rte_be_to_cpu_16
2075                                         (conf.of_set_vlan_vid->vlan_vid);
2076                         } else if (actions->type ==
2077                                    RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP) {
2078 override_na_vlan_priority:
2079                                 na_vlan_priority->nla_type =
2080                                         TCA_VLAN_PUSH_VLAN_PRIORITY;
2081                                 *(uint8_t *)mnl_attr_get_payload
2082                                         (na_vlan_priority) =
2083                                         conf.of_set_vlan_pcp->vlan_pcp;
2084                         }
2085                         break;
2086                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
2087                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
2088                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
2089                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
2090                 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
2091                 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
2092                 case RTE_FLOW_ACTION_TYPE_SET_TTL:
2093                 case RTE_FLOW_ACTION_TYPE_DEC_TTL:
2094                 case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
2095                 case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
2096                         na_act_index =
2097                                 mnl_attr_nest_start(nlh, na_act_index_cur++);
2098                         flow_tcf_create_pedit_mnl_msg(nlh,
2099                                                       &actions, item_flags);
2100                         mnl_attr_nest_end(nlh, na_act_index);
2101                         break;
2102                 default:
2103                         return rte_flow_error_set(error, ENOTSUP,
2104                                                   RTE_FLOW_ERROR_TYPE_ACTION,
2105                                                   actions,
2106                                                   "action not supported");
2107                 }
2108         }
2109         assert(na_flower);
2110         assert(na_flower_act);
2111         mnl_attr_nest_end(nlh, na_flower_act);
2112         mnl_attr_nest_end(nlh, na_flower);
2113         return 0;
2114 }
2115
2116 /**
2117  * Send Netlink message with acknowledgment.
2118  *
2119  * @param ctx
2120  *   Flow context to use.
2121  * @param nlh
2122  *   Message to send. This function always raises the NLM_F_ACK flag before
2123  *   sending.
2124  *
2125  * @return
2126  *   0 on success, a negative errno value otherwise and rte_errno is set.
2127  */
2128 static int
2129 flow_tcf_nl_ack(struct mlx5_flow_tcf_context *ctx, struct nlmsghdr *nlh)
2130 {
2131         alignas(struct nlmsghdr)
2132         uint8_t ans[mnl_nlmsg_size(sizeof(struct nlmsgerr)) +
2133                     nlh->nlmsg_len - sizeof(*nlh)];
2134         uint32_t seq = ctx->seq++;
2135         struct mnl_socket *nl = ctx->nl;
2136         int ret;
2137
2138         nlh->nlmsg_flags |= NLM_F_ACK;
2139         nlh->nlmsg_seq = seq;
2140         ret = mnl_socket_sendto(nl, nlh, nlh->nlmsg_len);
2141         if (ret != -1)
2142                 ret = mnl_socket_recvfrom(nl, ans, sizeof(ans));
2143         if (ret != -1)
2144                 ret = mnl_cb_run
2145                         (ans, ret, seq, mnl_socket_get_portid(nl), NULL, NULL);
2146         if (ret > 0)
2147                 return 0;
2148         rte_errno = errno;
2149         return -rte_errno;
2150 }
2151
2152 /**
2153  * Apply flow to E-Switch by sending Netlink message.
2154  *
2155  * @param[in] dev
2156  *   Pointer to Ethernet device.
2157  * @param[in, out] flow
2158  *   Pointer to the sub flow.
2159  * @param[out] error
2160  *   Pointer to the error structure.
2161  *
2162  * @return
2163  *   0 on success, a negative errno value otherwise and rte_ernno is set.
2164  */
2165 static int
2166 flow_tcf_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
2167                struct rte_flow_error *error)
2168 {
2169         struct priv *priv = dev->data->dev_private;
2170         struct mlx5_flow_tcf_context *ctx = priv->tcf_context;
2171         struct mlx5_flow *dev_flow;
2172         struct nlmsghdr *nlh;
2173
2174         dev_flow = LIST_FIRST(&flow->dev_flows);
2175         /* E-Switch flow can't be expanded. */
2176         assert(!LIST_NEXT(dev_flow, next));
2177         nlh = dev_flow->tcf.nlh;
2178         nlh->nlmsg_type = RTM_NEWTFILTER;
2179         nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
2180         if (!flow_tcf_nl_ack(ctx, nlh))
2181                 return 0;
2182         return rte_flow_error_set(error, rte_errno,
2183                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
2184                                   "netlink: failed to create TC flow rule");
2185 }
2186
2187 /**
2188  * Remove flow from E-Switch by sending Netlink message.
2189  *
2190  * @param[in] dev
2191  *   Pointer to Ethernet device.
2192  * @param[in, out] flow
2193  *   Pointer to the sub flow.
2194  */
2195 static void
2196 flow_tcf_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
2197 {
2198         struct priv *priv = dev->data->dev_private;
2199         struct mlx5_flow_tcf_context *ctx = priv->tcf_context;
2200         struct mlx5_flow *dev_flow;
2201         struct nlmsghdr *nlh;
2202
2203         if (!flow)
2204                 return;
2205         dev_flow = LIST_FIRST(&flow->dev_flows);
2206         if (!dev_flow)
2207                 return;
2208         /* E-Switch flow can't be expanded. */
2209         assert(!LIST_NEXT(dev_flow, next));
2210         nlh = dev_flow->tcf.nlh;
2211         nlh->nlmsg_type = RTM_DELTFILTER;
2212         nlh->nlmsg_flags = NLM_F_REQUEST;
2213         flow_tcf_nl_ack(ctx, nlh);
2214 }
2215
2216 /**
2217  * Remove flow from E-Switch and release resources of the device flow.
2218  *
2219  * @param[in] dev
2220  *   Pointer to Ethernet device.
2221  * @param[in, out] flow
2222  *   Pointer to the sub flow.
2223  */
2224 static void
2225 flow_tcf_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
2226 {
2227         struct mlx5_flow *dev_flow;
2228
2229         if (!flow)
2230                 return;
2231         flow_tcf_remove(dev, flow);
2232         dev_flow = LIST_FIRST(&flow->dev_flows);
2233         if (!dev_flow)
2234                 return;
2235         /* E-Switch flow can't be expanded. */
2236         assert(!LIST_NEXT(dev_flow, next));
2237         LIST_REMOVE(dev_flow, next);
2238         rte_free(dev_flow);
2239 }
2240
2241 /**
2242  * Query a flow.
2243  *
2244  * @see rte_flow_query()
2245  * @see rte_flow_ops
2246  */
2247 static int
2248 flow_tcf_query(struct rte_eth_dev *dev __rte_unused,
2249                struct rte_flow *flow __rte_unused,
2250                const struct rte_flow_action *actions __rte_unused,
2251                void *data __rte_unused,
2252                struct rte_flow_error *error __rte_unused)
2253 {
2254         rte_errno = ENOTSUP;
2255         return -rte_errno;
2256 }
2257
2258 const struct mlx5_flow_driver_ops mlx5_flow_tcf_drv_ops = {
2259         .validate = flow_tcf_validate,
2260         .prepare = flow_tcf_prepare,
2261         .translate = flow_tcf_translate,
2262         .apply = flow_tcf_apply,
2263         .remove = flow_tcf_remove,
2264         .destroy = flow_tcf_destroy,
2265         .query = flow_tcf_query,
2266 };
2267
2268 /**
2269  * Create and configure a libmnl socket for Netlink flow rules.
2270  *
2271  * @return
2272  *   A valid libmnl socket object pointer on success, NULL otherwise and
2273  *   rte_errno is set.
2274  */
2275 static struct mnl_socket *
2276 flow_tcf_mnl_socket_create(void)
2277 {
2278         struct mnl_socket *nl = mnl_socket_open(NETLINK_ROUTE);
2279
2280         if (nl) {
2281                 mnl_socket_setsockopt(nl, NETLINK_CAP_ACK, &(int){ 1 },
2282                                       sizeof(int));
2283                 if (!mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID))
2284                         return nl;
2285         }
2286         rte_errno = errno;
2287         if (nl)
2288                 mnl_socket_close(nl);
2289         return NULL;
2290 }
2291
2292 /**
2293  * Destroy a libmnl socket.
2294  *
2295  * @param nl
2296  *   Libmnl socket of the @p NETLINK_ROUTE kind.
2297  */
2298 static void
2299 flow_tcf_mnl_socket_destroy(struct mnl_socket *nl)
2300 {
2301         if (nl)
2302                 mnl_socket_close(nl);
2303 }
2304
2305 /**
2306  * Initialize ingress qdisc of a given network interface.
2307  *
2308  * @param ctx
2309  *   Pointer to tc-flower context to use.
2310  * @param ifindex
2311  *   Index of network interface to initialize.
2312  * @param[out] error
2313  *   Perform verbose error reporting if not NULL.
2314  *
2315  * @return
2316  *   0 on success, a negative errno value otherwise and rte_errno is set.
2317  */
2318 int
2319 mlx5_flow_tcf_init(struct mlx5_flow_tcf_context *ctx,
2320                    unsigned int ifindex, struct rte_flow_error *error)
2321 {
2322         struct nlmsghdr *nlh;
2323         struct tcmsg *tcm;
2324         alignas(struct nlmsghdr)
2325         uint8_t buf[mnl_nlmsg_size(sizeof(*tcm) + 128)];
2326
2327         /* Destroy existing ingress qdisc and everything attached to it. */
2328         nlh = mnl_nlmsg_put_header(buf);
2329         nlh->nlmsg_type = RTM_DELQDISC;
2330         nlh->nlmsg_flags = NLM_F_REQUEST;
2331         tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
2332         tcm->tcm_family = AF_UNSPEC;
2333         tcm->tcm_ifindex = ifindex;
2334         tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
2335         tcm->tcm_parent = TC_H_INGRESS;
2336         /* Ignore errors when qdisc is already absent. */
2337         if (flow_tcf_nl_ack(ctx, nlh) &&
2338             rte_errno != EINVAL && rte_errno != ENOENT)
2339                 return rte_flow_error_set(error, rte_errno,
2340                                           RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
2341                                           "netlink: failed to remove ingress"
2342                                           " qdisc");
2343         /* Create fresh ingress qdisc. */
2344         nlh = mnl_nlmsg_put_header(buf);
2345         nlh->nlmsg_type = RTM_NEWQDISC;
2346         nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
2347         tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
2348         tcm->tcm_family = AF_UNSPEC;
2349         tcm->tcm_ifindex = ifindex;
2350         tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
2351         tcm->tcm_parent = TC_H_INGRESS;
2352         mnl_attr_put_strz_check(nlh, sizeof(buf), TCA_KIND, "ingress");
2353         if (flow_tcf_nl_ack(ctx, nlh))
2354                 return rte_flow_error_set(error, rte_errno,
2355                                           RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
2356                                           "netlink: failed to create ingress"
2357                                           " qdisc");
2358         return 0;
2359 }
2360
2361 /**
2362  * Create libmnl context for Netlink flow rules.
2363  *
2364  * @return
2365  *   A valid libmnl socket object pointer on success, NULL otherwise and
2366  *   rte_errno is set.
2367  */
2368 struct mlx5_flow_tcf_context *
2369 mlx5_flow_tcf_context_create(void)
2370 {
2371         struct mlx5_flow_tcf_context *ctx = rte_zmalloc(__func__,
2372                                                         sizeof(*ctx),
2373                                                         sizeof(uint32_t));
2374         if (!ctx)
2375                 goto error;
2376         ctx->nl = flow_tcf_mnl_socket_create();
2377         if (!ctx->nl)
2378                 goto error;
2379         ctx->buf_size = MNL_SOCKET_BUFFER_SIZE;
2380         ctx->buf = rte_zmalloc(__func__,
2381                                ctx->buf_size, sizeof(uint32_t));
2382         if (!ctx->buf)
2383                 goto error;
2384         ctx->seq = random();
2385         return ctx;
2386 error:
2387         mlx5_flow_tcf_context_destroy(ctx);
2388         return NULL;
2389 }
2390
2391 /**
2392  * Destroy a libmnl context.
2393  *
2394  * @param ctx
2395  *   Libmnl socket of the @p NETLINK_ROUTE kind.
2396  */
2397 void
2398 mlx5_flow_tcf_context_destroy(struct mlx5_flow_tcf_context *ctx)
2399 {
2400         if (!ctx)
2401                 return;
2402         flow_tcf_mnl_socket_destroy(ctx->nl);
2403         rte_free(ctx->buf);
2404         rte_free(ctx);
2405 }