net/mlx5: add Linux TC flower driver for E-Switch flow
[dpdk.git] / drivers / net / mlx5 / mlx5_flow_tcf.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2018 6WIND S.A.
3  * Copyright 2018 Mellanox Technologies, Ltd
4  */
5
6 #include <assert.h>
7 #include <errno.h>
8 #include <libmnl/libmnl.h>
9 #include <linux/if_ether.h>
10 #include <linux/netlink.h>
11 #include <linux/pkt_cls.h>
12 #include <linux/pkt_sched.h>
13 #include <linux/rtnetlink.h>
14 #include <linux/tc_act/tc_gact.h>
15 #include <linux/tc_act/tc_mirred.h>
16 #include <netinet/in.h>
17 #include <stdalign.h>
18 #include <stdbool.h>
19 #include <stddef.h>
20 #include <stdint.h>
21 #include <stdlib.h>
22 #include <sys/socket.h>
23
24 #include <rte_byteorder.h>
25 #include <rte_errno.h>
26 #include <rte_ether.h>
27 #include <rte_flow.h>
28 #include <rte_malloc.h>
29
30 #include "mlx5.h"
31 #include "mlx5_flow.h"
32 #include "mlx5_autoconf.h"
33
34 #ifdef HAVE_TC_ACT_VLAN
35
36 #include <linux/tc_act/tc_vlan.h>
37
38 #else /* HAVE_TC_ACT_VLAN */
39
40 #define TCA_VLAN_ACT_POP 1
41 #define TCA_VLAN_ACT_PUSH 2
42 #define TCA_VLAN_ACT_MODIFY 3
43 #define TCA_VLAN_PARMS 2
44 #define TCA_VLAN_PUSH_VLAN_ID 3
45 #define TCA_VLAN_PUSH_VLAN_PROTOCOL 4
46 #define TCA_VLAN_PAD 5
47 #define TCA_VLAN_PUSH_VLAN_PRIORITY 6
48
49 struct tc_vlan {
50         tc_gen;
51         int v_action;
52 };
53
54 #endif /* HAVE_TC_ACT_VLAN */
55
56 /* Normally found in linux/netlink.h. */
57 #ifndef NETLINK_CAP_ACK
58 #define NETLINK_CAP_ACK 10
59 #endif
60
61 /* Normally found in linux/pkt_sched.h. */
62 #ifndef TC_H_MIN_INGRESS
63 #define TC_H_MIN_INGRESS 0xfff2u
64 #endif
65
66 /* Normally found in linux/pkt_cls.h. */
67 #ifndef TCA_CLS_FLAGS_SKIP_SW
68 #define TCA_CLS_FLAGS_SKIP_SW (1 << 1)
69 #endif
70 #ifndef HAVE_TCA_FLOWER_ACT
71 #define TCA_FLOWER_ACT 3
72 #endif
73 #ifndef HAVE_TCA_FLOWER_FLAGS
74 #define TCA_FLOWER_FLAGS 22
75 #endif
76 #ifndef HAVE_TCA_FLOWER_KEY_ETH_TYPE
77 #define TCA_FLOWER_KEY_ETH_TYPE 8
78 #endif
79 #ifndef HAVE_TCA_FLOWER_KEY_ETH_DST
80 #define TCA_FLOWER_KEY_ETH_DST 4
81 #endif
82 #ifndef HAVE_TCA_FLOWER_KEY_ETH_DST_MASK
83 #define TCA_FLOWER_KEY_ETH_DST_MASK 5
84 #endif
85 #ifndef HAVE_TCA_FLOWER_KEY_ETH_SRC
86 #define TCA_FLOWER_KEY_ETH_SRC 6
87 #endif
88 #ifndef HAVE_TCA_FLOWER_KEY_ETH_SRC_MASK
89 #define TCA_FLOWER_KEY_ETH_SRC_MASK 7
90 #endif
91 #ifndef HAVE_TCA_FLOWER_KEY_IP_PROTO
92 #define TCA_FLOWER_KEY_IP_PROTO 9
93 #endif
94 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_SRC
95 #define TCA_FLOWER_KEY_IPV4_SRC 10
96 #endif
97 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_SRC_MASK
98 #define TCA_FLOWER_KEY_IPV4_SRC_MASK 11
99 #endif
100 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_DST
101 #define TCA_FLOWER_KEY_IPV4_DST 12
102 #endif
103 #ifndef HAVE_TCA_FLOWER_KEY_IPV4_DST_MASK
104 #define TCA_FLOWER_KEY_IPV4_DST_MASK 13
105 #endif
106 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_SRC
107 #define TCA_FLOWER_KEY_IPV6_SRC 14
108 #endif
109 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_SRC_MASK
110 #define TCA_FLOWER_KEY_IPV6_SRC_MASK 15
111 #endif
112 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_DST
113 #define TCA_FLOWER_KEY_IPV6_DST 16
114 #endif
115 #ifndef HAVE_TCA_FLOWER_KEY_IPV6_DST_MASK
116 #define TCA_FLOWER_KEY_IPV6_DST_MASK 17
117 #endif
118 #ifndef HAVE_TCA_FLOWER_KEY_TCP_SRC
119 #define TCA_FLOWER_KEY_TCP_SRC 18
120 #endif
121 #ifndef HAVE_TCA_FLOWER_KEY_TCP_SRC_MASK
122 #define TCA_FLOWER_KEY_TCP_SRC_MASK 35
123 #endif
124 #ifndef HAVE_TCA_FLOWER_KEY_TCP_DST
125 #define TCA_FLOWER_KEY_TCP_DST 19
126 #endif
127 #ifndef HAVE_TCA_FLOWER_KEY_TCP_DST_MASK
128 #define TCA_FLOWER_KEY_TCP_DST_MASK 36
129 #endif
130 #ifndef HAVE_TCA_FLOWER_KEY_UDP_SRC
131 #define TCA_FLOWER_KEY_UDP_SRC 20
132 #endif
133 #ifndef HAVE_TCA_FLOWER_KEY_UDP_SRC_MASK
134 #define TCA_FLOWER_KEY_UDP_SRC_MASK 37
135 #endif
136 #ifndef HAVE_TCA_FLOWER_KEY_UDP_DST
137 #define TCA_FLOWER_KEY_UDP_DST 21
138 #endif
139 #ifndef HAVE_TCA_FLOWER_KEY_UDP_DST_MASK
140 #define TCA_FLOWER_KEY_UDP_DST_MASK 38
141 #endif
142 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_ID
143 #define TCA_FLOWER_KEY_VLAN_ID 23
144 #endif
145 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_PRIO
146 #define TCA_FLOWER_KEY_VLAN_PRIO 24
147 #endif
148 #ifndef HAVE_TCA_FLOWER_KEY_VLAN_ETH_TYPE
149 #define TCA_FLOWER_KEY_VLAN_ETH_TYPE 25
150 #endif
151
152 #ifndef IPV6_ADDR_LEN
153 #define IPV6_ADDR_LEN 16
154 #endif
155
156 /** Empty masks for known item types. */
157 static const union {
158         struct rte_flow_item_port_id port_id;
159         struct rte_flow_item_eth eth;
160         struct rte_flow_item_vlan vlan;
161         struct rte_flow_item_ipv4 ipv4;
162         struct rte_flow_item_ipv6 ipv6;
163         struct rte_flow_item_tcp tcp;
164         struct rte_flow_item_udp udp;
165 } flow_tcf_mask_empty;
166
167 /** Supported masks for known item types. */
168 static const struct {
169         struct rte_flow_item_port_id port_id;
170         struct rte_flow_item_eth eth;
171         struct rte_flow_item_vlan vlan;
172         struct rte_flow_item_ipv4 ipv4;
173         struct rte_flow_item_ipv6 ipv6;
174         struct rte_flow_item_tcp tcp;
175         struct rte_flow_item_udp udp;
176 } flow_tcf_mask_supported = {
177         .port_id = {
178                 .id = 0xffffffff,
179         },
180         .eth = {
181                 .type = RTE_BE16(0xffff),
182                 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
183                 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
184         },
185         .vlan = {
186                 /* PCP and VID only, no DEI. */
187                 .tci = RTE_BE16(0xefff),
188                 .inner_type = RTE_BE16(0xffff),
189         },
190         .ipv4.hdr = {
191                 .next_proto_id = 0xff,
192                 .src_addr = RTE_BE32(0xffffffff),
193                 .dst_addr = RTE_BE32(0xffffffff),
194         },
195         .ipv6.hdr = {
196                 .proto = 0xff,
197                 .src_addr =
198                         "\xff\xff\xff\xff\xff\xff\xff\xff"
199                         "\xff\xff\xff\xff\xff\xff\xff\xff",
200                 .dst_addr =
201                         "\xff\xff\xff\xff\xff\xff\xff\xff"
202                         "\xff\xff\xff\xff\xff\xff\xff\xff",
203         },
204         .tcp.hdr = {
205                 .src_port = RTE_BE16(0xffff),
206                 .dst_port = RTE_BE16(0xffff),
207         },
208         .udp.hdr = {
209                 .src_port = RTE_BE16(0xffff),
210                 .dst_port = RTE_BE16(0xffff),
211         },
212 };
213
214 #define SZ_NLATTR_HDR MNL_ALIGN(sizeof(struct nlattr))
215 #define SZ_NLATTR_NEST SZ_NLATTR_HDR
216 #define SZ_NLATTR_DATA_OF(len) MNL_ALIGN(SZ_NLATTR_HDR + (len))
217 #define SZ_NLATTR_TYPE_OF(typ) SZ_NLATTR_DATA_OF(sizeof(typ))
218 #define SZ_NLATTR_STRZ_OF(str) SZ_NLATTR_DATA_OF(strlen(str) + 1)
219
220 #define PTOI_TABLE_SZ_MAX(dev) (mlx5_dev_to_port_id((dev)->device, NULL, 0) + 2)
221
222 /** DPDK port to network interface index (ifindex) conversion. */
223 struct flow_tcf_ptoi {
224         uint16_t port_id; /**< DPDK port ID. */
225         unsigned int ifindex; /**< Network interface index. */
226 };
227
228 #define MLX5_TCF_FATE_ACTIONS (MLX5_FLOW_ACTION_DROP | MLX5_FLOW_ACTION_PORT_ID)
229
230 /**
231  * Retrieve mask for pattern item.
232  *
233  * This function does basic sanity checks on a pattern item in order to
234  * return the most appropriate mask for it.
235  *
236  * @param[in] item
237  *   Item specification.
238  * @param[in] mask_default
239  *   Default mask for pattern item as specified by the flow API.
240  * @param[in] mask_supported
241  *   Mask fields supported by the implementation.
242  * @param[in] mask_empty
243  *   Empty mask to return when there is no specification.
244  * @param[out] error
245  *   Perform verbose error reporting if not NULL.
246  *
247  * @return
248  *   Either @p item->mask or one of the mask parameters on success, NULL
249  *   otherwise and rte_errno is set.
250  */
251 static const void *
252 flow_tcf_item_mask(const struct rte_flow_item *item, const void *mask_default,
253                    const void *mask_supported, const void *mask_empty,
254                    size_t mask_size, struct rte_flow_error *error)
255 {
256         const uint8_t *mask;
257         size_t i;
258
259         /* item->last and item->mask cannot exist without item->spec. */
260         if (!item->spec && (item->mask || item->last)) {
261                 rte_flow_error_set(error, EINVAL,
262                                    RTE_FLOW_ERROR_TYPE_ITEM, item,
263                                    "\"mask\" or \"last\" field provided without"
264                                    " a corresponding \"spec\"");
265                 return NULL;
266         }
267         /* No spec, no mask, no problem. */
268         if (!item->spec)
269                 return mask_empty;
270         mask = item->mask ? item->mask : mask_default;
271         assert(mask);
272         /*
273          * Single-pass check to make sure that:
274          * - Mask is supported, no bits are set outside mask_supported.
275          * - Both item->spec and item->last are included in mask.
276          */
277         for (i = 0; i != mask_size; ++i) {
278                 if (!mask[i])
279                         continue;
280                 if ((mask[i] | ((const uint8_t *)mask_supported)[i]) !=
281                     ((const uint8_t *)mask_supported)[i]) {
282                         rte_flow_error_set(error, ENOTSUP,
283                                            RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
284                                            "unsupported field found"
285                                            " in \"mask\"");
286                         return NULL;
287                 }
288                 if (item->last &&
289                     (((const uint8_t *)item->spec)[i] & mask[i]) !=
290                     (((const uint8_t *)item->last)[i] & mask[i])) {
291                         rte_flow_error_set(error, EINVAL,
292                                            RTE_FLOW_ERROR_TYPE_ITEM_LAST,
293                                            item->last,
294                                            "range between \"spec\" and \"last\""
295                                            " not comprised in \"mask\"");
296                         return NULL;
297                 }
298         }
299         return mask;
300 }
301
302 /**
303  * Build a conversion table between port ID and ifindex.
304  *
305  * @param[in] dev
306  *   Pointer to Ethernet device.
307  * @param[out] ptoi
308  *   Pointer to ptoi table.
309  * @param[in] len
310  *   Size of ptoi table provided.
311  *
312  * @return
313  *   Size of ptoi table filled.
314  */
315 static unsigned int
316 flow_tcf_build_ptoi_table(struct rte_eth_dev *dev, struct flow_tcf_ptoi *ptoi,
317                           unsigned int len)
318 {
319         unsigned int n = mlx5_dev_to_port_id(dev->device, NULL, 0);
320         uint16_t port_id[n + 1];
321         unsigned int i;
322         unsigned int own = 0;
323
324         /* At least one port is needed when no switch domain is present. */
325         if (!n) {
326                 n = 1;
327                 port_id[0] = dev->data->port_id;
328         } else {
329                 n = RTE_MIN(mlx5_dev_to_port_id(dev->device, port_id, n), n);
330         }
331         if (n > len)
332                 return 0;
333         for (i = 0; i != n; ++i) {
334                 struct rte_eth_dev_info dev_info;
335
336                 rte_eth_dev_info_get(port_id[i], &dev_info);
337                 if (port_id[i] == dev->data->port_id)
338                         own = i;
339                 ptoi[i].port_id = port_id[i];
340                 ptoi[i].ifindex = dev_info.if_index;
341         }
342         /* Ensure first entry of ptoi[] is the current device. */
343         if (own) {
344                 ptoi[n] = ptoi[0];
345                 ptoi[0] = ptoi[own];
346                 ptoi[own] = ptoi[n];
347         }
348         /* An entry with zero ifindex terminates ptoi[]. */
349         ptoi[n].port_id = 0;
350         ptoi[n].ifindex = 0;
351         return n;
352 }
353
354 /**
355  * Verify the @p attr will be correctly understood by the E-switch.
356  *
357  * @param[in] attr
358  *   Pointer to flow attributes
359  * @param[out] error
360  *   Pointer to error structure.
361  *
362  * @return
363  *   0 on success, a negative errno value otherwise and rte_errno is set.
364  */
365 static int
366 flow_tcf_validate_attributes(const struct rte_flow_attr *attr,
367                              struct rte_flow_error *error)
368 {
369         /*
370          * Supported attributes: no groups, some priorities and ingress only.
371          * Don't care about transfer as it is the caller's problem.
372          */
373         if (attr->group)
374                 return rte_flow_error_set(error, ENOTSUP,
375                                           RTE_FLOW_ERROR_TYPE_ATTR_GROUP, attr,
376                                           "groups are not supported");
377         if (attr->priority > 0xfffe)
378                 return rte_flow_error_set(error, ENOTSUP,
379                                           RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
380                                           attr,
381                                           "lowest priority level is 0xfffe");
382         if (!attr->ingress)
383                 return rte_flow_error_set(error, EINVAL,
384                                           RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
385                                           attr, "only ingress is supported");
386         if (attr->egress)
387                 return rte_flow_error_set(error, ENOTSUP,
388                                           RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
389                                           attr, "egress is not supported");
390         return 0;
391 }
392
393 /**
394  * Validate flow for E-Switch.
395  *
396  * @param[in] priv
397  *   Pointer to the priv structure.
398  * @param[in] attr
399  *   Pointer to the flow attributes.
400  * @param[in] items
401  *   Pointer to the list of items.
402  * @param[in] actions
403  *   Pointer to the list of actions.
404  * @param[out] error
405  *   Pointer to the error structure.
406  *
407  * @return
408  *   0 on success, a negative errno value otherwise and rte_ernno is set.
409  */
410 static int
411 flow_tcf_validate(struct rte_eth_dev *dev,
412                   const struct rte_flow_attr *attr,
413                   const struct rte_flow_item items[],
414                   const struct rte_flow_action actions[],
415                   struct rte_flow_error *error)
416 {
417         union {
418                 const struct rte_flow_item_port_id *port_id;
419                 const struct rte_flow_item_eth *eth;
420                 const struct rte_flow_item_vlan *vlan;
421                 const struct rte_flow_item_ipv4 *ipv4;
422                 const struct rte_flow_item_ipv6 *ipv6;
423                 const struct rte_flow_item_tcp *tcp;
424                 const struct rte_flow_item_udp *udp;
425         } spec, mask;
426         union {
427                 const struct rte_flow_action_port_id *port_id;
428                 const struct rte_flow_action_of_push_vlan *of_push_vlan;
429                 const struct rte_flow_action_of_set_vlan_vid *
430                         of_set_vlan_vid;
431                 const struct rte_flow_action_of_set_vlan_pcp *
432                         of_set_vlan_pcp;
433         } conf;
434         uint32_t item_flags = 0;
435         uint32_t action_flags = 0;
436         uint8_t next_protocol = -1;
437         unsigned int tcm_ifindex = 0;
438         struct flow_tcf_ptoi ptoi[PTOI_TABLE_SZ_MAX(dev)];
439         bool in_port_id_set;
440         int ret;
441
442         claim_nonzero(flow_tcf_build_ptoi_table(dev, ptoi,
443                                                 PTOI_TABLE_SZ_MAX(dev)));
444         ret = flow_tcf_validate_attributes(attr, error);
445         if (ret < 0)
446                 return ret;
447         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
448                 unsigned int i;
449
450                 switch (items->type) {
451                 case RTE_FLOW_ITEM_TYPE_VOID:
452                         break;
453                 case RTE_FLOW_ITEM_TYPE_PORT_ID:
454                         mask.port_id = flow_tcf_item_mask
455                                 (items, &rte_flow_item_port_id_mask,
456                                  &flow_tcf_mask_supported.port_id,
457                                  &flow_tcf_mask_empty.port_id,
458                                  sizeof(flow_tcf_mask_supported.port_id),
459                                  error);
460                         if (!mask.port_id)
461                                 return -rte_errno;
462                         if (mask.port_id == &flow_tcf_mask_empty.port_id) {
463                                 in_port_id_set = 1;
464                                 break;
465                         }
466                         spec.port_id = items->spec;
467                         if (mask.port_id->id && mask.port_id->id != 0xffffffff)
468                                 return rte_flow_error_set
469                                         (error, ENOTSUP,
470                                          RTE_FLOW_ERROR_TYPE_ITEM_MASK,
471                                          mask.port_id,
472                                          "no support for partial mask on"
473                                          " \"id\" field");
474                         if (!mask.port_id->id)
475                                 i = 0;
476                         else
477                                 for (i = 0; ptoi[i].ifindex; ++i)
478                                         if (ptoi[i].port_id == spec.port_id->id)
479                                                 break;
480                         if (!ptoi[i].ifindex)
481                                 return rte_flow_error_set
482                                         (error, ENODEV,
483                                          RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
484                                          spec.port_id,
485                                          "missing data to convert port ID to"
486                                          " ifindex");
487                         if (in_port_id_set && ptoi[i].ifindex != tcm_ifindex)
488                                 return rte_flow_error_set
489                                         (error, ENOTSUP,
490                                          RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
491                                          spec.port_id,
492                                          "cannot match traffic for"
493                                          " several port IDs through"
494                                          " a single flow rule");
495                         tcm_ifindex = ptoi[i].ifindex;
496                         in_port_id_set = 1;
497                         break;
498                 case RTE_FLOW_ITEM_TYPE_ETH:
499                         ret = mlx5_flow_validate_item_eth(items, item_flags,
500                                                           error);
501                         if (ret < 0)
502                                 return ret;
503                         item_flags |= MLX5_FLOW_LAYER_OUTER_L2;
504                         /* TODO:
505                          * Redundant check due to different supported mask.
506                          * Same for the rest of items.
507                          */
508                         mask.eth = flow_tcf_item_mask
509                                 (items, &rte_flow_item_eth_mask,
510                                  &flow_tcf_mask_supported.eth,
511                                  &flow_tcf_mask_empty.eth,
512                                  sizeof(flow_tcf_mask_supported.eth),
513                                  error);
514                         if (!mask.eth)
515                                 return -rte_errno;
516                         if (mask.eth->type && mask.eth->type !=
517                             RTE_BE16(0xffff))
518                                 return rte_flow_error_set
519                                         (error, ENOTSUP,
520                                          RTE_FLOW_ERROR_TYPE_ITEM_MASK,
521                                          mask.eth,
522                                          "no support for partial mask on"
523                                          " \"type\" field");
524                         break;
525                 case RTE_FLOW_ITEM_TYPE_VLAN:
526                         ret = mlx5_flow_validate_item_vlan(items, item_flags,
527                                                            error);
528                         if (ret < 0)
529                                 return ret;
530                         item_flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
531                         mask.vlan = flow_tcf_item_mask
532                                 (items, &rte_flow_item_vlan_mask,
533                                  &flow_tcf_mask_supported.vlan,
534                                  &flow_tcf_mask_empty.vlan,
535                                  sizeof(flow_tcf_mask_supported.vlan),
536                                  error);
537                         if (!mask.vlan)
538                                 return -rte_errno;
539                         if ((mask.vlan->tci & RTE_BE16(0xe000) &&
540                              (mask.vlan->tci & RTE_BE16(0xe000)) !=
541                               RTE_BE16(0xe000)) ||
542                             (mask.vlan->tci & RTE_BE16(0x0fff) &&
543                              (mask.vlan->tci & RTE_BE16(0x0fff)) !=
544                               RTE_BE16(0x0fff)) ||
545                             (mask.vlan->inner_type &&
546                              mask.vlan->inner_type != RTE_BE16(0xffff)))
547                                 return rte_flow_error_set
548                                         (error, ENOTSUP,
549                                          RTE_FLOW_ERROR_TYPE_ITEM_MASK,
550                                          mask.vlan,
551                                          "no support for partial masks on"
552                                          " \"tci\" (PCP and VID parts) and"
553                                          " \"inner_type\" fields");
554                         break;
555                 case RTE_FLOW_ITEM_TYPE_IPV4:
556                         ret = mlx5_flow_validate_item_ipv4(items, item_flags,
557                                                            error);
558                         if (ret < 0)
559                                 return ret;
560                         item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
561                         mask.ipv4 = flow_tcf_item_mask
562                                 (items, &rte_flow_item_ipv4_mask,
563                                  &flow_tcf_mask_supported.ipv4,
564                                  &flow_tcf_mask_empty.ipv4,
565                                  sizeof(flow_tcf_mask_supported.ipv4),
566                                  error);
567                         if (!mask.ipv4)
568                                 return -rte_errno;
569                         if (mask.ipv4->hdr.next_proto_id &&
570                             mask.ipv4->hdr.next_proto_id != 0xff)
571                                 return rte_flow_error_set
572                                         (error, ENOTSUP,
573                                          RTE_FLOW_ERROR_TYPE_ITEM_MASK,
574                                          mask.ipv4,
575                                          "no support for partial mask on"
576                                          " \"hdr.next_proto_id\" field");
577                         else if (mask.ipv4->hdr.next_proto_id)
578                                 next_protocol =
579                                         ((const struct rte_flow_item_ipv4 *)
580                                          (items->spec))->hdr.next_proto_id;
581                         break;
582                 case RTE_FLOW_ITEM_TYPE_IPV6:
583                         ret = mlx5_flow_validate_item_ipv6(items, item_flags,
584                                                            error);
585                         if (ret < 0)
586                                 return ret;
587                         item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
588                         mask.ipv6 = flow_tcf_item_mask
589                                 (items, &rte_flow_item_ipv6_mask,
590                                  &flow_tcf_mask_supported.ipv6,
591                                  &flow_tcf_mask_empty.ipv6,
592                                  sizeof(flow_tcf_mask_supported.ipv6),
593                                  error);
594                         if (!mask.ipv6)
595                                 return -rte_errno;
596                         if (mask.ipv6->hdr.proto &&
597                             mask.ipv6->hdr.proto != 0xff)
598                                 return rte_flow_error_set
599                                         (error, ENOTSUP,
600                                          RTE_FLOW_ERROR_TYPE_ITEM_MASK,
601                                          mask.ipv6,
602                                          "no support for partial mask on"
603                                          " \"hdr.proto\" field");
604                         else if (mask.ipv6->hdr.proto)
605                                 next_protocol =
606                                         ((const struct rte_flow_item_ipv6 *)
607                                          (items->spec))->hdr.proto;
608                         break;
609                 case RTE_FLOW_ITEM_TYPE_UDP:
610                         ret = mlx5_flow_validate_item_udp(items, item_flags,
611                                                           next_protocol, error);
612                         if (ret < 0)
613                                 return ret;
614                         item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
615                         mask.udp = flow_tcf_item_mask
616                                 (items, &rte_flow_item_udp_mask,
617                                  &flow_tcf_mask_supported.udp,
618                                  &flow_tcf_mask_empty.udp,
619                                  sizeof(flow_tcf_mask_supported.udp),
620                                  error);
621                         if (!mask.udp)
622                                 return -rte_errno;
623                         break;
624                 case RTE_FLOW_ITEM_TYPE_TCP:
625                         ret = mlx5_flow_validate_item_tcp(items, item_flags,
626                                                           next_protocol, error);
627                         if (ret < 0)
628                                 return ret;
629                         item_flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
630                         mask.tcp = flow_tcf_item_mask
631                                 (items, &rte_flow_item_tcp_mask,
632                                  &flow_tcf_mask_supported.tcp,
633                                  &flow_tcf_mask_empty.tcp,
634                                  sizeof(flow_tcf_mask_supported.tcp),
635                                  error);
636                         if (!mask.tcp)
637                                 return -rte_errno;
638                         break;
639                 default:
640                         return rte_flow_error_set(error, ENOTSUP,
641                                                   RTE_FLOW_ERROR_TYPE_ITEM,
642                                                   NULL, "item not supported");
643                 }
644         }
645         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
646                 unsigned int i;
647
648                 switch (actions->type) {
649                 case RTE_FLOW_ACTION_TYPE_VOID:
650                         break;
651                 case RTE_FLOW_ACTION_TYPE_PORT_ID:
652                         if (action_flags & MLX5_TCF_FATE_ACTIONS)
653                                 return rte_flow_error_set
654                                         (error, EINVAL,
655                                          RTE_FLOW_ERROR_TYPE_ACTION, actions,
656                                          "can't have multiple fate actions");
657                         conf.port_id = actions->conf;
658                         if (conf.port_id->original)
659                                 i = 0;
660                         else
661                                 for (i = 0; ptoi[i].ifindex; ++i)
662                                         if (ptoi[i].port_id == conf.port_id->id)
663                                                 break;
664                         if (!ptoi[i].ifindex)
665                                 return rte_flow_error_set
666                                         (error, ENODEV,
667                                          RTE_FLOW_ERROR_TYPE_ACTION_CONF,
668                                          conf.port_id,
669                                          "missing data to convert port ID to"
670                                          " ifindex");
671                         action_flags |= MLX5_FLOW_ACTION_PORT_ID;
672                         break;
673                 case RTE_FLOW_ACTION_TYPE_DROP:
674                         if (action_flags & MLX5_TCF_FATE_ACTIONS)
675                                 return rte_flow_error_set
676                                         (error, EINVAL,
677                                          RTE_FLOW_ERROR_TYPE_ACTION, actions,
678                                          "can't have multiple fate actions");
679                         action_flags |= MLX5_FLOW_ACTION_DROP;
680                         break;
681                 case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
682                         action_flags |= MLX5_FLOW_ACTION_OF_POP_VLAN;
683                         break;
684                 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
685                         action_flags |= MLX5_FLOW_ACTION_OF_PUSH_VLAN;
686                         break;
687                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
688                         action_flags |= MLX5_FLOW_ACTION_OF_SET_VLAN_VID;
689                         break;
690                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
691                         action_flags |= MLX5_FLOW_ACTION_OF_SET_VLAN_PCP;
692                         break;
693                 default:
694                         return rte_flow_error_set(error, ENOTSUP,
695                                                   RTE_FLOW_ERROR_TYPE_ACTION,
696                                                   actions,
697                                                   "action not supported");
698                 }
699         }
700         if (!(action_flags & MLX5_TCF_FATE_ACTIONS))
701                 return rte_flow_error_set(error, EINVAL,
702                                           RTE_FLOW_ERROR_TYPE_ACTION, actions,
703                                           "no fate action is found");
704         return 0;
705 }
706
707 /**
708  * Calculate maximum size of memory for flow items of Linux TC flower and
709  * extract specified items.
710  *
711  * @param[in] items
712  *   Pointer to the list of items.
713  * @param[out] item_flags
714  *   Pointer to the detected items.
715  *
716  * @return
717  *   Maximum size of memory for items.
718  */
719 static int
720 flow_tcf_get_items_and_size(const struct rte_flow_item items[],
721                             uint64_t *item_flags)
722 {
723         int size = 0;
724         uint64_t flags = 0;
725
726         size += SZ_NLATTR_STRZ_OF("flower") +
727                 SZ_NLATTR_NEST + /* TCA_OPTIONS. */
728                 SZ_NLATTR_TYPE_OF(uint32_t); /* TCA_CLS_FLAGS_SKIP_SW. */
729         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
730                 switch (items->type) {
731                 case RTE_FLOW_ITEM_TYPE_VOID:
732                         break;
733                 case RTE_FLOW_ITEM_TYPE_PORT_ID:
734                         break;
735                 case RTE_FLOW_ITEM_TYPE_ETH:
736                         size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
737                                 SZ_NLATTR_DATA_OF(ETHER_ADDR_LEN) * 4;
738                                 /* dst/src MAC addr and mask. */
739                         flags |= MLX5_FLOW_LAYER_OUTER_L2;
740                         break;
741                 case RTE_FLOW_ITEM_TYPE_VLAN:
742                         size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
743                                 SZ_NLATTR_TYPE_OF(uint16_t) +
744                                 /* VLAN Ether type. */
745                                 SZ_NLATTR_TYPE_OF(uint8_t) + /* VLAN prio. */
746                                 SZ_NLATTR_TYPE_OF(uint16_t); /* VLAN ID. */
747                         flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
748                         break;
749                 case RTE_FLOW_ITEM_TYPE_IPV4:
750                         size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
751                                 SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
752                                 SZ_NLATTR_TYPE_OF(uint32_t) * 4;
753                                 /* dst/src IP addr and mask. */
754                         flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
755                         break;
756                 case RTE_FLOW_ITEM_TYPE_IPV6:
757                         size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
758                                 SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
759                                 SZ_NLATTR_TYPE_OF(IPV6_ADDR_LEN) * 4;
760                                 /* dst/src IP addr and mask. */
761                         flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
762                         break;
763                 case RTE_FLOW_ITEM_TYPE_UDP:
764                         size += SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
765                                 SZ_NLATTR_TYPE_OF(uint16_t) * 4;
766                                 /* dst/src port and mask. */
767                         flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
768                         break;
769                 case RTE_FLOW_ITEM_TYPE_TCP:
770                         size += SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
771                                 SZ_NLATTR_TYPE_OF(uint16_t) * 4;
772                                 /* dst/src port and mask. */
773                         flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
774                         break;
775                 default:
776                         DRV_LOG(WARNING,
777                                 "unsupported item %p type %d,"
778                                 " items must be validated before flow creation",
779                                 (const void *)items, items->type);
780                         break;
781                 }
782         }
783         *item_flags = flags;
784         return size;
785 }
786
787 /**
788  * Calculate maximum size of memory for flow actions of Linux TC flower and
789  * extract specified actions.
790  *
791  * @param[in] actions
792  *   Pointer to the list of actions.
793  * @param[out] action_flags
794  *   Pointer to the detected actions.
795  *
796  * @return
797  *   Maximum size of memory for actions.
798  */
799 static int
800 flow_tcf_get_actions_and_size(const struct rte_flow_action actions[],
801                               uint64_t *action_flags)
802 {
803         int size = 0;
804         uint64_t flags = 0;
805
806         size += SZ_NLATTR_NEST; /* TCA_FLOWER_ACT. */
807         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
808                 switch (actions->type) {
809                 case RTE_FLOW_ACTION_TYPE_VOID:
810                         break;
811                 case RTE_FLOW_ACTION_TYPE_PORT_ID:
812                         size += SZ_NLATTR_NEST + /* na_act_index. */
813                                 SZ_NLATTR_STRZ_OF("mirred") +
814                                 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
815                                 SZ_NLATTR_TYPE_OF(struct tc_mirred);
816                         flags |= MLX5_FLOW_ACTION_PORT_ID;
817                         break;
818                 case RTE_FLOW_ACTION_TYPE_DROP:
819                         size += SZ_NLATTR_NEST + /* na_act_index. */
820                                 SZ_NLATTR_STRZ_OF("gact") +
821                                 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
822                                 SZ_NLATTR_TYPE_OF(struct tc_gact);
823                         flags |= MLX5_FLOW_ACTION_DROP;
824                         break;
825                 case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
826                         flags |= MLX5_FLOW_ACTION_OF_POP_VLAN;
827                         goto action_of_vlan;
828                 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
829                         flags |= MLX5_FLOW_ACTION_OF_PUSH_VLAN;
830                         goto action_of_vlan;
831                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
832                         flags |= MLX5_FLOW_ACTION_OF_SET_VLAN_VID;
833                         goto action_of_vlan;
834                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
835                         flags |= MLX5_FLOW_ACTION_OF_SET_VLAN_PCP;
836                         goto action_of_vlan;
837 action_of_vlan:
838                         size += SZ_NLATTR_NEST + /* na_act_index. */
839                                 SZ_NLATTR_STRZ_OF("vlan") +
840                                 SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
841                                 SZ_NLATTR_TYPE_OF(struct tc_vlan) +
842                                 SZ_NLATTR_TYPE_OF(uint16_t) +
843                                 /* VLAN protocol. */
844                                 SZ_NLATTR_TYPE_OF(uint16_t) + /* VLAN ID. */
845                                 SZ_NLATTR_TYPE_OF(uint8_t); /* VLAN prio. */
846                         break;
847                 default:
848                         DRV_LOG(WARNING,
849                                 "unsupported action %p type %d,"
850                                 " items must be validated before flow creation",
851                                 (const void *)actions, actions->type);
852                         break;
853                 }
854         }
855         *action_flags = flags;
856         return size;
857 }
858
859 /**
860  * Brand rtnetlink buffer with unique handle.
861  *
862  * This handle should be unique for a given network interface to avoid
863  * collisions.
864  *
865  * @param nlh
866  *   Pointer to Netlink message.
867  * @param handle
868  *   Unique 32-bit handle to use.
869  */
870 static void
871 flow_tcf_nl_brand(struct nlmsghdr *nlh, uint32_t handle)
872 {
873         struct tcmsg *tcm = mnl_nlmsg_get_payload(nlh);
874
875         tcm->tcm_handle = handle;
876         DRV_LOG(DEBUG, "Netlink msg %p is branded with handle %x",
877                 (void *)nlh, handle);
878 }
879
880 /**
881  * Prepare a flow object for Linux TC flower. It calculates the maximum size of
882  * memory required, allocates the memory, initializes Netlink message headers
883  * and set unique TC message handle.
884  *
885  * @param[in] attr
886  *   Pointer to the flow attributes.
887  * @param[in] items
888  *   Pointer to the list of items.
889  * @param[in] actions
890  *   Pointer to the list of actions.
891  * @param[out] item_flags
892  *   Pointer to bit mask of all items detected.
893  * @param[out] action_flags
894  *   Pointer to bit mask of all actions detected.
895  * @param[out] error
896  *   Pointer to the error structure.
897  *
898  * @return
899  *   Pointer to mlx5_flow object on success,
900  *   otherwise NULL and rte_ernno is set.
901  */
902 static struct mlx5_flow *
903 flow_tcf_prepare(const struct rte_flow_attr *attr __rte_unused,
904                  const struct rte_flow_item items[],
905                  const struct rte_flow_action actions[],
906                  uint64_t *item_flags, uint64_t *action_flags,
907                  struct rte_flow_error *error)
908 {
909         size_t size = sizeof(struct mlx5_flow) +
910                       MNL_ALIGN(sizeof(struct nlmsghdr)) +
911                       MNL_ALIGN(sizeof(struct tcmsg));
912         struct mlx5_flow *dev_flow;
913         struct nlmsghdr *nlh;
914         struct tcmsg *tcm;
915
916         size += flow_tcf_get_items_and_size(items, item_flags);
917         size += flow_tcf_get_actions_and_size(actions, action_flags);
918         dev_flow = rte_zmalloc(__func__, size, MNL_ALIGNTO);
919         if (!dev_flow) {
920                 rte_flow_error_set(error, ENOMEM,
921                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
922                                    "not enough memory to create E-Switch flow");
923                 return NULL;
924         }
925         nlh = mnl_nlmsg_put_header((void *)(dev_flow + 1));
926         tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
927         *dev_flow = (struct mlx5_flow){
928                 .tcf = (struct mlx5_flow_tcf){
929                         .nlh = nlh,
930                         .tcm = tcm,
931                 },
932         };
933         /*
934          * Generate a reasonably unique handle based on the address of the
935          * target buffer.
936          *
937          * This is straightforward on 32-bit systems where the flow pointer can
938          * be used directly. Otherwise, its least significant part is taken
939          * after shifting it by the previous power of two of the pointed buffer
940          * size.
941          */
942         if (sizeof(dev_flow) <= 4)
943                 flow_tcf_nl_brand(nlh, (uintptr_t)dev_flow);
944         else
945                 flow_tcf_nl_brand(nlh, (uintptr_t)dev_flow >>
946                                        rte_log2_u32(rte_align32prevpow2(size)));
947         return dev_flow;
948 }
949
950 /**
951  * Translate flow for Linux TC flower and construct Netlink message.
952  *
953  * @param[in] priv
954  *   Pointer to the priv structure.
955  * @param[in, out] flow
956  *   Pointer to the sub flow.
957  * @param[in] attr
958  *   Pointer to the flow attributes.
959  * @param[in] items
960  *   Pointer to the list of items.
961  * @param[in] actions
962  *   Pointer to the list of actions.
963  * @param[out] error
964  *   Pointer to the error structure.
965  *
966  * @return
967  *   0 on success, a negative errno value otherwise and rte_ernno is set.
968  */
969 static int
970 flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
971                    const struct rte_flow_attr *attr,
972                    const struct rte_flow_item items[],
973                    const struct rte_flow_action actions[],
974                    struct rte_flow_error *error)
975 {
976         union {
977                 const struct rte_flow_item_port_id *port_id;
978                 const struct rte_flow_item_eth *eth;
979                 const struct rte_flow_item_vlan *vlan;
980                 const struct rte_flow_item_ipv4 *ipv4;
981                 const struct rte_flow_item_ipv6 *ipv6;
982                 const struct rte_flow_item_tcp *tcp;
983                 const struct rte_flow_item_udp *udp;
984         } spec, mask;
985         union {
986                 const struct rte_flow_action_port_id *port_id;
987                 const struct rte_flow_action_of_push_vlan *of_push_vlan;
988                 const struct rte_flow_action_of_set_vlan_vid *
989                         of_set_vlan_vid;
990                 const struct rte_flow_action_of_set_vlan_pcp *
991                         of_set_vlan_pcp;
992         } conf;
993         struct flow_tcf_ptoi ptoi[PTOI_TABLE_SZ_MAX(dev)];
994         struct nlmsghdr *nlh = dev_flow->tcf.nlh;
995         struct tcmsg *tcm = dev_flow->tcf.tcm;
996         uint32_t na_act_index_cur;
997         bool eth_type_set = 0;
998         bool vlan_present = 0;
999         bool vlan_eth_type_set = 0;
1000         bool ip_proto_set = 0;
1001         struct nlattr *na_flower;
1002         struct nlattr *na_flower_act;
1003         struct nlattr *na_vlan_id = NULL;
1004         struct nlattr *na_vlan_priority = NULL;
1005
1006         claim_nonzero(flow_tcf_build_ptoi_table(dev, ptoi,
1007                                                 PTOI_TABLE_SZ_MAX(dev)));
1008         nlh = dev_flow->tcf.nlh;
1009         tcm = dev_flow->tcf.tcm;
1010         /* Prepare API must have been called beforehand. */
1011         assert(nlh != NULL && tcm != NULL);
1012         tcm->tcm_family = AF_UNSPEC;
1013         tcm->tcm_ifindex = ptoi[0].ifindex;
1014         tcm->tcm_parent = TC_H_MAKE(TC_H_INGRESS, TC_H_MIN_INGRESS);
1015         /*
1016          * Priority cannot be zero to prevent the kernel from picking one
1017          * automatically.
1018          */
1019         tcm->tcm_info = TC_H_MAKE((attr->priority + 1) << 16,
1020                                   RTE_BE16(ETH_P_ALL));
1021         mnl_attr_put_strz(nlh, TCA_KIND, "flower");
1022         na_flower = mnl_attr_nest_start(nlh, TCA_OPTIONS);
1023         mnl_attr_put_u32(nlh, TCA_FLOWER_FLAGS, TCA_CLS_FLAGS_SKIP_SW);
1024         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
1025                 unsigned int i;
1026
1027                 switch (items->type) {
1028                 case RTE_FLOW_ITEM_TYPE_VOID:
1029                         break;
1030                 case RTE_FLOW_ITEM_TYPE_PORT_ID:
1031                         mask.port_id = flow_tcf_item_mask
1032                                 (items, &rte_flow_item_port_id_mask,
1033                                  &flow_tcf_mask_supported.port_id,
1034                                  &flow_tcf_mask_empty.port_id,
1035                                  sizeof(flow_tcf_mask_supported.port_id),
1036                                  error);
1037                         assert(mask.port_id);
1038                         if (mask.port_id == &flow_tcf_mask_empty.port_id)
1039                                 break;
1040                         spec.port_id = items->spec;
1041                         if (!mask.port_id->id)
1042                                 i = 0;
1043                         else
1044                                 for (i = 0; ptoi[i].ifindex; ++i)
1045                                         if (ptoi[i].port_id == spec.port_id->id)
1046                                                 break;
1047                         assert(ptoi[i].ifindex);
1048                         tcm->tcm_ifindex = ptoi[i].ifindex;
1049                         break;
1050                 case RTE_FLOW_ITEM_TYPE_ETH:
1051                         mask.eth = flow_tcf_item_mask
1052                                 (items, &rte_flow_item_eth_mask,
1053                                  &flow_tcf_mask_supported.eth,
1054                                  &flow_tcf_mask_empty.eth,
1055                                  sizeof(flow_tcf_mask_supported.eth),
1056                                  error);
1057                         assert(mask.eth);
1058                         if (mask.eth == &flow_tcf_mask_empty.eth)
1059                                 break;
1060                         spec.eth = items->spec;
1061                         if (mask.eth->type) {
1062                                 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_ETH_TYPE,
1063                                                  spec.eth->type);
1064                                 eth_type_set = 1;
1065                         }
1066                         if (!is_zero_ether_addr(&mask.eth->dst)) {
1067                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_DST,
1068                                              ETHER_ADDR_LEN,
1069                                              spec.eth->dst.addr_bytes);
1070                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_DST_MASK,
1071                                              ETHER_ADDR_LEN,
1072                                              mask.eth->dst.addr_bytes);
1073                         }
1074                         if (!is_zero_ether_addr(&mask.eth->src)) {
1075                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_SRC,
1076                                              ETHER_ADDR_LEN,
1077                                              spec.eth->src.addr_bytes);
1078                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_SRC_MASK,
1079                                              ETHER_ADDR_LEN,
1080                                              mask.eth->src.addr_bytes);
1081                         }
1082                         break;
1083                 case RTE_FLOW_ITEM_TYPE_VLAN:
1084                         mask.vlan = flow_tcf_item_mask
1085                                 (items, &rte_flow_item_vlan_mask,
1086                                  &flow_tcf_mask_supported.vlan,
1087                                  &flow_tcf_mask_empty.vlan,
1088                                  sizeof(flow_tcf_mask_supported.vlan),
1089                                  error);
1090                         assert(mask.vlan);
1091                         if (!eth_type_set)
1092                                 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_ETH_TYPE,
1093                                                  RTE_BE16(ETH_P_8021Q));
1094                         eth_type_set = 1;
1095                         vlan_present = 1;
1096                         if (mask.vlan == &flow_tcf_mask_empty.vlan)
1097                                 break;
1098                         spec.vlan = items->spec;
1099                         if (mask.vlan->inner_type) {
1100                                 mnl_attr_put_u16(nlh,
1101                                                  TCA_FLOWER_KEY_VLAN_ETH_TYPE,
1102                                                  spec.vlan->inner_type);
1103                                 vlan_eth_type_set = 1;
1104                         }
1105                         if (mask.vlan->tci & RTE_BE16(0xe000))
1106                                 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_VLAN_PRIO,
1107                                                 (rte_be_to_cpu_16
1108                                                  (spec.vlan->tci) >> 13) & 0x7);
1109                         if (mask.vlan->tci & RTE_BE16(0x0fff))
1110                                 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_VLAN_ID,
1111                                                  rte_be_to_cpu_16
1112                                                  (spec.vlan->tci &
1113                                                   RTE_BE16(0x0fff)));
1114                         break;
1115                 case RTE_FLOW_ITEM_TYPE_IPV4:
1116                         mask.ipv4 = flow_tcf_item_mask
1117                                 (items, &rte_flow_item_ipv4_mask,
1118                                  &flow_tcf_mask_supported.ipv4,
1119                                  &flow_tcf_mask_empty.ipv4,
1120                                  sizeof(flow_tcf_mask_supported.ipv4),
1121                                  error);
1122                         assert(mask.ipv4);
1123                         if (!eth_type_set || !vlan_eth_type_set)
1124                                 mnl_attr_put_u16(nlh,
1125                                                  vlan_present ?
1126                                                  TCA_FLOWER_KEY_VLAN_ETH_TYPE :
1127                                                  TCA_FLOWER_KEY_ETH_TYPE,
1128                                                  RTE_BE16(ETH_P_IP));
1129                         eth_type_set = 1;
1130                         vlan_eth_type_set = 1;
1131                         if (mask.ipv4 == &flow_tcf_mask_empty.ipv4)
1132                                 break;
1133                         spec.ipv4 = items->spec;
1134                         if (mask.ipv4->hdr.next_proto_id) {
1135                                 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
1136                                                 spec.ipv4->hdr.next_proto_id);
1137                                 ip_proto_set = 1;
1138                         }
1139                         if (mask.ipv4->hdr.src_addr) {
1140                                 mnl_attr_put_u32(nlh, TCA_FLOWER_KEY_IPV4_SRC,
1141                                                  spec.ipv4->hdr.src_addr);
1142                                 mnl_attr_put_u32(nlh,
1143                                                  TCA_FLOWER_KEY_IPV4_SRC_MASK,
1144                                                  mask.ipv4->hdr.src_addr);
1145                         }
1146                         if (mask.ipv4->hdr.dst_addr) {
1147                                 mnl_attr_put_u32(nlh, TCA_FLOWER_KEY_IPV4_DST,
1148                                                  spec.ipv4->hdr.dst_addr);
1149                                 mnl_attr_put_u32(nlh,
1150                                                  TCA_FLOWER_KEY_IPV4_DST_MASK,
1151                                                  mask.ipv4->hdr.dst_addr);
1152                         }
1153                         break;
1154                 case RTE_FLOW_ITEM_TYPE_IPV6:
1155                         mask.ipv6 = flow_tcf_item_mask
1156                                 (items, &rte_flow_item_ipv6_mask,
1157                                  &flow_tcf_mask_supported.ipv6,
1158                                  &flow_tcf_mask_empty.ipv6,
1159                                  sizeof(flow_tcf_mask_supported.ipv6),
1160                                  error);
1161                         assert(mask.ipv6);
1162                         if (!eth_type_set || !vlan_eth_type_set)
1163                                 mnl_attr_put_u16(nlh,
1164                                                  vlan_present ?
1165                                                  TCA_FLOWER_KEY_VLAN_ETH_TYPE :
1166                                                  TCA_FLOWER_KEY_ETH_TYPE,
1167                                                  RTE_BE16(ETH_P_IPV6));
1168                         eth_type_set = 1;
1169                         vlan_eth_type_set = 1;
1170                         if (mask.ipv6 == &flow_tcf_mask_empty.ipv6)
1171                                 break;
1172                         spec.ipv6 = items->spec;
1173                         if (mask.ipv6->hdr.proto) {
1174                                 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
1175                                                 spec.ipv6->hdr.proto);
1176                                 ip_proto_set = 1;
1177                         }
1178                         if (!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.src_addr)) {
1179                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_SRC,
1180                                              sizeof(spec.ipv6->hdr.src_addr),
1181                                              spec.ipv6->hdr.src_addr);
1182                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_SRC_MASK,
1183                                              sizeof(mask.ipv6->hdr.src_addr),
1184                                              mask.ipv6->hdr.src_addr);
1185                         }
1186                         if (!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.dst_addr)) {
1187                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_DST,
1188                                              sizeof(spec.ipv6->hdr.dst_addr),
1189                                              spec.ipv6->hdr.dst_addr);
1190                                 mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_DST_MASK,
1191                                              sizeof(mask.ipv6->hdr.dst_addr),
1192                                              mask.ipv6->hdr.dst_addr);
1193                         }
1194                         break;
1195                 case RTE_FLOW_ITEM_TYPE_UDP:
1196                         mask.udp = flow_tcf_item_mask
1197                                 (items, &rte_flow_item_udp_mask,
1198                                  &flow_tcf_mask_supported.udp,
1199                                  &flow_tcf_mask_empty.udp,
1200                                  sizeof(flow_tcf_mask_supported.udp),
1201                                  error);
1202                         assert(mask.udp);
1203                         if (!ip_proto_set)
1204                                 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
1205                                                 IPPROTO_UDP);
1206                         if (mask.udp == &flow_tcf_mask_empty.udp)
1207                                 break;
1208                         spec.udp = items->spec;
1209                         if (mask.udp->hdr.src_port) {
1210                                 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_UDP_SRC,
1211                                                  spec.udp->hdr.src_port);
1212                                 mnl_attr_put_u16(nlh,
1213                                                  TCA_FLOWER_KEY_UDP_SRC_MASK,
1214                                                  mask.udp->hdr.src_port);
1215                         }
1216                         if (mask.udp->hdr.dst_port) {
1217                                 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_UDP_DST,
1218                                                  spec.udp->hdr.dst_port);
1219                                 mnl_attr_put_u16(nlh,
1220                                                  TCA_FLOWER_KEY_UDP_DST_MASK,
1221                                                  mask.udp->hdr.dst_port);
1222                         }
1223                         break;
1224                 case RTE_FLOW_ITEM_TYPE_TCP:
1225                         mask.tcp = flow_tcf_item_mask
1226                                 (items, &rte_flow_item_tcp_mask,
1227                                  &flow_tcf_mask_supported.tcp,
1228                                  &flow_tcf_mask_empty.tcp,
1229                                  sizeof(flow_tcf_mask_supported.tcp),
1230                                  error);
1231                         assert(mask.tcp);
1232                         if (!ip_proto_set)
1233                                 mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
1234                                                 IPPROTO_TCP);
1235                         if (mask.tcp == &flow_tcf_mask_empty.tcp)
1236                                 break;
1237                         spec.tcp = items->spec;
1238                         if (mask.tcp->hdr.src_port) {
1239                                 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_TCP_SRC,
1240                                                  spec.tcp->hdr.src_port);
1241                                 mnl_attr_put_u16(nlh,
1242                                                  TCA_FLOWER_KEY_TCP_SRC_MASK,
1243                                                  mask.tcp->hdr.src_port);
1244                         }
1245                         if (mask.tcp->hdr.dst_port) {
1246                                 mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_TCP_DST,
1247                                                  spec.tcp->hdr.dst_port);
1248                                 mnl_attr_put_u16(nlh,
1249                                                  TCA_FLOWER_KEY_TCP_DST_MASK,
1250                                                  mask.tcp->hdr.dst_port);
1251                         }
1252                         break;
1253                 default:
1254                         return rte_flow_error_set(error, ENOTSUP,
1255                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1256                                                   NULL, "item not supported");
1257                 }
1258         }
1259         na_flower_act = mnl_attr_nest_start(nlh, TCA_FLOWER_ACT);
1260         na_act_index_cur = 1;
1261         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1262                 struct nlattr *na_act_index;
1263                 struct nlattr *na_act;
1264                 unsigned int vlan_act;
1265                 unsigned int i;
1266
1267                 switch (actions->type) {
1268                 case RTE_FLOW_ACTION_TYPE_VOID:
1269                         break;
1270                 case RTE_FLOW_ACTION_TYPE_PORT_ID:
1271                         conf.port_id = actions->conf;
1272                         if (conf.port_id->original)
1273                                 i = 0;
1274                         else
1275                                 for (i = 0; ptoi[i].ifindex; ++i)
1276                                         if (ptoi[i].port_id == conf.port_id->id)
1277                                                 break;
1278                         assert(ptoi[i].ifindex);
1279                         na_act_index =
1280                                 mnl_attr_nest_start(nlh, na_act_index_cur++);
1281                         assert(na_act_index);
1282                         mnl_attr_put_strz(nlh, TCA_ACT_KIND, "mirred");
1283                         na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
1284                         assert(na_act);
1285                         mnl_attr_put(nlh, TCA_MIRRED_PARMS,
1286                                      sizeof(struct tc_mirred),
1287                                      &(struct tc_mirred){
1288                                         .action = TC_ACT_STOLEN,
1289                                         .eaction = TCA_EGRESS_REDIR,
1290                                         .ifindex = ptoi[i].ifindex,
1291                                      });
1292                         mnl_attr_nest_end(nlh, na_act);
1293                         mnl_attr_nest_end(nlh, na_act_index);
1294                         break;
1295                 case RTE_FLOW_ACTION_TYPE_DROP:
1296                         na_act_index =
1297                                 mnl_attr_nest_start(nlh, na_act_index_cur++);
1298                         assert(na_act_index);
1299                         mnl_attr_put_strz(nlh, TCA_ACT_KIND, "gact");
1300                         na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
1301                         assert(na_act);
1302                         mnl_attr_put(nlh, TCA_GACT_PARMS,
1303                                      sizeof(struct tc_gact),
1304                                      &(struct tc_gact){
1305                                         .action = TC_ACT_SHOT,
1306                                      });
1307                         mnl_attr_nest_end(nlh, na_act);
1308                         mnl_attr_nest_end(nlh, na_act_index);
1309                         break;
1310                 case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
1311                         conf.of_push_vlan = NULL;
1312                         vlan_act = TCA_VLAN_ACT_POP;
1313                         goto action_of_vlan;
1314                 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
1315                         conf.of_push_vlan = actions->conf;
1316                         vlan_act = TCA_VLAN_ACT_PUSH;
1317                         goto action_of_vlan;
1318                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
1319                         conf.of_set_vlan_vid = actions->conf;
1320                         if (na_vlan_id)
1321                                 goto override_na_vlan_id;
1322                         vlan_act = TCA_VLAN_ACT_MODIFY;
1323                         goto action_of_vlan;
1324                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
1325                         conf.of_set_vlan_pcp = actions->conf;
1326                         if (na_vlan_priority)
1327                                 goto override_na_vlan_priority;
1328                         vlan_act = TCA_VLAN_ACT_MODIFY;
1329                         goto action_of_vlan;
1330 action_of_vlan:
1331                         na_act_index =
1332                                 mnl_attr_nest_start(nlh, na_act_index_cur++);
1333                         assert(na_act_index);
1334                         mnl_attr_put_strz(nlh, TCA_ACT_KIND, "vlan");
1335                         na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
1336                         assert(na_act);
1337                         mnl_attr_put(nlh, TCA_VLAN_PARMS,
1338                                      sizeof(struct tc_vlan),
1339                                      &(struct tc_vlan){
1340                                         .action = TC_ACT_PIPE,
1341                                         .v_action = vlan_act,
1342                                      });
1343                         if (vlan_act == TCA_VLAN_ACT_POP) {
1344                                 mnl_attr_nest_end(nlh, na_act);
1345                                 mnl_attr_nest_end(nlh, na_act_index);
1346                                 break;
1347                         }
1348                         if (vlan_act == TCA_VLAN_ACT_PUSH)
1349                                 mnl_attr_put_u16(nlh,
1350                                                  TCA_VLAN_PUSH_VLAN_PROTOCOL,
1351                                                  conf.of_push_vlan->ethertype);
1352                         na_vlan_id = mnl_nlmsg_get_payload_tail(nlh);
1353                         mnl_attr_put_u16(nlh, TCA_VLAN_PAD, 0);
1354                         na_vlan_priority = mnl_nlmsg_get_payload_tail(nlh);
1355                         mnl_attr_put_u8(nlh, TCA_VLAN_PAD, 0);
1356                         mnl_attr_nest_end(nlh, na_act);
1357                         mnl_attr_nest_end(nlh, na_act_index);
1358                         if (actions->type ==
1359                             RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID) {
1360 override_na_vlan_id:
1361                                 na_vlan_id->nla_type = TCA_VLAN_PUSH_VLAN_ID;
1362                                 *(uint16_t *)mnl_attr_get_payload(na_vlan_id) =
1363                                         rte_be_to_cpu_16
1364                                         (conf.of_set_vlan_vid->vlan_vid);
1365                         } else if (actions->type ==
1366                                    RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP) {
1367 override_na_vlan_priority:
1368                                 na_vlan_priority->nla_type =
1369                                         TCA_VLAN_PUSH_VLAN_PRIORITY;
1370                                 *(uint8_t *)mnl_attr_get_payload
1371                                         (na_vlan_priority) =
1372                                         conf.of_set_vlan_pcp->vlan_pcp;
1373                         }
1374                         break;
1375                 default:
1376                         return rte_flow_error_set(error, ENOTSUP,
1377                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1378                                                   actions,
1379                                                   "action not supported");
1380                 }
1381         }
1382         assert(na_flower);
1383         assert(na_flower_act);
1384         mnl_attr_nest_end(nlh, na_flower_act);
1385         mnl_attr_nest_end(nlh, na_flower);
1386         return 0;
1387 }
1388
1389 /**
1390  * Send Netlink message with acknowledgment.
1391  *
1392  * @param nl
1393  *   Libmnl socket to use.
1394  * @param nlh
1395  *   Message to send. This function always raises the NLM_F_ACK flag before
1396  *   sending.
1397  *
1398  * @return
1399  *   0 on success, a negative errno value otherwise and rte_errno is set.
1400  */
1401 static int
1402 flow_tcf_nl_ack(struct mnl_socket *nl, struct nlmsghdr *nlh)
1403 {
1404         alignas(struct nlmsghdr)
1405         uint8_t ans[mnl_nlmsg_size(sizeof(struct nlmsgerr)) +
1406                     nlh->nlmsg_len - sizeof(*nlh)];
1407         uint32_t seq = random();
1408         int ret;
1409
1410         nlh->nlmsg_flags |= NLM_F_ACK;
1411         nlh->nlmsg_seq = seq;
1412         ret = mnl_socket_sendto(nl, nlh, nlh->nlmsg_len);
1413         if (ret != -1)
1414                 ret = mnl_socket_recvfrom(nl, ans, sizeof(ans));
1415         if (ret != -1)
1416                 ret = mnl_cb_run
1417                         (ans, ret, seq, mnl_socket_get_portid(nl), NULL, NULL);
1418         if (ret > 0)
1419                 return 0;
1420         rte_errno = errno;
1421         return -rte_errno;
1422 }
1423
1424 /**
1425  * Apply flow to E-Switch by sending Netlink message.
1426  *
1427  * @param[in] dev
1428  *   Pointer to Ethernet device.
1429  * @param[in, out] flow
1430  *   Pointer to the sub flow.
1431  * @param[out] error
1432  *   Pointer to the error structure.
1433  *
1434  * @return
1435  *   0 on success, a negative errno value otherwise and rte_ernno is set.
1436  */
1437 static int
1438 flow_tcf_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
1439                struct rte_flow_error *error)
1440 {
1441         struct priv *priv = dev->data->dev_private;
1442         struct mnl_socket *nl = priv->mnl_socket;
1443         struct mlx5_flow *dev_flow;
1444         struct nlmsghdr *nlh;
1445
1446         dev_flow = LIST_FIRST(&flow->dev_flows);
1447         /* E-Switch flow can't be expanded. */
1448         assert(!LIST_NEXT(dev_flow, next));
1449         nlh = dev_flow->tcf.nlh;
1450         nlh->nlmsg_type = RTM_NEWTFILTER;
1451         nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
1452         if (!flow_tcf_nl_ack(nl, nlh))
1453                 return 0;
1454         return rte_flow_error_set(error, rte_errno,
1455                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1456                                   "netlink: failed to create TC flow rule");
1457 }
1458
1459 /**
1460  * Remove flow from E-Switch by sending Netlink message.
1461  *
1462  * @param[in] dev
1463  *   Pointer to Ethernet device.
1464  * @param[in, out] flow
1465  *   Pointer to the sub flow.
1466  */
1467 static void
1468 flow_tcf_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
1469 {
1470         struct priv *priv = dev->data->dev_private;
1471         struct mnl_socket *nl = priv->mnl_socket;
1472         struct mlx5_flow *dev_flow;
1473         struct nlmsghdr *nlh;
1474
1475         if (!flow)
1476                 return;
1477         dev_flow = LIST_FIRST(&flow->dev_flows);
1478         if (!dev_flow)
1479                 return;
1480         /* E-Switch flow can't be expanded. */
1481         assert(!LIST_NEXT(dev_flow, next));
1482         nlh = dev_flow->tcf.nlh;
1483         nlh->nlmsg_type = RTM_DELTFILTER;
1484         nlh->nlmsg_flags = NLM_F_REQUEST;
1485         flow_tcf_nl_ack(nl, nlh);
1486 }
1487
1488 /**
1489  * Remove flow from E-Switch and release resources of the device flow.
1490  *
1491  * @param[in] dev
1492  *   Pointer to Ethernet device.
1493  * @param[in, out] flow
1494  *   Pointer to the sub flow.
1495  */
1496 static void
1497 flow_tcf_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
1498 {
1499         struct mlx5_flow *dev_flow;
1500
1501         if (!flow)
1502                 return;
1503         flow_tcf_remove(dev, flow);
1504         dev_flow = LIST_FIRST(&flow->dev_flows);
1505         if (!dev_flow)
1506                 return;
1507         /* E-Switch flow can't be expanded. */
1508         assert(!LIST_NEXT(dev_flow, next));
1509         LIST_REMOVE(dev_flow, next);
1510         rte_free(dev_flow);
1511 }
1512
1513 const struct mlx5_flow_driver_ops mlx5_flow_tcf_drv_ops = {
1514         .validate = flow_tcf_validate,
1515         .prepare = flow_tcf_prepare,
1516         .translate = flow_tcf_translate,
1517         .apply = flow_tcf_apply,
1518         .remove = flow_tcf_remove,
1519         .destroy = flow_tcf_destroy,
1520 };
1521
1522 /**
1523  * Initialize ingress qdisc of a given network interface.
1524  *
1525  * @param nl
1526  *   Libmnl socket of the @p NETLINK_ROUTE kind.
1527  * @param ifindex
1528  *   Index of network interface to initialize.
1529  * @param[out] error
1530  *   Perform verbose error reporting if not NULL.
1531  *
1532  * @return
1533  *   0 on success, a negative errno value otherwise and rte_errno is set.
1534  */
1535 int
1536 mlx5_flow_tcf_init(struct mnl_socket *nl, unsigned int ifindex,
1537                    struct rte_flow_error *error)
1538 {
1539         struct nlmsghdr *nlh;
1540         struct tcmsg *tcm;
1541         alignas(struct nlmsghdr)
1542         uint8_t buf[mnl_nlmsg_size(sizeof(*tcm) + 128)];
1543
1544         /* Destroy existing ingress qdisc and everything attached to it. */
1545         nlh = mnl_nlmsg_put_header(buf);
1546         nlh->nlmsg_type = RTM_DELQDISC;
1547         nlh->nlmsg_flags = NLM_F_REQUEST;
1548         tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
1549         tcm->tcm_family = AF_UNSPEC;
1550         tcm->tcm_ifindex = ifindex;
1551         tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
1552         tcm->tcm_parent = TC_H_INGRESS;
1553         /* Ignore errors when qdisc is already absent. */
1554         if (flow_tcf_nl_ack(nl, nlh) &&
1555             rte_errno != EINVAL && rte_errno != ENOENT)
1556                 return rte_flow_error_set(error, rte_errno,
1557                                           RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1558                                           "netlink: failed to remove ingress"
1559                                           " qdisc");
1560         /* Create fresh ingress qdisc. */
1561         nlh = mnl_nlmsg_put_header(buf);
1562         nlh->nlmsg_type = RTM_NEWQDISC;
1563         nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
1564         tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
1565         tcm->tcm_family = AF_UNSPEC;
1566         tcm->tcm_ifindex = ifindex;
1567         tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
1568         tcm->tcm_parent = TC_H_INGRESS;
1569         mnl_attr_put_strz_check(nlh, sizeof(buf), TCA_KIND, "ingress");
1570         if (flow_tcf_nl_ack(nl, nlh))
1571                 return rte_flow_error_set(error, rte_errno,
1572                                           RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1573                                           "netlink: failed to create ingress"
1574                                           " qdisc");
1575         return 0;
1576 }
1577
1578 /**
1579  * Create and configure a libmnl socket for Netlink flow rules.
1580  *
1581  * @return
1582  *   A valid libmnl socket object pointer on success, NULL otherwise and
1583  *   rte_errno is set.
1584  */
1585 struct mnl_socket *
1586 mlx5_flow_tcf_socket_create(void)
1587 {
1588         struct mnl_socket *nl = mnl_socket_open(NETLINK_ROUTE);
1589
1590         if (nl) {
1591                 mnl_socket_setsockopt(nl, NETLINK_CAP_ACK, &(int){ 1 },
1592                                       sizeof(int));
1593                 if (!mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID))
1594                         return nl;
1595         }
1596         rte_errno = errno;
1597         if (nl)
1598                 mnl_socket_close(nl);
1599         return NULL;
1600 }
1601
1602 /**
1603  * Destroy a libmnl socket.
1604  *
1605  * @param nl
1606  *   Libmnl socket of the @p NETLINK_ROUTE kind.
1607  */
1608 void
1609 mlx5_flow_tcf_socket_destroy(struct mnl_socket *nl)
1610 {
1611         mnl_socket_close(nl);
1612 }