ethdev: add transfer attribute to flow API
[dpdk.git] / drivers / net / mlx4 / mlx4_flow.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2017 6WIND S.A.
3  * Copyright 2017 Mellanox Technologies, Ltd
4  */
5
6 /**
7  * @file
8  * Flow API operations for mlx4 driver.
9  */
10
11 #include <arpa/inet.h>
12 #include <assert.h>
13 #include <errno.h>
14 #include <stdalign.h>
15 #include <stddef.h>
16 #include <stdint.h>
17 #include <string.h>
18 #include <sys/queue.h>
19
20 /* Verbs headers do not support -pedantic. */
21 #ifdef PEDANTIC
22 #pragma GCC diagnostic ignored "-Wpedantic"
23 #endif
24 #include <infiniband/verbs.h>
25 #ifdef PEDANTIC
26 #pragma GCC diagnostic error "-Wpedantic"
27 #endif
28
29 #include <rte_byteorder.h>
30 #include <rte_errno.h>
31 #include <rte_eth_ctrl.h>
32 #include <rte_ethdev_driver.h>
33 #include <rte_ether.h>
34 #include <rte_flow.h>
35 #include <rte_flow_driver.h>
36 #include <rte_malloc.h>
37
38 /* PMD headers. */
39 #include "mlx4.h"
40 #include "mlx4_glue.h"
41 #include "mlx4_flow.h"
42 #include "mlx4_rxtx.h"
43 #include "mlx4_utils.h"
44
45 /** Static initializer for a list of subsequent item types. */
46 #define NEXT_ITEM(...) \
47         (const enum rte_flow_item_type []){ \
48                 __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
49         }
50
51 /** Processor structure associated with a flow item. */
52 struct mlx4_flow_proc_item {
53         /** Bit-mask for fields supported by this PMD. */
54         const void *mask_support;
55         /** Bit-mask to use when @p item->mask is not provided. */
56         const void *mask_default;
57         /** Size in bytes for @p mask_support and @p mask_default. */
58         const unsigned int mask_sz;
59         /** Merge a pattern item into a flow rule handle. */
60         int (*merge)(struct rte_flow *flow,
61                      const struct rte_flow_item *item,
62                      const struct mlx4_flow_proc_item *proc,
63                      struct rte_flow_error *error);
64         /** Size in bytes of the destination structure. */
65         const unsigned int dst_sz;
66         /** List of possible subsequent items. */
67         const enum rte_flow_item_type *const next_item;
68 };
69
70 /** Shared resources for drop flow rules. */
71 struct mlx4_drop {
72         struct ibv_qp *qp; /**< QP target. */
73         struct ibv_cq *cq; /**< CQ associated with above QP. */
74         struct priv *priv; /**< Back pointer to private data. */
75         uint32_t refcnt; /**< Reference count. */
76 };
77
78 /**
79  * Convert DPDK RSS hash types to their Verbs equivalent.
80  *
81  * This function returns the supported (default) set when @p types has
82  * special value (uint64_t)-1.
83  *
84  * @param priv
85  *   Pointer to private structure.
86  * @param types
87  *   Hash types in DPDK format (see struct rte_eth_rss_conf).
88  *
89  * @return
90  *   A valid Verbs RSS hash fields mask for mlx4 on success, (uint64_t)-1
91  *   otherwise and rte_errno is set.
92  */
93 uint64_t
94 mlx4_conv_rss_types(struct priv *priv, uint64_t types)
95 {
96         enum { IPV4, IPV6, TCP, UDP, };
97         const uint64_t in[] = {
98                 [IPV4] = (ETH_RSS_IPV4 |
99                           ETH_RSS_FRAG_IPV4 |
100                           ETH_RSS_NONFRAG_IPV4_TCP |
101                           ETH_RSS_NONFRAG_IPV4_UDP |
102                           ETH_RSS_NONFRAG_IPV4_OTHER),
103                 [IPV6] = (ETH_RSS_IPV6 |
104                           ETH_RSS_FRAG_IPV6 |
105                           ETH_RSS_NONFRAG_IPV6_TCP |
106                           ETH_RSS_NONFRAG_IPV6_UDP |
107                           ETH_RSS_NONFRAG_IPV6_OTHER |
108                           ETH_RSS_IPV6_EX |
109                           ETH_RSS_IPV6_TCP_EX |
110                           ETH_RSS_IPV6_UDP_EX),
111                 [TCP] = (ETH_RSS_NONFRAG_IPV4_TCP |
112                          ETH_RSS_NONFRAG_IPV6_TCP |
113                          ETH_RSS_IPV6_TCP_EX),
114                 [UDP] = (ETH_RSS_NONFRAG_IPV4_UDP |
115                          ETH_RSS_NONFRAG_IPV6_UDP |
116                          ETH_RSS_IPV6_UDP_EX),
117         };
118         const uint64_t out[RTE_DIM(in)] = {
119                 [IPV4] = IBV_RX_HASH_SRC_IPV4 | IBV_RX_HASH_DST_IPV4,
120                 [IPV6] = IBV_RX_HASH_SRC_IPV6 | IBV_RX_HASH_DST_IPV6,
121                 [TCP] = IBV_RX_HASH_SRC_PORT_TCP | IBV_RX_HASH_DST_PORT_TCP,
122                 [UDP] = IBV_RX_HASH_SRC_PORT_UDP | IBV_RX_HASH_DST_PORT_UDP,
123         };
124         uint64_t seen = 0;
125         uint64_t conv = 0;
126         unsigned int i;
127
128         for (i = 0; i != RTE_DIM(in); ++i)
129                 if (types & in[i]) {
130                         seen |= types & in[i];
131                         conv |= out[i];
132                 }
133         if ((conv & priv->hw_rss_sup) == conv) {
134                 if (types == (uint64_t)-1) {
135                         /* Include inner RSS by default if supported. */
136                         conv |= priv->hw_rss_sup & IBV_RX_HASH_INNER;
137                         return conv;
138                 }
139                 if (!(types & ~seen))
140                         return conv;
141         }
142         rte_errno = ENOTSUP;
143         return (uint64_t)-1;
144 }
145
146 /**
147  * Merge Ethernet pattern item into flow rule handle.
148  *
149  * Additional mlx4-specific constraints on supported fields:
150  *
151  * - No support for partial masks, except in the specific case of matching
152  *   all multicast traffic (@p spec->dst and @p mask->dst equal to
153  *   01:00:00:00:00:00).
154  * - Not providing @p item->spec or providing an empty @p mask->dst is
155  *   *only* supported if the rule doesn't specify additional matching
156  *   criteria (i.e. rule is promiscuous-like).
157  *
158  * @param[in, out] flow
159  *   Flow rule handle to update.
160  * @param[in] item
161  *   Pattern item to merge.
162  * @param[in] proc
163  *   Associated item-processing object.
164  * @param[out] error
165  *   Perform verbose error reporting if not NULL.
166  *
167  * @return
168  *   0 on success, a negative errno value otherwise and rte_errno is set.
169  */
170 static int
171 mlx4_flow_merge_eth(struct rte_flow *flow,
172                     const struct rte_flow_item *item,
173                     const struct mlx4_flow_proc_item *proc,
174                     struct rte_flow_error *error)
175 {
176         const struct rte_flow_item_eth *spec = item->spec;
177         const struct rte_flow_item_eth *mask =
178                 spec ? (item->mask ? item->mask : proc->mask_default) : NULL;
179         struct ibv_flow_spec_eth *eth;
180         const char *msg;
181         unsigned int i;
182
183         if (!mask) {
184                 flow->promisc = 1;
185         } else {
186                 uint32_t sum_dst = 0;
187                 uint32_t sum_src = 0;
188
189                 for (i = 0; i != sizeof(mask->dst.addr_bytes); ++i) {
190                         sum_dst += mask->dst.addr_bytes[i];
191                         sum_src += mask->src.addr_bytes[i];
192                 }
193                 if (sum_src) {
194                         msg = "mlx4 does not support source MAC matching";
195                         goto error;
196                 } else if (!sum_dst) {
197                         flow->promisc = 1;
198                 } else if (sum_dst == 1 && mask->dst.addr_bytes[0] == 1) {
199                         if (!(spec->dst.addr_bytes[0] & 1)) {
200                                 msg = "mlx4 does not support the explicit"
201                                         " exclusion of all multicast traffic";
202                                 goto error;
203                         }
204                         flow->allmulti = 1;
205                 } else if (sum_dst != (UINT8_C(0xff) * ETHER_ADDR_LEN)) {
206                         msg = "mlx4 does not support matching partial"
207                                 " Ethernet fields";
208                         goto error;
209                 }
210         }
211         if (!flow->ibv_attr)
212                 return 0;
213         if (flow->promisc) {
214                 flow->ibv_attr->type = IBV_FLOW_ATTR_ALL_DEFAULT;
215                 return 0;
216         }
217         if (flow->allmulti) {
218                 flow->ibv_attr->type = IBV_FLOW_ATTR_MC_DEFAULT;
219                 return 0;
220         }
221         ++flow->ibv_attr->num_of_specs;
222         eth = (void *)((uintptr_t)flow->ibv_attr + flow->ibv_attr_size);
223         *eth = (struct ibv_flow_spec_eth) {
224                 .type = IBV_FLOW_SPEC_ETH,
225                 .size = sizeof(*eth),
226         };
227         memcpy(eth->val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
228         memcpy(eth->mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
229         /* Remove unwanted bits from values. */
230         for (i = 0; i < ETHER_ADDR_LEN; ++i) {
231                 eth->val.dst_mac[i] &= eth->mask.dst_mac[i];
232         }
233         return 0;
234 error:
235         return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
236                                   item, msg);
237 }
238
239 /**
240  * Merge VLAN pattern item into flow rule handle.
241  *
242  * Additional mlx4-specific constraints on supported fields:
243  *
244  * - Matching *all* VLAN traffic by omitting @p item->spec or providing an
245  *   empty @p item->mask would also include non-VLAN traffic. Doing so is
246  *   therefore unsupported.
247  * - No support for partial masks.
248  *
249  * @param[in, out] flow
250  *   Flow rule handle to update.
251  * @param[in] item
252  *   Pattern item to merge.
253  * @param[in] proc
254  *   Associated item-processing object.
255  * @param[out] error
256  *   Perform verbose error reporting if not NULL.
257  *
258  * @return
259  *   0 on success, a negative errno value otherwise and rte_errno is set.
260  */
261 static int
262 mlx4_flow_merge_vlan(struct rte_flow *flow,
263                      const struct rte_flow_item *item,
264                      const struct mlx4_flow_proc_item *proc,
265                      struct rte_flow_error *error)
266 {
267         const struct rte_flow_item_vlan *spec = item->spec;
268         const struct rte_flow_item_vlan *mask =
269                 spec ? (item->mask ? item->mask : proc->mask_default) : NULL;
270         struct ibv_flow_spec_eth *eth;
271         const char *msg;
272
273         if (!mask || !mask->tci) {
274                 msg = "mlx4 cannot match all VLAN traffic while excluding"
275                         " non-VLAN traffic, TCI VID must be specified";
276                 goto error;
277         }
278         if (mask->tci != RTE_BE16(0x0fff)) {
279                 msg = "mlx4 does not support partial TCI VID matching";
280                 goto error;
281         }
282         if (!flow->ibv_attr)
283                 return 0;
284         eth = (void *)((uintptr_t)flow->ibv_attr + flow->ibv_attr_size -
285                        sizeof(*eth));
286         eth->val.vlan_tag = spec->tci;
287         eth->mask.vlan_tag = mask->tci;
288         eth->val.vlan_tag &= eth->mask.vlan_tag;
289         return 0;
290 error:
291         return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
292                                   item, msg);
293 }
294
295 /**
296  * Merge IPv4 pattern item into flow rule handle.
297  *
298  * Additional mlx4-specific constraints on supported fields:
299  *
300  * - No support for partial masks.
301  *
302  * @param[in, out] flow
303  *   Flow rule handle to update.
304  * @param[in] item
305  *   Pattern item to merge.
306  * @param[in] proc
307  *   Associated item-processing object.
308  * @param[out] error
309  *   Perform verbose error reporting if not NULL.
310  *
311  * @return
312  *   0 on success, a negative errno value otherwise and rte_errno is set.
313  */
314 static int
315 mlx4_flow_merge_ipv4(struct rte_flow *flow,
316                      const struct rte_flow_item *item,
317                      const struct mlx4_flow_proc_item *proc,
318                      struct rte_flow_error *error)
319 {
320         const struct rte_flow_item_ipv4 *spec = item->spec;
321         const struct rte_flow_item_ipv4 *mask =
322                 spec ? (item->mask ? item->mask : proc->mask_default) : NULL;
323         struct ibv_flow_spec_ipv4 *ipv4;
324         const char *msg;
325
326         if (mask &&
327             ((uint32_t)(mask->hdr.src_addr + 1) > UINT32_C(1) ||
328              (uint32_t)(mask->hdr.dst_addr + 1) > UINT32_C(1))) {
329                 msg = "mlx4 does not support matching partial IPv4 fields";
330                 goto error;
331         }
332         if (!flow->ibv_attr)
333                 return 0;
334         ++flow->ibv_attr->num_of_specs;
335         ipv4 = (void *)((uintptr_t)flow->ibv_attr + flow->ibv_attr_size);
336         *ipv4 = (struct ibv_flow_spec_ipv4) {
337                 .type = IBV_FLOW_SPEC_IPV4,
338                 .size = sizeof(*ipv4),
339         };
340         if (!spec)
341                 return 0;
342         ipv4->val = (struct ibv_flow_ipv4_filter) {
343                 .src_ip = spec->hdr.src_addr,
344                 .dst_ip = spec->hdr.dst_addr,
345         };
346         ipv4->mask = (struct ibv_flow_ipv4_filter) {
347                 .src_ip = mask->hdr.src_addr,
348                 .dst_ip = mask->hdr.dst_addr,
349         };
350         /* Remove unwanted bits from values. */
351         ipv4->val.src_ip &= ipv4->mask.src_ip;
352         ipv4->val.dst_ip &= ipv4->mask.dst_ip;
353         return 0;
354 error:
355         return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
356                                   item, msg);
357 }
358
359 /**
360  * Merge UDP pattern item into flow rule handle.
361  *
362  * Additional mlx4-specific constraints on supported fields:
363  *
364  * - No support for partial masks.
365  *
366  * @param[in, out] flow
367  *   Flow rule handle to update.
368  * @param[in] item
369  *   Pattern item to merge.
370  * @param[in] proc
371  *   Associated item-processing object.
372  * @param[out] error
373  *   Perform verbose error reporting if not NULL.
374  *
375  * @return
376  *   0 on success, a negative errno value otherwise and rte_errno is set.
377  */
378 static int
379 mlx4_flow_merge_udp(struct rte_flow *flow,
380                     const struct rte_flow_item *item,
381                     const struct mlx4_flow_proc_item *proc,
382                     struct rte_flow_error *error)
383 {
384         const struct rte_flow_item_udp *spec = item->spec;
385         const struct rte_flow_item_udp *mask =
386                 spec ? (item->mask ? item->mask : proc->mask_default) : NULL;
387         struct ibv_flow_spec_tcp_udp *udp;
388         const char *msg;
389
390         if (mask &&
391             ((uint16_t)(mask->hdr.src_port + 1) > UINT16_C(1) ||
392              (uint16_t)(mask->hdr.dst_port + 1) > UINT16_C(1))) {
393                 msg = "mlx4 does not support matching partial UDP fields";
394                 goto error;
395         }
396         if (!flow->ibv_attr)
397                 return 0;
398         ++flow->ibv_attr->num_of_specs;
399         udp = (void *)((uintptr_t)flow->ibv_attr + flow->ibv_attr_size);
400         *udp = (struct ibv_flow_spec_tcp_udp) {
401                 .type = IBV_FLOW_SPEC_UDP,
402                 .size = sizeof(*udp),
403         };
404         if (!spec)
405                 return 0;
406         udp->val.dst_port = spec->hdr.dst_port;
407         udp->val.src_port = spec->hdr.src_port;
408         udp->mask.dst_port = mask->hdr.dst_port;
409         udp->mask.src_port = mask->hdr.src_port;
410         /* Remove unwanted bits from values. */
411         udp->val.src_port &= udp->mask.src_port;
412         udp->val.dst_port &= udp->mask.dst_port;
413         return 0;
414 error:
415         return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
416                                   item, msg);
417 }
418
419 /**
420  * Merge TCP pattern item into flow rule handle.
421  *
422  * Additional mlx4-specific constraints on supported fields:
423  *
424  * - No support for partial masks.
425  *
426  * @param[in, out] flow
427  *   Flow rule handle to update.
428  * @param[in] item
429  *   Pattern item to merge.
430  * @param[in] proc
431  *   Associated item-processing object.
432  * @param[out] error
433  *   Perform verbose error reporting if not NULL.
434  *
435  * @return
436  *   0 on success, a negative errno value otherwise and rte_errno is set.
437  */
438 static int
439 mlx4_flow_merge_tcp(struct rte_flow *flow,
440                     const struct rte_flow_item *item,
441                     const struct mlx4_flow_proc_item *proc,
442                     struct rte_flow_error *error)
443 {
444         const struct rte_flow_item_tcp *spec = item->spec;
445         const struct rte_flow_item_tcp *mask =
446                 spec ? (item->mask ? item->mask : proc->mask_default) : NULL;
447         struct ibv_flow_spec_tcp_udp *tcp;
448         const char *msg;
449
450         if (mask &&
451             ((uint16_t)(mask->hdr.src_port + 1) > UINT16_C(1) ||
452              (uint16_t)(mask->hdr.dst_port + 1) > UINT16_C(1))) {
453                 msg = "mlx4 does not support matching partial TCP fields";
454                 goto error;
455         }
456         if (!flow->ibv_attr)
457                 return 0;
458         ++flow->ibv_attr->num_of_specs;
459         tcp = (void *)((uintptr_t)flow->ibv_attr + flow->ibv_attr_size);
460         *tcp = (struct ibv_flow_spec_tcp_udp) {
461                 .type = IBV_FLOW_SPEC_TCP,
462                 .size = sizeof(*tcp),
463         };
464         if (!spec)
465                 return 0;
466         tcp->val.dst_port = spec->hdr.dst_port;
467         tcp->val.src_port = spec->hdr.src_port;
468         tcp->mask.dst_port = mask->hdr.dst_port;
469         tcp->mask.src_port = mask->hdr.src_port;
470         /* Remove unwanted bits from values. */
471         tcp->val.src_port &= tcp->mask.src_port;
472         tcp->val.dst_port &= tcp->mask.dst_port;
473         return 0;
474 error:
475         return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
476                                   item, msg);
477 }
478
479 /**
480  * Perform basic sanity checks on a pattern item.
481  *
482  * @param[in] item
483  *   Item specification.
484  * @param[in] proc
485  *   Associated item-processing object.
486  * @param[out] error
487  *   Perform verbose error reporting if not NULL.
488  *
489  * @return
490  *   0 on success, a negative errno value otherwise and rte_errno is set.
491  */
492 static int
493 mlx4_flow_item_check(const struct rte_flow_item *item,
494                      const struct mlx4_flow_proc_item *proc,
495                      struct rte_flow_error *error)
496 {
497         const uint8_t *mask;
498         unsigned int i;
499
500         /* item->last and item->mask cannot exist without item->spec. */
501         if (!item->spec && (item->mask || item->last))
502                 return rte_flow_error_set
503                         (error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM, item,
504                          "\"mask\" or \"last\" field provided without a"
505                          " corresponding \"spec\"");
506         /* No spec, no mask, no problem. */
507         if (!item->spec)
508                 return 0;
509         mask = item->mask ?
510                 (const uint8_t *)item->mask :
511                 (const uint8_t *)proc->mask_default;
512         assert(mask);
513         /*
514          * Single-pass check to make sure that:
515          * - Mask is supported, no bits are set outside proc->mask_support.
516          * - Both item->spec and item->last are included in mask.
517          */
518         for (i = 0; i != proc->mask_sz; ++i) {
519                 if (!mask[i])
520                         continue;
521                 if ((mask[i] | ((const uint8_t *)proc->mask_support)[i]) !=
522                     ((const uint8_t *)proc->mask_support)[i])
523                         return rte_flow_error_set
524                                 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
525                                  item, "unsupported field found in \"mask\"");
526                 if (item->last &&
527                     (((const uint8_t *)item->spec)[i] & mask[i]) !=
528                     (((const uint8_t *)item->last)[i] & mask[i]))
529                         return rte_flow_error_set
530                                 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
531                                  item,
532                                  "range between \"spec\" and \"last\""
533                                  " is larger than \"mask\"");
534         }
535         return 0;
536 }
537
538 /** Graph of supported items and associated actions. */
539 static const struct mlx4_flow_proc_item mlx4_flow_proc_item_list[] = {
540         [RTE_FLOW_ITEM_TYPE_END] = {
541                 .next_item = NEXT_ITEM(RTE_FLOW_ITEM_TYPE_ETH),
542         },
543         [RTE_FLOW_ITEM_TYPE_ETH] = {
544                 .next_item = NEXT_ITEM(RTE_FLOW_ITEM_TYPE_VLAN,
545                                        RTE_FLOW_ITEM_TYPE_IPV4),
546                 .mask_support = &(const struct rte_flow_item_eth){
547                         /* Only destination MAC can be matched. */
548                         .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
549                 },
550                 .mask_default = &rte_flow_item_eth_mask,
551                 .mask_sz = sizeof(struct rte_flow_item_eth),
552                 .merge = mlx4_flow_merge_eth,
553                 .dst_sz = sizeof(struct ibv_flow_spec_eth),
554         },
555         [RTE_FLOW_ITEM_TYPE_VLAN] = {
556                 .next_item = NEXT_ITEM(RTE_FLOW_ITEM_TYPE_IPV4),
557                 .mask_support = &(const struct rte_flow_item_vlan){
558                         /* Only TCI VID matching is supported. */
559                         .tci = RTE_BE16(0x0fff),
560                 },
561                 .mask_default = &rte_flow_item_vlan_mask,
562                 .mask_sz = sizeof(struct rte_flow_item_vlan),
563                 .merge = mlx4_flow_merge_vlan,
564                 .dst_sz = 0,
565         },
566         [RTE_FLOW_ITEM_TYPE_IPV4] = {
567                 .next_item = NEXT_ITEM(RTE_FLOW_ITEM_TYPE_UDP,
568                                        RTE_FLOW_ITEM_TYPE_TCP),
569                 .mask_support = &(const struct rte_flow_item_ipv4){
570                         .hdr = {
571                                 .src_addr = RTE_BE32(0xffffffff),
572                                 .dst_addr = RTE_BE32(0xffffffff),
573                         },
574                 },
575                 .mask_default = &rte_flow_item_ipv4_mask,
576                 .mask_sz = sizeof(struct rte_flow_item_ipv4),
577                 .merge = mlx4_flow_merge_ipv4,
578                 .dst_sz = sizeof(struct ibv_flow_spec_ipv4),
579         },
580         [RTE_FLOW_ITEM_TYPE_UDP] = {
581                 .mask_support = &(const struct rte_flow_item_udp){
582                         .hdr = {
583                                 .src_port = RTE_BE16(0xffff),
584                                 .dst_port = RTE_BE16(0xffff),
585                         },
586                 },
587                 .mask_default = &rte_flow_item_udp_mask,
588                 .mask_sz = sizeof(struct rte_flow_item_udp),
589                 .merge = mlx4_flow_merge_udp,
590                 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
591         },
592         [RTE_FLOW_ITEM_TYPE_TCP] = {
593                 .mask_support = &(const struct rte_flow_item_tcp){
594                         .hdr = {
595                                 .src_port = RTE_BE16(0xffff),
596                                 .dst_port = RTE_BE16(0xffff),
597                         },
598                 },
599                 .mask_default = &rte_flow_item_tcp_mask,
600                 .mask_sz = sizeof(struct rte_flow_item_tcp),
601                 .merge = mlx4_flow_merge_tcp,
602                 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
603         },
604 };
605
606 /**
607  * Make sure a flow rule is supported and initialize associated structure.
608  *
609  * @param priv
610  *   Pointer to private structure.
611  * @param[in] attr
612  *   Flow rule attributes.
613  * @param[in] pattern
614  *   Pattern specification (list terminated by the END pattern item).
615  * @param[in] actions
616  *   Associated actions (list terminated by the END action).
617  * @param[out] error
618  *   Perform verbose error reporting if not NULL.
619  * @param[in, out] addr
620  *   Buffer where the resulting flow rule handle pointer must be stored.
621  *   If NULL, stop processing after validation stage.
622  *
623  * @return
624  *   0 on success, a negative errno value otherwise and rte_errno is set.
625  */
626 static int
627 mlx4_flow_prepare(struct priv *priv,
628                   const struct rte_flow_attr *attr,
629                   const struct rte_flow_item pattern[],
630                   const struct rte_flow_action actions[],
631                   struct rte_flow_error *error,
632                   struct rte_flow **addr)
633 {
634         const struct rte_flow_item *item;
635         const struct rte_flow_action *action;
636         const struct mlx4_flow_proc_item *proc;
637         struct rte_flow temp = { .ibv_attr_size = sizeof(*temp.ibv_attr) };
638         struct rte_flow *flow = &temp;
639         const char *msg = NULL;
640         int overlap;
641
642         if (attr->group)
643                 return rte_flow_error_set
644                         (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
645                          NULL, "groups are not supported");
646         if (attr->priority > MLX4_FLOW_PRIORITY_LAST)
647                 return rte_flow_error_set
648                         (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
649                          NULL, "maximum priority level is "
650                          MLX4_STR_EXPAND(MLX4_FLOW_PRIORITY_LAST));
651         if (attr->egress)
652                 return rte_flow_error_set
653                         (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
654                          NULL, "egress is not supported");
655         if (attr->transfer)
656                 return rte_flow_error_set
657                         (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
658                          NULL, "transfer is not supported");
659         if (!attr->ingress)
660                 return rte_flow_error_set
661                         (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
662                          NULL, "only ingress is supported");
663 fill:
664         overlap = 0;
665         proc = mlx4_flow_proc_item_list;
666         /* Go over pattern. */
667         for (item = pattern; item->type; ++item) {
668                 const struct mlx4_flow_proc_item *next = NULL;
669                 unsigned int i;
670                 int err;
671
672                 if (item->type == RTE_FLOW_ITEM_TYPE_VOID)
673                         continue;
674                 if (item->type == MLX4_FLOW_ITEM_TYPE_INTERNAL) {
675                         flow->internal = 1;
676                         continue;
677                 }
678                 if (flow->promisc || flow->allmulti) {
679                         msg = "mlx4 does not support additional matching"
680                                 " criteria combined with indiscriminate"
681                                 " matching on Ethernet headers";
682                         goto exit_item_not_supported;
683                 }
684                 for (i = 0; proc->next_item && proc->next_item[i]; ++i) {
685                         if (proc->next_item[i] == item->type) {
686                                 next = &mlx4_flow_proc_item_list[item->type];
687                                 break;
688                         }
689                 }
690                 if (!next)
691                         goto exit_item_not_supported;
692                 proc = next;
693                 /*
694                  * Perform basic sanity checks only once, while handle is
695                  * not allocated.
696                  */
697                 if (flow == &temp) {
698                         err = mlx4_flow_item_check(item, proc, error);
699                         if (err)
700                                 return err;
701                 }
702                 if (proc->merge) {
703                         err = proc->merge(flow, item, proc, error);
704                         if (err)
705                                 return err;
706                 }
707                 flow->ibv_attr_size += proc->dst_sz;
708         }
709         /* Go over actions list. */
710         for (action = actions; action->type; ++action) {
711                 /* This one may appear anywhere multiple times. */
712                 if (action->type == RTE_FLOW_ACTION_TYPE_VOID)
713                         continue;
714                 /* Fate-deciding actions may appear exactly once. */
715                 if (overlap) {
716                         msg = "cannot combine several fate-deciding actions,"
717                                 " choose between DROP, QUEUE or RSS";
718                         goto exit_action_not_supported;
719                 }
720                 overlap = 1;
721                 switch (action->type) {
722                         const struct rte_flow_action_queue *queue;
723                         const struct rte_flow_action_rss *rss;
724                         const uint8_t *rss_key;
725                         uint32_t rss_key_len;
726                         uint64_t fields;
727                         unsigned int i;
728
729                 case RTE_FLOW_ACTION_TYPE_DROP:
730                         flow->drop = 1;
731                         break;
732                 case RTE_FLOW_ACTION_TYPE_QUEUE:
733                         if (flow->rss)
734                                 break;
735                         queue = action->conf;
736                         if (queue->index >= priv->dev->data->nb_rx_queues) {
737                                 msg = "queue target index beyond number of"
738                                         " configured Rx queues";
739                                 goto exit_action_not_supported;
740                         }
741                         flow->rss = mlx4_rss_get
742                                 (priv, 0, mlx4_rss_hash_key_default, 1,
743                                  &queue->index);
744                         if (!flow->rss) {
745                                 msg = "not enough resources for additional"
746                                         " single-queue RSS context";
747                                 goto exit_action_not_supported;
748                         }
749                         break;
750                 case RTE_FLOW_ACTION_TYPE_RSS:
751                         if (flow->rss)
752                                 break;
753                         rss = action->conf;
754                         /* Default RSS configuration if none is provided. */
755                         if (rss->key_len) {
756                                 rss_key = rss->key;
757                                 rss_key_len = rss->key_len;
758                         } else {
759                                 rss_key = mlx4_rss_hash_key_default;
760                                 rss_key_len = MLX4_RSS_HASH_KEY_SIZE;
761                         }
762                         /* Sanity checks. */
763                         for (i = 0; i < rss->queue_num; ++i)
764                                 if (rss->queue[i] >=
765                                     priv->dev->data->nb_rx_queues)
766                                         break;
767                         if (i != rss->queue_num) {
768                                 msg = "queue index target beyond number of"
769                                         " configured Rx queues";
770                                 goto exit_action_not_supported;
771                         }
772                         if (!rte_is_power_of_2(rss->queue_num)) {
773                                 msg = "for RSS, mlx4 requires the number of"
774                                         " queues to be a power of two";
775                                 goto exit_action_not_supported;
776                         }
777                         if (rss_key_len != sizeof(flow->rss->key)) {
778                                 msg = "mlx4 supports exactly one RSS hash key"
779                                         " length: "
780                                         MLX4_STR_EXPAND(MLX4_RSS_HASH_KEY_SIZE);
781                                 goto exit_action_not_supported;
782                         }
783                         for (i = 1; i < rss->queue_num; ++i)
784                                 if (rss->queue[i] - rss->queue[i - 1] != 1)
785                                         break;
786                         if (i != rss->queue_num) {
787                                 msg = "mlx4 requires RSS contexts to use"
788                                         " consecutive queue indices only";
789                                 goto exit_action_not_supported;
790                         }
791                         if (rss->queue[0] % rss->queue_num) {
792                                 msg = "mlx4 requires the first queue of a RSS"
793                                         " context to be aligned on a multiple"
794                                         " of the context size";
795                                 goto exit_action_not_supported;
796                         }
797                         if (rss->func &&
798                             rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ) {
799                                 msg = "the only supported RSS hash function"
800                                         " is Toeplitz";
801                                 goto exit_action_not_supported;
802                         }
803                         if (rss->level) {
804                                 msg = "a nonzero RSS encapsulation level is"
805                                         " not supported";
806                                 goto exit_action_not_supported;
807                         }
808                         rte_errno = 0;
809                         fields = mlx4_conv_rss_types(priv, rss->types);
810                         if (fields == (uint64_t)-1 && rte_errno) {
811                                 msg = "unsupported RSS hash type requested";
812                                 goto exit_action_not_supported;
813                         }
814                         flow->rss = mlx4_rss_get
815                                 (priv, fields, rss_key, rss->queue_num,
816                                  rss->queue);
817                         if (!flow->rss) {
818                                 msg = "either invalid parameters or not enough"
819                                         " resources for additional multi-queue"
820                                         " RSS context";
821                                 goto exit_action_not_supported;
822                         }
823                         break;
824                 default:
825                         goto exit_action_not_supported;
826                 }
827         }
828         /* When fate is unknown, drop traffic. */
829         if (!overlap)
830                 flow->drop = 1;
831         /* Validation ends here. */
832         if (!addr) {
833                 if (flow->rss)
834                         mlx4_rss_put(flow->rss);
835                 return 0;
836         }
837         if (flow == &temp) {
838                 /* Allocate proper handle based on collected data. */
839                 const struct mlx4_malloc_vec vec[] = {
840                         {
841                                 .align = alignof(struct rte_flow),
842                                 .size = sizeof(*flow),
843                                 .addr = (void **)&flow,
844                         },
845                         {
846                                 .align = alignof(struct ibv_flow_attr),
847                                 .size = temp.ibv_attr_size,
848                                 .addr = (void **)&temp.ibv_attr,
849                         },
850                 };
851
852                 if (!mlx4_zmallocv(__func__, vec, RTE_DIM(vec))) {
853                         if (temp.rss)
854                                 mlx4_rss_put(temp.rss);
855                         return rte_flow_error_set
856                                 (error, -rte_errno,
857                                  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
858                                  "flow rule handle allocation failure");
859                 }
860                 /* Most fields will be updated by second pass. */
861                 *flow = (struct rte_flow){
862                         .ibv_attr = temp.ibv_attr,
863                         .ibv_attr_size = sizeof(*flow->ibv_attr),
864                         .rss = temp.rss,
865                 };
866                 *flow->ibv_attr = (struct ibv_flow_attr){
867                         .type = IBV_FLOW_ATTR_NORMAL,
868                         .size = sizeof(*flow->ibv_attr),
869                         .priority = attr->priority,
870                         .port = priv->port,
871                 };
872                 goto fill;
873         }
874         *addr = flow;
875         return 0;
876 exit_item_not_supported:
877         return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
878                                   item, msg ? msg : "item not supported");
879 exit_action_not_supported:
880         return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
881                                   action, msg ? msg : "action not supported");
882 }
883
884 /**
885  * Validate a flow supported by the NIC.
886  *
887  * @see rte_flow_validate()
888  * @see rte_flow_ops
889  */
890 static int
891 mlx4_flow_validate(struct rte_eth_dev *dev,
892                    const struct rte_flow_attr *attr,
893                    const struct rte_flow_item pattern[],
894                    const struct rte_flow_action actions[],
895                    struct rte_flow_error *error)
896 {
897         struct priv *priv = dev->data->dev_private;
898
899         return mlx4_flow_prepare(priv, attr, pattern, actions, error, NULL);
900 }
901
902 /**
903  * Get a drop flow rule resources instance.
904  *
905  * @param priv
906  *   Pointer to private structure.
907  *
908  * @return
909  *   Pointer to drop flow resources on success, NULL otherwise and rte_errno
910  *   is set.
911  */
912 static struct mlx4_drop *
913 mlx4_drop_get(struct priv *priv)
914 {
915         struct mlx4_drop *drop = priv->drop;
916
917         if (drop) {
918                 assert(drop->refcnt);
919                 assert(drop->priv == priv);
920                 ++drop->refcnt;
921                 return drop;
922         }
923         drop = rte_malloc(__func__, sizeof(*drop), 0);
924         if (!drop)
925                 goto error;
926         *drop = (struct mlx4_drop){
927                 .priv = priv,
928                 .refcnt = 1,
929         };
930         drop->cq = mlx4_glue->create_cq(priv->ctx, 1, NULL, NULL, 0);
931         if (!drop->cq)
932                 goto error;
933         drop->qp = mlx4_glue->create_qp
934                 (priv->pd,
935                  &(struct ibv_qp_init_attr){
936                         .send_cq = drop->cq,
937                         .recv_cq = drop->cq,
938                         .qp_type = IBV_QPT_RAW_PACKET,
939                  });
940         if (!drop->qp)
941                 goto error;
942         priv->drop = drop;
943         return drop;
944 error:
945         if (drop->qp)
946                 claim_zero(mlx4_glue->destroy_qp(drop->qp));
947         if (drop->cq)
948                 claim_zero(mlx4_glue->destroy_cq(drop->cq));
949         if (drop)
950                 rte_free(drop);
951         rte_errno = ENOMEM;
952         return NULL;
953 }
954
955 /**
956  * Give back a drop flow rule resources instance.
957  *
958  * @param drop
959  *   Pointer to drop flow rule resources.
960  */
961 static void
962 mlx4_drop_put(struct mlx4_drop *drop)
963 {
964         assert(drop->refcnt);
965         if (--drop->refcnt)
966                 return;
967         drop->priv->drop = NULL;
968         claim_zero(mlx4_glue->destroy_qp(drop->qp));
969         claim_zero(mlx4_glue->destroy_cq(drop->cq));
970         rte_free(drop);
971 }
972
973 /**
974  * Toggle a configured flow rule.
975  *
976  * @param priv
977  *   Pointer to private structure.
978  * @param flow
979  *   Flow rule handle to toggle.
980  * @param enable
981  *   Whether associated Verbs flow must be created or removed.
982  * @param[out] error
983  *   Perform verbose error reporting if not NULL.
984  *
985  * @return
986  *   0 on success, a negative errno value otherwise and rte_errno is set.
987  */
988 static int
989 mlx4_flow_toggle(struct priv *priv,
990                  struct rte_flow *flow,
991                  int enable,
992                  struct rte_flow_error *error)
993 {
994         struct ibv_qp *qp = NULL;
995         const char *msg;
996         int err;
997
998         if (!enable) {
999                 if (!flow->ibv_flow)
1000                         return 0;
1001                 claim_zero(mlx4_glue->destroy_flow(flow->ibv_flow));
1002                 flow->ibv_flow = NULL;
1003                 if (flow->drop)
1004                         mlx4_drop_put(priv->drop);
1005                 else if (flow->rss)
1006                         mlx4_rss_detach(flow->rss);
1007                 return 0;
1008         }
1009         assert(flow->ibv_attr);
1010         if (!flow->internal &&
1011             !priv->isolated &&
1012             flow->ibv_attr->priority == MLX4_FLOW_PRIORITY_LAST) {
1013                 if (flow->ibv_flow) {
1014                         claim_zero(mlx4_glue->destroy_flow(flow->ibv_flow));
1015                         flow->ibv_flow = NULL;
1016                         if (flow->drop)
1017                                 mlx4_drop_put(priv->drop);
1018                         else if (flow->rss)
1019                                 mlx4_rss_detach(flow->rss);
1020                 }
1021                 err = EACCES;
1022                 msg = ("priority level "
1023                        MLX4_STR_EXPAND(MLX4_FLOW_PRIORITY_LAST)
1024                        " is reserved when not in isolated mode");
1025                 goto error;
1026         }
1027         if (flow->rss) {
1028                 struct mlx4_rss *rss = flow->rss;
1029                 int missing = 0;
1030                 unsigned int i;
1031
1032                 /* Stop at the first nonexistent target queue. */
1033                 for (i = 0; i != rss->queues; ++i)
1034                         if (rss->queue_id[i] >=
1035                             priv->dev->data->nb_rx_queues ||
1036                             !priv->dev->data->rx_queues[rss->queue_id[i]]) {
1037                                 missing = 1;
1038                                 break;
1039                         }
1040                 if (flow->ibv_flow) {
1041                         if (missing ^ !flow->drop)
1042                                 return 0;
1043                         /* Verbs flow needs updating. */
1044                         claim_zero(mlx4_glue->destroy_flow(flow->ibv_flow));
1045                         flow->ibv_flow = NULL;
1046                         if (flow->drop)
1047                                 mlx4_drop_put(priv->drop);
1048                         else
1049                                 mlx4_rss_detach(rss);
1050                 }
1051                 if (!missing) {
1052                         err = mlx4_rss_attach(rss);
1053                         if (err) {
1054                                 err = -err;
1055                                 msg = "cannot create indirection table or hash"
1056                                         " QP to associate flow rule with";
1057                                 goto error;
1058                         }
1059                         qp = rss->qp;
1060                 }
1061                 /* A missing target queue drops traffic implicitly. */
1062                 flow->drop = missing;
1063         }
1064         if (flow->drop) {
1065                 if (flow->ibv_flow)
1066                         return 0;
1067                 mlx4_drop_get(priv);
1068                 if (!priv->drop) {
1069                         err = rte_errno;
1070                         msg = "resources for drop flow rule cannot be created";
1071                         goto error;
1072                 }
1073                 qp = priv->drop->qp;
1074         }
1075         assert(qp);
1076         if (flow->ibv_flow)
1077                 return 0;
1078         flow->ibv_flow = mlx4_glue->create_flow(qp, flow->ibv_attr);
1079         if (flow->ibv_flow)
1080                 return 0;
1081         if (flow->drop)
1082                 mlx4_drop_put(priv->drop);
1083         else if (flow->rss)
1084                 mlx4_rss_detach(flow->rss);
1085         err = errno;
1086         msg = "flow rule rejected by device";
1087 error:
1088         return rte_flow_error_set
1089                 (error, err, RTE_FLOW_ERROR_TYPE_HANDLE, flow, msg);
1090 }
1091
1092 /**
1093  * Create a flow.
1094  *
1095  * @see rte_flow_create()
1096  * @see rte_flow_ops
1097  */
1098 static struct rte_flow *
1099 mlx4_flow_create(struct rte_eth_dev *dev,
1100                  const struct rte_flow_attr *attr,
1101                  const struct rte_flow_item pattern[],
1102                  const struct rte_flow_action actions[],
1103                  struct rte_flow_error *error)
1104 {
1105         struct priv *priv = dev->data->dev_private;
1106         struct rte_flow *flow;
1107         int err;
1108
1109         err = mlx4_flow_prepare(priv, attr, pattern, actions, error, &flow);
1110         if (err)
1111                 return NULL;
1112         err = mlx4_flow_toggle(priv, flow, priv->started, error);
1113         if (!err) {
1114                 struct rte_flow *curr = LIST_FIRST(&priv->flows);
1115
1116                 /* New rules are inserted after internal ones. */
1117                 if (!curr || !curr->internal) {
1118                         LIST_INSERT_HEAD(&priv->flows, flow, next);
1119                 } else {
1120                         while (LIST_NEXT(curr, next) &&
1121                                LIST_NEXT(curr, next)->internal)
1122                                 curr = LIST_NEXT(curr, next);
1123                         LIST_INSERT_AFTER(curr, flow, next);
1124                 }
1125                 return flow;
1126         }
1127         if (flow->rss)
1128                 mlx4_rss_put(flow->rss);
1129         rte_flow_error_set(error, -err, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1130                            error->message);
1131         rte_free(flow);
1132         return NULL;
1133 }
1134
1135 /**
1136  * Configure isolated mode.
1137  *
1138  * @see rte_flow_isolate()
1139  * @see rte_flow_ops
1140  */
1141 static int
1142 mlx4_flow_isolate(struct rte_eth_dev *dev,
1143                   int enable,
1144                   struct rte_flow_error *error)
1145 {
1146         struct priv *priv = dev->data->dev_private;
1147
1148         if (!!enable == !!priv->isolated)
1149                 return 0;
1150         priv->isolated = !!enable;
1151         if (mlx4_flow_sync(priv, error)) {
1152                 priv->isolated = !enable;
1153                 return -rte_errno;
1154         }
1155         return 0;
1156 }
1157
1158 /**
1159  * Destroy a flow rule.
1160  *
1161  * @see rte_flow_destroy()
1162  * @see rte_flow_ops
1163  */
1164 static int
1165 mlx4_flow_destroy(struct rte_eth_dev *dev,
1166                   struct rte_flow *flow,
1167                   struct rte_flow_error *error)
1168 {
1169         struct priv *priv = dev->data->dev_private;
1170         int err = mlx4_flow_toggle(priv, flow, 0, error);
1171
1172         if (err)
1173                 return err;
1174         LIST_REMOVE(flow, next);
1175         if (flow->rss)
1176                 mlx4_rss_put(flow->rss);
1177         rte_free(flow);
1178         return 0;
1179 }
1180
1181 /**
1182  * Destroy user-configured flow rules.
1183  *
1184  * This function skips internal flows rules.
1185  *
1186  * @see rte_flow_flush()
1187  * @see rte_flow_ops
1188  */
1189 static int
1190 mlx4_flow_flush(struct rte_eth_dev *dev,
1191                 struct rte_flow_error *error)
1192 {
1193         struct priv *priv = dev->data->dev_private;
1194         struct rte_flow *flow = LIST_FIRST(&priv->flows);
1195
1196         while (flow) {
1197                 struct rte_flow *next = LIST_NEXT(flow, next);
1198
1199                 if (!flow->internal)
1200                         mlx4_flow_destroy(dev, flow, error);
1201                 flow = next;
1202         }
1203         return 0;
1204 }
1205
1206 /**
1207  * Helper function to determine the next configured VLAN filter.
1208  *
1209  * @param priv
1210  *   Pointer to private structure.
1211  * @param vlan
1212  *   VLAN ID to use as a starting point.
1213  *
1214  * @return
1215  *   Next configured VLAN ID or a high value (>= 4096) if there is none.
1216  */
1217 static uint16_t
1218 mlx4_flow_internal_next_vlan(struct priv *priv, uint16_t vlan)
1219 {
1220         while (vlan < 4096) {
1221                 if (priv->dev->data->vlan_filter_conf.ids[vlan / 64] &
1222                     (UINT64_C(1) << (vlan % 64)))
1223                         return vlan;
1224                 ++vlan;
1225         }
1226         return vlan;
1227 }
1228
1229 /**
1230  * Generate internal flow rules.
1231  *
1232  * Various flow rules are created depending on the mode the device is in:
1233  *
1234  * 1. Promiscuous:
1235  *       port MAC + broadcast + catch-all (VLAN filtering is ignored).
1236  * 2. All multicast:
1237  *       port MAC/VLAN + broadcast + catch-all multicast.
1238  * 3. Otherwise:
1239  *       port MAC/VLAN + broadcast MAC/VLAN.
1240  *
1241  * About MAC flow rules:
1242  *
1243  * - MAC flow rules are generated from @p dev->data->mac_addrs
1244  *   (@p priv->mac array).
1245  * - An additional flow rule for Ethernet broadcasts is also generated.
1246  * - All these are per-VLAN if @p DEV_RX_OFFLOAD_VLAN_FILTER
1247  *   is enabled and VLAN filters are configured.
1248  *
1249  * @param priv
1250  *   Pointer to private structure.
1251  * @param[out] error
1252  *   Perform verbose error reporting if not NULL.
1253  *
1254  * @return
1255  *   0 on success, a negative errno value otherwise and rte_errno is set.
1256  */
1257 static int
1258 mlx4_flow_internal(struct priv *priv, struct rte_flow_error *error)
1259 {
1260         struct rte_flow_attr attr = {
1261                 .priority = MLX4_FLOW_PRIORITY_LAST,
1262                 .ingress = 1,
1263         };
1264         struct rte_flow_item_eth eth_spec;
1265         const struct rte_flow_item_eth eth_mask = {
1266                 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
1267         };
1268         const struct rte_flow_item_eth eth_allmulti = {
1269                 .dst.addr_bytes = "\x01\x00\x00\x00\x00\x00",
1270         };
1271         struct rte_flow_item_vlan vlan_spec;
1272         const struct rte_flow_item_vlan vlan_mask = {
1273                 .tci = RTE_BE16(0x0fff),
1274         };
1275         struct rte_flow_item pattern[] = {
1276                 {
1277                         .type = MLX4_FLOW_ITEM_TYPE_INTERNAL,
1278                 },
1279                 {
1280                         .type = RTE_FLOW_ITEM_TYPE_ETH,
1281                         .spec = &eth_spec,
1282                         .mask = &eth_mask,
1283                 },
1284                 {
1285                         /* Replaced with VLAN if filtering is enabled. */
1286                         .type = RTE_FLOW_ITEM_TYPE_END,
1287                 },
1288                 {
1289                         .type = RTE_FLOW_ITEM_TYPE_END,
1290                 },
1291         };
1292         /*
1293          * Round number of queues down to their previous power of 2 to
1294          * comply with RSS context limitations. Extra queues silently do not
1295          * get RSS by default.
1296          */
1297         uint32_t queues =
1298                 rte_align32pow2(priv->dev->data->nb_rx_queues + 1) >> 1;
1299         uint16_t queue[queues];
1300         struct rte_flow_action_rss action_rss = {
1301                 .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
1302                 .level = 0,
1303                 .types = -1,
1304                 .key_len = MLX4_RSS_HASH_KEY_SIZE,
1305                 .queue_num = queues,
1306                 .key = mlx4_rss_hash_key_default,
1307                 .queue = queue,
1308         };
1309         struct rte_flow_action actions[] = {
1310                 {
1311                         .type = RTE_FLOW_ACTION_TYPE_RSS,
1312                         .conf = &action_rss,
1313                 },
1314                 {
1315                         .type = RTE_FLOW_ACTION_TYPE_END,
1316                 },
1317         };
1318         struct ether_addr *rule_mac = &eth_spec.dst;
1319         rte_be16_t *rule_vlan =
1320                 (priv->dev->data->dev_conf.rxmode.offloads &
1321                  DEV_RX_OFFLOAD_VLAN_FILTER) &&
1322                 !priv->dev->data->promiscuous ?
1323                 &vlan_spec.tci :
1324                 NULL;
1325         uint16_t vlan = 0;
1326         struct rte_flow *flow;
1327         unsigned int i;
1328         int err = 0;
1329
1330         /* Nothing to be done if there are no Rx queues. */
1331         if (!queues)
1332                 goto error;
1333         /* Prepare default RSS configuration. */
1334         for (i = 0; i != queues; ++i)
1335                 queue[i] = i;
1336         /*
1337          * Set up VLAN item if filtering is enabled and at least one VLAN
1338          * filter is configured.
1339          */
1340         if (rule_vlan) {
1341                 vlan = mlx4_flow_internal_next_vlan(priv, 0);
1342                 if (vlan < 4096) {
1343                         pattern[2] = (struct rte_flow_item){
1344                                 .type = RTE_FLOW_ITEM_TYPE_VLAN,
1345                                 .spec = &vlan_spec,
1346                                 .mask = &vlan_mask,
1347                         };
1348 next_vlan:
1349                         *rule_vlan = rte_cpu_to_be_16(vlan);
1350                 } else {
1351                         rule_vlan = NULL;
1352                 }
1353         }
1354         for (i = 0; i != RTE_DIM(priv->mac) + 1; ++i) {
1355                 const struct ether_addr *mac;
1356
1357                 /* Broadcasts are handled by an extra iteration. */
1358                 if (i < RTE_DIM(priv->mac))
1359                         mac = &priv->mac[i];
1360                 else
1361                         mac = &eth_mask.dst;
1362                 if (is_zero_ether_addr(mac))
1363                         continue;
1364                 /* Check if MAC flow rule is already present. */
1365                 for (flow = LIST_FIRST(&priv->flows);
1366                      flow && flow->internal;
1367                      flow = LIST_NEXT(flow, next)) {
1368                         const struct ibv_flow_spec_eth *eth =
1369                                 (const void *)((uintptr_t)flow->ibv_attr +
1370                                                sizeof(*flow->ibv_attr));
1371                         unsigned int j;
1372
1373                         if (!flow->mac)
1374                                 continue;
1375                         assert(flow->ibv_attr->type == IBV_FLOW_ATTR_NORMAL);
1376                         assert(flow->ibv_attr->num_of_specs == 1);
1377                         assert(eth->type == IBV_FLOW_SPEC_ETH);
1378                         assert(flow->rss);
1379                         if (rule_vlan &&
1380                             (eth->val.vlan_tag != *rule_vlan ||
1381                              eth->mask.vlan_tag != RTE_BE16(0x0fff)))
1382                                 continue;
1383                         if (!rule_vlan && eth->mask.vlan_tag)
1384                                 continue;
1385                         for (j = 0; j != sizeof(mac->addr_bytes); ++j)
1386                                 if (eth->val.dst_mac[j] != mac->addr_bytes[j] ||
1387                                     eth->mask.dst_mac[j] != UINT8_C(0xff) ||
1388                                     eth->val.src_mac[j] != UINT8_C(0x00) ||
1389                                     eth->mask.src_mac[j] != UINT8_C(0x00))
1390                                         break;
1391                         if (j != sizeof(mac->addr_bytes))
1392                                 continue;
1393                         if (flow->rss->queues != queues ||
1394                             memcmp(flow->rss->queue_id, action_rss.queue,
1395                                    queues * sizeof(flow->rss->queue_id[0])))
1396                                 continue;
1397                         break;
1398                 }
1399                 if (!flow || !flow->internal) {
1400                         /* Not found, create a new flow rule. */
1401                         memcpy(rule_mac, mac, sizeof(*mac));
1402                         flow = mlx4_flow_create(priv->dev, &attr, pattern,
1403                                                 actions, error);
1404                         if (!flow) {
1405                                 err = -rte_errno;
1406                                 goto error;
1407                         }
1408                 }
1409                 flow->select = 1;
1410                 flow->mac = 1;
1411         }
1412         if (rule_vlan) {
1413                 vlan = mlx4_flow_internal_next_vlan(priv, vlan + 1);
1414                 if (vlan < 4096)
1415                         goto next_vlan;
1416         }
1417         /* Take care of promiscuous and all multicast flow rules. */
1418         if (priv->dev->data->promiscuous || priv->dev->data->all_multicast) {
1419                 for (flow = LIST_FIRST(&priv->flows);
1420                      flow && flow->internal;
1421                      flow = LIST_NEXT(flow, next)) {
1422                         if (priv->dev->data->promiscuous) {
1423                                 if (flow->promisc)
1424                                         break;
1425                         } else {
1426                                 assert(priv->dev->data->all_multicast);
1427                                 if (flow->allmulti)
1428                                         break;
1429                         }
1430                 }
1431                 if (flow && flow->internal) {
1432                         assert(flow->rss);
1433                         if (flow->rss->queues != queues ||
1434                             memcmp(flow->rss->queue_id, action_rss.queue,
1435                                    queues * sizeof(flow->rss->queue_id[0])))
1436                                 flow = NULL;
1437                 }
1438                 if (!flow || !flow->internal) {
1439                         /* Not found, create a new flow rule. */
1440                         if (priv->dev->data->promiscuous) {
1441                                 pattern[1].spec = NULL;
1442                                 pattern[1].mask = NULL;
1443                         } else {
1444                                 assert(priv->dev->data->all_multicast);
1445                                 pattern[1].spec = &eth_allmulti;
1446                                 pattern[1].mask = &eth_allmulti;
1447                         }
1448                         pattern[2] = pattern[3];
1449                         flow = mlx4_flow_create(priv->dev, &attr, pattern,
1450                                                 actions, error);
1451                         if (!flow) {
1452                                 err = -rte_errno;
1453                                 goto error;
1454                         }
1455                 }
1456                 assert(flow->promisc || flow->allmulti);
1457                 flow->select = 1;
1458         }
1459 error:
1460         /* Clear selection and clean up stale internal flow rules. */
1461         flow = LIST_FIRST(&priv->flows);
1462         while (flow && flow->internal) {
1463                 struct rte_flow *next = LIST_NEXT(flow, next);
1464
1465                 if (!flow->select)
1466                         claim_zero(mlx4_flow_destroy(priv->dev, flow, error));
1467                 else
1468                         flow->select = 0;
1469                 flow = next;
1470         }
1471         return err;
1472 }
1473
1474 /**
1475  * Synchronize flow rules.
1476  *
1477  * This function synchronizes flow rules with the state of the device by
1478  * taking into account isolated mode and whether target queues are
1479  * configured.
1480  *
1481  * @param priv
1482  *   Pointer to private structure.
1483  * @param[out] error
1484  *   Perform verbose error reporting if not NULL.
1485  *
1486  * @return
1487  *   0 on success, a negative errno value otherwise and rte_errno is set.
1488  */
1489 int
1490 mlx4_flow_sync(struct priv *priv, struct rte_flow_error *error)
1491 {
1492         struct rte_flow *flow;
1493         int ret;
1494
1495         /* Internal flow rules are guaranteed to come first in the list. */
1496         if (priv->isolated) {
1497                 /*
1498                  * Get rid of them in isolated mode, stop at the first
1499                  * non-internal rule found.
1500                  */
1501                 for (flow = LIST_FIRST(&priv->flows);
1502                      flow && flow->internal;
1503                      flow = LIST_FIRST(&priv->flows))
1504                         claim_zero(mlx4_flow_destroy(priv->dev, flow, error));
1505         } else {
1506                 /* Refresh internal rules. */
1507                 ret = mlx4_flow_internal(priv, error);
1508                 if (ret)
1509                         return ret;
1510         }
1511         /* Toggle the remaining flow rules . */
1512         LIST_FOREACH(flow, &priv->flows, next) {
1513                 ret = mlx4_flow_toggle(priv, flow, priv->started, error);
1514                 if (ret)
1515                         return ret;
1516         }
1517         if (!priv->started)
1518                 assert(!priv->drop);
1519         return 0;
1520 }
1521
1522 /**
1523  * Clean up all flow rules.
1524  *
1525  * Unlike mlx4_flow_flush(), this function takes care of all remaining flow
1526  * rules regardless of whether they are internal or user-configured.
1527  *
1528  * @param priv
1529  *   Pointer to private structure.
1530  */
1531 void
1532 mlx4_flow_clean(struct priv *priv)
1533 {
1534         struct rte_flow *flow;
1535
1536         while ((flow = LIST_FIRST(&priv->flows)))
1537                 mlx4_flow_destroy(priv->dev, flow, NULL);
1538         assert(LIST_EMPTY(&priv->rss));
1539 }
1540
1541 static const struct rte_flow_ops mlx4_flow_ops = {
1542         .validate = mlx4_flow_validate,
1543         .create = mlx4_flow_create,
1544         .destroy = mlx4_flow_destroy,
1545         .flush = mlx4_flow_flush,
1546         .isolate = mlx4_flow_isolate,
1547 };
1548
1549 /**
1550  * Manage filter operations.
1551  *
1552  * @param dev
1553  *   Pointer to Ethernet device structure.
1554  * @param filter_type
1555  *   Filter type.
1556  * @param filter_op
1557  *   Operation to perform.
1558  * @param arg
1559  *   Pointer to operation-specific structure.
1560  *
1561  * @return
1562  *   0 on success, negative errno value otherwise and rte_errno is set.
1563  */
1564 int
1565 mlx4_filter_ctrl(struct rte_eth_dev *dev,
1566                  enum rte_filter_type filter_type,
1567                  enum rte_filter_op filter_op,
1568                  void *arg)
1569 {
1570         switch (filter_type) {
1571         case RTE_ETH_FILTER_GENERIC:
1572                 if (filter_op != RTE_ETH_FILTER_GET)
1573                         break;
1574                 *(const void **)arg = &mlx4_flow_ops;
1575                 return 0;
1576         default:
1577                 ERROR("%p: filter type (%d) not supported",
1578                       (void *)dev, filter_type);
1579                 break;
1580         }
1581         rte_errno = ENOTSUP;
1582         return -rte_errno;
1583 }