ethdev: add hash function to RSS flow API action
[dpdk.git] / drivers / net / mlx4 / mlx4_flow.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2017 6WIND S.A.
3  * Copyright 2017 Mellanox Technologies, Ltd
4  */
5
6 /**
7  * @file
8  * Flow API operations for mlx4 driver.
9  */
10
11 #include <arpa/inet.h>
12 #include <assert.h>
13 #include <errno.h>
14 #include <stdalign.h>
15 #include <stddef.h>
16 #include <stdint.h>
17 #include <string.h>
18 #include <sys/queue.h>
19
20 /* Verbs headers do not support -pedantic. */
21 #ifdef PEDANTIC
22 #pragma GCC diagnostic ignored "-Wpedantic"
23 #endif
24 #include <infiniband/verbs.h>
25 #ifdef PEDANTIC
26 #pragma GCC diagnostic error "-Wpedantic"
27 #endif
28
29 #include <rte_byteorder.h>
30 #include <rte_errno.h>
31 #include <rte_eth_ctrl.h>
32 #include <rte_ethdev_driver.h>
33 #include <rte_ether.h>
34 #include <rte_flow.h>
35 #include <rte_flow_driver.h>
36 #include <rte_malloc.h>
37
38 /* PMD headers. */
39 #include "mlx4.h"
40 #include "mlx4_glue.h"
41 #include "mlx4_flow.h"
42 #include "mlx4_rxtx.h"
43 #include "mlx4_utils.h"
44
45 /** Static initializer for a list of subsequent item types. */
46 #define NEXT_ITEM(...) \
47         (const enum rte_flow_item_type []){ \
48                 __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
49         }
50
51 /** Processor structure associated with a flow item. */
52 struct mlx4_flow_proc_item {
53         /** Bit-mask for fields supported by this PMD. */
54         const void *mask_support;
55         /** Bit-mask to use when @p item->mask is not provided. */
56         const void *mask_default;
57         /** Size in bytes for @p mask_support and @p mask_default. */
58         const unsigned int mask_sz;
59         /** Merge a pattern item into a flow rule handle. */
60         int (*merge)(struct rte_flow *flow,
61                      const struct rte_flow_item *item,
62                      const struct mlx4_flow_proc_item *proc,
63                      struct rte_flow_error *error);
64         /** Size in bytes of the destination structure. */
65         const unsigned int dst_sz;
66         /** List of possible subsequent items. */
67         const enum rte_flow_item_type *const next_item;
68 };
69
70 /** Shared resources for drop flow rules. */
71 struct mlx4_drop {
72         struct ibv_qp *qp; /**< QP target. */
73         struct ibv_cq *cq; /**< CQ associated with above QP. */
74         struct priv *priv; /**< Back pointer to private data. */
75         uint32_t refcnt; /**< Reference count. */
76 };
77
78 /**
79  * Convert DPDK RSS hash types to their Verbs equivalent.
80  *
81  * This function returns the supported (default) set when @p types has
82  * special value (uint64_t)-1.
83  *
84  * @param priv
85  *   Pointer to private structure.
86  * @param types
87  *   Hash types in DPDK format (see struct rte_eth_rss_conf).
88  *
89  * @return
90  *   A valid Verbs RSS hash fields mask for mlx4 on success, (uint64_t)-1
91  *   otherwise and rte_errno is set.
92  */
93 uint64_t
94 mlx4_conv_rss_types(struct priv *priv, uint64_t types)
95 {
96         enum { IPV4, IPV6, TCP, UDP, };
97         const uint64_t in[] = {
98                 [IPV4] = (ETH_RSS_IPV4 |
99                           ETH_RSS_FRAG_IPV4 |
100                           ETH_RSS_NONFRAG_IPV4_TCP |
101                           ETH_RSS_NONFRAG_IPV4_UDP |
102                           ETH_RSS_NONFRAG_IPV4_OTHER),
103                 [IPV6] = (ETH_RSS_IPV6 |
104                           ETH_RSS_FRAG_IPV6 |
105                           ETH_RSS_NONFRAG_IPV6_TCP |
106                           ETH_RSS_NONFRAG_IPV6_UDP |
107                           ETH_RSS_NONFRAG_IPV6_OTHER |
108                           ETH_RSS_IPV6_EX |
109                           ETH_RSS_IPV6_TCP_EX |
110                           ETH_RSS_IPV6_UDP_EX),
111                 [TCP] = (ETH_RSS_NONFRAG_IPV4_TCP |
112                          ETH_RSS_NONFRAG_IPV6_TCP |
113                          ETH_RSS_IPV6_TCP_EX),
114                 [UDP] = (ETH_RSS_NONFRAG_IPV4_UDP |
115                          ETH_RSS_NONFRAG_IPV6_UDP |
116                          ETH_RSS_IPV6_UDP_EX),
117         };
118         const uint64_t out[RTE_DIM(in)] = {
119                 [IPV4] = IBV_RX_HASH_SRC_IPV4 | IBV_RX_HASH_DST_IPV4,
120                 [IPV6] = IBV_RX_HASH_SRC_IPV6 | IBV_RX_HASH_DST_IPV6,
121                 [TCP] = IBV_RX_HASH_SRC_PORT_TCP | IBV_RX_HASH_DST_PORT_TCP,
122                 [UDP] = IBV_RX_HASH_SRC_PORT_UDP | IBV_RX_HASH_DST_PORT_UDP,
123         };
124         uint64_t seen = 0;
125         uint64_t conv = 0;
126         unsigned int i;
127
128         for (i = 0; i != RTE_DIM(in); ++i)
129                 if (types & in[i]) {
130                         seen |= types & in[i];
131                         conv |= out[i];
132                 }
133         if ((conv & priv->hw_rss_sup) == conv) {
134                 if (types == (uint64_t)-1) {
135                         /* Include inner RSS by default if supported. */
136                         conv |= priv->hw_rss_sup & IBV_RX_HASH_INNER;
137                         return conv;
138                 }
139                 if (!(types & ~seen))
140                         return conv;
141         }
142         rte_errno = ENOTSUP;
143         return (uint64_t)-1;
144 }
145
146 /**
147  * Merge Ethernet pattern item into flow rule handle.
148  *
149  * Additional mlx4-specific constraints on supported fields:
150  *
151  * - No support for partial masks, except in the specific case of matching
152  *   all multicast traffic (@p spec->dst and @p mask->dst equal to
153  *   01:00:00:00:00:00).
154  * - Not providing @p item->spec or providing an empty @p mask->dst is
155  *   *only* supported if the rule doesn't specify additional matching
156  *   criteria (i.e. rule is promiscuous-like).
157  *
158  * @param[in, out] flow
159  *   Flow rule handle to update.
160  * @param[in] item
161  *   Pattern item to merge.
162  * @param[in] proc
163  *   Associated item-processing object.
164  * @param[out] error
165  *   Perform verbose error reporting if not NULL.
166  *
167  * @return
168  *   0 on success, a negative errno value otherwise and rte_errno is set.
169  */
170 static int
171 mlx4_flow_merge_eth(struct rte_flow *flow,
172                     const struct rte_flow_item *item,
173                     const struct mlx4_flow_proc_item *proc,
174                     struct rte_flow_error *error)
175 {
176         const struct rte_flow_item_eth *spec = item->spec;
177         const struct rte_flow_item_eth *mask =
178                 spec ? (item->mask ? item->mask : proc->mask_default) : NULL;
179         struct ibv_flow_spec_eth *eth;
180         const char *msg;
181         unsigned int i;
182
183         if (!mask) {
184                 flow->promisc = 1;
185         } else {
186                 uint32_t sum_dst = 0;
187                 uint32_t sum_src = 0;
188
189                 for (i = 0; i != sizeof(mask->dst.addr_bytes); ++i) {
190                         sum_dst += mask->dst.addr_bytes[i];
191                         sum_src += mask->src.addr_bytes[i];
192                 }
193                 if (sum_src) {
194                         msg = "mlx4 does not support source MAC matching";
195                         goto error;
196                 } else if (!sum_dst) {
197                         flow->promisc = 1;
198                 } else if (sum_dst == 1 && mask->dst.addr_bytes[0] == 1) {
199                         if (!(spec->dst.addr_bytes[0] & 1)) {
200                                 msg = "mlx4 does not support the explicit"
201                                         " exclusion of all multicast traffic";
202                                 goto error;
203                         }
204                         flow->allmulti = 1;
205                 } else if (sum_dst != (UINT8_C(0xff) * ETHER_ADDR_LEN)) {
206                         msg = "mlx4 does not support matching partial"
207                                 " Ethernet fields";
208                         goto error;
209                 }
210         }
211         if (!flow->ibv_attr)
212                 return 0;
213         if (flow->promisc) {
214                 flow->ibv_attr->type = IBV_FLOW_ATTR_ALL_DEFAULT;
215                 return 0;
216         }
217         if (flow->allmulti) {
218                 flow->ibv_attr->type = IBV_FLOW_ATTR_MC_DEFAULT;
219                 return 0;
220         }
221         ++flow->ibv_attr->num_of_specs;
222         eth = (void *)((uintptr_t)flow->ibv_attr + flow->ibv_attr_size);
223         *eth = (struct ibv_flow_spec_eth) {
224                 .type = IBV_FLOW_SPEC_ETH,
225                 .size = sizeof(*eth),
226         };
227         memcpy(eth->val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
228         memcpy(eth->mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
229         /* Remove unwanted bits from values. */
230         for (i = 0; i < ETHER_ADDR_LEN; ++i) {
231                 eth->val.dst_mac[i] &= eth->mask.dst_mac[i];
232         }
233         return 0;
234 error:
235         return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
236                                   item, msg);
237 }
238
239 /**
240  * Merge VLAN pattern item into flow rule handle.
241  *
242  * Additional mlx4-specific constraints on supported fields:
243  *
244  * - Matching *all* VLAN traffic by omitting @p item->spec or providing an
245  *   empty @p item->mask would also include non-VLAN traffic. Doing so is
246  *   therefore unsupported.
247  * - No support for partial masks.
248  *
249  * @param[in, out] flow
250  *   Flow rule handle to update.
251  * @param[in] item
252  *   Pattern item to merge.
253  * @param[in] proc
254  *   Associated item-processing object.
255  * @param[out] error
256  *   Perform verbose error reporting if not NULL.
257  *
258  * @return
259  *   0 on success, a negative errno value otherwise and rte_errno is set.
260  */
261 static int
262 mlx4_flow_merge_vlan(struct rte_flow *flow,
263                      const struct rte_flow_item *item,
264                      const struct mlx4_flow_proc_item *proc,
265                      struct rte_flow_error *error)
266 {
267         const struct rte_flow_item_vlan *spec = item->spec;
268         const struct rte_flow_item_vlan *mask =
269                 spec ? (item->mask ? item->mask : proc->mask_default) : NULL;
270         struct ibv_flow_spec_eth *eth;
271         const char *msg;
272
273         if (!mask || !mask->tci) {
274                 msg = "mlx4 cannot match all VLAN traffic while excluding"
275                         " non-VLAN traffic, TCI VID must be specified";
276                 goto error;
277         }
278         if (mask->tci != RTE_BE16(0x0fff)) {
279                 msg = "mlx4 does not support partial TCI VID matching";
280                 goto error;
281         }
282         if (!flow->ibv_attr)
283                 return 0;
284         eth = (void *)((uintptr_t)flow->ibv_attr + flow->ibv_attr_size -
285                        sizeof(*eth));
286         eth->val.vlan_tag = spec->tci;
287         eth->mask.vlan_tag = mask->tci;
288         eth->val.vlan_tag &= eth->mask.vlan_tag;
289         return 0;
290 error:
291         return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
292                                   item, msg);
293 }
294
295 /**
296  * Merge IPv4 pattern item into flow rule handle.
297  *
298  * Additional mlx4-specific constraints on supported fields:
299  *
300  * - No support for partial masks.
301  *
302  * @param[in, out] flow
303  *   Flow rule handle to update.
304  * @param[in] item
305  *   Pattern item to merge.
306  * @param[in] proc
307  *   Associated item-processing object.
308  * @param[out] error
309  *   Perform verbose error reporting if not NULL.
310  *
311  * @return
312  *   0 on success, a negative errno value otherwise and rte_errno is set.
313  */
314 static int
315 mlx4_flow_merge_ipv4(struct rte_flow *flow,
316                      const struct rte_flow_item *item,
317                      const struct mlx4_flow_proc_item *proc,
318                      struct rte_flow_error *error)
319 {
320         const struct rte_flow_item_ipv4 *spec = item->spec;
321         const struct rte_flow_item_ipv4 *mask =
322                 spec ? (item->mask ? item->mask : proc->mask_default) : NULL;
323         struct ibv_flow_spec_ipv4 *ipv4;
324         const char *msg;
325
326         if (mask &&
327             ((uint32_t)(mask->hdr.src_addr + 1) > UINT32_C(1) ||
328              (uint32_t)(mask->hdr.dst_addr + 1) > UINT32_C(1))) {
329                 msg = "mlx4 does not support matching partial IPv4 fields";
330                 goto error;
331         }
332         if (!flow->ibv_attr)
333                 return 0;
334         ++flow->ibv_attr->num_of_specs;
335         ipv4 = (void *)((uintptr_t)flow->ibv_attr + flow->ibv_attr_size);
336         *ipv4 = (struct ibv_flow_spec_ipv4) {
337                 .type = IBV_FLOW_SPEC_IPV4,
338                 .size = sizeof(*ipv4),
339         };
340         if (!spec)
341                 return 0;
342         ipv4->val = (struct ibv_flow_ipv4_filter) {
343                 .src_ip = spec->hdr.src_addr,
344                 .dst_ip = spec->hdr.dst_addr,
345         };
346         ipv4->mask = (struct ibv_flow_ipv4_filter) {
347                 .src_ip = mask->hdr.src_addr,
348                 .dst_ip = mask->hdr.dst_addr,
349         };
350         /* Remove unwanted bits from values. */
351         ipv4->val.src_ip &= ipv4->mask.src_ip;
352         ipv4->val.dst_ip &= ipv4->mask.dst_ip;
353         return 0;
354 error:
355         return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
356                                   item, msg);
357 }
358
359 /**
360  * Merge UDP pattern item into flow rule handle.
361  *
362  * Additional mlx4-specific constraints on supported fields:
363  *
364  * - No support for partial masks.
365  *
366  * @param[in, out] flow
367  *   Flow rule handle to update.
368  * @param[in] item
369  *   Pattern item to merge.
370  * @param[in] proc
371  *   Associated item-processing object.
372  * @param[out] error
373  *   Perform verbose error reporting if not NULL.
374  *
375  * @return
376  *   0 on success, a negative errno value otherwise and rte_errno is set.
377  */
378 static int
379 mlx4_flow_merge_udp(struct rte_flow *flow,
380                     const struct rte_flow_item *item,
381                     const struct mlx4_flow_proc_item *proc,
382                     struct rte_flow_error *error)
383 {
384         const struct rte_flow_item_udp *spec = item->spec;
385         const struct rte_flow_item_udp *mask =
386                 spec ? (item->mask ? item->mask : proc->mask_default) : NULL;
387         struct ibv_flow_spec_tcp_udp *udp;
388         const char *msg;
389
390         if (mask &&
391             ((uint16_t)(mask->hdr.src_port + 1) > UINT16_C(1) ||
392              (uint16_t)(mask->hdr.dst_port + 1) > UINT16_C(1))) {
393                 msg = "mlx4 does not support matching partial UDP fields";
394                 goto error;
395         }
396         if (!flow->ibv_attr)
397                 return 0;
398         ++flow->ibv_attr->num_of_specs;
399         udp = (void *)((uintptr_t)flow->ibv_attr + flow->ibv_attr_size);
400         *udp = (struct ibv_flow_spec_tcp_udp) {
401                 .type = IBV_FLOW_SPEC_UDP,
402                 .size = sizeof(*udp),
403         };
404         if (!spec)
405                 return 0;
406         udp->val.dst_port = spec->hdr.dst_port;
407         udp->val.src_port = spec->hdr.src_port;
408         udp->mask.dst_port = mask->hdr.dst_port;
409         udp->mask.src_port = mask->hdr.src_port;
410         /* Remove unwanted bits from values. */
411         udp->val.src_port &= udp->mask.src_port;
412         udp->val.dst_port &= udp->mask.dst_port;
413         return 0;
414 error:
415         return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
416                                   item, msg);
417 }
418
419 /**
420  * Merge TCP pattern item into flow rule handle.
421  *
422  * Additional mlx4-specific constraints on supported fields:
423  *
424  * - No support for partial masks.
425  *
426  * @param[in, out] flow
427  *   Flow rule handle to update.
428  * @param[in] item
429  *   Pattern item to merge.
430  * @param[in] proc
431  *   Associated item-processing object.
432  * @param[out] error
433  *   Perform verbose error reporting if not NULL.
434  *
435  * @return
436  *   0 on success, a negative errno value otherwise and rte_errno is set.
437  */
438 static int
439 mlx4_flow_merge_tcp(struct rte_flow *flow,
440                     const struct rte_flow_item *item,
441                     const struct mlx4_flow_proc_item *proc,
442                     struct rte_flow_error *error)
443 {
444         const struct rte_flow_item_tcp *spec = item->spec;
445         const struct rte_flow_item_tcp *mask =
446                 spec ? (item->mask ? item->mask : proc->mask_default) : NULL;
447         struct ibv_flow_spec_tcp_udp *tcp;
448         const char *msg;
449
450         if (mask &&
451             ((uint16_t)(mask->hdr.src_port + 1) > UINT16_C(1) ||
452              (uint16_t)(mask->hdr.dst_port + 1) > UINT16_C(1))) {
453                 msg = "mlx4 does not support matching partial TCP fields";
454                 goto error;
455         }
456         if (!flow->ibv_attr)
457                 return 0;
458         ++flow->ibv_attr->num_of_specs;
459         tcp = (void *)((uintptr_t)flow->ibv_attr + flow->ibv_attr_size);
460         *tcp = (struct ibv_flow_spec_tcp_udp) {
461                 .type = IBV_FLOW_SPEC_TCP,
462                 .size = sizeof(*tcp),
463         };
464         if (!spec)
465                 return 0;
466         tcp->val.dst_port = spec->hdr.dst_port;
467         tcp->val.src_port = spec->hdr.src_port;
468         tcp->mask.dst_port = mask->hdr.dst_port;
469         tcp->mask.src_port = mask->hdr.src_port;
470         /* Remove unwanted bits from values. */
471         tcp->val.src_port &= tcp->mask.src_port;
472         tcp->val.dst_port &= tcp->mask.dst_port;
473         return 0;
474 error:
475         return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
476                                   item, msg);
477 }
478
479 /**
480  * Perform basic sanity checks on a pattern item.
481  *
482  * @param[in] item
483  *   Item specification.
484  * @param[in] proc
485  *   Associated item-processing object.
486  * @param[out] error
487  *   Perform verbose error reporting if not NULL.
488  *
489  * @return
490  *   0 on success, a negative errno value otherwise and rte_errno is set.
491  */
492 static int
493 mlx4_flow_item_check(const struct rte_flow_item *item,
494                      const struct mlx4_flow_proc_item *proc,
495                      struct rte_flow_error *error)
496 {
497         const uint8_t *mask;
498         unsigned int i;
499
500         /* item->last and item->mask cannot exist without item->spec. */
501         if (!item->spec && (item->mask || item->last))
502                 return rte_flow_error_set
503                         (error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM, item,
504                          "\"mask\" or \"last\" field provided without a"
505                          " corresponding \"spec\"");
506         /* No spec, no mask, no problem. */
507         if (!item->spec)
508                 return 0;
509         mask = item->mask ?
510                 (const uint8_t *)item->mask :
511                 (const uint8_t *)proc->mask_default;
512         assert(mask);
513         /*
514          * Single-pass check to make sure that:
515          * - Mask is supported, no bits are set outside proc->mask_support.
516          * - Both item->spec and item->last are included in mask.
517          */
518         for (i = 0; i != proc->mask_sz; ++i) {
519                 if (!mask[i])
520                         continue;
521                 if ((mask[i] | ((const uint8_t *)proc->mask_support)[i]) !=
522                     ((const uint8_t *)proc->mask_support)[i])
523                         return rte_flow_error_set
524                                 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
525                                  item, "unsupported field found in \"mask\"");
526                 if (item->last &&
527                     (((const uint8_t *)item->spec)[i] & mask[i]) !=
528                     (((const uint8_t *)item->last)[i] & mask[i]))
529                         return rte_flow_error_set
530                                 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
531                                  item,
532                                  "range between \"spec\" and \"last\""
533                                  " is larger than \"mask\"");
534         }
535         return 0;
536 }
537
538 /** Graph of supported items and associated actions. */
539 static const struct mlx4_flow_proc_item mlx4_flow_proc_item_list[] = {
540         [RTE_FLOW_ITEM_TYPE_END] = {
541                 .next_item = NEXT_ITEM(RTE_FLOW_ITEM_TYPE_ETH),
542         },
543         [RTE_FLOW_ITEM_TYPE_ETH] = {
544                 .next_item = NEXT_ITEM(RTE_FLOW_ITEM_TYPE_VLAN,
545                                        RTE_FLOW_ITEM_TYPE_IPV4),
546                 .mask_support = &(const struct rte_flow_item_eth){
547                         /* Only destination MAC can be matched. */
548                         .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
549                 },
550                 .mask_default = &rte_flow_item_eth_mask,
551                 .mask_sz = sizeof(struct rte_flow_item_eth),
552                 .merge = mlx4_flow_merge_eth,
553                 .dst_sz = sizeof(struct ibv_flow_spec_eth),
554         },
555         [RTE_FLOW_ITEM_TYPE_VLAN] = {
556                 .next_item = NEXT_ITEM(RTE_FLOW_ITEM_TYPE_IPV4),
557                 .mask_support = &(const struct rte_flow_item_vlan){
558                         /* Only TCI VID matching is supported. */
559                         .tci = RTE_BE16(0x0fff),
560                 },
561                 .mask_default = &rte_flow_item_vlan_mask,
562                 .mask_sz = sizeof(struct rte_flow_item_vlan),
563                 .merge = mlx4_flow_merge_vlan,
564                 .dst_sz = 0,
565         },
566         [RTE_FLOW_ITEM_TYPE_IPV4] = {
567                 .next_item = NEXT_ITEM(RTE_FLOW_ITEM_TYPE_UDP,
568                                        RTE_FLOW_ITEM_TYPE_TCP),
569                 .mask_support = &(const struct rte_flow_item_ipv4){
570                         .hdr = {
571                                 .src_addr = RTE_BE32(0xffffffff),
572                                 .dst_addr = RTE_BE32(0xffffffff),
573                         },
574                 },
575                 .mask_default = &rte_flow_item_ipv4_mask,
576                 .mask_sz = sizeof(struct rte_flow_item_ipv4),
577                 .merge = mlx4_flow_merge_ipv4,
578                 .dst_sz = sizeof(struct ibv_flow_spec_ipv4),
579         },
580         [RTE_FLOW_ITEM_TYPE_UDP] = {
581                 .mask_support = &(const struct rte_flow_item_udp){
582                         .hdr = {
583                                 .src_port = RTE_BE16(0xffff),
584                                 .dst_port = RTE_BE16(0xffff),
585                         },
586                 },
587                 .mask_default = &rte_flow_item_udp_mask,
588                 .mask_sz = sizeof(struct rte_flow_item_udp),
589                 .merge = mlx4_flow_merge_udp,
590                 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
591         },
592         [RTE_FLOW_ITEM_TYPE_TCP] = {
593                 .mask_support = &(const struct rte_flow_item_tcp){
594                         .hdr = {
595                                 .src_port = RTE_BE16(0xffff),
596                                 .dst_port = RTE_BE16(0xffff),
597                         },
598                 },
599                 .mask_default = &rte_flow_item_tcp_mask,
600                 .mask_sz = sizeof(struct rte_flow_item_tcp),
601                 .merge = mlx4_flow_merge_tcp,
602                 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
603         },
604 };
605
606 /**
607  * Make sure a flow rule is supported and initialize associated structure.
608  *
609  * @param priv
610  *   Pointer to private structure.
611  * @param[in] attr
612  *   Flow rule attributes.
613  * @param[in] pattern
614  *   Pattern specification (list terminated by the END pattern item).
615  * @param[in] actions
616  *   Associated actions (list terminated by the END action).
617  * @param[out] error
618  *   Perform verbose error reporting if not NULL.
619  * @param[in, out] addr
620  *   Buffer where the resulting flow rule handle pointer must be stored.
621  *   If NULL, stop processing after validation stage.
622  *
623  * @return
624  *   0 on success, a negative errno value otherwise and rte_errno is set.
625  */
626 static int
627 mlx4_flow_prepare(struct priv *priv,
628                   const struct rte_flow_attr *attr,
629                   const struct rte_flow_item pattern[],
630                   const struct rte_flow_action actions[],
631                   struct rte_flow_error *error,
632                   struct rte_flow **addr)
633 {
634         const struct rte_flow_item *item;
635         const struct rte_flow_action *action;
636         const struct mlx4_flow_proc_item *proc;
637         struct rte_flow temp = { .ibv_attr_size = sizeof(*temp.ibv_attr) };
638         struct rte_flow *flow = &temp;
639         const char *msg = NULL;
640         int overlap;
641
642         if (attr->group)
643                 return rte_flow_error_set
644                         (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
645                          NULL, "groups are not supported");
646         if (attr->priority > MLX4_FLOW_PRIORITY_LAST)
647                 return rte_flow_error_set
648                         (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
649                          NULL, "maximum priority level is "
650                          MLX4_STR_EXPAND(MLX4_FLOW_PRIORITY_LAST));
651         if (attr->egress)
652                 return rte_flow_error_set
653                         (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
654                          NULL, "egress is not supported");
655         if (!attr->ingress)
656                 return rte_flow_error_set
657                         (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
658                          NULL, "only ingress is supported");
659 fill:
660         overlap = 0;
661         proc = mlx4_flow_proc_item_list;
662         /* Go over pattern. */
663         for (item = pattern; item->type; ++item) {
664                 const struct mlx4_flow_proc_item *next = NULL;
665                 unsigned int i;
666                 int err;
667
668                 if (item->type == RTE_FLOW_ITEM_TYPE_VOID)
669                         continue;
670                 if (item->type == MLX4_FLOW_ITEM_TYPE_INTERNAL) {
671                         flow->internal = 1;
672                         continue;
673                 }
674                 if (flow->promisc || flow->allmulti) {
675                         msg = "mlx4 does not support additional matching"
676                                 " criteria combined with indiscriminate"
677                                 " matching on Ethernet headers";
678                         goto exit_item_not_supported;
679                 }
680                 for (i = 0; proc->next_item && proc->next_item[i]; ++i) {
681                         if (proc->next_item[i] == item->type) {
682                                 next = &mlx4_flow_proc_item_list[item->type];
683                                 break;
684                         }
685                 }
686                 if (!next)
687                         goto exit_item_not_supported;
688                 proc = next;
689                 /*
690                  * Perform basic sanity checks only once, while handle is
691                  * not allocated.
692                  */
693                 if (flow == &temp) {
694                         err = mlx4_flow_item_check(item, proc, error);
695                         if (err)
696                                 return err;
697                 }
698                 if (proc->merge) {
699                         err = proc->merge(flow, item, proc, error);
700                         if (err)
701                                 return err;
702                 }
703                 flow->ibv_attr_size += proc->dst_sz;
704         }
705         /* Go over actions list. */
706         for (action = actions; action->type; ++action) {
707                 /* This one may appear anywhere multiple times. */
708                 if (action->type == RTE_FLOW_ACTION_TYPE_VOID)
709                         continue;
710                 /* Fate-deciding actions may appear exactly once. */
711                 if (overlap) {
712                         msg = "cannot combine several fate-deciding actions,"
713                                 " choose between DROP, QUEUE or RSS";
714                         goto exit_action_not_supported;
715                 }
716                 overlap = 1;
717                 switch (action->type) {
718                         const struct rte_flow_action_queue *queue;
719                         const struct rte_flow_action_rss *rss;
720                         const uint8_t *rss_key;
721                         uint32_t rss_key_len;
722                         uint64_t fields;
723                         unsigned int i;
724
725                 case RTE_FLOW_ACTION_TYPE_DROP:
726                         flow->drop = 1;
727                         break;
728                 case RTE_FLOW_ACTION_TYPE_QUEUE:
729                         if (flow->rss)
730                                 break;
731                         queue = action->conf;
732                         if (queue->index >= priv->dev->data->nb_rx_queues) {
733                                 msg = "queue target index beyond number of"
734                                         " configured Rx queues";
735                                 goto exit_action_not_supported;
736                         }
737                         flow->rss = mlx4_rss_get
738                                 (priv, 0, mlx4_rss_hash_key_default, 1,
739                                  &queue->index);
740                         if (!flow->rss) {
741                                 msg = "not enough resources for additional"
742                                         " single-queue RSS context";
743                                 goto exit_action_not_supported;
744                         }
745                         break;
746                 case RTE_FLOW_ACTION_TYPE_RSS:
747                         if (flow->rss)
748                                 break;
749                         rss = action->conf;
750                         /* Default RSS configuration if none is provided. */
751                         if (rss->key_len) {
752                                 rss_key = rss->key;
753                                 rss_key_len = rss->key_len;
754                         } else {
755                                 rss_key = mlx4_rss_hash_key_default;
756                                 rss_key_len = MLX4_RSS_HASH_KEY_SIZE;
757                         }
758                         /* Sanity checks. */
759                         for (i = 0; i < rss->queue_num; ++i)
760                                 if (rss->queue[i] >=
761                                     priv->dev->data->nb_rx_queues)
762                                         break;
763                         if (i != rss->queue_num) {
764                                 msg = "queue index target beyond number of"
765                                         " configured Rx queues";
766                                 goto exit_action_not_supported;
767                         }
768                         if (!rte_is_power_of_2(rss->queue_num)) {
769                                 msg = "for RSS, mlx4 requires the number of"
770                                         " queues to be a power of two";
771                                 goto exit_action_not_supported;
772                         }
773                         if (rss_key_len != sizeof(flow->rss->key)) {
774                                 msg = "mlx4 supports exactly one RSS hash key"
775                                         " length: "
776                                         MLX4_STR_EXPAND(MLX4_RSS_HASH_KEY_SIZE);
777                                 goto exit_action_not_supported;
778                         }
779                         for (i = 1; i < rss->queue_num; ++i)
780                                 if (rss->queue[i] - rss->queue[i - 1] != 1)
781                                         break;
782                         if (i != rss->queue_num) {
783                                 msg = "mlx4 requires RSS contexts to use"
784                                         " consecutive queue indices only";
785                                 goto exit_action_not_supported;
786                         }
787                         if (rss->queue[0] % rss->queue_num) {
788                                 msg = "mlx4 requires the first queue of a RSS"
789                                         " context to be aligned on a multiple"
790                                         " of the context size";
791                                 goto exit_action_not_supported;
792                         }
793                         if (rss->func &&
794                             rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ) {
795                                 msg = "the only supported RSS hash function"
796                                         " is Toeplitz";
797                                 goto exit_action_not_supported;
798                         }
799                         rte_errno = 0;
800                         fields = mlx4_conv_rss_types(priv, rss->types);
801                         if (fields == (uint64_t)-1 && rte_errno) {
802                                 msg = "unsupported RSS hash type requested";
803                                 goto exit_action_not_supported;
804                         }
805                         flow->rss = mlx4_rss_get
806                                 (priv, fields, rss_key, rss->queue_num,
807                                  rss->queue);
808                         if (!flow->rss) {
809                                 msg = "either invalid parameters or not enough"
810                                         " resources for additional multi-queue"
811                                         " RSS context";
812                                 goto exit_action_not_supported;
813                         }
814                         break;
815                 default:
816                         goto exit_action_not_supported;
817                 }
818         }
819         /* When fate is unknown, drop traffic. */
820         if (!overlap)
821                 flow->drop = 1;
822         /* Validation ends here. */
823         if (!addr) {
824                 if (flow->rss)
825                         mlx4_rss_put(flow->rss);
826                 return 0;
827         }
828         if (flow == &temp) {
829                 /* Allocate proper handle based on collected data. */
830                 const struct mlx4_malloc_vec vec[] = {
831                         {
832                                 .align = alignof(struct rte_flow),
833                                 .size = sizeof(*flow),
834                                 .addr = (void **)&flow,
835                         },
836                         {
837                                 .align = alignof(struct ibv_flow_attr),
838                                 .size = temp.ibv_attr_size,
839                                 .addr = (void **)&temp.ibv_attr,
840                         },
841                 };
842
843                 if (!mlx4_zmallocv(__func__, vec, RTE_DIM(vec))) {
844                         if (temp.rss)
845                                 mlx4_rss_put(temp.rss);
846                         return rte_flow_error_set
847                                 (error, -rte_errno,
848                                  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
849                                  "flow rule handle allocation failure");
850                 }
851                 /* Most fields will be updated by second pass. */
852                 *flow = (struct rte_flow){
853                         .ibv_attr = temp.ibv_attr,
854                         .ibv_attr_size = sizeof(*flow->ibv_attr),
855                         .rss = temp.rss,
856                 };
857                 *flow->ibv_attr = (struct ibv_flow_attr){
858                         .type = IBV_FLOW_ATTR_NORMAL,
859                         .size = sizeof(*flow->ibv_attr),
860                         .priority = attr->priority,
861                         .port = priv->port,
862                 };
863                 goto fill;
864         }
865         *addr = flow;
866         return 0;
867 exit_item_not_supported:
868         return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
869                                   item, msg ? msg : "item not supported");
870 exit_action_not_supported:
871         return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
872                                   action, msg ? msg : "action not supported");
873 }
874
875 /**
876  * Validate a flow supported by the NIC.
877  *
878  * @see rte_flow_validate()
879  * @see rte_flow_ops
880  */
881 static int
882 mlx4_flow_validate(struct rte_eth_dev *dev,
883                    const struct rte_flow_attr *attr,
884                    const struct rte_flow_item pattern[],
885                    const struct rte_flow_action actions[],
886                    struct rte_flow_error *error)
887 {
888         struct priv *priv = dev->data->dev_private;
889
890         return mlx4_flow_prepare(priv, attr, pattern, actions, error, NULL);
891 }
892
893 /**
894  * Get a drop flow rule resources instance.
895  *
896  * @param priv
897  *   Pointer to private structure.
898  *
899  * @return
900  *   Pointer to drop flow resources on success, NULL otherwise and rte_errno
901  *   is set.
902  */
903 static struct mlx4_drop *
904 mlx4_drop_get(struct priv *priv)
905 {
906         struct mlx4_drop *drop = priv->drop;
907
908         if (drop) {
909                 assert(drop->refcnt);
910                 assert(drop->priv == priv);
911                 ++drop->refcnt;
912                 return drop;
913         }
914         drop = rte_malloc(__func__, sizeof(*drop), 0);
915         if (!drop)
916                 goto error;
917         *drop = (struct mlx4_drop){
918                 .priv = priv,
919                 .refcnt = 1,
920         };
921         drop->cq = mlx4_glue->create_cq(priv->ctx, 1, NULL, NULL, 0);
922         if (!drop->cq)
923                 goto error;
924         drop->qp = mlx4_glue->create_qp
925                 (priv->pd,
926                  &(struct ibv_qp_init_attr){
927                         .send_cq = drop->cq,
928                         .recv_cq = drop->cq,
929                         .qp_type = IBV_QPT_RAW_PACKET,
930                  });
931         if (!drop->qp)
932                 goto error;
933         priv->drop = drop;
934         return drop;
935 error:
936         if (drop->qp)
937                 claim_zero(mlx4_glue->destroy_qp(drop->qp));
938         if (drop->cq)
939                 claim_zero(mlx4_glue->destroy_cq(drop->cq));
940         if (drop)
941                 rte_free(drop);
942         rte_errno = ENOMEM;
943         return NULL;
944 }
945
946 /**
947  * Give back a drop flow rule resources instance.
948  *
949  * @param drop
950  *   Pointer to drop flow rule resources.
951  */
952 static void
953 mlx4_drop_put(struct mlx4_drop *drop)
954 {
955         assert(drop->refcnt);
956         if (--drop->refcnt)
957                 return;
958         drop->priv->drop = NULL;
959         claim_zero(mlx4_glue->destroy_qp(drop->qp));
960         claim_zero(mlx4_glue->destroy_cq(drop->cq));
961         rte_free(drop);
962 }
963
964 /**
965  * Toggle a configured flow rule.
966  *
967  * @param priv
968  *   Pointer to private structure.
969  * @param flow
970  *   Flow rule handle to toggle.
971  * @param enable
972  *   Whether associated Verbs flow must be created or removed.
973  * @param[out] error
974  *   Perform verbose error reporting if not NULL.
975  *
976  * @return
977  *   0 on success, a negative errno value otherwise and rte_errno is set.
978  */
979 static int
980 mlx4_flow_toggle(struct priv *priv,
981                  struct rte_flow *flow,
982                  int enable,
983                  struct rte_flow_error *error)
984 {
985         struct ibv_qp *qp = NULL;
986         const char *msg;
987         int err;
988
989         if (!enable) {
990                 if (!flow->ibv_flow)
991                         return 0;
992                 claim_zero(mlx4_glue->destroy_flow(flow->ibv_flow));
993                 flow->ibv_flow = NULL;
994                 if (flow->drop)
995                         mlx4_drop_put(priv->drop);
996                 else if (flow->rss)
997                         mlx4_rss_detach(flow->rss);
998                 return 0;
999         }
1000         assert(flow->ibv_attr);
1001         if (!flow->internal &&
1002             !priv->isolated &&
1003             flow->ibv_attr->priority == MLX4_FLOW_PRIORITY_LAST) {
1004                 if (flow->ibv_flow) {
1005                         claim_zero(mlx4_glue->destroy_flow(flow->ibv_flow));
1006                         flow->ibv_flow = NULL;
1007                         if (flow->drop)
1008                                 mlx4_drop_put(priv->drop);
1009                         else if (flow->rss)
1010                                 mlx4_rss_detach(flow->rss);
1011                 }
1012                 err = EACCES;
1013                 msg = ("priority level "
1014                        MLX4_STR_EXPAND(MLX4_FLOW_PRIORITY_LAST)
1015                        " is reserved when not in isolated mode");
1016                 goto error;
1017         }
1018         if (flow->rss) {
1019                 struct mlx4_rss *rss = flow->rss;
1020                 int missing = 0;
1021                 unsigned int i;
1022
1023                 /* Stop at the first nonexistent target queue. */
1024                 for (i = 0; i != rss->queues; ++i)
1025                         if (rss->queue_id[i] >=
1026                             priv->dev->data->nb_rx_queues ||
1027                             !priv->dev->data->rx_queues[rss->queue_id[i]]) {
1028                                 missing = 1;
1029                                 break;
1030                         }
1031                 if (flow->ibv_flow) {
1032                         if (missing ^ !flow->drop)
1033                                 return 0;
1034                         /* Verbs flow needs updating. */
1035                         claim_zero(mlx4_glue->destroy_flow(flow->ibv_flow));
1036                         flow->ibv_flow = NULL;
1037                         if (flow->drop)
1038                                 mlx4_drop_put(priv->drop);
1039                         else
1040                                 mlx4_rss_detach(rss);
1041                 }
1042                 if (!missing) {
1043                         err = mlx4_rss_attach(rss);
1044                         if (err) {
1045                                 err = -err;
1046                                 msg = "cannot create indirection table or hash"
1047                                         " QP to associate flow rule with";
1048                                 goto error;
1049                         }
1050                         qp = rss->qp;
1051                 }
1052                 /* A missing target queue drops traffic implicitly. */
1053                 flow->drop = missing;
1054         }
1055         if (flow->drop) {
1056                 if (flow->ibv_flow)
1057                         return 0;
1058                 mlx4_drop_get(priv);
1059                 if (!priv->drop) {
1060                         err = rte_errno;
1061                         msg = "resources for drop flow rule cannot be created";
1062                         goto error;
1063                 }
1064                 qp = priv->drop->qp;
1065         }
1066         assert(qp);
1067         if (flow->ibv_flow)
1068                 return 0;
1069         flow->ibv_flow = mlx4_glue->create_flow(qp, flow->ibv_attr);
1070         if (flow->ibv_flow)
1071                 return 0;
1072         if (flow->drop)
1073                 mlx4_drop_put(priv->drop);
1074         else if (flow->rss)
1075                 mlx4_rss_detach(flow->rss);
1076         err = errno;
1077         msg = "flow rule rejected by device";
1078 error:
1079         return rte_flow_error_set
1080                 (error, err, RTE_FLOW_ERROR_TYPE_HANDLE, flow, msg);
1081 }
1082
1083 /**
1084  * Create a flow.
1085  *
1086  * @see rte_flow_create()
1087  * @see rte_flow_ops
1088  */
1089 static struct rte_flow *
1090 mlx4_flow_create(struct rte_eth_dev *dev,
1091                  const struct rte_flow_attr *attr,
1092                  const struct rte_flow_item pattern[],
1093                  const struct rte_flow_action actions[],
1094                  struct rte_flow_error *error)
1095 {
1096         struct priv *priv = dev->data->dev_private;
1097         struct rte_flow *flow;
1098         int err;
1099
1100         err = mlx4_flow_prepare(priv, attr, pattern, actions, error, &flow);
1101         if (err)
1102                 return NULL;
1103         err = mlx4_flow_toggle(priv, flow, priv->started, error);
1104         if (!err) {
1105                 struct rte_flow *curr = LIST_FIRST(&priv->flows);
1106
1107                 /* New rules are inserted after internal ones. */
1108                 if (!curr || !curr->internal) {
1109                         LIST_INSERT_HEAD(&priv->flows, flow, next);
1110                 } else {
1111                         while (LIST_NEXT(curr, next) &&
1112                                LIST_NEXT(curr, next)->internal)
1113                                 curr = LIST_NEXT(curr, next);
1114                         LIST_INSERT_AFTER(curr, flow, next);
1115                 }
1116                 return flow;
1117         }
1118         if (flow->rss)
1119                 mlx4_rss_put(flow->rss);
1120         rte_flow_error_set(error, -err, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1121                            error->message);
1122         rte_free(flow);
1123         return NULL;
1124 }
1125
1126 /**
1127  * Configure isolated mode.
1128  *
1129  * @see rte_flow_isolate()
1130  * @see rte_flow_ops
1131  */
1132 static int
1133 mlx4_flow_isolate(struct rte_eth_dev *dev,
1134                   int enable,
1135                   struct rte_flow_error *error)
1136 {
1137         struct priv *priv = dev->data->dev_private;
1138
1139         if (!!enable == !!priv->isolated)
1140                 return 0;
1141         priv->isolated = !!enable;
1142         if (mlx4_flow_sync(priv, error)) {
1143                 priv->isolated = !enable;
1144                 return -rte_errno;
1145         }
1146         return 0;
1147 }
1148
1149 /**
1150  * Destroy a flow rule.
1151  *
1152  * @see rte_flow_destroy()
1153  * @see rte_flow_ops
1154  */
1155 static int
1156 mlx4_flow_destroy(struct rte_eth_dev *dev,
1157                   struct rte_flow *flow,
1158                   struct rte_flow_error *error)
1159 {
1160         struct priv *priv = dev->data->dev_private;
1161         int err = mlx4_flow_toggle(priv, flow, 0, error);
1162
1163         if (err)
1164                 return err;
1165         LIST_REMOVE(flow, next);
1166         if (flow->rss)
1167                 mlx4_rss_put(flow->rss);
1168         rte_free(flow);
1169         return 0;
1170 }
1171
1172 /**
1173  * Destroy user-configured flow rules.
1174  *
1175  * This function skips internal flows rules.
1176  *
1177  * @see rte_flow_flush()
1178  * @see rte_flow_ops
1179  */
1180 static int
1181 mlx4_flow_flush(struct rte_eth_dev *dev,
1182                 struct rte_flow_error *error)
1183 {
1184         struct priv *priv = dev->data->dev_private;
1185         struct rte_flow *flow = LIST_FIRST(&priv->flows);
1186
1187         while (flow) {
1188                 struct rte_flow *next = LIST_NEXT(flow, next);
1189
1190                 if (!flow->internal)
1191                         mlx4_flow_destroy(dev, flow, error);
1192                 flow = next;
1193         }
1194         return 0;
1195 }
1196
1197 /**
1198  * Helper function to determine the next configured VLAN filter.
1199  *
1200  * @param priv
1201  *   Pointer to private structure.
1202  * @param vlan
1203  *   VLAN ID to use as a starting point.
1204  *
1205  * @return
1206  *   Next configured VLAN ID or a high value (>= 4096) if there is none.
1207  */
1208 static uint16_t
1209 mlx4_flow_internal_next_vlan(struct priv *priv, uint16_t vlan)
1210 {
1211         while (vlan < 4096) {
1212                 if (priv->dev->data->vlan_filter_conf.ids[vlan / 64] &
1213                     (UINT64_C(1) << (vlan % 64)))
1214                         return vlan;
1215                 ++vlan;
1216         }
1217         return vlan;
1218 }
1219
1220 /**
1221  * Generate internal flow rules.
1222  *
1223  * Various flow rules are created depending on the mode the device is in:
1224  *
1225  * 1. Promiscuous:
1226  *       port MAC + broadcast + catch-all (VLAN filtering is ignored).
1227  * 2. All multicast:
1228  *       port MAC/VLAN + broadcast + catch-all multicast.
1229  * 3. Otherwise:
1230  *       port MAC/VLAN + broadcast MAC/VLAN.
1231  *
1232  * About MAC flow rules:
1233  *
1234  * - MAC flow rules are generated from @p dev->data->mac_addrs
1235  *   (@p priv->mac array).
1236  * - An additional flow rule for Ethernet broadcasts is also generated.
1237  * - All these are per-VLAN if @p DEV_RX_OFFLOAD_VLAN_FILTER
1238  *   is enabled and VLAN filters are configured.
1239  *
1240  * @param priv
1241  *   Pointer to private structure.
1242  * @param[out] error
1243  *   Perform verbose error reporting if not NULL.
1244  *
1245  * @return
1246  *   0 on success, a negative errno value otherwise and rte_errno is set.
1247  */
1248 static int
1249 mlx4_flow_internal(struct priv *priv, struct rte_flow_error *error)
1250 {
1251         struct rte_flow_attr attr = {
1252                 .priority = MLX4_FLOW_PRIORITY_LAST,
1253                 .ingress = 1,
1254         };
1255         struct rte_flow_item_eth eth_spec;
1256         const struct rte_flow_item_eth eth_mask = {
1257                 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
1258         };
1259         const struct rte_flow_item_eth eth_allmulti = {
1260                 .dst.addr_bytes = "\x01\x00\x00\x00\x00\x00",
1261         };
1262         struct rte_flow_item_vlan vlan_spec;
1263         const struct rte_flow_item_vlan vlan_mask = {
1264                 .tci = RTE_BE16(0x0fff),
1265         };
1266         struct rte_flow_item pattern[] = {
1267                 {
1268                         .type = MLX4_FLOW_ITEM_TYPE_INTERNAL,
1269                 },
1270                 {
1271                         .type = RTE_FLOW_ITEM_TYPE_ETH,
1272                         .spec = &eth_spec,
1273                         .mask = &eth_mask,
1274                 },
1275                 {
1276                         /* Replaced with VLAN if filtering is enabled. */
1277                         .type = RTE_FLOW_ITEM_TYPE_END,
1278                 },
1279                 {
1280                         .type = RTE_FLOW_ITEM_TYPE_END,
1281                 },
1282         };
1283         /*
1284          * Round number of queues down to their previous power of 2 to
1285          * comply with RSS context limitations. Extra queues silently do not
1286          * get RSS by default.
1287          */
1288         uint32_t queues =
1289                 rte_align32pow2(priv->dev->data->nb_rx_queues + 1) >> 1;
1290         uint16_t queue[queues];
1291         struct rte_flow_action_rss action_rss = {
1292                 .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
1293                 .types = -1,
1294                 .key_len = MLX4_RSS_HASH_KEY_SIZE,
1295                 .queue_num = queues,
1296                 .key = mlx4_rss_hash_key_default,
1297                 .queue = queue,
1298         };
1299         struct rte_flow_action actions[] = {
1300                 {
1301                         .type = RTE_FLOW_ACTION_TYPE_RSS,
1302                         .conf = &action_rss,
1303                 },
1304                 {
1305                         .type = RTE_FLOW_ACTION_TYPE_END,
1306                 },
1307         };
1308         struct ether_addr *rule_mac = &eth_spec.dst;
1309         rte_be16_t *rule_vlan =
1310                 (priv->dev->data->dev_conf.rxmode.offloads &
1311                  DEV_RX_OFFLOAD_VLAN_FILTER) &&
1312                 !priv->dev->data->promiscuous ?
1313                 &vlan_spec.tci :
1314                 NULL;
1315         uint16_t vlan = 0;
1316         struct rte_flow *flow;
1317         unsigned int i;
1318         int err = 0;
1319
1320         /* Nothing to be done if there are no Rx queues. */
1321         if (!queues)
1322                 goto error;
1323         /* Prepare default RSS configuration. */
1324         for (i = 0; i != queues; ++i)
1325                 queue[i] = i;
1326         /*
1327          * Set up VLAN item if filtering is enabled and at least one VLAN
1328          * filter is configured.
1329          */
1330         if (rule_vlan) {
1331                 vlan = mlx4_flow_internal_next_vlan(priv, 0);
1332                 if (vlan < 4096) {
1333                         pattern[2] = (struct rte_flow_item){
1334                                 .type = RTE_FLOW_ITEM_TYPE_VLAN,
1335                                 .spec = &vlan_spec,
1336                                 .mask = &vlan_mask,
1337                         };
1338 next_vlan:
1339                         *rule_vlan = rte_cpu_to_be_16(vlan);
1340                 } else {
1341                         rule_vlan = NULL;
1342                 }
1343         }
1344         for (i = 0; i != RTE_DIM(priv->mac) + 1; ++i) {
1345                 const struct ether_addr *mac;
1346
1347                 /* Broadcasts are handled by an extra iteration. */
1348                 if (i < RTE_DIM(priv->mac))
1349                         mac = &priv->mac[i];
1350                 else
1351                         mac = &eth_mask.dst;
1352                 if (is_zero_ether_addr(mac))
1353                         continue;
1354                 /* Check if MAC flow rule is already present. */
1355                 for (flow = LIST_FIRST(&priv->flows);
1356                      flow && flow->internal;
1357                      flow = LIST_NEXT(flow, next)) {
1358                         const struct ibv_flow_spec_eth *eth =
1359                                 (const void *)((uintptr_t)flow->ibv_attr +
1360                                                sizeof(*flow->ibv_attr));
1361                         unsigned int j;
1362
1363                         if (!flow->mac)
1364                                 continue;
1365                         assert(flow->ibv_attr->type == IBV_FLOW_ATTR_NORMAL);
1366                         assert(flow->ibv_attr->num_of_specs == 1);
1367                         assert(eth->type == IBV_FLOW_SPEC_ETH);
1368                         assert(flow->rss);
1369                         if (rule_vlan &&
1370                             (eth->val.vlan_tag != *rule_vlan ||
1371                              eth->mask.vlan_tag != RTE_BE16(0x0fff)))
1372                                 continue;
1373                         if (!rule_vlan && eth->mask.vlan_tag)
1374                                 continue;
1375                         for (j = 0; j != sizeof(mac->addr_bytes); ++j)
1376                                 if (eth->val.dst_mac[j] != mac->addr_bytes[j] ||
1377                                     eth->mask.dst_mac[j] != UINT8_C(0xff) ||
1378                                     eth->val.src_mac[j] != UINT8_C(0x00) ||
1379                                     eth->mask.src_mac[j] != UINT8_C(0x00))
1380                                         break;
1381                         if (j != sizeof(mac->addr_bytes))
1382                                 continue;
1383                         if (flow->rss->queues != queues ||
1384                             memcmp(flow->rss->queue_id, action_rss.queue,
1385                                    queues * sizeof(flow->rss->queue_id[0])))
1386                                 continue;
1387                         break;
1388                 }
1389                 if (!flow || !flow->internal) {
1390                         /* Not found, create a new flow rule. */
1391                         memcpy(rule_mac, mac, sizeof(*mac));
1392                         flow = mlx4_flow_create(priv->dev, &attr, pattern,
1393                                                 actions, error);
1394                         if (!flow) {
1395                                 err = -rte_errno;
1396                                 goto error;
1397                         }
1398                 }
1399                 flow->select = 1;
1400                 flow->mac = 1;
1401         }
1402         if (rule_vlan) {
1403                 vlan = mlx4_flow_internal_next_vlan(priv, vlan + 1);
1404                 if (vlan < 4096)
1405                         goto next_vlan;
1406         }
1407         /* Take care of promiscuous and all multicast flow rules. */
1408         if (priv->dev->data->promiscuous || priv->dev->data->all_multicast) {
1409                 for (flow = LIST_FIRST(&priv->flows);
1410                      flow && flow->internal;
1411                      flow = LIST_NEXT(flow, next)) {
1412                         if (priv->dev->data->promiscuous) {
1413                                 if (flow->promisc)
1414                                         break;
1415                         } else {
1416                                 assert(priv->dev->data->all_multicast);
1417                                 if (flow->allmulti)
1418                                         break;
1419                         }
1420                 }
1421                 if (flow && flow->internal) {
1422                         assert(flow->rss);
1423                         if (flow->rss->queues != queues ||
1424                             memcmp(flow->rss->queue_id, action_rss.queue,
1425                                    queues * sizeof(flow->rss->queue_id[0])))
1426                                 flow = NULL;
1427                 }
1428                 if (!flow || !flow->internal) {
1429                         /* Not found, create a new flow rule. */
1430                         if (priv->dev->data->promiscuous) {
1431                                 pattern[1].spec = NULL;
1432                                 pattern[1].mask = NULL;
1433                         } else {
1434                                 assert(priv->dev->data->all_multicast);
1435                                 pattern[1].spec = &eth_allmulti;
1436                                 pattern[1].mask = &eth_allmulti;
1437                         }
1438                         pattern[2] = pattern[3];
1439                         flow = mlx4_flow_create(priv->dev, &attr, pattern,
1440                                                 actions, error);
1441                         if (!flow) {
1442                                 err = -rte_errno;
1443                                 goto error;
1444                         }
1445                 }
1446                 assert(flow->promisc || flow->allmulti);
1447                 flow->select = 1;
1448         }
1449 error:
1450         /* Clear selection and clean up stale internal flow rules. */
1451         flow = LIST_FIRST(&priv->flows);
1452         while (flow && flow->internal) {
1453                 struct rte_flow *next = LIST_NEXT(flow, next);
1454
1455                 if (!flow->select)
1456                         claim_zero(mlx4_flow_destroy(priv->dev, flow, error));
1457                 else
1458                         flow->select = 0;
1459                 flow = next;
1460         }
1461         return err;
1462 }
1463
1464 /**
1465  * Synchronize flow rules.
1466  *
1467  * This function synchronizes flow rules with the state of the device by
1468  * taking into account isolated mode and whether target queues are
1469  * configured.
1470  *
1471  * @param priv
1472  *   Pointer to private structure.
1473  * @param[out] error
1474  *   Perform verbose error reporting if not NULL.
1475  *
1476  * @return
1477  *   0 on success, a negative errno value otherwise and rte_errno is set.
1478  */
1479 int
1480 mlx4_flow_sync(struct priv *priv, struct rte_flow_error *error)
1481 {
1482         struct rte_flow *flow;
1483         int ret;
1484
1485         /* Internal flow rules are guaranteed to come first in the list. */
1486         if (priv->isolated) {
1487                 /*
1488                  * Get rid of them in isolated mode, stop at the first
1489                  * non-internal rule found.
1490                  */
1491                 for (flow = LIST_FIRST(&priv->flows);
1492                      flow && flow->internal;
1493                      flow = LIST_FIRST(&priv->flows))
1494                         claim_zero(mlx4_flow_destroy(priv->dev, flow, error));
1495         } else {
1496                 /* Refresh internal rules. */
1497                 ret = mlx4_flow_internal(priv, error);
1498                 if (ret)
1499                         return ret;
1500         }
1501         /* Toggle the remaining flow rules . */
1502         LIST_FOREACH(flow, &priv->flows, next) {
1503                 ret = mlx4_flow_toggle(priv, flow, priv->started, error);
1504                 if (ret)
1505                         return ret;
1506         }
1507         if (!priv->started)
1508                 assert(!priv->drop);
1509         return 0;
1510 }
1511
1512 /**
1513  * Clean up all flow rules.
1514  *
1515  * Unlike mlx4_flow_flush(), this function takes care of all remaining flow
1516  * rules regardless of whether they are internal or user-configured.
1517  *
1518  * @param priv
1519  *   Pointer to private structure.
1520  */
1521 void
1522 mlx4_flow_clean(struct priv *priv)
1523 {
1524         struct rte_flow *flow;
1525
1526         while ((flow = LIST_FIRST(&priv->flows)))
1527                 mlx4_flow_destroy(priv->dev, flow, NULL);
1528         assert(LIST_EMPTY(&priv->rss));
1529 }
1530
1531 static const struct rte_flow_ops mlx4_flow_ops = {
1532         .validate = mlx4_flow_validate,
1533         .create = mlx4_flow_create,
1534         .destroy = mlx4_flow_destroy,
1535         .flush = mlx4_flow_flush,
1536         .isolate = mlx4_flow_isolate,
1537 };
1538
1539 /**
1540  * Manage filter operations.
1541  *
1542  * @param dev
1543  *   Pointer to Ethernet device structure.
1544  * @param filter_type
1545  *   Filter type.
1546  * @param filter_op
1547  *   Operation to perform.
1548  * @param arg
1549  *   Pointer to operation-specific structure.
1550  *
1551  * @return
1552  *   0 on success, negative errno value otherwise and rte_errno is set.
1553  */
1554 int
1555 mlx4_filter_ctrl(struct rte_eth_dev *dev,
1556                  enum rte_filter_type filter_type,
1557                  enum rte_filter_op filter_op,
1558                  void *arg)
1559 {
1560         switch (filter_type) {
1561         case RTE_ETH_FILTER_GENERIC:
1562                 if (filter_op != RTE_ETH_FILTER_GET)
1563                         break;
1564                 *(const void **)arg = &mlx4_flow_ops;
1565                 return 0;
1566         default:
1567                 ERROR("%p: filter type (%d) not supported",
1568                       (void *)dev, filter_type);
1569                 break;
1570         }
1571         rte_errno = ENOTSUP;
1572         return -rte_errno;
1573 }