net/mlx4: fix ignored RSS hash types
[dpdk.git] / drivers / net / mlx4 / mlx4_flow.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2017 6WIND S.A.
3  * Copyright 2017 Mellanox Technologies, Ltd
4  */
5
6 /**
7  * @file
8  * Flow API operations for mlx4 driver.
9  */
10
11 #include <arpa/inet.h>
12 #include <assert.h>
13 #include <errno.h>
14 #include <stdalign.h>
15 #include <stddef.h>
16 #include <stdint.h>
17 #include <string.h>
18 #include <sys/queue.h>
19
20 /* Verbs headers do not support -pedantic. */
21 #ifdef PEDANTIC
22 #pragma GCC diagnostic ignored "-Wpedantic"
23 #endif
24 #include <infiniband/verbs.h>
25 #ifdef PEDANTIC
26 #pragma GCC diagnostic error "-Wpedantic"
27 #endif
28
29 #include <rte_byteorder.h>
30 #include <rte_errno.h>
31 #include <rte_eth_ctrl.h>
32 #include <rte_ethdev_driver.h>
33 #include <rte_ether.h>
34 #include <rte_flow.h>
35 #include <rte_flow_driver.h>
36 #include <rte_malloc.h>
37
38 /* PMD headers. */
39 #include "mlx4.h"
40 #include "mlx4_glue.h"
41 #include "mlx4_flow.h"
42 #include "mlx4_rxtx.h"
43 #include "mlx4_utils.h"
44
45 /** Static initializer for a list of subsequent item types. */
46 #define NEXT_ITEM(...) \
47         (const enum rte_flow_item_type []){ \
48                 __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
49         }
50
51 /** Processor structure associated with a flow item. */
52 struct mlx4_flow_proc_item {
53         /** Bit-mask for fields supported by this PMD. */
54         const void *mask_support;
55         /** Bit-mask to use when @p item->mask is not provided. */
56         const void *mask_default;
57         /** Size in bytes for @p mask_support and @p mask_default. */
58         const unsigned int mask_sz;
59         /** Merge a pattern item into a flow rule handle. */
60         int (*merge)(struct rte_flow *flow,
61                      const struct rte_flow_item *item,
62                      const struct mlx4_flow_proc_item *proc,
63                      struct rte_flow_error *error);
64         /** Size in bytes of the destination structure. */
65         const unsigned int dst_sz;
66         /** List of possible subsequent items. */
67         const enum rte_flow_item_type *const next_item;
68 };
69
70 /** Shared resources for drop flow rules. */
71 struct mlx4_drop {
72         struct ibv_qp *qp; /**< QP target. */
73         struct ibv_cq *cq; /**< CQ associated with above QP. */
74         struct priv *priv; /**< Back pointer to private data. */
75         uint32_t refcnt; /**< Reference count. */
76 };
77
78 /**
79  * Convert DPDK RSS hash fields to their Verbs equivalent.
80  *
81  * This function returns the supported (default) set when @p rss_hf has
82  * special value (uint64_t)-1.
83  *
84  * @param priv
85  *   Pointer to private structure.
86  * @param rss_hf
87  *   Hash fields in DPDK format (see struct rte_eth_rss_conf).
88  *
89  * @return
90  *   A valid Verbs RSS hash fields mask for mlx4 on success, (uint64_t)-1
91  *   otherwise and rte_errno is set.
92  */
93 uint64_t
94 mlx4_conv_rss_hf(struct priv *priv, uint64_t rss_hf)
95 {
96         enum { IPV4, IPV6, TCP, UDP, };
97         const uint64_t in[] = {
98                 [IPV4] = (ETH_RSS_IPV4 |
99                           ETH_RSS_FRAG_IPV4 |
100                           ETH_RSS_NONFRAG_IPV4_TCP |
101                           ETH_RSS_NONFRAG_IPV4_UDP |
102                           ETH_RSS_NONFRAG_IPV4_OTHER),
103                 [IPV6] = (ETH_RSS_IPV6 |
104                           ETH_RSS_FRAG_IPV6 |
105                           ETH_RSS_NONFRAG_IPV6_TCP |
106                           ETH_RSS_NONFRAG_IPV6_UDP |
107                           ETH_RSS_NONFRAG_IPV6_OTHER |
108                           ETH_RSS_IPV6_EX |
109                           ETH_RSS_IPV6_TCP_EX |
110                           ETH_RSS_IPV6_UDP_EX),
111                 [TCP] = (ETH_RSS_NONFRAG_IPV4_TCP |
112                          ETH_RSS_NONFRAG_IPV6_TCP |
113                          ETH_RSS_IPV6_TCP_EX),
114                 [UDP] = (ETH_RSS_NONFRAG_IPV4_UDP |
115                          ETH_RSS_NONFRAG_IPV6_UDP |
116                          ETH_RSS_IPV6_UDP_EX),
117         };
118         const uint64_t out[RTE_DIM(in)] = {
119                 [IPV4] = IBV_RX_HASH_SRC_IPV4 | IBV_RX_HASH_DST_IPV4,
120                 [IPV6] = IBV_RX_HASH_SRC_IPV6 | IBV_RX_HASH_DST_IPV6,
121                 [TCP] = IBV_RX_HASH_SRC_PORT_TCP | IBV_RX_HASH_DST_PORT_TCP,
122                 [UDP] = IBV_RX_HASH_SRC_PORT_UDP | IBV_RX_HASH_DST_PORT_UDP,
123         };
124         uint64_t seen = 0;
125         uint64_t conv = 0;
126         unsigned int i;
127
128         for (i = 0; i != RTE_DIM(in); ++i)
129                 if (rss_hf & in[i]) {
130                         seen |= rss_hf & in[i];
131                         conv |= out[i];
132                 }
133         if ((conv & priv->hw_rss_sup) == conv) {
134                 if (rss_hf == (uint64_t)-1) {
135                         /* Include inner RSS by default if supported. */
136                         conv |= priv->hw_rss_sup & IBV_RX_HASH_INNER;
137                         return conv;
138                 }
139                 if (!(rss_hf & ~seen))
140                         return conv;
141         }
142         rte_errno = ENOTSUP;
143         return (uint64_t)-1;
144 }
145
146 /**
147  * Merge Ethernet pattern item into flow rule handle.
148  *
149  * Additional mlx4-specific constraints on supported fields:
150  *
151  * - No support for partial masks, except in the specific case of matching
152  *   all multicast traffic (@p spec->dst and @p mask->dst equal to
153  *   01:00:00:00:00:00).
154  * - Not providing @p item->spec or providing an empty @p mask->dst is
155  *   *only* supported if the rule doesn't specify additional matching
156  *   criteria (i.e. rule is promiscuous-like).
157  *
158  * @param[in, out] flow
159  *   Flow rule handle to update.
160  * @param[in] item
161  *   Pattern item to merge.
162  * @param[in] proc
163  *   Associated item-processing object.
164  * @param[out] error
165  *   Perform verbose error reporting if not NULL.
166  *
167  * @return
168  *   0 on success, a negative errno value otherwise and rte_errno is set.
169  */
170 static int
171 mlx4_flow_merge_eth(struct rte_flow *flow,
172                     const struct rte_flow_item *item,
173                     const struct mlx4_flow_proc_item *proc,
174                     struct rte_flow_error *error)
175 {
176         const struct rte_flow_item_eth *spec = item->spec;
177         const struct rte_flow_item_eth *mask =
178                 spec ? (item->mask ? item->mask : proc->mask_default) : NULL;
179         struct ibv_flow_spec_eth *eth;
180         const char *msg;
181         unsigned int i;
182
183         if (!mask) {
184                 flow->promisc = 1;
185         } else {
186                 uint32_t sum_dst = 0;
187                 uint32_t sum_src = 0;
188
189                 for (i = 0; i != sizeof(mask->dst.addr_bytes); ++i) {
190                         sum_dst += mask->dst.addr_bytes[i];
191                         sum_src += mask->src.addr_bytes[i];
192                 }
193                 if (sum_src) {
194                         msg = "mlx4 does not support source MAC matching";
195                         goto error;
196                 } else if (!sum_dst) {
197                         flow->promisc = 1;
198                 } else if (sum_dst == 1 && mask->dst.addr_bytes[0] == 1) {
199                         if (!(spec->dst.addr_bytes[0] & 1)) {
200                                 msg = "mlx4 does not support the explicit"
201                                         " exclusion of all multicast traffic";
202                                 goto error;
203                         }
204                         flow->allmulti = 1;
205                 } else if (sum_dst != (UINT8_C(0xff) * ETHER_ADDR_LEN)) {
206                         msg = "mlx4 does not support matching partial"
207                                 " Ethernet fields";
208                         goto error;
209                 }
210         }
211         if (!flow->ibv_attr)
212                 return 0;
213         if (flow->promisc) {
214                 flow->ibv_attr->type = IBV_FLOW_ATTR_ALL_DEFAULT;
215                 return 0;
216         }
217         if (flow->allmulti) {
218                 flow->ibv_attr->type = IBV_FLOW_ATTR_MC_DEFAULT;
219                 return 0;
220         }
221         ++flow->ibv_attr->num_of_specs;
222         eth = (void *)((uintptr_t)flow->ibv_attr + flow->ibv_attr_size);
223         *eth = (struct ibv_flow_spec_eth) {
224                 .type = IBV_FLOW_SPEC_ETH,
225                 .size = sizeof(*eth),
226         };
227         memcpy(eth->val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
228         memcpy(eth->mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
229         /* Remove unwanted bits from values. */
230         for (i = 0; i < ETHER_ADDR_LEN; ++i) {
231                 eth->val.dst_mac[i] &= eth->mask.dst_mac[i];
232         }
233         return 0;
234 error:
235         return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
236                                   item, msg);
237 }
238
239 /**
240  * Merge VLAN pattern item into flow rule handle.
241  *
242  * Additional mlx4-specific constraints on supported fields:
243  *
244  * - Matching *all* VLAN traffic by omitting @p item->spec or providing an
245  *   empty @p item->mask would also include non-VLAN traffic. Doing so is
246  *   therefore unsupported.
247  * - No support for partial masks.
248  *
249  * @param[in, out] flow
250  *   Flow rule handle to update.
251  * @param[in] item
252  *   Pattern item to merge.
253  * @param[in] proc
254  *   Associated item-processing object.
255  * @param[out] error
256  *   Perform verbose error reporting if not NULL.
257  *
258  * @return
259  *   0 on success, a negative errno value otherwise and rte_errno is set.
260  */
261 static int
262 mlx4_flow_merge_vlan(struct rte_flow *flow,
263                      const struct rte_flow_item *item,
264                      const struct mlx4_flow_proc_item *proc,
265                      struct rte_flow_error *error)
266 {
267         const struct rte_flow_item_vlan *spec = item->spec;
268         const struct rte_flow_item_vlan *mask =
269                 spec ? (item->mask ? item->mask : proc->mask_default) : NULL;
270         struct ibv_flow_spec_eth *eth;
271         const char *msg;
272
273         if (!mask || !mask->tci) {
274                 msg = "mlx4 cannot match all VLAN traffic while excluding"
275                         " non-VLAN traffic, TCI VID must be specified";
276                 goto error;
277         }
278         if (mask->tci != RTE_BE16(0x0fff)) {
279                 msg = "mlx4 does not support partial TCI VID matching";
280                 goto error;
281         }
282         if (!flow->ibv_attr)
283                 return 0;
284         eth = (void *)((uintptr_t)flow->ibv_attr + flow->ibv_attr_size -
285                        sizeof(*eth));
286         eth->val.vlan_tag = spec->tci;
287         eth->mask.vlan_tag = mask->tci;
288         eth->val.vlan_tag &= eth->mask.vlan_tag;
289         return 0;
290 error:
291         return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
292                                   item, msg);
293 }
294
295 /**
296  * Merge IPv4 pattern item into flow rule handle.
297  *
298  * Additional mlx4-specific constraints on supported fields:
299  *
300  * - No support for partial masks.
301  *
302  * @param[in, out] flow
303  *   Flow rule handle to update.
304  * @param[in] item
305  *   Pattern item to merge.
306  * @param[in] proc
307  *   Associated item-processing object.
308  * @param[out] error
309  *   Perform verbose error reporting if not NULL.
310  *
311  * @return
312  *   0 on success, a negative errno value otherwise and rte_errno is set.
313  */
314 static int
315 mlx4_flow_merge_ipv4(struct rte_flow *flow,
316                      const struct rte_flow_item *item,
317                      const struct mlx4_flow_proc_item *proc,
318                      struct rte_flow_error *error)
319 {
320         const struct rte_flow_item_ipv4 *spec = item->spec;
321         const struct rte_flow_item_ipv4 *mask =
322                 spec ? (item->mask ? item->mask : proc->mask_default) : NULL;
323         struct ibv_flow_spec_ipv4 *ipv4;
324         const char *msg;
325
326         if (mask &&
327             ((uint32_t)(mask->hdr.src_addr + 1) > UINT32_C(1) ||
328              (uint32_t)(mask->hdr.dst_addr + 1) > UINT32_C(1))) {
329                 msg = "mlx4 does not support matching partial IPv4 fields";
330                 goto error;
331         }
332         if (!flow->ibv_attr)
333                 return 0;
334         ++flow->ibv_attr->num_of_specs;
335         ipv4 = (void *)((uintptr_t)flow->ibv_attr + flow->ibv_attr_size);
336         *ipv4 = (struct ibv_flow_spec_ipv4) {
337                 .type = IBV_FLOW_SPEC_IPV4,
338                 .size = sizeof(*ipv4),
339         };
340         if (!spec)
341                 return 0;
342         ipv4->val = (struct ibv_flow_ipv4_filter) {
343                 .src_ip = spec->hdr.src_addr,
344                 .dst_ip = spec->hdr.dst_addr,
345         };
346         ipv4->mask = (struct ibv_flow_ipv4_filter) {
347                 .src_ip = mask->hdr.src_addr,
348                 .dst_ip = mask->hdr.dst_addr,
349         };
350         /* Remove unwanted bits from values. */
351         ipv4->val.src_ip &= ipv4->mask.src_ip;
352         ipv4->val.dst_ip &= ipv4->mask.dst_ip;
353         return 0;
354 error:
355         return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
356                                   item, msg);
357 }
358
359 /**
360  * Merge UDP pattern item into flow rule handle.
361  *
362  * Additional mlx4-specific constraints on supported fields:
363  *
364  * - No support for partial masks.
365  *
366  * @param[in, out] flow
367  *   Flow rule handle to update.
368  * @param[in] item
369  *   Pattern item to merge.
370  * @param[in] proc
371  *   Associated item-processing object.
372  * @param[out] error
373  *   Perform verbose error reporting if not NULL.
374  *
375  * @return
376  *   0 on success, a negative errno value otherwise and rte_errno is set.
377  */
378 static int
379 mlx4_flow_merge_udp(struct rte_flow *flow,
380                     const struct rte_flow_item *item,
381                     const struct mlx4_flow_proc_item *proc,
382                     struct rte_flow_error *error)
383 {
384         const struct rte_flow_item_udp *spec = item->spec;
385         const struct rte_flow_item_udp *mask =
386                 spec ? (item->mask ? item->mask : proc->mask_default) : NULL;
387         struct ibv_flow_spec_tcp_udp *udp;
388         const char *msg;
389
390         if (mask &&
391             ((uint16_t)(mask->hdr.src_port + 1) > UINT16_C(1) ||
392              (uint16_t)(mask->hdr.dst_port + 1) > UINT16_C(1))) {
393                 msg = "mlx4 does not support matching partial UDP fields";
394                 goto error;
395         }
396         if (!flow->ibv_attr)
397                 return 0;
398         ++flow->ibv_attr->num_of_specs;
399         udp = (void *)((uintptr_t)flow->ibv_attr + flow->ibv_attr_size);
400         *udp = (struct ibv_flow_spec_tcp_udp) {
401                 .type = IBV_FLOW_SPEC_UDP,
402                 .size = sizeof(*udp),
403         };
404         if (!spec)
405                 return 0;
406         udp->val.dst_port = spec->hdr.dst_port;
407         udp->val.src_port = spec->hdr.src_port;
408         udp->mask.dst_port = mask->hdr.dst_port;
409         udp->mask.src_port = mask->hdr.src_port;
410         /* Remove unwanted bits from values. */
411         udp->val.src_port &= udp->mask.src_port;
412         udp->val.dst_port &= udp->mask.dst_port;
413         return 0;
414 error:
415         return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
416                                   item, msg);
417 }
418
419 /**
420  * Merge TCP pattern item into flow rule handle.
421  *
422  * Additional mlx4-specific constraints on supported fields:
423  *
424  * - No support for partial masks.
425  *
426  * @param[in, out] flow
427  *   Flow rule handle to update.
428  * @param[in] item
429  *   Pattern item to merge.
430  * @param[in] proc
431  *   Associated item-processing object.
432  * @param[out] error
433  *   Perform verbose error reporting if not NULL.
434  *
435  * @return
436  *   0 on success, a negative errno value otherwise and rte_errno is set.
437  */
438 static int
439 mlx4_flow_merge_tcp(struct rte_flow *flow,
440                     const struct rte_flow_item *item,
441                     const struct mlx4_flow_proc_item *proc,
442                     struct rte_flow_error *error)
443 {
444         const struct rte_flow_item_tcp *spec = item->spec;
445         const struct rte_flow_item_tcp *mask =
446                 spec ? (item->mask ? item->mask : proc->mask_default) : NULL;
447         struct ibv_flow_spec_tcp_udp *tcp;
448         const char *msg;
449
450         if (mask &&
451             ((uint16_t)(mask->hdr.src_port + 1) > UINT16_C(1) ||
452              (uint16_t)(mask->hdr.dst_port + 1) > UINT16_C(1))) {
453                 msg = "mlx4 does not support matching partial TCP fields";
454                 goto error;
455         }
456         if (!flow->ibv_attr)
457                 return 0;
458         ++flow->ibv_attr->num_of_specs;
459         tcp = (void *)((uintptr_t)flow->ibv_attr + flow->ibv_attr_size);
460         *tcp = (struct ibv_flow_spec_tcp_udp) {
461                 .type = IBV_FLOW_SPEC_TCP,
462                 .size = sizeof(*tcp),
463         };
464         if (!spec)
465                 return 0;
466         tcp->val.dst_port = spec->hdr.dst_port;
467         tcp->val.src_port = spec->hdr.src_port;
468         tcp->mask.dst_port = mask->hdr.dst_port;
469         tcp->mask.src_port = mask->hdr.src_port;
470         /* Remove unwanted bits from values. */
471         tcp->val.src_port &= tcp->mask.src_port;
472         tcp->val.dst_port &= tcp->mask.dst_port;
473         return 0;
474 error:
475         return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
476                                   item, msg);
477 }
478
479 /**
480  * Perform basic sanity checks on a pattern item.
481  *
482  * @param[in] item
483  *   Item specification.
484  * @param[in] proc
485  *   Associated item-processing object.
486  * @param[out] error
487  *   Perform verbose error reporting if not NULL.
488  *
489  * @return
490  *   0 on success, a negative errno value otherwise and rte_errno is set.
491  */
492 static int
493 mlx4_flow_item_check(const struct rte_flow_item *item,
494                      const struct mlx4_flow_proc_item *proc,
495                      struct rte_flow_error *error)
496 {
497         const uint8_t *mask;
498         unsigned int i;
499
500         /* item->last and item->mask cannot exist without item->spec. */
501         if (!item->spec && (item->mask || item->last))
502                 return rte_flow_error_set
503                         (error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM, item,
504                          "\"mask\" or \"last\" field provided without a"
505                          " corresponding \"spec\"");
506         /* No spec, no mask, no problem. */
507         if (!item->spec)
508                 return 0;
509         mask = item->mask ?
510                 (const uint8_t *)item->mask :
511                 (const uint8_t *)proc->mask_default;
512         assert(mask);
513         /*
514          * Single-pass check to make sure that:
515          * - Mask is supported, no bits are set outside proc->mask_support.
516          * - Both item->spec and item->last are included in mask.
517          */
518         for (i = 0; i != proc->mask_sz; ++i) {
519                 if (!mask[i])
520                         continue;
521                 if ((mask[i] | ((const uint8_t *)proc->mask_support)[i]) !=
522                     ((const uint8_t *)proc->mask_support)[i])
523                         return rte_flow_error_set
524                                 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
525                                  item, "unsupported field found in \"mask\"");
526                 if (item->last &&
527                     (((const uint8_t *)item->spec)[i] & mask[i]) !=
528                     (((const uint8_t *)item->last)[i] & mask[i]))
529                         return rte_flow_error_set
530                                 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
531                                  item,
532                                  "range between \"spec\" and \"last\""
533                                  " is larger than \"mask\"");
534         }
535         return 0;
536 }
537
538 /** Graph of supported items and associated actions. */
539 static const struct mlx4_flow_proc_item mlx4_flow_proc_item_list[] = {
540         [RTE_FLOW_ITEM_TYPE_END] = {
541                 .next_item = NEXT_ITEM(RTE_FLOW_ITEM_TYPE_ETH),
542         },
543         [RTE_FLOW_ITEM_TYPE_ETH] = {
544                 .next_item = NEXT_ITEM(RTE_FLOW_ITEM_TYPE_VLAN,
545                                        RTE_FLOW_ITEM_TYPE_IPV4),
546                 .mask_support = &(const struct rte_flow_item_eth){
547                         /* Only destination MAC can be matched. */
548                         .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
549                 },
550                 .mask_default = &rte_flow_item_eth_mask,
551                 .mask_sz = sizeof(struct rte_flow_item_eth),
552                 .merge = mlx4_flow_merge_eth,
553                 .dst_sz = sizeof(struct ibv_flow_spec_eth),
554         },
555         [RTE_FLOW_ITEM_TYPE_VLAN] = {
556                 .next_item = NEXT_ITEM(RTE_FLOW_ITEM_TYPE_IPV4),
557                 .mask_support = &(const struct rte_flow_item_vlan){
558                         /* Only TCI VID matching is supported. */
559                         .tci = RTE_BE16(0x0fff),
560                 },
561                 .mask_default = &rte_flow_item_vlan_mask,
562                 .mask_sz = sizeof(struct rte_flow_item_vlan),
563                 .merge = mlx4_flow_merge_vlan,
564                 .dst_sz = 0,
565         },
566         [RTE_FLOW_ITEM_TYPE_IPV4] = {
567                 .next_item = NEXT_ITEM(RTE_FLOW_ITEM_TYPE_UDP,
568                                        RTE_FLOW_ITEM_TYPE_TCP),
569                 .mask_support = &(const struct rte_flow_item_ipv4){
570                         .hdr = {
571                                 .src_addr = RTE_BE32(0xffffffff),
572                                 .dst_addr = RTE_BE32(0xffffffff),
573                         },
574                 },
575                 .mask_default = &rte_flow_item_ipv4_mask,
576                 .mask_sz = sizeof(struct rte_flow_item_ipv4),
577                 .merge = mlx4_flow_merge_ipv4,
578                 .dst_sz = sizeof(struct ibv_flow_spec_ipv4),
579         },
580         [RTE_FLOW_ITEM_TYPE_UDP] = {
581                 .mask_support = &(const struct rte_flow_item_udp){
582                         .hdr = {
583                                 .src_port = RTE_BE16(0xffff),
584                                 .dst_port = RTE_BE16(0xffff),
585                         },
586                 },
587                 .mask_default = &rte_flow_item_udp_mask,
588                 .mask_sz = sizeof(struct rte_flow_item_udp),
589                 .merge = mlx4_flow_merge_udp,
590                 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
591         },
592         [RTE_FLOW_ITEM_TYPE_TCP] = {
593                 .mask_support = &(const struct rte_flow_item_tcp){
594                         .hdr = {
595                                 .src_port = RTE_BE16(0xffff),
596                                 .dst_port = RTE_BE16(0xffff),
597                         },
598                 },
599                 .mask_default = &rte_flow_item_tcp_mask,
600                 .mask_sz = sizeof(struct rte_flow_item_tcp),
601                 .merge = mlx4_flow_merge_tcp,
602                 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
603         },
604 };
605
606 /**
607  * Make sure a flow rule is supported and initialize associated structure.
608  *
609  * @param priv
610  *   Pointer to private structure.
611  * @param[in] attr
612  *   Flow rule attributes.
613  * @param[in] pattern
614  *   Pattern specification (list terminated by the END pattern item).
615  * @param[in] actions
616  *   Associated actions (list terminated by the END action).
617  * @param[out] error
618  *   Perform verbose error reporting if not NULL.
619  * @param[in, out] addr
620  *   Buffer where the resulting flow rule handle pointer must be stored.
621  *   If NULL, stop processing after validation stage.
622  *
623  * @return
624  *   0 on success, a negative errno value otherwise and rte_errno is set.
625  */
626 static int
627 mlx4_flow_prepare(struct priv *priv,
628                   const struct rte_flow_attr *attr,
629                   const struct rte_flow_item pattern[],
630                   const struct rte_flow_action actions[],
631                   struct rte_flow_error *error,
632                   struct rte_flow **addr)
633 {
634         const struct rte_flow_item *item;
635         const struct rte_flow_action *action;
636         const struct mlx4_flow_proc_item *proc;
637         struct rte_flow temp = { .ibv_attr_size = sizeof(*temp.ibv_attr) };
638         struct rte_flow *flow = &temp;
639         const char *msg = NULL;
640
641         if (attr->group)
642                 return rte_flow_error_set
643                         (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
644                          NULL, "groups are not supported");
645         if (attr->priority > MLX4_FLOW_PRIORITY_LAST)
646                 return rte_flow_error_set
647                         (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
648                          NULL, "maximum priority level is "
649                          MLX4_STR_EXPAND(MLX4_FLOW_PRIORITY_LAST));
650         if (attr->egress)
651                 return rte_flow_error_set
652                         (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
653                          NULL, "egress is not supported");
654         if (!attr->ingress)
655                 return rte_flow_error_set
656                         (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
657                          NULL, "only ingress is supported");
658 fill:
659         proc = mlx4_flow_proc_item_list;
660         /* Go over pattern. */
661         for (item = pattern; item->type; ++item) {
662                 const struct mlx4_flow_proc_item *next = NULL;
663                 unsigned int i;
664                 int err;
665
666                 if (item->type == RTE_FLOW_ITEM_TYPE_VOID)
667                         continue;
668                 if (item->type == MLX4_FLOW_ITEM_TYPE_INTERNAL) {
669                         flow->internal = 1;
670                         continue;
671                 }
672                 if (flow->promisc || flow->allmulti) {
673                         msg = "mlx4 does not support additional matching"
674                                 " criteria combined with indiscriminate"
675                                 " matching on Ethernet headers";
676                         goto exit_item_not_supported;
677                 }
678                 for (i = 0; proc->next_item && proc->next_item[i]; ++i) {
679                         if (proc->next_item[i] == item->type) {
680                                 next = &mlx4_flow_proc_item_list[item->type];
681                                 break;
682                         }
683                 }
684                 if (!next)
685                         goto exit_item_not_supported;
686                 proc = next;
687                 /*
688                  * Perform basic sanity checks only once, while handle is
689                  * not allocated.
690                  */
691                 if (flow == &temp) {
692                         err = mlx4_flow_item_check(item, proc, error);
693                         if (err)
694                                 return err;
695                 }
696                 if (proc->merge) {
697                         err = proc->merge(flow, item, proc, error);
698                         if (err)
699                                 return err;
700                 }
701                 flow->ibv_attr_size += proc->dst_sz;
702         }
703         /* Go over actions list. */
704         for (action = actions; action->type; ++action) {
705                 switch (action->type) {
706                         const struct rte_flow_action_queue *queue;
707                         const struct rte_flow_action_rss *rss;
708                         const struct rte_eth_rss_conf *rss_conf;
709                         uint64_t fields;
710                         unsigned int i;
711
712                 case RTE_FLOW_ACTION_TYPE_VOID:
713                         continue;
714                 case RTE_FLOW_ACTION_TYPE_DROP:
715                         flow->drop = 1;
716                         break;
717                 case RTE_FLOW_ACTION_TYPE_QUEUE:
718                         if (flow->rss)
719                                 break;
720                         queue = action->conf;
721                         if (queue->index >= priv->dev->data->nb_rx_queues) {
722                                 msg = "queue target index beyond number of"
723                                         " configured Rx queues";
724                                 goto exit_action_not_supported;
725                         }
726                         flow->rss = mlx4_rss_get
727                                 (priv, 0, mlx4_rss_hash_key_default, 1,
728                                  &queue->index);
729                         if (!flow->rss) {
730                                 msg = "not enough resources for additional"
731                                         " single-queue RSS context";
732                                 goto exit_action_not_supported;
733                         }
734                         break;
735                 case RTE_FLOW_ACTION_TYPE_RSS:
736                         if (flow->rss)
737                                 break;
738                         rss = action->conf;
739                         /* Default RSS configuration if none is provided. */
740                         rss_conf =
741                                 rss->rss_conf ?
742                                 rss->rss_conf :
743                                 &(struct rte_eth_rss_conf){
744                                         .rss_key = mlx4_rss_hash_key_default,
745                                         .rss_key_len = MLX4_RSS_HASH_KEY_SIZE,
746                                         .rss_hf = -1,
747                                 };
748                         /* Sanity checks. */
749                         for (i = 0; i < rss->num; ++i)
750                                 if (rss->queue[i] >=
751                                     priv->dev->data->nb_rx_queues)
752                                         break;
753                         if (i != rss->num) {
754                                 msg = "queue index target beyond number of"
755                                         " configured Rx queues";
756                                 goto exit_action_not_supported;
757                         }
758                         if (!rte_is_power_of_2(rss->num)) {
759                                 msg = "for RSS, mlx4 requires the number of"
760                                         " queues to be a power of two";
761                                 goto exit_action_not_supported;
762                         }
763                         if (rss_conf->rss_key_len !=
764                             sizeof(flow->rss->key)) {
765                                 msg = "mlx4 supports exactly one RSS hash key"
766                                         " length: "
767                                         MLX4_STR_EXPAND(MLX4_RSS_HASH_KEY_SIZE);
768                                 goto exit_action_not_supported;
769                         }
770                         for (i = 1; i < rss->num; ++i)
771                                 if (rss->queue[i] - rss->queue[i - 1] != 1)
772                                         break;
773                         if (i != rss->num) {
774                                 msg = "mlx4 requires RSS contexts to use"
775                                         " consecutive queue indices only";
776                                 goto exit_action_not_supported;
777                         }
778                         if (rss->queue[0] % rss->num) {
779                                 msg = "mlx4 requires the first queue of a RSS"
780                                         " context to be aligned on a multiple"
781                                         " of the context size";
782                                 goto exit_action_not_supported;
783                         }
784                         rte_errno = 0;
785                         fields = mlx4_conv_rss_hf(priv, rss_conf->rss_hf);
786                         if (fields == (uint64_t)-1 && rte_errno) {
787                                 msg = "unsupported RSS hash type requested";
788                                 goto exit_action_not_supported;
789                         }
790                         flow->rss = mlx4_rss_get
791                                 (priv, fields, rss_conf->rss_key, rss->num,
792                                  rss->queue);
793                         if (!flow->rss) {
794                                 msg = "either invalid parameters or not enough"
795                                         " resources for additional multi-queue"
796                                         " RSS context";
797                                 goto exit_action_not_supported;
798                         }
799                         break;
800                 default:
801                         goto exit_action_not_supported;
802                 }
803         }
804         if (!flow->rss && !flow->drop)
805                 return rte_flow_error_set
806                         (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
807                          NULL, "no valid action");
808         /* Validation ends here. */
809         if (!addr) {
810                 if (flow->rss)
811                         mlx4_rss_put(flow->rss);
812                 return 0;
813         }
814         if (flow == &temp) {
815                 /* Allocate proper handle based on collected data. */
816                 const struct mlx4_malloc_vec vec[] = {
817                         {
818                                 .align = alignof(struct rte_flow),
819                                 .size = sizeof(*flow),
820                                 .addr = (void **)&flow,
821                         },
822                         {
823                                 .align = alignof(struct ibv_flow_attr),
824                                 .size = temp.ibv_attr_size,
825                                 .addr = (void **)&temp.ibv_attr,
826                         },
827                 };
828
829                 if (!mlx4_zmallocv(__func__, vec, RTE_DIM(vec))) {
830                         if (temp.rss)
831                                 mlx4_rss_put(temp.rss);
832                         return rte_flow_error_set
833                                 (error, -rte_errno,
834                                  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
835                                  "flow rule handle allocation failure");
836                 }
837                 /* Most fields will be updated by second pass. */
838                 *flow = (struct rte_flow){
839                         .ibv_attr = temp.ibv_attr,
840                         .ibv_attr_size = sizeof(*flow->ibv_attr),
841                         .rss = temp.rss,
842                 };
843                 *flow->ibv_attr = (struct ibv_flow_attr){
844                         .type = IBV_FLOW_ATTR_NORMAL,
845                         .size = sizeof(*flow->ibv_attr),
846                         .priority = attr->priority,
847                         .port = priv->port,
848                 };
849                 goto fill;
850         }
851         *addr = flow;
852         return 0;
853 exit_item_not_supported:
854         return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
855                                   item, msg ? msg : "item not supported");
856 exit_action_not_supported:
857         return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
858                                   action, msg ? msg : "action not supported");
859 }
860
861 /**
862  * Validate a flow supported by the NIC.
863  *
864  * @see rte_flow_validate()
865  * @see rte_flow_ops
866  */
867 static int
868 mlx4_flow_validate(struct rte_eth_dev *dev,
869                    const struct rte_flow_attr *attr,
870                    const struct rte_flow_item pattern[],
871                    const struct rte_flow_action actions[],
872                    struct rte_flow_error *error)
873 {
874         struct priv *priv = dev->data->dev_private;
875
876         return mlx4_flow_prepare(priv, attr, pattern, actions, error, NULL);
877 }
878
879 /**
880  * Get a drop flow rule resources instance.
881  *
882  * @param priv
883  *   Pointer to private structure.
884  *
885  * @return
886  *   Pointer to drop flow resources on success, NULL otherwise and rte_errno
887  *   is set.
888  */
889 static struct mlx4_drop *
890 mlx4_drop_get(struct priv *priv)
891 {
892         struct mlx4_drop *drop = priv->drop;
893
894         if (drop) {
895                 assert(drop->refcnt);
896                 assert(drop->priv == priv);
897                 ++drop->refcnt;
898                 return drop;
899         }
900         drop = rte_malloc(__func__, sizeof(*drop), 0);
901         if (!drop)
902                 goto error;
903         *drop = (struct mlx4_drop){
904                 .priv = priv,
905                 .refcnt = 1,
906         };
907         drop->cq = mlx4_glue->create_cq(priv->ctx, 1, NULL, NULL, 0);
908         if (!drop->cq)
909                 goto error;
910         drop->qp = mlx4_glue->create_qp
911                 (priv->pd,
912                  &(struct ibv_qp_init_attr){
913                         .send_cq = drop->cq,
914                         .recv_cq = drop->cq,
915                         .qp_type = IBV_QPT_RAW_PACKET,
916                  });
917         if (!drop->qp)
918                 goto error;
919         priv->drop = drop;
920         return drop;
921 error:
922         if (drop->qp)
923                 claim_zero(mlx4_glue->destroy_qp(drop->qp));
924         if (drop->cq)
925                 claim_zero(mlx4_glue->destroy_cq(drop->cq));
926         if (drop)
927                 rte_free(drop);
928         rte_errno = ENOMEM;
929         return NULL;
930 }
931
932 /**
933  * Give back a drop flow rule resources instance.
934  *
935  * @param drop
936  *   Pointer to drop flow rule resources.
937  */
938 static void
939 mlx4_drop_put(struct mlx4_drop *drop)
940 {
941         assert(drop->refcnt);
942         if (--drop->refcnt)
943                 return;
944         drop->priv->drop = NULL;
945         claim_zero(mlx4_glue->destroy_qp(drop->qp));
946         claim_zero(mlx4_glue->destroy_cq(drop->cq));
947         rte_free(drop);
948 }
949
950 /**
951  * Toggle a configured flow rule.
952  *
953  * @param priv
954  *   Pointer to private structure.
955  * @param flow
956  *   Flow rule handle to toggle.
957  * @param enable
958  *   Whether associated Verbs flow must be created or removed.
959  * @param[out] error
960  *   Perform verbose error reporting if not NULL.
961  *
962  * @return
963  *   0 on success, a negative errno value otherwise and rte_errno is set.
964  */
965 static int
966 mlx4_flow_toggle(struct priv *priv,
967                  struct rte_flow *flow,
968                  int enable,
969                  struct rte_flow_error *error)
970 {
971         struct ibv_qp *qp = NULL;
972         const char *msg;
973         int err;
974
975         if (!enable) {
976                 if (!flow->ibv_flow)
977                         return 0;
978                 claim_zero(mlx4_glue->destroy_flow(flow->ibv_flow));
979                 flow->ibv_flow = NULL;
980                 if (flow->drop)
981                         mlx4_drop_put(priv->drop);
982                 else if (flow->rss)
983                         mlx4_rss_detach(flow->rss);
984                 return 0;
985         }
986         assert(flow->ibv_attr);
987         if (!flow->internal &&
988             !priv->isolated &&
989             flow->ibv_attr->priority == MLX4_FLOW_PRIORITY_LAST) {
990                 if (flow->ibv_flow) {
991                         claim_zero(mlx4_glue->destroy_flow(flow->ibv_flow));
992                         flow->ibv_flow = NULL;
993                         if (flow->drop)
994                                 mlx4_drop_put(priv->drop);
995                         else if (flow->rss)
996                                 mlx4_rss_detach(flow->rss);
997                 }
998                 err = EACCES;
999                 msg = ("priority level "
1000                        MLX4_STR_EXPAND(MLX4_FLOW_PRIORITY_LAST)
1001                        " is reserved when not in isolated mode");
1002                 goto error;
1003         }
1004         if (flow->rss) {
1005                 struct mlx4_rss *rss = flow->rss;
1006                 int missing = 0;
1007                 unsigned int i;
1008
1009                 /* Stop at the first nonexistent target queue. */
1010                 for (i = 0; i != rss->queues; ++i)
1011                         if (rss->queue_id[i] >=
1012                             priv->dev->data->nb_rx_queues ||
1013                             !priv->dev->data->rx_queues[rss->queue_id[i]]) {
1014                                 missing = 1;
1015                                 break;
1016                         }
1017                 if (flow->ibv_flow) {
1018                         if (missing ^ !flow->drop)
1019                                 return 0;
1020                         /* Verbs flow needs updating. */
1021                         claim_zero(mlx4_glue->destroy_flow(flow->ibv_flow));
1022                         flow->ibv_flow = NULL;
1023                         if (flow->drop)
1024                                 mlx4_drop_put(priv->drop);
1025                         else
1026                                 mlx4_rss_detach(rss);
1027                 }
1028                 if (!missing) {
1029                         err = mlx4_rss_attach(rss);
1030                         if (err) {
1031                                 err = -err;
1032                                 msg = "cannot create indirection table or hash"
1033                                         " QP to associate flow rule with";
1034                                 goto error;
1035                         }
1036                         qp = rss->qp;
1037                 }
1038                 /* A missing target queue drops traffic implicitly. */
1039                 flow->drop = missing;
1040         }
1041         if (flow->drop) {
1042                 if (flow->ibv_flow)
1043                         return 0;
1044                 mlx4_drop_get(priv);
1045                 if (!priv->drop) {
1046                         err = rte_errno;
1047                         msg = "resources for drop flow rule cannot be created";
1048                         goto error;
1049                 }
1050                 qp = priv->drop->qp;
1051         }
1052         assert(qp);
1053         if (flow->ibv_flow)
1054                 return 0;
1055         flow->ibv_flow = mlx4_glue->create_flow(qp, flow->ibv_attr);
1056         if (flow->ibv_flow)
1057                 return 0;
1058         if (flow->drop)
1059                 mlx4_drop_put(priv->drop);
1060         else if (flow->rss)
1061                 mlx4_rss_detach(flow->rss);
1062         err = errno;
1063         msg = "flow rule rejected by device";
1064 error:
1065         return rte_flow_error_set
1066                 (error, err, RTE_FLOW_ERROR_TYPE_HANDLE, flow, msg);
1067 }
1068
1069 /**
1070  * Create a flow.
1071  *
1072  * @see rte_flow_create()
1073  * @see rte_flow_ops
1074  */
1075 static struct rte_flow *
1076 mlx4_flow_create(struct rte_eth_dev *dev,
1077                  const struct rte_flow_attr *attr,
1078                  const struct rte_flow_item pattern[],
1079                  const struct rte_flow_action actions[],
1080                  struct rte_flow_error *error)
1081 {
1082         struct priv *priv = dev->data->dev_private;
1083         struct rte_flow *flow;
1084         int err;
1085
1086         err = mlx4_flow_prepare(priv, attr, pattern, actions, error, &flow);
1087         if (err)
1088                 return NULL;
1089         err = mlx4_flow_toggle(priv, flow, priv->started, error);
1090         if (!err) {
1091                 struct rte_flow *curr = LIST_FIRST(&priv->flows);
1092
1093                 /* New rules are inserted after internal ones. */
1094                 if (!curr || !curr->internal) {
1095                         LIST_INSERT_HEAD(&priv->flows, flow, next);
1096                 } else {
1097                         while (LIST_NEXT(curr, next) &&
1098                                LIST_NEXT(curr, next)->internal)
1099                                 curr = LIST_NEXT(curr, next);
1100                         LIST_INSERT_AFTER(curr, flow, next);
1101                 }
1102                 return flow;
1103         }
1104         if (flow->rss)
1105                 mlx4_rss_put(flow->rss);
1106         rte_flow_error_set(error, -err, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1107                            error->message);
1108         rte_free(flow);
1109         return NULL;
1110 }
1111
1112 /**
1113  * Configure isolated mode.
1114  *
1115  * @see rte_flow_isolate()
1116  * @see rte_flow_ops
1117  */
1118 static int
1119 mlx4_flow_isolate(struct rte_eth_dev *dev,
1120                   int enable,
1121                   struct rte_flow_error *error)
1122 {
1123         struct priv *priv = dev->data->dev_private;
1124
1125         if (!!enable == !!priv->isolated)
1126                 return 0;
1127         priv->isolated = !!enable;
1128         if (mlx4_flow_sync(priv, error)) {
1129                 priv->isolated = !enable;
1130                 return -rte_errno;
1131         }
1132         return 0;
1133 }
1134
1135 /**
1136  * Destroy a flow rule.
1137  *
1138  * @see rte_flow_destroy()
1139  * @see rte_flow_ops
1140  */
1141 static int
1142 mlx4_flow_destroy(struct rte_eth_dev *dev,
1143                   struct rte_flow *flow,
1144                   struct rte_flow_error *error)
1145 {
1146         struct priv *priv = dev->data->dev_private;
1147         int err = mlx4_flow_toggle(priv, flow, 0, error);
1148
1149         if (err)
1150                 return err;
1151         LIST_REMOVE(flow, next);
1152         if (flow->rss)
1153                 mlx4_rss_put(flow->rss);
1154         rte_free(flow);
1155         return 0;
1156 }
1157
1158 /**
1159  * Destroy user-configured flow rules.
1160  *
1161  * This function skips internal flows rules.
1162  *
1163  * @see rte_flow_flush()
1164  * @see rte_flow_ops
1165  */
1166 static int
1167 mlx4_flow_flush(struct rte_eth_dev *dev,
1168                 struct rte_flow_error *error)
1169 {
1170         struct priv *priv = dev->data->dev_private;
1171         struct rte_flow *flow = LIST_FIRST(&priv->flows);
1172
1173         while (flow) {
1174                 struct rte_flow *next = LIST_NEXT(flow, next);
1175
1176                 if (!flow->internal)
1177                         mlx4_flow_destroy(dev, flow, error);
1178                 flow = next;
1179         }
1180         return 0;
1181 }
1182
1183 /**
1184  * Helper function to determine the next configured VLAN filter.
1185  *
1186  * @param priv
1187  *   Pointer to private structure.
1188  * @param vlan
1189  *   VLAN ID to use as a starting point.
1190  *
1191  * @return
1192  *   Next configured VLAN ID or a high value (>= 4096) if there is none.
1193  */
1194 static uint16_t
1195 mlx4_flow_internal_next_vlan(struct priv *priv, uint16_t vlan)
1196 {
1197         while (vlan < 4096) {
1198                 if (priv->dev->data->vlan_filter_conf.ids[vlan / 64] &
1199                     (UINT64_C(1) << (vlan % 64)))
1200                         return vlan;
1201                 ++vlan;
1202         }
1203         return vlan;
1204 }
1205
1206 /**
1207  * Generate internal flow rules.
1208  *
1209  * Various flow rules are created depending on the mode the device is in:
1210  *
1211  * 1. Promiscuous:
1212  *       port MAC + broadcast + catch-all (VLAN filtering is ignored).
1213  * 2. All multicast:
1214  *       port MAC/VLAN + broadcast + catch-all multicast.
1215  * 3. Otherwise:
1216  *       port MAC/VLAN + broadcast MAC/VLAN.
1217  *
1218  * About MAC flow rules:
1219  *
1220  * - MAC flow rules are generated from @p dev->data->mac_addrs
1221  *   (@p priv->mac array).
1222  * - An additional flow rule for Ethernet broadcasts is also generated.
1223  * - All these are per-VLAN if @p DEV_RX_OFFLOAD_VLAN_FILTER
1224  *   is enabled and VLAN filters are configured.
1225  *
1226  * @param priv
1227  *   Pointer to private structure.
1228  * @param[out] error
1229  *   Perform verbose error reporting if not NULL.
1230  *
1231  * @return
1232  *   0 on success, a negative errno value otherwise and rte_errno is set.
1233  */
1234 static int
1235 mlx4_flow_internal(struct priv *priv, struct rte_flow_error *error)
1236 {
1237         struct rte_flow_attr attr = {
1238                 .priority = MLX4_FLOW_PRIORITY_LAST,
1239                 .ingress = 1,
1240         };
1241         struct rte_flow_item_eth eth_spec;
1242         const struct rte_flow_item_eth eth_mask = {
1243                 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
1244         };
1245         const struct rte_flow_item_eth eth_allmulti = {
1246                 .dst.addr_bytes = "\x01\x00\x00\x00\x00\x00",
1247         };
1248         struct rte_flow_item_vlan vlan_spec;
1249         const struct rte_flow_item_vlan vlan_mask = {
1250                 .tci = RTE_BE16(0x0fff),
1251         };
1252         struct rte_flow_item pattern[] = {
1253                 {
1254                         .type = MLX4_FLOW_ITEM_TYPE_INTERNAL,
1255                 },
1256                 {
1257                         .type = RTE_FLOW_ITEM_TYPE_ETH,
1258                         .spec = &eth_spec,
1259                         .mask = &eth_mask,
1260                 },
1261                 {
1262                         /* Replaced with VLAN if filtering is enabled. */
1263                         .type = RTE_FLOW_ITEM_TYPE_END,
1264                 },
1265                 {
1266                         .type = RTE_FLOW_ITEM_TYPE_END,
1267                 },
1268         };
1269         /*
1270          * Round number of queues down to their previous power of 2 to
1271          * comply with RSS context limitations. Extra queues silently do not
1272          * get RSS by default.
1273          */
1274         uint32_t queues =
1275                 rte_align32pow2(priv->dev->data->nb_rx_queues + 1) >> 1;
1276         alignas(struct rte_flow_action_rss) uint8_t rss_conf_data
1277                 [offsetof(struct rte_flow_action_rss, queue) +
1278                  sizeof(((struct rte_flow_action_rss *)0)->queue[0]) * queues];
1279         struct rte_flow_action_rss *rss_conf = (void *)rss_conf_data;
1280         struct rte_flow_action actions[] = {
1281                 {
1282                         .type = RTE_FLOW_ACTION_TYPE_RSS,
1283                         .conf = rss_conf,
1284                 },
1285                 {
1286                         .type = RTE_FLOW_ACTION_TYPE_END,
1287                 },
1288         };
1289         struct ether_addr *rule_mac = &eth_spec.dst;
1290         rte_be16_t *rule_vlan =
1291                 (priv->dev->data->dev_conf.rxmode.offloads &
1292                  DEV_RX_OFFLOAD_VLAN_FILTER) &&
1293                 !priv->dev->data->promiscuous ?
1294                 &vlan_spec.tci :
1295                 NULL;
1296         uint16_t vlan = 0;
1297         struct rte_flow *flow;
1298         unsigned int i;
1299         int err = 0;
1300
1301         /* Nothing to be done if there are no Rx queues. */
1302         if (!queues)
1303                 goto error;
1304         /* Prepare default RSS configuration. */
1305         *rss_conf = (struct rte_flow_action_rss){
1306                 .rss_conf = NULL, /* Rely on default fallback settings. */
1307                 .num = queues,
1308         };
1309         for (i = 0; i != queues; ++i)
1310                 rss_conf->queue[i] = i;
1311         /*
1312          * Set up VLAN item if filtering is enabled and at least one VLAN
1313          * filter is configured.
1314          */
1315         if (rule_vlan) {
1316                 vlan = mlx4_flow_internal_next_vlan(priv, 0);
1317                 if (vlan < 4096) {
1318                         pattern[2] = (struct rte_flow_item){
1319                                 .type = RTE_FLOW_ITEM_TYPE_VLAN,
1320                                 .spec = &vlan_spec,
1321                                 .mask = &vlan_mask,
1322                         };
1323 next_vlan:
1324                         *rule_vlan = rte_cpu_to_be_16(vlan);
1325                 } else {
1326                         rule_vlan = NULL;
1327                 }
1328         }
1329         for (i = 0; i != RTE_DIM(priv->mac) + 1; ++i) {
1330                 const struct ether_addr *mac;
1331
1332                 /* Broadcasts are handled by an extra iteration. */
1333                 if (i < RTE_DIM(priv->mac))
1334                         mac = &priv->mac[i];
1335                 else
1336                         mac = &eth_mask.dst;
1337                 if (is_zero_ether_addr(mac))
1338                         continue;
1339                 /* Check if MAC flow rule is already present. */
1340                 for (flow = LIST_FIRST(&priv->flows);
1341                      flow && flow->internal;
1342                      flow = LIST_NEXT(flow, next)) {
1343                         const struct ibv_flow_spec_eth *eth =
1344                                 (const void *)((uintptr_t)flow->ibv_attr +
1345                                                sizeof(*flow->ibv_attr));
1346                         unsigned int j;
1347
1348                         if (!flow->mac)
1349                                 continue;
1350                         assert(flow->ibv_attr->type == IBV_FLOW_ATTR_NORMAL);
1351                         assert(flow->ibv_attr->num_of_specs == 1);
1352                         assert(eth->type == IBV_FLOW_SPEC_ETH);
1353                         assert(flow->rss);
1354                         if (rule_vlan &&
1355                             (eth->val.vlan_tag != *rule_vlan ||
1356                              eth->mask.vlan_tag != RTE_BE16(0x0fff)))
1357                                 continue;
1358                         if (!rule_vlan && eth->mask.vlan_tag)
1359                                 continue;
1360                         for (j = 0; j != sizeof(mac->addr_bytes); ++j)
1361                                 if (eth->val.dst_mac[j] != mac->addr_bytes[j] ||
1362                                     eth->mask.dst_mac[j] != UINT8_C(0xff) ||
1363                                     eth->val.src_mac[j] != UINT8_C(0x00) ||
1364                                     eth->mask.src_mac[j] != UINT8_C(0x00))
1365                                         break;
1366                         if (j != sizeof(mac->addr_bytes))
1367                                 continue;
1368                         if (flow->rss->queues != queues ||
1369                             memcmp(flow->rss->queue_id, rss_conf->queue,
1370                                    queues * sizeof(flow->rss->queue_id[0])))
1371                                 continue;
1372                         break;
1373                 }
1374                 if (!flow || !flow->internal) {
1375                         /* Not found, create a new flow rule. */
1376                         memcpy(rule_mac, mac, sizeof(*mac));
1377                         flow = mlx4_flow_create(priv->dev, &attr, pattern,
1378                                                 actions, error);
1379                         if (!flow) {
1380                                 err = -rte_errno;
1381                                 goto error;
1382                         }
1383                 }
1384                 flow->select = 1;
1385                 flow->mac = 1;
1386         }
1387         if (rule_vlan) {
1388                 vlan = mlx4_flow_internal_next_vlan(priv, vlan + 1);
1389                 if (vlan < 4096)
1390                         goto next_vlan;
1391         }
1392         /* Take care of promiscuous and all multicast flow rules. */
1393         if (priv->dev->data->promiscuous || priv->dev->data->all_multicast) {
1394                 for (flow = LIST_FIRST(&priv->flows);
1395                      flow && flow->internal;
1396                      flow = LIST_NEXT(flow, next)) {
1397                         if (priv->dev->data->promiscuous) {
1398                                 if (flow->promisc)
1399                                         break;
1400                         } else {
1401                                 assert(priv->dev->data->all_multicast);
1402                                 if (flow->allmulti)
1403                                         break;
1404                         }
1405                 }
1406                 if (flow && flow->internal) {
1407                         assert(flow->rss);
1408                         if (flow->rss->queues != queues ||
1409                             memcmp(flow->rss->queue_id, rss_conf->queue,
1410                                    queues * sizeof(flow->rss->queue_id[0])))
1411                                 flow = NULL;
1412                 }
1413                 if (!flow || !flow->internal) {
1414                         /* Not found, create a new flow rule. */
1415                         if (priv->dev->data->promiscuous) {
1416                                 pattern[1].spec = NULL;
1417                                 pattern[1].mask = NULL;
1418                         } else {
1419                                 assert(priv->dev->data->all_multicast);
1420                                 pattern[1].spec = &eth_allmulti;
1421                                 pattern[1].mask = &eth_allmulti;
1422                         }
1423                         pattern[2] = pattern[3];
1424                         flow = mlx4_flow_create(priv->dev, &attr, pattern,
1425                                                 actions, error);
1426                         if (!flow) {
1427                                 err = -rte_errno;
1428                                 goto error;
1429                         }
1430                 }
1431                 assert(flow->promisc || flow->allmulti);
1432                 flow->select = 1;
1433         }
1434 error:
1435         /* Clear selection and clean up stale internal flow rules. */
1436         flow = LIST_FIRST(&priv->flows);
1437         while (flow && flow->internal) {
1438                 struct rte_flow *next = LIST_NEXT(flow, next);
1439
1440                 if (!flow->select)
1441                         claim_zero(mlx4_flow_destroy(priv->dev, flow, error));
1442                 else
1443                         flow->select = 0;
1444                 flow = next;
1445         }
1446         return err;
1447 }
1448
1449 /**
1450  * Synchronize flow rules.
1451  *
1452  * This function synchronizes flow rules with the state of the device by
1453  * taking into account isolated mode and whether target queues are
1454  * configured.
1455  *
1456  * @param priv
1457  *   Pointer to private structure.
1458  * @param[out] error
1459  *   Perform verbose error reporting if not NULL.
1460  *
1461  * @return
1462  *   0 on success, a negative errno value otherwise and rte_errno is set.
1463  */
1464 int
1465 mlx4_flow_sync(struct priv *priv, struct rte_flow_error *error)
1466 {
1467         struct rte_flow *flow;
1468         int ret;
1469
1470         /* Internal flow rules are guaranteed to come first in the list. */
1471         if (priv->isolated) {
1472                 /*
1473                  * Get rid of them in isolated mode, stop at the first
1474                  * non-internal rule found.
1475                  */
1476                 for (flow = LIST_FIRST(&priv->flows);
1477                      flow && flow->internal;
1478                      flow = LIST_FIRST(&priv->flows))
1479                         claim_zero(mlx4_flow_destroy(priv->dev, flow, error));
1480         } else {
1481                 /* Refresh internal rules. */
1482                 ret = mlx4_flow_internal(priv, error);
1483                 if (ret)
1484                         return ret;
1485         }
1486         /* Toggle the remaining flow rules . */
1487         LIST_FOREACH(flow, &priv->flows, next) {
1488                 ret = mlx4_flow_toggle(priv, flow, priv->started, error);
1489                 if (ret)
1490                         return ret;
1491         }
1492         if (!priv->started)
1493                 assert(!priv->drop);
1494         return 0;
1495 }
1496
1497 /**
1498  * Clean up all flow rules.
1499  *
1500  * Unlike mlx4_flow_flush(), this function takes care of all remaining flow
1501  * rules regardless of whether they are internal or user-configured.
1502  *
1503  * @param priv
1504  *   Pointer to private structure.
1505  */
1506 void
1507 mlx4_flow_clean(struct priv *priv)
1508 {
1509         struct rte_flow *flow;
1510
1511         while ((flow = LIST_FIRST(&priv->flows)))
1512                 mlx4_flow_destroy(priv->dev, flow, NULL);
1513         assert(LIST_EMPTY(&priv->rss));
1514 }
1515
1516 static const struct rte_flow_ops mlx4_flow_ops = {
1517         .validate = mlx4_flow_validate,
1518         .create = mlx4_flow_create,
1519         .destroy = mlx4_flow_destroy,
1520         .flush = mlx4_flow_flush,
1521         .isolate = mlx4_flow_isolate,
1522 };
1523
1524 /**
1525  * Manage filter operations.
1526  *
1527  * @param dev
1528  *   Pointer to Ethernet device structure.
1529  * @param filter_type
1530  *   Filter type.
1531  * @param filter_op
1532  *   Operation to perform.
1533  * @param arg
1534  *   Pointer to operation-specific structure.
1535  *
1536  * @return
1537  *   0 on success, negative errno value otherwise and rte_errno is set.
1538  */
1539 int
1540 mlx4_filter_ctrl(struct rte_eth_dev *dev,
1541                  enum rte_filter_type filter_type,
1542                  enum rte_filter_op filter_op,
1543                  void *arg)
1544 {
1545         switch (filter_type) {
1546         case RTE_ETH_FILTER_GENERIC:
1547                 if (filter_op != RTE_ETH_FILTER_GET)
1548                         break;
1549                 *(const void **)arg = &mlx4_flow_ops;
1550                 return 0;
1551         default:
1552                 ERROR("%p: filter type (%d) not supported",
1553                       (void *)dev, filter_type);
1554                 break;
1555         }
1556         rte_errno = ENOTSUP;
1557         return -rte_errno;
1558 }