net/af_packet: make qdisc bypass configurable
[dpdk.git] / drivers / net / mlx5 / mlx5_fdir.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright 2015 6WIND S.A.
5  *   Copyright 2015 Mellanox.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of 6WIND S.A. nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include <stddef.h>
35 #include <assert.h>
36 #include <stdint.h>
37 #include <string.h>
38 #include <errno.h>
39
40 /* Verbs header. */
41 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
42 #ifdef PEDANTIC
43 #pragma GCC diagnostic ignored "-Wpedantic"
44 #endif
45 #include <infiniband/verbs.h>
46 #ifdef PEDANTIC
47 #pragma GCC diagnostic error "-Wpedantic"
48 #endif
49
50 #include <rte_ether.h>
51 #include <rte_malloc.h>
52 #include <rte_ethdev.h>
53 #include <rte_common.h>
54 #include <rte_flow.h>
55 #include <rte_flow_driver.h>
56
57 #include "mlx5.h"
58 #include "mlx5_rxtx.h"
59
60 struct fdir_flow_desc {
61         uint16_t dst_port;
62         uint16_t src_port;
63         uint32_t src_ip[4];
64         uint32_t dst_ip[4];
65         uint8_t mac[6];
66         uint16_t vlan_tag;
67         enum hash_rxq_type type;
68 };
69
70 struct mlx5_fdir_filter {
71         LIST_ENTRY(mlx5_fdir_filter) next;
72         uint16_t queue; /* Queue assigned to if FDIR match. */
73         enum rte_eth_fdir_behavior behavior;
74         struct fdir_flow_desc desc;
75         struct ibv_exp_flow *flow;
76 };
77
78 LIST_HEAD(fdir_filter_list, mlx5_fdir_filter);
79
80 /**
81  * Convert struct rte_eth_fdir_filter to mlx5 filter descriptor.
82  *
83  * @param[in] fdir_filter
84  *   DPDK filter structure to convert.
85  * @param[out] desc
86  *   Resulting mlx5 filter descriptor.
87  * @param mode
88  *   Flow director mode.
89  */
90 static void
91 fdir_filter_to_flow_desc(const struct rte_eth_fdir_filter *fdir_filter,
92                          struct fdir_flow_desc *desc, enum rte_fdir_mode mode)
93 {
94         /* Initialize descriptor. */
95         memset(desc, 0, sizeof(*desc));
96
97         /* Set VLAN ID. */
98         desc->vlan_tag = fdir_filter->input.flow_ext.vlan_tci;
99
100         /* Set MAC address. */
101         if (mode == RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
102                 rte_memcpy(desc->mac,
103                            fdir_filter->input.flow.mac_vlan_flow.mac_addr.
104                                 addr_bytes,
105                            sizeof(desc->mac));
106                 desc->type = HASH_RXQ_ETH;
107                 return;
108         }
109
110         /* Set mode */
111         switch (fdir_filter->input.flow_type) {
112         case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
113                 desc->type = HASH_RXQ_UDPV4;
114                 break;
115         case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
116                 desc->type = HASH_RXQ_TCPV4;
117                 break;
118         case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
119                 desc->type = HASH_RXQ_IPV4;
120                 break;
121         case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
122                 desc->type = HASH_RXQ_UDPV6;
123                 break;
124         case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
125                 desc->type = HASH_RXQ_TCPV6;
126                 break;
127         case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
128                 desc->type = HASH_RXQ_IPV6;
129                 break;
130         default:
131                 break;
132         }
133
134         /* Set flow values */
135         switch (fdir_filter->input.flow_type) {
136         case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
137         case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
138                 desc->src_port = fdir_filter->input.flow.udp4_flow.src_port;
139                 desc->dst_port = fdir_filter->input.flow.udp4_flow.dst_port;
140                 /* fallthrough */
141         case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
142                 desc->src_ip[0] = fdir_filter->input.flow.ip4_flow.src_ip;
143                 desc->dst_ip[0] = fdir_filter->input.flow.ip4_flow.dst_ip;
144                 break;
145         case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
146         case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
147                 desc->src_port = fdir_filter->input.flow.udp6_flow.src_port;
148                 desc->dst_port = fdir_filter->input.flow.udp6_flow.dst_port;
149                 /* Fall through. */
150         case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
151                 rte_memcpy(desc->src_ip,
152                            fdir_filter->input.flow.ipv6_flow.src_ip,
153                            sizeof(desc->src_ip));
154                 rte_memcpy(desc->dst_ip,
155                            fdir_filter->input.flow.ipv6_flow.dst_ip,
156                            sizeof(desc->dst_ip));
157                 break;
158         default:
159                 break;
160         }
161 }
162
163 /**
164  * Check if two flow descriptors overlap according to configured mask.
165  *
166  * @param priv
167  *   Private structure that provides flow director mask.
168  * @param desc1
169  *   First flow descriptor to compare.
170  * @param desc2
171  *   Second flow descriptor to compare.
172  *
173  * @return
174  *   Nonzero if descriptors overlap.
175  */
176 static int
177 priv_fdir_overlap(const struct priv *priv,
178                   const struct fdir_flow_desc *desc1,
179                   const struct fdir_flow_desc *desc2)
180 {
181         const struct rte_eth_fdir_masks *mask =
182                 &priv->dev->data->dev_conf.fdir_conf.mask;
183         unsigned int i;
184
185         if (desc1->type != desc2->type)
186                 return 0;
187         /* Ignore non masked bits. */
188         for (i = 0; i != RTE_DIM(desc1->mac); ++i)
189                 if ((desc1->mac[i] & mask->mac_addr_byte_mask) !=
190                     (desc2->mac[i] & mask->mac_addr_byte_mask))
191                         return 0;
192         if (((desc1->src_port & mask->src_port_mask) !=
193              (desc2->src_port & mask->src_port_mask)) ||
194             ((desc1->dst_port & mask->dst_port_mask) !=
195              (desc2->dst_port & mask->dst_port_mask)))
196                 return 0;
197         switch (desc1->type) {
198         case HASH_RXQ_IPV4:
199         case HASH_RXQ_UDPV4:
200         case HASH_RXQ_TCPV4:
201                 if (((desc1->src_ip[0] & mask->ipv4_mask.src_ip) !=
202                      (desc2->src_ip[0] & mask->ipv4_mask.src_ip)) ||
203                     ((desc1->dst_ip[0] & mask->ipv4_mask.dst_ip) !=
204                      (desc2->dst_ip[0] & mask->ipv4_mask.dst_ip)))
205                         return 0;
206                 break;
207         case HASH_RXQ_IPV6:
208         case HASH_RXQ_UDPV6:
209         case HASH_RXQ_TCPV6:
210                 for (i = 0; i != RTE_DIM(desc1->src_ip); ++i)
211                         if (((desc1->src_ip[i] & mask->ipv6_mask.src_ip[i]) !=
212                              (desc2->src_ip[i] & mask->ipv6_mask.src_ip[i])) ||
213                             ((desc1->dst_ip[i] & mask->ipv6_mask.dst_ip[i]) !=
214                              (desc2->dst_ip[i] & mask->ipv6_mask.dst_ip[i])))
215                                 return 0;
216                 break;
217         default:
218                 break;
219         }
220         return 1;
221 }
222
223 /**
224  * Create flow director steering rule for a specific filter.
225  *
226  * @param priv
227  *   Private structure.
228  * @param mlx5_fdir_filter
229  *   Filter to create a steering rule for.
230  * @param fdir_queue
231  *   Flow director queue for matching packets.
232  *
233  * @return
234  *   0 on success, errno value on failure.
235  */
236 static int
237 priv_fdir_flow_add(struct priv *priv,
238                    struct mlx5_fdir_filter *mlx5_fdir_filter,
239                    struct fdir_queue *fdir_queue)
240 {
241         struct ibv_exp_flow *flow;
242         struct fdir_flow_desc *desc = &mlx5_fdir_filter->desc;
243         enum rte_fdir_mode fdir_mode =
244                 priv->dev->data->dev_conf.fdir_conf.mode;
245         struct rte_eth_fdir_masks *mask =
246                 &priv->dev->data->dev_conf.fdir_conf.mask;
247         FLOW_ATTR_SPEC_ETH(data, priv_flow_attr(priv, NULL, 0, desc->type));
248         struct ibv_exp_flow_attr *attr = &data->attr;
249         uintptr_t spec_offset = (uintptr_t)&data->spec;
250         struct ibv_exp_flow_spec_eth *spec_eth;
251         struct ibv_exp_flow_spec_ipv4 *spec_ipv4;
252         struct ibv_exp_flow_spec_ipv6 *spec_ipv6;
253         struct ibv_exp_flow_spec_tcp_udp *spec_tcp_udp;
254         struct mlx5_fdir_filter *iter_fdir_filter;
255         unsigned int i;
256
257         /* Abort if an existing flow overlaps this one to avoid packet
258          * duplication, even if it targets another queue. */
259         LIST_FOREACH(iter_fdir_filter, priv->fdir_filter_list, next)
260                 if ((iter_fdir_filter != mlx5_fdir_filter) &&
261                     (iter_fdir_filter->flow != NULL) &&
262                     (priv_fdir_overlap(priv,
263                                        &mlx5_fdir_filter->desc,
264                                        &iter_fdir_filter->desc)))
265                         return EEXIST;
266
267         /*
268          * No padding must be inserted by the compiler between attr and spec.
269          * This layout is expected by libibverbs.
270          */
271         assert(((uint8_t *)attr + sizeof(*attr)) == (uint8_t *)spec_offset);
272         priv_flow_attr(priv, attr, sizeof(data), desc->type);
273
274         /* Set Ethernet spec */
275         spec_eth = (struct ibv_exp_flow_spec_eth *)spec_offset;
276
277         /* The first specification must be Ethernet. */
278         assert(spec_eth->type == IBV_EXP_FLOW_SPEC_ETH);
279         assert(spec_eth->size == sizeof(*spec_eth));
280
281         /* VLAN ID */
282         spec_eth->val.vlan_tag = desc->vlan_tag & mask->vlan_tci_mask;
283         spec_eth->mask.vlan_tag = mask->vlan_tci_mask;
284
285         /* Update priority */
286         attr->priority = 2;
287
288         if (fdir_mode == RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
289                 /* MAC Address */
290                 for (i = 0; i != RTE_DIM(spec_eth->mask.dst_mac); ++i) {
291                         spec_eth->val.dst_mac[i] =
292                                 desc->mac[i] & mask->mac_addr_byte_mask;
293                         spec_eth->mask.dst_mac[i] = mask->mac_addr_byte_mask;
294                 }
295                 goto create_flow;
296         }
297
298         switch (desc->type) {
299         case HASH_RXQ_IPV4:
300         case HASH_RXQ_UDPV4:
301         case HASH_RXQ_TCPV4:
302                 spec_offset += spec_eth->size;
303
304                 /* Set IP spec */
305                 spec_ipv4 = (struct ibv_exp_flow_spec_ipv4 *)spec_offset;
306
307                 /* The second specification must be IP. */
308                 assert(spec_ipv4->type == IBV_EXP_FLOW_SPEC_IPV4);
309                 assert(spec_ipv4->size == sizeof(*spec_ipv4));
310
311                 spec_ipv4->val.src_ip =
312                         desc->src_ip[0] & mask->ipv4_mask.src_ip;
313                 spec_ipv4->val.dst_ip =
314                         desc->dst_ip[0] & mask->ipv4_mask.dst_ip;
315                 spec_ipv4->mask.src_ip = mask->ipv4_mask.src_ip;
316                 spec_ipv4->mask.dst_ip = mask->ipv4_mask.dst_ip;
317
318                 /* Update priority */
319                 attr->priority = 1;
320
321                 if (desc->type == HASH_RXQ_IPV4)
322                         goto create_flow;
323
324                 spec_offset += spec_ipv4->size;
325                 break;
326         case HASH_RXQ_IPV6:
327         case HASH_RXQ_UDPV6:
328         case HASH_RXQ_TCPV6:
329                 spec_offset += spec_eth->size;
330
331                 /* Set IP spec */
332                 spec_ipv6 = (struct ibv_exp_flow_spec_ipv6 *)spec_offset;
333
334                 /* The second specification must be IP. */
335                 assert(spec_ipv6->type == IBV_EXP_FLOW_SPEC_IPV6);
336                 assert(spec_ipv6->size == sizeof(*spec_ipv6));
337
338                 for (i = 0; i != RTE_DIM(desc->src_ip); ++i) {
339                         ((uint32_t *)spec_ipv6->val.src_ip)[i] =
340                                 desc->src_ip[i] & mask->ipv6_mask.src_ip[i];
341                         ((uint32_t *)spec_ipv6->val.dst_ip)[i] =
342                                 desc->dst_ip[i] & mask->ipv6_mask.dst_ip[i];
343                 }
344                 rte_memcpy(spec_ipv6->mask.src_ip,
345                            mask->ipv6_mask.src_ip,
346                            sizeof(spec_ipv6->mask.src_ip));
347                 rte_memcpy(spec_ipv6->mask.dst_ip,
348                            mask->ipv6_mask.dst_ip,
349                            sizeof(spec_ipv6->mask.dst_ip));
350
351                 /* Update priority */
352                 attr->priority = 1;
353
354                 if (desc->type == HASH_RXQ_IPV6)
355                         goto create_flow;
356
357                 spec_offset += spec_ipv6->size;
358                 break;
359         default:
360                 ERROR("invalid flow attribute type");
361                 return EINVAL;
362         }
363
364         /* Set TCP/UDP flow specification. */
365         spec_tcp_udp = (struct ibv_exp_flow_spec_tcp_udp *)spec_offset;
366
367         /* The third specification must be TCP/UDP. */
368         assert(spec_tcp_udp->type == IBV_EXP_FLOW_SPEC_TCP ||
369                spec_tcp_udp->type == IBV_EXP_FLOW_SPEC_UDP);
370         assert(spec_tcp_udp->size == sizeof(*spec_tcp_udp));
371
372         spec_tcp_udp->val.src_port = desc->src_port & mask->src_port_mask;
373         spec_tcp_udp->val.dst_port = desc->dst_port & mask->dst_port_mask;
374         spec_tcp_udp->mask.src_port = mask->src_port_mask;
375         spec_tcp_udp->mask.dst_port = mask->dst_port_mask;
376
377         /* Update priority */
378         attr->priority = 0;
379
380 create_flow:
381
382         errno = 0;
383         flow = ibv_exp_create_flow(fdir_queue->qp, attr);
384         if (flow == NULL) {
385                 /* It's not clear whether errno is always set in this case. */
386                 ERROR("%p: flow director configuration failed, errno=%d: %s",
387                       (void *)priv, errno,
388                       (errno ? strerror(errno) : "Unknown error"));
389                 if (errno)
390                         return errno;
391                 return EINVAL;
392         }
393
394         DEBUG("%p: added flow director rule (%p)", (void *)priv, (void *)flow);
395         mlx5_fdir_filter->flow = flow;
396         return 0;
397 }
398
399 /**
400  * Destroy a flow director queue.
401  *
402  * @param fdir_queue
403  *   Flow director queue to be destroyed.
404  */
405 void
406 priv_fdir_queue_destroy(struct priv *priv, struct fdir_queue *fdir_queue)
407 {
408         struct mlx5_fdir_filter *fdir_filter;
409
410         /* Disable filter flows still applying to this queue. */
411         LIST_FOREACH(fdir_filter, priv->fdir_filter_list, next) {
412                 unsigned int idx = fdir_filter->queue;
413                 struct rxq_ctrl *rxq_ctrl =
414                         container_of((*priv->rxqs)[idx], struct rxq_ctrl, rxq);
415
416                 assert(idx < priv->rxqs_n);
417                 if (fdir_queue == rxq_ctrl->fdir_queue &&
418                     fdir_filter->flow != NULL) {
419                         claim_zero(ibv_exp_destroy_flow(fdir_filter->flow));
420                         fdir_filter->flow = NULL;
421                 }
422         }
423         assert(fdir_queue->qp);
424         claim_zero(ibv_destroy_qp(fdir_queue->qp));
425         assert(fdir_queue->ind_table);
426         claim_zero(ibv_exp_destroy_rwq_ind_table(fdir_queue->ind_table));
427         if (fdir_queue->wq)
428                 claim_zero(ibv_exp_destroy_wq(fdir_queue->wq));
429         if (fdir_queue->cq)
430                 claim_zero(ibv_destroy_cq(fdir_queue->cq));
431 #ifndef NDEBUG
432         memset(fdir_queue, 0x2a, sizeof(*fdir_queue));
433 #endif
434         rte_free(fdir_queue);
435 }
436
437 /**
438  * Create a flow director queue.
439  *
440  * @param priv
441  *   Private structure.
442  * @param wq
443  *   Work queue to route matched packets to, NULL if one needs to
444  *   be created.
445  *
446  * @return
447  *   Related flow director queue on success, NULL otherwise.
448  */
449 static struct fdir_queue *
450 priv_fdir_queue_create(struct priv *priv, struct ibv_exp_wq *wq,
451                        unsigned int socket)
452 {
453         struct fdir_queue *fdir_queue;
454
455         fdir_queue = rte_calloc_socket(__func__, 1, sizeof(*fdir_queue),
456                                        0, socket);
457         if (!fdir_queue) {
458                 ERROR("cannot allocate flow director queue");
459                 return NULL;
460         }
461         assert(priv->pd);
462         assert(priv->ctx);
463         if (!wq) {
464                 fdir_queue->cq = ibv_exp_create_cq(
465                         priv->ctx, 1, NULL, NULL, 0,
466                         &(struct ibv_exp_cq_init_attr){
467                                 .comp_mask = 0,
468                         });
469                 if (!fdir_queue->cq) {
470                         ERROR("cannot create flow director CQ");
471                         goto error;
472                 }
473                 fdir_queue->wq = ibv_exp_create_wq(
474                         priv->ctx,
475                         &(struct ibv_exp_wq_init_attr){
476                                 .wq_type = IBV_EXP_WQT_RQ,
477                                 .max_recv_wr = 1,
478                                 .max_recv_sge = 1,
479                                 .pd = priv->pd,
480                                 .cq = fdir_queue->cq,
481                         });
482                 if (!fdir_queue->wq) {
483                         ERROR("cannot create flow director WQ");
484                         goto error;
485                 }
486                 wq = fdir_queue->wq;
487         }
488         fdir_queue->ind_table = ibv_exp_create_rwq_ind_table(
489                 priv->ctx,
490                 &(struct ibv_exp_rwq_ind_table_init_attr){
491                         .pd = priv->pd,
492                         .log_ind_tbl_size = 0,
493                         .ind_tbl = &wq,
494                         .comp_mask = 0,
495                 });
496         if (!fdir_queue->ind_table) {
497                 ERROR("cannot create flow director indirection table");
498                 goto error;
499         }
500         fdir_queue->qp = ibv_exp_create_qp(
501                 priv->ctx,
502                 &(struct ibv_exp_qp_init_attr){
503                         .qp_type = IBV_QPT_RAW_PACKET,
504                         .comp_mask =
505                                 IBV_EXP_QP_INIT_ATTR_PD |
506                                 IBV_EXP_QP_INIT_ATTR_PORT |
507                                 IBV_EXP_QP_INIT_ATTR_RX_HASH,
508                         .pd = priv->pd,
509                         .rx_hash_conf = &(struct ibv_exp_rx_hash_conf){
510                                 .rx_hash_function =
511                                         IBV_EXP_RX_HASH_FUNC_TOEPLITZ,
512                                 .rx_hash_key_len = rss_hash_default_key_len,
513                                 .rx_hash_key = rss_hash_default_key,
514                                 .rx_hash_fields_mask = 0,
515                                 .rwq_ind_tbl = fdir_queue->ind_table,
516                         },
517                         .port_num = priv->port,
518                 });
519         if (!fdir_queue->qp) {
520                 ERROR("cannot create flow director hash RX QP");
521                 goto error;
522         }
523         return fdir_queue;
524 error:
525         assert(fdir_queue);
526         assert(!fdir_queue->qp);
527         if (fdir_queue->ind_table)
528                 claim_zero(ibv_exp_destroy_rwq_ind_table
529                            (fdir_queue->ind_table));
530         if (fdir_queue->wq)
531                 claim_zero(ibv_exp_destroy_wq(fdir_queue->wq));
532         if (fdir_queue->cq)
533                 claim_zero(ibv_destroy_cq(fdir_queue->cq));
534         rte_free(fdir_queue);
535         return NULL;
536 }
537
538 /**
539  * Get flow director queue for a specific RX queue, create it in case
540  * it does not exist.
541  *
542  * @param priv
543  *   Private structure.
544  * @param idx
545  *   RX queue index.
546  *
547  * @return
548  *   Related flow director queue on success, NULL otherwise.
549  */
550 static struct fdir_queue *
551 priv_get_fdir_queue(struct priv *priv, uint16_t idx)
552 {
553         struct rxq_ctrl *rxq_ctrl =
554                 container_of((*priv->rxqs)[idx], struct rxq_ctrl, rxq);
555         struct fdir_queue *fdir_queue = rxq_ctrl->fdir_queue;
556
557         assert(rxq_ctrl->wq);
558         if (fdir_queue == NULL) {
559                 fdir_queue = priv_fdir_queue_create(priv, rxq_ctrl->wq,
560                                                     rxq_ctrl->socket);
561                 rxq_ctrl->fdir_queue = fdir_queue;
562         }
563         return fdir_queue;
564 }
565
566 /**
567  * Get or flow director drop queue. Create it if it does not exist.
568  *
569  * @param priv
570  *   Private structure.
571  *
572  * @return
573  *   Flow director drop queue on success, NULL otherwise.
574  */
575 static struct fdir_queue *
576 priv_get_fdir_drop_queue(struct priv *priv)
577 {
578         struct fdir_queue *fdir_queue = priv->fdir_drop_queue;
579
580         if (fdir_queue == NULL) {
581                 unsigned int socket = SOCKET_ID_ANY;
582
583                 /* Select a known NUMA socket if possible. */
584                 if (priv->rxqs_n && (*priv->rxqs)[0])
585                         socket = container_of((*priv->rxqs)[0],
586                                               struct rxq_ctrl, rxq)->socket;
587                 fdir_queue = priv_fdir_queue_create(priv, NULL, socket);
588                 priv->fdir_drop_queue = fdir_queue;
589         }
590         return fdir_queue;
591 }
592
593 /**
594  * Enable flow director filter and create steering rules.
595  *
596  * @param priv
597  *   Private structure.
598  * @param mlx5_fdir_filter
599  *   Filter to create steering rule for.
600  *
601  * @return
602  *   0 on success, errno value on failure.
603  */
604 static int
605 priv_fdir_filter_enable(struct priv *priv,
606                         struct mlx5_fdir_filter *mlx5_fdir_filter)
607 {
608         struct fdir_queue *fdir_queue;
609
610         /* Check if flow already exists. */
611         if (mlx5_fdir_filter->flow != NULL)
612                 return 0;
613
614         /* Get fdir_queue for specific queue. */
615         if (mlx5_fdir_filter->behavior == RTE_ETH_FDIR_REJECT)
616                 fdir_queue = priv_get_fdir_drop_queue(priv);
617         else
618                 fdir_queue = priv_get_fdir_queue(priv,
619                                                  mlx5_fdir_filter->queue);
620
621         if (fdir_queue == NULL) {
622                 ERROR("failed to create flow director rxq for queue %d",
623                       mlx5_fdir_filter->queue);
624                 return EINVAL;
625         }
626
627         /* Create flow */
628         return priv_fdir_flow_add(priv, mlx5_fdir_filter, fdir_queue);
629 }
630
631 /**
632  * Initialize flow director filters list.
633  *
634  * @param priv
635  *   Private structure.
636  *
637  * @return
638  *   0 on success, errno value on failure.
639  */
640 int
641 fdir_init_filters_list(struct priv *priv)
642 {
643         /* Filter list initialization should be done only once. */
644         if (priv->fdir_filter_list)
645                 return 0;
646
647         /* Create filters list. */
648         priv->fdir_filter_list =
649                 rte_calloc(__func__, 1, sizeof(*priv->fdir_filter_list), 0);
650
651         if (priv->fdir_filter_list == NULL) {
652                 int err = ENOMEM;
653
654                 ERROR("cannot allocate flow director filter list: %s",
655                       strerror(err));
656                 return err;
657         }
658
659         LIST_INIT(priv->fdir_filter_list);
660
661         return 0;
662 }
663
664 /**
665  * Flush all filters.
666  *
667  * @param priv
668  *   Private structure.
669  */
670 static void
671 priv_fdir_filter_flush(struct priv *priv)
672 {
673         struct mlx5_fdir_filter *mlx5_fdir_filter;
674
675         while ((mlx5_fdir_filter = LIST_FIRST(priv->fdir_filter_list))) {
676                 struct ibv_exp_flow *flow = mlx5_fdir_filter->flow;
677
678                 DEBUG("%p: flushing flow director filter %p",
679                       (void *)priv, (void *)mlx5_fdir_filter);
680                 LIST_REMOVE(mlx5_fdir_filter, next);
681                 if (flow != NULL)
682                         claim_zero(ibv_exp_destroy_flow(flow));
683                 rte_free(mlx5_fdir_filter);
684         }
685 }
686
687 /**
688  * Remove all flow director filters and delete list.
689  *
690  * @param priv
691  *   Private structure.
692  */
693 void
694 priv_fdir_delete_filters_list(struct priv *priv)
695 {
696         priv_fdir_filter_flush(priv);
697         rte_free(priv->fdir_filter_list);
698         priv->fdir_filter_list = NULL;
699 }
700
701 /**
702  * Disable flow director, remove all steering rules.
703  *
704  * @param priv
705  *   Private structure.
706  */
707 void
708 priv_fdir_disable(struct priv *priv)
709 {
710         unsigned int i;
711         struct mlx5_fdir_filter *mlx5_fdir_filter;
712
713         /* Run on every flow director filter and destroy flow handle. */
714         LIST_FOREACH(mlx5_fdir_filter, priv->fdir_filter_list, next) {
715                 struct ibv_exp_flow *flow;
716
717                 /* Only valid elements should be in the list */
718                 assert(mlx5_fdir_filter != NULL);
719                 flow = mlx5_fdir_filter->flow;
720
721                 /* Destroy flow handle */
722                 if (flow != NULL) {
723                         claim_zero(ibv_exp_destroy_flow(flow));
724                         mlx5_fdir_filter->flow = NULL;
725                 }
726         }
727
728         /* Destroy flow director context in each RX queue. */
729         for (i = 0; (i != priv->rxqs_n); i++) {
730                 struct rxq_ctrl *rxq_ctrl;
731
732                 if (!(*priv->rxqs)[i])
733                         continue;
734                 rxq_ctrl = container_of((*priv->rxqs)[i], struct rxq_ctrl, rxq);
735                 if (!rxq_ctrl->fdir_queue)
736                         continue;
737                 priv_fdir_queue_destroy(priv, rxq_ctrl->fdir_queue);
738                 rxq_ctrl->fdir_queue = NULL;
739         }
740         if (priv->fdir_drop_queue) {
741                 priv_fdir_queue_destroy(priv, priv->fdir_drop_queue);
742                 priv->fdir_drop_queue = NULL;
743         }
744 }
745
746 /**
747  * Enable flow director, create steering rules.
748  *
749  * @param priv
750  *   Private structure.
751  */
752 void
753 priv_fdir_enable(struct priv *priv)
754 {
755         struct mlx5_fdir_filter *mlx5_fdir_filter;
756
757         /* Run on every fdir filter and create flow handle */
758         LIST_FOREACH(mlx5_fdir_filter, priv->fdir_filter_list, next) {
759                 /* Only valid elements should be in the list */
760                 assert(mlx5_fdir_filter != NULL);
761
762                 priv_fdir_filter_enable(priv, mlx5_fdir_filter);
763         }
764 }
765
766 /**
767  * Find specific filter in list.
768  *
769  * @param priv
770  *   Private structure.
771  * @param fdir_filter
772  *   Flow director filter to find.
773  *
774  * @return
775  *   Filter element if found, otherwise NULL.
776  */
777 static struct mlx5_fdir_filter *
778 priv_find_filter_in_list(struct priv *priv,
779                          const struct rte_eth_fdir_filter *fdir_filter)
780 {
781         struct fdir_flow_desc desc;
782         struct mlx5_fdir_filter *mlx5_fdir_filter;
783         enum rte_fdir_mode fdir_mode = priv->dev->data->dev_conf.fdir_conf.mode;
784
785         /* Get flow director filter to look for. */
786         fdir_filter_to_flow_desc(fdir_filter, &desc, fdir_mode);
787
788         /* Look for the requested element. */
789         LIST_FOREACH(mlx5_fdir_filter, priv->fdir_filter_list, next) {
790                 /* Only valid elements should be in the list. */
791                 assert(mlx5_fdir_filter != NULL);
792
793                 /* Return matching filter. */
794                 if (!memcmp(&desc, &mlx5_fdir_filter->desc, sizeof(desc)))
795                         return mlx5_fdir_filter;
796         }
797
798         /* Filter not found */
799         return NULL;
800 }
801
802 /**
803  * Add new flow director filter and store it in list.
804  *
805  * @param priv
806  *   Private structure.
807  * @param fdir_filter
808  *   Flow director filter to add.
809  *
810  * @return
811  *   0 on success, errno value on failure.
812  */
813 static int
814 priv_fdir_filter_add(struct priv *priv,
815                      const struct rte_eth_fdir_filter *fdir_filter)
816 {
817         struct mlx5_fdir_filter *mlx5_fdir_filter;
818         enum rte_fdir_mode fdir_mode = priv->dev->data->dev_conf.fdir_conf.mode;
819         int err = 0;
820
821         /* Validate queue number. */
822         if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
823                 ERROR("invalid queue number %d", fdir_filter->action.rx_queue);
824                 return EINVAL;
825         }
826
827         /* Duplicate filters are currently unsupported. */
828         mlx5_fdir_filter = priv_find_filter_in_list(priv, fdir_filter);
829         if (mlx5_fdir_filter != NULL) {
830                 ERROR("filter already exists");
831                 return EINVAL;
832         }
833
834         /* Create new flow director filter. */
835         mlx5_fdir_filter =
836                 rte_calloc(__func__, 1, sizeof(*mlx5_fdir_filter), 0);
837         if (mlx5_fdir_filter == NULL) {
838                 err = ENOMEM;
839                 ERROR("cannot allocate flow director filter: %s",
840                       strerror(err));
841                 return err;
842         }
843
844         /* Set action parameters. */
845         mlx5_fdir_filter->queue = fdir_filter->action.rx_queue;
846         mlx5_fdir_filter->behavior = fdir_filter->action.behavior;
847
848         /* Convert to mlx5 filter descriptor. */
849         fdir_filter_to_flow_desc(fdir_filter,
850                                  &mlx5_fdir_filter->desc, fdir_mode);
851
852         /* Insert new filter into list. */
853         LIST_INSERT_HEAD(priv->fdir_filter_list, mlx5_fdir_filter, next);
854
855         DEBUG("%p: flow director filter %p added",
856               (void *)priv, (void *)mlx5_fdir_filter);
857
858         /* Enable filter immediately if device is started. */
859         if (priv->started)
860                 err = priv_fdir_filter_enable(priv, mlx5_fdir_filter);
861
862         return err;
863 }
864
865 /**
866  * Update queue for specific filter.
867  *
868  * @param priv
869  *   Private structure.
870  * @param fdir_filter
871  *   Filter to be updated.
872  *
873  * @return
874  *   0 on success, errno value on failure.
875  */
876 static int
877 priv_fdir_filter_update(struct priv *priv,
878                         const struct rte_eth_fdir_filter *fdir_filter)
879 {
880         struct mlx5_fdir_filter *mlx5_fdir_filter;
881
882         /* Validate queue number. */
883         if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
884                 ERROR("invalid queue number %d", fdir_filter->action.rx_queue);
885                 return EINVAL;
886         }
887
888         mlx5_fdir_filter = priv_find_filter_in_list(priv, fdir_filter);
889         if (mlx5_fdir_filter != NULL) {
890                 struct ibv_exp_flow *flow = mlx5_fdir_filter->flow;
891                 int err = 0;
892
893                 /* Update queue number. */
894                 mlx5_fdir_filter->queue = fdir_filter->action.rx_queue;
895
896                 /* Destroy flow handle. */
897                 if (flow != NULL) {
898                         claim_zero(ibv_exp_destroy_flow(flow));
899                         mlx5_fdir_filter->flow = NULL;
900                 }
901                 DEBUG("%p: flow director filter %p updated",
902                       (void *)priv, (void *)mlx5_fdir_filter);
903
904                 /* Enable filter if device is started. */
905                 if (priv->started)
906                         err = priv_fdir_filter_enable(priv, mlx5_fdir_filter);
907
908                 return err;
909         }
910
911         /* Filter not found, create it. */
912         DEBUG("%p: filter not found for update, creating new filter",
913               (void *)priv);
914         return priv_fdir_filter_add(priv, fdir_filter);
915 }
916
917 /**
918  * Delete specific filter.
919  *
920  * @param priv
921  *   Private structure.
922  * @param fdir_filter
923  *   Filter to be deleted.
924  *
925  * @return
926  *   0 on success, errno value on failure.
927  */
928 static int
929 priv_fdir_filter_delete(struct priv *priv,
930                         const struct rte_eth_fdir_filter *fdir_filter)
931 {
932         struct mlx5_fdir_filter *mlx5_fdir_filter;
933
934         mlx5_fdir_filter = priv_find_filter_in_list(priv, fdir_filter);
935         if (mlx5_fdir_filter != NULL) {
936                 struct ibv_exp_flow *flow = mlx5_fdir_filter->flow;
937
938                 /* Remove element from list. */
939                 LIST_REMOVE(mlx5_fdir_filter, next);
940
941                 /* Destroy flow handle. */
942                 if (flow != NULL) {
943                         claim_zero(ibv_exp_destroy_flow(flow));
944                         mlx5_fdir_filter->flow = NULL;
945                 }
946
947                 DEBUG("%p: flow director filter %p deleted",
948                       (void *)priv, (void *)mlx5_fdir_filter);
949
950                 /* Delete filter. */
951                 rte_free(mlx5_fdir_filter);
952
953                 return 0;
954         }
955
956         ERROR("%p: flow director delete failed, cannot find filter",
957               (void *)priv);
958         return EINVAL;
959 }
960
961 /**
962  * Get flow director information.
963  *
964  * @param priv
965  *   Private structure.
966  * @param[out] fdir_info
967  *   Resulting flow director information.
968  */
969 static void
970 priv_fdir_info_get(struct priv *priv, struct rte_eth_fdir_info *fdir_info)
971 {
972         struct rte_eth_fdir_masks *mask =
973                 &priv->dev->data->dev_conf.fdir_conf.mask;
974
975         fdir_info->mode = priv->dev->data->dev_conf.fdir_conf.mode;
976         fdir_info->guarant_spc = 0;
977
978         rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
979
980         fdir_info->max_flexpayload = 0;
981         fdir_info->flow_types_mask[0] = 0;
982
983         fdir_info->flex_payload_unit = 0;
984         fdir_info->max_flex_payload_segment_num = 0;
985         fdir_info->flex_payload_limit = 0;
986         memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
987 }
988
989 /**
990  * Deal with flow director operations.
991  *
992  * @param priv
993  *   Pointer to private structure.
994  * @param filter_op
995  *   Operation to perform.
996  * @param arg
997  *   Pointer to operation-specific structure.
998  *
999  * @return
1000  *   0 on success, errno value on failure.
1001  */
1002 static int
1003 priv_fdir_ctrl_func(struct priv *priv, enum rte_filter_op filter_op, void *arg)
1004 {
1005         enum rte_fdir_mode fdir_mode =
1006                 priv->dev->data->dev_conf.fdir_conf.mode;
1007         int ret = 0;
1008
1009         if (filter_op == RTE_ETH_FILTER_NOP)
1010                 return 0;
1011
1012         if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
1013             fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
1014                 ERROR("%p: flow director mode %d not supported",
1015                       (void *)priv, fdir_mode);
1016                 return EINVAL;
1017         }
1018
1019         switch (filter_op) {
1020         case RTE_ETH_FILTER_ADD:
1021                 ret = priv_fdir_filter_add(priv, arg);
1022                 break;
1023         case RTE_ETH_FILTER_UPDATE:
1024                 ret = priv_fdir_filter_update(priv, arg);
1025                 break;
1026         case RTE_ETH_FILTER_DELETE:
1027                 ret = priv_fdir_filter_delete(priv, arg);
1028                 break;
1029         case RTE_ETH_FILTER_FLUSH:
1030                 priv_fdir_filter_flush(priv);
1031                 break;
1032         case RTE_ETH_FILTER_INFO:
1033                 priv_fdir_info_get(priv, arg);
1034                 break;
1035         default:
1036                 DEBUG("%p: unknown operation %u", (void *)priv, filter_op);
1037                 ret = EINVAL;
1038                 break;
1039         }
1040         return ret;
1041 }
1042
1043 static const struct rte_flow_ops mlx5_flow_ops = {
1044         .validate = mlx5_flow_validate,
1045         .create = mlx5_flow_create,
1046         .destroy = mlx5_flow_destroy,
1047         .flush = mlx5_flow_flush,
1048         .query = NULL,
1049         .isolate = mlx5_flow_isolate,
1050 };
1051
1052 /**
1053  * Manage filter operations.
1054  *
1055  * @param dev
1056  *   Pointer to Ethernet device structure.
1057  * @param filter_type
1058  *   Filter type.
1059  * @param filter_op
1060  *   Operation to perform.
1061  * @param arg
1062  *   Pointer to operation-specific structure.
1063  *
1064  * @return
1065  *   0 on success, negative errno value on failure.
1066  */
1067 int
1068 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
1069                      enum rte_filter_type filter_type,
1070                      enum rte_filter_op filter_op,
1071                      void *arg)
1072 {
1073         int ret = EINVAL;
1074         struct priv *priv = dev->data->dev_private;
1075
1076         switch (filter_type) {
1077         case RTE_ETH_FILTER_GENERIC:
1078                 if (filter_op != RTE_ETH_FILTER_GET)
1079                         return -EINVAL;
1080                 *(const void **)arg = &mlx5_flow_ops;
1081                 return 0;
1082         case RTE_ETH_FILTER_FDIR:
1083                 priv_lock(priv);
1084                 ret = priv_fdir_ctrl_func(priv, filter_op, arg);
1085                 priv_unlock(priv);
1086                 break;
1087         default:
1088                 ERROR("%p: filter type (%d) not supported",
1089                       (void *)dev, filter_type);
1090                 break;
1091         }
1092
1093         return -ret;
1094 }