4 * Copyright 2017 6WIND S.A.
5 * Copyright 2017 Mellanox
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of 6WIND S.A. nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 * Flow API operations for mlx4 driver.
39 #include <arpa/inet.h>
46 #include <sys/queue.h>
48 /* Verbs headers do not support -pedantic. */
50 #pragma GCC diagnostic ignored "-Wpedantic"
52 #include <infiniband/verbs.h>
54 #pragma GCC diagnostic error "-Wpedantic"
57 #include <rte_byteorder.h>
58 #include <rte_errno.h>
59 #include <rte_eth_ctrl.h>
60 #include <rte_ethdev.h>
61 #include <rte_ether.h>
63 #include <rte_flow_driver.h>
64 #include <rte_malloc.h>
68 #include "mlx4_flow.h"
69 #include "mlx4_rxtx.h"
70 #include "mlx4_utils.h"
72 /** Static initializer for a list of subsequent item types. */
73 #define NEXT_ITEM(...) \
74 (const enum rte_flow_item_type []){ \
75 __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
78 /** Processor structure associated with a flow item. */
79 struct mlx4_flow_proc_item {
80 /** Bit-mask for fields supported by this PMD. */
81 const void *mask_support;
82 /** Bit-mask to use when @p item->mask is not provided. */
83 const void *mask_default;
84 /** Size in bytes for @p mask_support and @p mask_default. */
85 const unsigned int mask_sz;
86 /** Merge a pattern item into a flow rule handle. */
87 int (*merge)(struct rte_flow *flow,
88 const struct rte_flow_item *item,
89 const struct mlx4_flow_proc_item *proc,
90 struct rte_flow_error *error);
91 /** Size in bytes of the destination structure. */
92 const unsigned int dst_sz;
93 /** List of possible subsequent items. */
94 const enum rte_flow_item_type *const next_item;
97 /** Shared resources for drop flow rules. */
99 struct ibv_qp *qp; /**< QP target. */
100 struct ibv_cq *cq; /**< CQ associated with above QP. */
101 struct priv *priv; /**< Back pointer to private data. */
102 uint32_t refcnt; /**< Reference count. */
106 * Merge Ethernet pattern item into flow rule handle.
108 * Additional mlx4-specific constraints on supported fields:
110 * - No support for partial masks, except in the specific case of matching
111 * all multicast traffic (@p spec->dst and @p mask->dst equal to
112 * 01:00:00:00:00:00).
113 * - Not providing @p item->spec or providing an empty @p mask->dst is
114 * *only* supported if the rule doesn't specify additional matching
115 * criteria (i.e. rule is promiscuous-like).
117 * @param[in, out] flow
118 * Flow rule handle to update.
120 * Pattern item to merge.
122 * Associated item-processing object.
124 * Perform verbose error reporting if not NULL.
127 * 0 on success, a negative errno value otherwise and rte_errno is set.
130 mlx4_flow_merge_eth(struct rte_flow *flow,
131 const struct rte_flow_item *item,
132 const struct mlx4_flow_proc_item *proc,
133 struct rte_flow_error *error)
135 const struct rte_flow_item_eth *spec = item->spec;
136 const struct rte_flow_item_eth *mask =
137 spec ? (item->mask ? item->mask : proc->mask_default) : NULL;
138 struct ibv_flow_spec_eth *eth;
145 uint32_t sum_dst = 0;
146 uint32_t sum_src = 0;
148 for (i = 0; i != sizeof(mask->dst.addr_bytes); ++i) {
149 sum_dst += mask->dst.addr_bytes[i];
150 sum_src += mask->src.addr_bytes[i];
153 msg = "mlx4 does not support source MAC matching";
155 } else if (!sum_dst) {
157 } else if (sum_dst == 1 && mask->dst.addr_bytes[0] == 1) {
158 if (!(spec->dst.addr_bytes[0] & 1)) {
159 msg = "mlx4 does not support the explicit"
160 " exclusion of all multicast traffic";
164 } else if (sum_dst != (UINT8_C(0xff) * ETHER_ADDR_LEN)) {
165 msg = "mlx4 does not support matching partial"
173 flow->ibv_attr->type = IBV_FLOW_ATTR_ALL_DEFAULT;
176 if (flow->allmulti) {
177 flow->ibv_attr->type = IBV_FLOW_ATTR_MC_DEFAULT;
180 ++flow->ibv_attr->num_of_specs;
181 eth = (void *)((uintptr_t)flow->ibv_attr + flow->ibv_attr_size);
182 *eth = (struct ibv_flow_spec_eth) {
183 .type = IBV_FLOW_SPEC_ETH,
184 .size = sizeof(*eth),
186 memcpy(eth->val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
187 memcpy(eth->mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
188 /* Remove unwanted bits from values. */
189 for (i = 0; i < ETHER_ADDR_LEN; ++i) {
190 eth->val.dst_mac[i] &= eth->mask.dst_mac[i];
194 return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
199 * Merge VLAN pattern item into flow rule handle.
201 * Additional mlx4-specific constraints on supported fields:
203 * - Matching *all* VLAN traffic by omitting @p item->spec or providing an
204 * empty @p item->mask would also include non-VLAN traffic. Doing so is
205 * therefore unsupported.
206 * - No support for partial masks.
208 * @param[in, out] flow
209 * Flow rule handle to update.
211 * Pattern item to merge.
213 * Associated item-processing object.
215 * Perform verbose error reporting if not NULL.
218 * 0 on success, a negative errno value otherwise and rte_errno is set.
221 mlx4_flow_merge_vlan(struct rte_flow *flow,
222 const struct rte_flow_item *item,
223 const struct mlx4_flow_proc_item *proc,
224 struct rte_flow_error *error)
226 const struct rte_flow_item_vlan *spec = item->spec;
227 const struct rte_flow_item_vlan *mask =
228 spec ? (item->mask ? item->mask : proc->mask_default) : NULL;
229 struct ibv_flow_spec_eth *eth;
232 if (!mask || !mask->tci) {
233 msg = "mlx4 cannot match all VLAN traffic while excluding"
234 " non-VLAN traffic, TCI VID must be specified";
237 if (mask->tci != RTE_BE16(0x0fff)) {
238 msg = "mlx4 does not support partial TCI VID matching";
243 eth = (void *)((uintptr_t)flow->ibv_attr + flow->ibv_attr_size -
245 eth->val.vlan_tag = spec->tci;
246 eth->mask.vlan_tag = mask->tci;
247 eth->val.vlan_tag &= eth->mask.vlan_tag;
250 return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
255 * Merge IPv4 pattern item into flow rule handle.
257 * Additional mlx4-specific constraints on supported fields:
259 * - No support for partial masks.
261 * @param[in, out] flow
262 * Flow rule handle to update.
264 * Pattern item to merge.
266 * Associated item-processing object.
268 * Perform verbose error reporting if not NULL.
271 * 0 on success, a negative errno value otherwise and rte_errno is set.
274 mlx4_flow_merge_ipv4(struct rte_flow *flow,
275 const struct rte_flow_item *item,
276 const struct mlx4_flow_proc_item *proc,
277 struct rte_flow_error *error)
279 const struct rte_flow_item_ipv4 *spec = item->spec;
280 const struct rte_flow_item_ipv4 *mask =
281 spec ? (item->mask ? item->mask : proc->mask_default) : NULL;
282 struct ibv_flow_spec_ipv4 *ipv4;
286 ((uint32_t)(mask->hdr.src_addr + 1) > UINT32_C(1) ||
287 (uint32_t)(mask->hdr.dst_addr + 1) > UINT32_C(1))) {
288 msg = "mlx4 does not support matching partial IPv4 fields";
293 ++flow->ibv_attr->num_of_specs;
294 ipv4 = (void *)((uintptr_t)flow->ibv_attr + flow->ibv_attr_size);
295 *ipv4 = (struct ibv_flow_spec_ipv4) {
296 .type = IBV_FLOW_SPEC_IPV4,
297 .size = sizeof(*ipv4),
301 ipv4->val = (struct ibv_flow_ipv4_filter) {
302 .src_ip = spec->hdr.src_addr,
303 .dst_ip = spec->hdr.dst_addr,
305 ipv4->mask = (struct ibv_flow_ipv4_filter) {
306 .src_ip = mask->hdr.src_addr,
307 .dst_ip = mask->hdr.dst_addr,
309 /* Remove unwanted bits from values. */
310 ipv4->val.src_ip &= ipv4->mask.src_ip;
311 ipv4->val.dst_ip &= ipv4->mask.dst_ip;
314 return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
319 * Merge UDP pattern item into flow rule handle.
321 * Additional mlx4-specific constraints on supported fields:
323 * - No support for partial masks.
325 * @param[in, out] flow
326 * Flow rule handle to update.
328 * Pattern item to merge.
330 * Associated item-processing object.
332 * Perform verbose error reporting if not NULL.
335 * 0 on success, a negative errno value otherwise and rte_errno is set.
338 mlx4_flow_merge_udp(struct rte_flow *flow,
339 const struct rte_flow_item *item,
340 const struct mlx4_flow_proc_item *proc,
341 struct rte_flow_error *error)
343 const struct rte_flow_item_udp *spec = item->spec;
344 const struct rte_flow_item_udp *mask =
345 spec ? (item->mask ? item->mask : proc->mask_default) : NULL;
346 struct ibv_flow_spec_tcp_udp *udp;
350 ((uint16_t)(mask->hdr.src_port + 1) > UINT16_C(1) ||
351 (uint16_t)(mask->hdr.dst_port + 1) > UINT16_C(1))) {
352 msg = "mlx4 does not support matching partial UDP fields";
357 ++flow->ibv_attr->num_of_specs;
358 udp = (void *)((uintptr_t)flow->ibv_attr + flow->ibv_attr_size);
359 *udp = (struct ibv_flow_spec_tcp_udp) {
360 .type = IBV_FLOW_SPEC_UDP,
361 .size = sizeof(*udp),
365 udp->val.dst_port = spec->hdr.dst_port;
366 udp->val.src_port = spec->hdr.src_port;
367 udp->mask.dst_port = mask->hdr.dst_port;
368 udp->mask.src_port = mask->hdr.src_port;
369 /* Remove unwanted bits from values. */
370 udp->val.src_port &= udp->mask.src_port;
371 udp->val.dst_port &= udp->mask.dst_port;
374 return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
379 * Merge TCP pattern item into flow rule handle.
381 * Additional mlx4-specific constraints on supported fields:
383 * - No support for partial masks.
385 * @param[in, out] flow
386 * Flow rule handle to update.
388 * Pattern item to merge.
390 * Associated item-processing object.
392 * Perform verbose error reporting if not NULL.
395 * 0 on success, a negative errno value otherwise and rte_errno is set.
398 mlx4_flow_merge_tcp(struct rte_flow *flow,
399 const struct rte_flow_item *item,
400 const struct mlx4_flow_proc_item *proc,
401 struct rte_flow_error *error)
403 const struct rte_flow_item_tcp *spec = item->spec;
404 const struct rte_flow_item_tcp *mask =
405 spec ? (item->mask ? item->mask : proc->mask_default) : NULL;
406 struct ibv_flow_spec_tcp_udp *tcp;
410 ((uint16_t)(mask->hdr.src_port + 1) > UINT16_C(1) ||
411 (uint16_t)(mask->hdr.dst_port + 1) > UINT16_C(1))) {
412 msg = "mlx4 does not support matching partial TCP fields";
417 ++flow->ibv_attr->num_of_specs;
418 tcp = (void *)((uintptr_t)flow->ibv_attr + flow->ibv_attr_size);
419 *tcp = (struct ibv_flow_spec_tcp_udp) {
420 .type = IBV_FLOW_SPEC_TCP,
421 .size = sizeof(*tcp),
425 tcp->val.dst_port = spec->hdr.dst_port;
426 tcp->val.src_port = spec->hdr.src_port;
427 tcp->mask.dst_port = mask->hdr.dst_port;
428 tcp->mask.src_port = mask->hdr.src_port;
429 /* Remove unwanted bits from values. */
430 tcp->val.src_port &= tcp->mask.src_port;
431 tcp->val.dst_port &= tcp->mask.dst_port;
434 return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
439 * Perform basic sanity checks on a pattern item.
442 * Item specification.
444 * Associated item-processing object.
446 * Perform verbose error reporting if not NULL.
449 * 0 on success, a negative errno value otherwise and rte_errno is set.
452 mlx4_flow_item_check(const struct rte_flow_item *item,
453 const struct mlx4_flow_proc_item *proc,
454 struct rte_flow_error *error)
459 /* item->last and item->mask cannot exist without item->spec. */
460 if (!item->spec && (item->mask || item->last))
461 return rte_flow_error_set
462 (error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM, item,
463 "\"mask\" or \"last\" field provided without a"
464 " corresponding \"spec\"");
465 /* No spec, no mask, no problem. */
469 (const uint8_t *)item->mask :
470 (const uint8_t *)proc->mask_default;
473 * Single-pass check to make sure that:
474 * - Mask is supported, no bits are set outside proc->mask_support.
475 * - Both item->spec and item->last are included in mask.
477 for (i = 0; i != proc->mask_sz; ++i) {
480 if ((mask[i] | ((const uint8_t *)proc->mask_support)[i]) !=
481 ((const uint8_t *)proc->mask_support)[i])
482 return rte_flow_error_set
483 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
484 item, "unsupported field found in \"mask\"");
486 (((const uint8_t *)item->spec)[i] & mask[i]) !=
487 (((const uint8_t *)item->last)[i] & mask[i]))
488 return rte_flow_error_set
489 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
491 "range between \"spec\" and \"last\""
492 " is larger than \"mask\"");
497 /** Graph of supported items and associated actions. */
498 static const struct mlx4_flow_proc_item mlx4_flow_proc_item_list[] = {
499 [RTE_FLOW_ITEM_TYPE_END] = {
500 .next_item = NEXT_ITEM(RTE_FLOW_ITEM_TYPE_ETH),
502 [RTE_FLOW_ITEM_TYPE_ETH] = {
503 .next_item = NEXT_ITEM(RTE_FLOW_ITEM_TYPE_VLAN,
504 RTE_FLOW_ITEM_TYPE_IPV4),
505 .mask_support = &(const struct rte_flow_item_eth){
506 /* Only destination MAC can be matched. */
507 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
509 .mask_default = &rte_flow_item_eth_mask,
510 .mask_sz = sizeof(struct rte_flow_item_eth),
511 .merge = mlx4_flow_merge_eth,
512 .dst_sz = sizeof(struct ibv_flow_spec_eth),
514 [RTE_FLOW_ITEM_TYPE_VLAN] = {
515 .next_item = NEXT_ITEM(RTE_FLOW_ITEM_TYPE_IPV4),
516 .mask_support = &(const struct rte_flow_item_vlan){
517 /* Only TCI VID matching is supported. */
518 .tci = RTE_BE16(0x0fff),
520 .mask_default = &rte_flow_item_vlan_mask,
521 .mask_sz = sizeof(struct rte_flow_item_vlan),
522 .merge = mlx4_flow_merge_vlan,
525 [RTE_FLOW_ITEM_TYPE_IPV4] = {
526 .next_item = NEXT_ITEM(RTE_FLOW_ITEM_TYPE_UDP,
527 RTE_FLOW_ITEM_TYPE_TCP),
528 .mask_support = &(const struct rte_flow_item_ipv4){
530 .src_addr = RTE_BE32(0xffffffff),
531 .dst_addr = RTE_BE32(0xffffffff),
534 .mask_default = &rte_flow_item_ipv4_mask,
535 .mask_sz = sizeof(struct rte_flow_item_ipv4),
536 .merge = mlx4_flow_merge_ipv4,
537 .dst_sz = sizeof(struct ibv_flow_spec_ipv4),
539 [RTE_FLOW_ITEM_TYPE_UDP] = {
540 .mask_support = &(const struct rte_flow_item_udp){
542 .src_port = RTE_BE16(0xffff),
543 .dst_port = RTE_BE16(0xffff),
546 .mask_default = &rte_flow_item_udp_mask,
547 .mask_sz = sizeof(struct rte_flow_item_udp),
548 .merge = mlx4_flow_merge_udp,
549 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
551 [RTE_FLOW_ITEM_TYPE_TCP] = {
552 .mask_support = &(const struct rte_flow_item_tcp){
554 .src_port = RTE_BE16(0xffff),
555 .dst_port = RTE_BE16(0xffff),
558 .mask_default = &rte_flow_item_tcp_mask,
559 .mask_sz = sizeof(struct rte_flow_item_tcp),
560 .merge = mlx4_flow_merge_tcp,
561 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
566 * Make sure a flow rule is supported and initialize associated structure.
569 * Pointer to private structure.
571 * Flow rule attributes.
573 * Pattern specification (list terminated by the END pattern item).
575 * Associated actions (list terminated by the END action).
577 * Perform verbose error reporting if not NULL.
578 * @param[in, out] addr
579 * Buffer where the resulting flow rule handle pointer must be stored.
580 * If NULL, stop processing after validation stage.
583 * 0 on success, a negative errno value otherwise and rte_errno is set.
586 mlx4_flow_prepare(struct priv *priv,
587 const struct rte_flow_attr *attr,
588 const struct rte_flow_item pattern[],
589 const struct rte_flow_action actions[],
590 struct rte_flow_error *error,
591 struct rte_flow **addr)
593 const struct rte_flow_item *item;
594 const struct rte_flow_action *action;
595 const struct mlx4_flow_proc_item *proc;
596 struct rte_flow temp = { .ibv_attr_size = sizeof(*temp.ibv_attr) };
597 struct rte_flow *flow = &temp;
598 const char *msg = NULL;
601 return rte_flow_error_set
602 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
603 NULL, "groups are not supported");
604 if (attr->priority > MLX4_FLOW_PRIORITY_LAST)
605 return rte_flow_error_set
606 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
607 NULL, "maximum priority level is "
608 MLX4_STR_EXPAND(MLX4_FLOW_PRIORITY_LAST));
610 return rte_flow_error_set
611 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
612 NULL, "egress is not supported");
614 return rte_flow_error_set
615 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
616 NULL, "only ingress is supported");
618 proc = mlx4_flow_proc_item_list;
619 /* Go over pattern. */
620 for (item = pattern; item->type; ++item) {
621 const struct mlx4_flow_proc_item *next = NULL;
625 if (item->type == RTE_FLOW_ITEM_TYPE_VOID)
627 if (item->type == MLX4_FLOW_ITEM_TYPE_INTERNAL) {
631 if (flow->promisc || flow->allmulti) {
632 msg = "mlx4 does not support additional matching"
633 " criteria combined with indiscriminate"
634 " matching on Ethernet headers";
635 goto exit_item_not_supported;
637 for (i = 0; proc->next_item && proc->next_item[i]; ++i) {
638 if (proc->next_item[i] == item->type) {
639 next = &mlx4_flow_proc_item_list[item->type];
644 goto exit_item_not_supported;
647 * Perform basic sanity checks only once, while handle is
651 err = mlx4_flow_item_check(item, proc, error);
656 err = proc->merge(flow, item, proc, error);
660 flow->ibv_attr_size += proc->dst_sz;
662 /* Go over actions list. */
663 for (action = actions; action->type; ++action) {
664 switch (action->type) {
665 const struct rte_flow_action_queue *queue;
667 case RTE_FLOW_ACTION_TYPE_VOID:
669 case RTE_FLOW_ACTION_TYPE_DROP:
672 case RTE_FLOW_ACTION_TYPE_QUEUE:
673 queue = action->conf;
674 if (queue->index >= priv->dev->data->nb_rx_queues)
675 goto exit_action_not_supported;
677 flow->queue_id = queue->index;
680 goto exit_action_not_supported;
683 if (!flow->queue && !flow->drop)
684 return rte_flow_error_set
685 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
686 NULL, "no valid action");
687 /* Validation ends here. */
691 /* Allocate proper handle based on collected data. */
692 const struct mlx4_malloc_vec vec[] = {
694 .align = alignof(struct rte_flow),
695 .size = sizeof(*flow),
696 .addr = (void **)&flow,
699 .align = alignof(struct ibv_flow_attr),
700 .size = temp.ibv_attr_size,
701 .addr = (void **)&temp.ibv_attr,
705 if (!mlx4_zmallocv(__func__, vec, RTE_DIM(vec)))
706 return rte_flow_error_set
708 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
709 "flow rule handle allocation failure");
710 /* Most fields will be updated by second pass. */
711 *flow = (struct rte_flow){
712 .ibv_attr = temp.ibv_attr,
713 .ibv_attr_size = sizeof(*flow->ibv_attr),
715 *flow->ibv_attr = (struct ibv_flow_attr){
716 .type = IBV_FLOW_ATTR_NORMAL,
717 .size = sizeof(*flow->ibv_attr),
718 .priority = attr->priority,
725 exit_item_not_supported:
726 return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
727 item, msg ? msg : "item not supported");
728 exit_action_not_supported:
729 return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
730 action, "action not supported");
734 * Validate a flow supported by the NIC.
736 * @see rte_flow_validate()
740 mlx4_flow_validate(struct rte_eth_dev *dev,
741 const struct rte_flow_attr *attr,
742 const struct rte_flow_item pattern[],
743 const struct rte_flow_action actions[],
744 struct rte_flow_error *error)
746 struct priv *priv = dev->data->dev_private;
748 return mlx4_flow_prepare(priv, attr, pattern, actions, error, NULL);
752 * Get a drop flow rule resources instance.
755 * Pointer to private structure.
758 * Pointer to drop flow resources on success, NULL otherwise and rte_errno
761 static struct mlx4_drop *
762 mlx4_drop_get(struct priv *priv)
764 struct mlx4_drop *drop = priv->drop;
767 assert(drop->refcnt);
768 assert(drop->priv == priv);
772 drop = rte_malloc(__func__, sizeof(*drop), 0);
775 *drop = (struct mlx4_drop){
779 drop->cq = ibv_create_cq(priv->ctx, 1, NULL, NULL, 0);
782 drop->qp = ibv_create_qp(priv->pd,
783 &(struct ibv_qp_init_attr){
786 .qp_type = IBV_QPT_RAW_PACKET,
794 claim_zero(ibv_destroy_qp(drop->qp));
796 claim_zero(ibv_destroy_cq(drop->cq));
804 * Give back a drop flow rule resources instance.
807 * Pointer to drop flow rule resources.
810 mlx4_drop_put(struct mlx4_drop *drop)
812 assert(drop->refcnt);
815 drop->priv->drop = NULL;
816 claim_zero(ibv_destroy_qp(drop->qp));
817 claim_zero(ibv_destroy_cq(drop->cq));
822 * Toggle a configured flow rule.
825 * Pointer to private structure.
827 * Flow rule handle to toggle.
829 * Whether associated Verbs flow must be created or removed.
831 * Perform verbose error reporting if not NULL.
834 * 0 on success, a negative errno value otherwise and rte_errno is set.
837 mlx4_flow_toggle(struct priv *priv,
838 struct rte_flow *flow,
840 struct rte_flow_error *error)
842 struct ibv_qp *qp = NULL;
849 claim_zero(ibv_destroy_flow(flow->ibv_flow));
850 flow->ibv_flow = NULL;
852 mlx4_drop_put(priv->drop);
855 assert(flow->ibv_attr);
856 if (!flow->internal &&
858 flow->ibv_attr->priority == MLX4_FLOW_PRIORITY_LAST) {
859 if (flow->ibv_flow) {
860 claim_zero(ibv_destroy_flow(flow->ibv_flow));
861 flow->ibv_flow = NULL;
863 mlx4_drop_put(priv->drop);
866 msg = ("priority level "
867 MLX4_STR_EXPAND(MLX4_FLOW_PRIORITY_LAST)
868 " is reserved when not in isolated mode");
872 struct rxq *rxq = NULL;
874 if (flow->queue_id < priv->dev->data->nb_rx_queues)
875 rxq = priv->dev->data->rx_queues[flow->queue_id];
876 if (flow->ibv_flow) {
877 if (!rxq ^ !flow->drop)
879 /* Verbs flow needs updating. */
880 claim_zero(ibv_destroy_flow(flow->ibv_flow));
881 flow->ibv_flow = NULL;
883 mlx4_drop_put(priv->drop);
887 /* A missing target queue drops traffic implicitly. */
894 msg = "resources for drop flow rule cannot be created";
902 flow->ibv_flow = ibv_create_flow(qp, flow->ibv_attr);
906 mlx4_drop_put(priv->drop);
908 msg = "flow rule rejected by device";
910 return rte_flow_error_set
911 (error, err, RTE_FLOW_ERROR_TYPE_HANDLE, flow, msg);
917 * @see rte_flow_create()
920 static struct rte_flow *
921 mlx4_flow_create(struct rte_eth_dev *dev,
922 const struct rte_flow_attr *attr,
923 const struct rte_flow_item pattern[],
924 const struct rte_flow_action actions[],
925 struct rte_flow_error *error)
927 struct priv *priv = dev->data->dev_private;
928 struct rte_flow *flow;
931 err = mlx4_flow_prepare(priv, attr, pattern, actions, error, &flow);
934 err = mlx4_flow_toggle(priv, flow, priv->started, error);
936 struct rte_flow *curr = LIST_FIRST(&priv->flows);
938 /* New rules are inserted after internal ones. */
939 if (!curr || !curr->internal) {
940 LIST_INSERT_HEAD(&priv->flows, flow, next);
942 while (LIST_NEXT(curr, next) &&
943 LIST_NEXT(curr, next)->internal)
944 curr = LIST_NEXT(curr, next);
945 LIST_INSERT_AFTER(curr, flow, next);
949 rte_flow_error_set(error, -err, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
956 * Configure isolated mode.
958 * @see rte_flow_isolate()
962 mlx4_flow_isolate(struct rte_eth_dev *dev,
964 struct rte_flow_error *error)
966 struct priv *priv = dev->data->dev_private;
968 if (!!enable == !!priv->isolated)
970 priv->isolated = !!enable;
971 if (mlx4_flow_sync(priv, error)) {
972 priv->isolated = !enable;
979 * Destroy a flow rule.
981 * @see rte_flow_destroy()
985 mlx4_flow_destroy(struct rte_eth_dev *dev,
986 struct rte_flow *flow,
987 struct rte_flow_error *error)
989 struct priv *priv = dev->data->dev_private;
990 int err = mlx4_flow_toggle(priv, flow, 0, error);
994 LIST_REMOVE(flow, next);
1000 * Destroy user-configured flow rules.
1002 * This function skips internal flows rules.
1004 * @see rte_flow_flush()
1008 mlx4_flow_flush(struct rte_eth_dev *dev,
1009 struct rte_flow_error *error)
1011 struct priv *priv = dev->data->dev_private;
1012 struct rte_flow *flow = LIST_FIRST(&priv->flows);
1015 struct rte_flow *next = LIST_NEXT(flow, next);
1017 if (!flow->internal)
1018 mlx4_flow_destroy(dev, flow, error);
1025 * Helper function to determine the next configured VLAN filter.
1028 * Pointer to private structure.
1030 * VLAN ID to use as a starting point.
1033 * Next configured VLAN ID or a high value (>= 4096) if there is none.
1036 mlx4_flow_internal_next_vlan(struct priv *priv, uint16_t vlan)
1038 while (vlan < 4096) {
1039 if (priv->dev->data->vlan_filter_conf.ids[vlan / 64] &
1040 (UINT64_C(1) << (vlan % 64)))
1048 * Generate internal flow rules.
1050 * - MAC flow rules are generated from @p dev->data->mac_addrs
1051 * (@p priv->mac array).
1052 * - An additional flow rule for Ethernet broadcasts is also generated.
1053 * - All these are per-VLAN if @p dev->data->dev_conf.rxmode.hw_vlan_filter
1054 * is enabled and VLAN filters are configured.
1057 * Pointer to private structure.
1059 * Perform verbose error reporting if not NULL.
1062 * 0 on success, a negative errno value otherwise and rte_errno is set.
1065 mlx4_flow_internal(struct priv *priv, struct rte_flow_error *error)
1067 struct rte_flow_attr attr = {
1068 .priority = MLX4_FLOW_PRIORITY_LAST,
1071 struct rte_flow_item_eth eth_spec;
1072 const struct rte_flow_item_eth eth_mask = {
1073 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
1075 struct rte_flow_item_vlan vlan_spec;
1076 const struct rte_flow_item_vlan vlan_mask = {
1077 .tci = RTE_BE16(0x0fff),
1079 struct rte_flow_item pattern[] = {
1081 .type = MLX4_FLOW_ITEM_TYPE_INTERNAL,
1084 .type = RTE_FLOW_ITEM_TYPE_ETH,
1089 /* Replaced with VLAN if filtering is enabled. */
1090 .type = RTE_FLOW_ITEM_TYPE_END,
1093 .type = RTE_FLOW_ITEM_TYPE_END,
1096 struct rte_flow_action actions[] = {
1098 .type = RTE_FLOW_ACTION_TYPE_QUEUE,
1099 .conf = &(struct rte_flow_action_queue){
1104 .type = RTE_FLOW_ACTION_TYPE_END,
1107 struct ether_addr *rule_mac = ð_spec.dst;
1108 rte_be16_t *rule_vlan =
1109 priv->dev->data->dev_conf.rxmode.hw_vlan_filter ?
1113 struct rte_flow *flow;
1118 * Set up VLAN item if filtering is enabled and at least one VLAN
1119 * filter is configured.
1122 vlan = mlx4_flow_internal_next_vlan(priv, 0);
1124 pattern[2] = (struct rte_flow_item){
1125 .type = RTE_FLOW_ITEM_TYPE_VLAN,
1130 *rule_vlan = rte_cpu_to_be_16(vlan);
1135 for (i = 0; i != RTE_DIM(priv->mac) + 1; ++i) {
1136 const struct ether_addr *mac;
1138 /* Broadcasts are handled by an extra iteration. */
1139 if (i < RTE_DIM(priv->mac))
1140 mac = &priv->mac[i];
1142 mac = ð_mask.dst;
1143 if (is_zero_ether_addr(mac))
1145 /* Check if MAC flow rule is already present. */
1146 for (flow = LIST_FIRST(&priv->flows);
1147 flow && flow->internal;
1148 flow = LIST_NEXT(flow, next)) {
1149 const struct ibv_flow_spec_eth *eth =
1150 (const void *)((uintptr_t)flow->ibv_attr +
1151 sizeof(*flow->ibv_attr));
1156 assert(flow->ibv_attr->type == IBV_FLOW_ATTR_NORMAL);
1157 assert(flow->ibv_attr->num_of_specs == 1);
1158 assert(eth->type == IBV_FLOW_SPEC_ETH);
1160 (eth->val.vlan_tag != *rule_vlan ||
1161 eth->mask.vlan_tag != RTE_BE16(0x0fff)))
1163 if (!rule_vlan && eth->mask.vlan_tag)
1165 for (j = 0; j != sizeof(mac->addr_bytes); ++j)
1166 if (eth->val.dst_mac[j] != mac->addr_bytes[j] ||
1167 eth->mask.dst_mac[j] != UINT8_C(0xff) ||
1168 eth->val.src_mac[j] != UINT8_C(0x00) ||
1169 eth->mask.src_mac[j] != UINT8_C(0x00))
1171 if (j == sizeof(mac->addr_bytes))
1174 if (!flow || !flow->internal) {
1175 /* Not found, create a new flow rule. */
1176 memcpy(rule_mac, mac, sizeof(*mac));
1177 flow = mlx4_flow_create(priv->dev, &attr, pattern,
1187 if (!err && rule_vlan) {
1188 vlan = mlx4_flow_internal_next_vlan(priv, vlan + 1);
1192 /* Clear selection and clean up stale MAC flow rules. */
1193 flow = LIST_FIRST(&priv->flows);
1194 while (flow && flow->internal) {
1195 struct rte_flow *next = LIST_NEXT(flow, next);
1197 if (flow->mac && !flow->select)
1198 claim_zero(mlx4_flow_destroy(priv->dev, flow, error));
1207 * Synchronize flow rules.
1209 * This function synchronizes flow rules with the state of the device by
1210 * taking into account isolated mode and whether target queues are
1214 * Pointer to private structure.
1216 * Perform verbose error reporting if not NULL.
1219 * 0 on success, a negative errno value otherwise and rte_errno is set.
1222 mlx4_flow_sync(struct priv *priv, struct rte_flow_error *error)
1224 struct rte_flow *flow;
1227 /* Internal flow rules are guaranteed to come first in the list. */
1228 if (priv->isolated) {
1230 * Get rid of them in isolated mode, stop at the first
1231 * non-internal rule found.
1233 for (flow = LIST_FIRST(&priv->flows);
1234 flow && flow->internal;
1235 flow = LIST_FIRST(&priv->flows))
1236 claim_zero(mlx4_flow_destroy(priv->dev, flow, error));
1238 /* Refresh internal rules. */
1239 ret = mlx4_flow_internal(priv, error);
1243 /* Toggle the remaining flow rules . */
1244 for (flow = LIST_FIRST(&priv->flows);
1246 flow = LIST_NEXT(flow, next)) {
1247 ret = mlx4_flow_toggle(priv, flow, priv->started, error);
1252 assert(!priv->drop);
1257 * Clean up all flow rules.
1259 * Unlike mlx4_flow_flush(), this function takes care of all remaining flow
1260 * rules regardless of whether they are internal or user-configured.
1263 * Pointer to private structure.
1266 mlx4_flow_clean(struct priv *priv)
1268 struct rte_flow *flow;
1270 while ((flow = LIST_FIRST(&priv->flows)))
1271 mlx4_flow_destroy(priv->dev, flow, NULL);
1274 static const struct rte_flow_ops mlx4_flow_ops = {
1275 .validate = mlx4_flow_validate,
1276 .create = mlx4_flow_create,
1277 .destroy = mlx4_flow_destroy,
1278 .flush = mlx4_flow_flush,
1279 .isolate = mlx4_flow_isolate,
1283 * Manage filter operations.
1286 * Pointer to Ethernet device structure.
1287 * @param filter_type
1290 * Operation to perform.
1292 * Pointer to operation-specific structure.
1295 * 0 on success, negative errno value otherwise and rte_errno is set.
1298 mlx4_filter_ctrl(struct rte_eth_dev *dev,
1299 enum rte_filter_type filter_type,
1300 enum rte_filter_op filter_op,
1303 switch (filter_type) {
1304 case RTE_ETH_FILTER_GENERIC:
1305 if (filter_op != RTE_ETH_FILTER_GET)
1307 *(const void **)arg = &mlx4_flow_ops;
1310 ERROR("%p: filter type (%d) not supported",
1311 (void *)dev, filter_type);
1314 rte_errno = ENOTSUP;