4 * Copyright 2017 6WIND S.A.
5 * Copyright 2017 Mellanox
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of 6WIND S.A. nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 * Flow API operations for mlx4 driver.
39 #include <arpa/inet.h>
46 #include <sys/queue.h>
48 /* Verbs headers do not support -pedantic. */
50 #pragma GCC diagnostic ignored "-Wpedantic"
52 #include <infiniband/verbs.h>
54 #pragma GCC diagnostic error "-Wpedantic"
57 #include <rte_byteorder.h>
58 #include <rte_errno.h>
59 #include <rte_eth_ctrl.h>
60 #include <rte_ethdev.h>
61 #include <rte_ether.h>
63 #include <rte_flow_driver.h>
64 #include <rte_malloc.h>
68 #include "mlx4_flow.h"
69 #include "mlx4_rxtx.h"
70 #include "mlx4_utils.h"
72 /** Static initializer for a list of subsequent item types. */
73 #define NEXT_ITEM(...) \
74 (const enum rte_flow_item_type []){ \
75 __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
78 /** Processor structure associated with a flow item. */
79 struct mlx4_flow_proc_item {
80 /** Bit-mask for fields supported by this PMD. */
81 const void *mask_support;
82 /** Bit-mask to use when @p item->mask is not provided. */
83 const void *mask_default;
84 /** Size in bytes for @p mask_support and @p mask_default. */
85 const unsigned int mask_sz;
86 /** Merge a pattern item into a flow rule handle. */
87 int (*merge)(struct rte_flow *flow,
88 const struct rte_flow_item *item,
89 const struct mlx4_flow_proc_item *proc,
90 struct rte_flow_error *error);
91 /** Size in bytes of the destination structure. */
92 const unsigned int dst_sz;
93 /** List of possible subsequent items. */
94 const enum rte_flow_item_type *const next_item;
97 /** Shared resources for drop flow rules. */
99 struct ibv_qp *qp; /**< QP target. */
100 struct ibv_cq *cq; /**< CQ associated with above QP. */
101 struct priv *priv; /**< Back pointer to private data. */
102 uint32_t refcnt; /**< Reference count. */
106 * Merge Ethernet pattern item into flow rule handle.
108 * Additional mlx4-specific constraints on supported fields:
110 * - No support for partial masks.
111 * - Not providing @p item->spec or providing an empty @p mask->dst is
112 * *only* supported if the rule doesn't specify additional matching
113 * criteria (i.e. rule is promiscuous-like).
115 * @param[in, out] flow
116 * Flow rule handle to update.
118 * Pattern item to merge.
120 * Associated item-processing object.
122 * Perform verbose error reporting if not NULL.
125 * 0 on success, a negative errno value otherwise and rte_errno is set.
128 mlx4_flow_merge_eth(struct rte_flow *flow,
129 const struct rte_flow_item *item,
130 const struct mlx4_flow_proc_item *proc,
131 struct rte_flow_error *error)
133 const struct rte_flow_item_eth *spec = item->spec;
134 const struct rte_flow_item_eth *mask =
135 spec ? (item->mask ? item->mask : proc->mask_default) : NULL;
136 struct ibv_flow_spec_eth *eth;
143 uint32_t sum_dst = 0;
144 uint32_t sum_src = 0;
146 for (i = 0; i != sizeof(mask->dst.addr_bytes); ++i) {
147 sum_dst += mask->dst.addr_bytes[i];
148 sum_src += mask->src.addr_bytes[i];
151 msg = "mlx4 does not support source MAC matching";
153 } else if (!sum_dst) {
155 } else if (sum_dst != (UINT8_C(0xff) * ETHER_ADDR_LEN)) {
156 msg = "mlx4 does not support matching partial"
164 flow->ibv_attr->type = IBV_FLOW_ATTR_ALL_DEFAULT;
167 ++flow->ibv_attr->num_of_specs;
168 eth = (void *)((uintptr_t)flow->ibv_attr + flow->ibv_attr_size);
169 *eth = (struct ibv_flow_spec_eth) {
170 .type = IBV_FLOW_SPEC_ETH,
171 .size = sizeof(*eth),
173 memcpy(eth->val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
174 memcpy(eth->mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
175 /* Remove unwanted bits from values. */
176 for (i = 0; i < ETHER_ADDR_LEN; ++i) {
177 eth->val.dst_mac[i] &= eth->mask.dst_mac[i];
181 return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
186 * Merge VLAN pattern item into flow rule handle.
188 * Additional mlx4-specific constraints on supported fields:
190 * - Matching *all* VLAN traffic by omitting @p item->spec or providing an
191 * empty @p item->mask would also include non-VLAN traffic. Doing so is
192 * therefore unsupported.
193 * - No support for partial masks.
195 * @param[in, out] flow
196 * Flow rule handle to update.
198 * Pattern item to merge.
200 * Associated item-processing object.
202 * Perform verbose error reporting if not NULL.
205 * 0 on success, a negative errno value otherwise and rte_errno is set.
208 mlx4_flow_merge_vlan(struct rte_flow *flow,
209 const struct rte_flow_item *item,
210 const struct mlx4_flow_proc_item *proc,
211 struct rte_flow_error *error)
213 const struct rte_flow_item_vlan *spec = item->spec;
214 const struct rte_flow_item_vlan *mask =
215 spec ? (item->mask ? item->mask : proc->mask_default) : NULL;
216 struct ibv_flow_spec_eth *eth;
219 if (!mask || !mask->tci) {
220 msg = "mlx4 cannot match all VLAN traffic while excluding"
221 " non-VLAN traffic, TCI VID must be specified";
224 if (mask->tci != RTE_BE16(0x0fff)) {
225 msg = "mlx4 does not support partial TCI VID matching";
230 eth = (void *)((uintptr_t)flow->ibv_attr + flow->ibv_attr_size -
232 eth->val.vlan_tag = spec->tci;
233 eth->mask.vlan_tag = mask->tci;
234 eth->val.vlan_tag &= eth->mask.vlan_tag;
237 return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
242 * Merge IPv4 pattern item into flow rule handle.
244 * Additional mlx4-specific constraints on supported fields:
246 * - No support for partial masks.
248 * @param[in, out] flow
249 * Flow rule handle to update.
251 * Pattern item to merge.
253 * Associated item-processing object.
255 * Perform verbose error reporting if not NULL.
258 * 0 on success, a negative errno value otherwise and rte_errno is set.
261 mlx4_flow_merge_ipv4(struct rte_flow *flow,
262 const struct rte_flow_item *item,
263 const struct mlx4_flow_proc_item *proc,
264 struct rte_flow_error *error)
266 const struct rte_flow_item_ipv4 *spec = item->spec;
267 const struct rte_flow_item_ipv4 *mask =
268 spec ? (item->mask ? item->mask : proc->mask_default) : NULL;
269 struct ibv_flow_spec_ipv4 *ipv4;
273 ((uint32_t)(mask->hdr.src_addr + 1) > UINT32_C(1) ||
274 (uint32_t)(mask->hdr.dst_addr + 1) > UINT32_C(1))) {
275 msg = "mlx4 does not support matching partial IPv4 fields";
280 ++flow->ibv_attr->num_of_specs;
281 ipv4 = (void *)((uintptr_t)flow->ibv_attr + flow->ibv_attr_size);
282 *ipv4 = (struct ibv_flow_spec_ipv4) {
283 .type = IBV_FLOW_SPEC_IPV4,
284 .size = sizeof(*ipv4),
288 ipv4->val = (struct ibv_flow_ipv4_filter) {
289 .src_ip = spec->hdr.src_addr,
290 .dst_ip = spec->hdr.dst_addr,
292 ipv4->mask = (struct ibv_flow_ipv4_filter) {
293 .src_ip = mask->hdr.src_addr,
294 .dst_ip = mask->hdr.dst_addr,
296 /* Remove unwanted bits from values. */
297 ipv4->val.src_ip &= ipv4->mask.src_ip;
298 ipv4->val.dst_ip &= ipv4->mask.dst_ip;
301 return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
306 * Merge UDP pattern item into flow rule handle.
308 * Additional mlx4-specific constraints on supported fields:
310 * - No support for partial masks.
312 * @param[in, out] flow
313 * Flow rule handle to update.
315 * Pattern item to merge.
317 * Associated item-processing object.
319 * Perform verbose error reporting if not NULL.
322 * 0 on success, a negative errno value otherwise and rte_errno is set.
325 mlx4_flow_merge_udp(struct rte_flow *flow,
326 const struct rte_flow_item *item,
327 const struct mlx4_flow_proc_item *proc,
328 struct rte_flow_error *error)
330 const struct rte_flow_item_udp *spec = item->spec;
331 const struct rte_flow_item_udp *mask =
332 spec ? (item->mask ? item->mask : proc->mask_default) : NULL;
333 struct ibv_flow_spec_tcp_udp *udp;
337 ((uint16_t)(mask->hdr.src_port + 1) > UINT16_C(1) ||
338 (uint16_t)(mask->hdr.dst_port + 1) > UINT16_C(1))) {
339 msg = "mlx4 does not support matching partial UDP fields";
344 ++flow->ibv_attr->num_of_specs;
345 udp = (void *)((uintptr_t)flow->ibv_attr + flow->ibv_attr_size);
346 *udp = (struct ibv_flow_spec_tcp_udp) {
347 .type = IBV_FLOW_SPEC_UDP,
348 .size = sizeof(*udp),
352 udp->val.dst_port = spec->hdr.dst_port;
353 udp->val.src_port = spec->hdr.src_port;
354 udp->mask.dst_port = mask->hdr.dst_port;
355 udp->mask.src_port = mask->hdr.src_port;
356 /* Remove unwanted bits from values. */
357 udp->val.src_port &= udp->mask.src_port;
358 udp->val.dst_port &= udp->mask.dst_port;
361 return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
366 * Merge TCP pattern item into flow rule handle.
368 * Additional mlx4-specific constraints on supported fields:
370 * - No support for partial masks.
372 * @param[in, out] flow
373 * Flow rule handle to update.
375 * Pattern item to merge.
377 * Associated item-processing object.
379 * Perform verbose error reporting if not NULL.
382 * 0 on success, a negative errno value otherwise and rte_errno is set.
385 mlx4_flow_merge_tcp(struct rte_flow *flow,
386 const struct rte_flow_item *item,
387 const struct mlx4_flow_proc_item *proc,
388 struct rte_flow_error *error)
390 const struct rte_flow_item_tcp *spec = item->spec;
391 const struct rte_flow_item_tcp *mask =
392 spec ? (item->mask ? item->mask : proc->mask_default) : NULL;
393 struct ibv_flow_spec_tcp_udp *tcp;
397 ((uint16_t)(mask->hdr.src_port + 1) > UINT16_C(1) ||
398 (uint16_t)(mask->hdr.dst_port + 1) > UINT16_C(1))) {
399 msg = "mlx4 does not support matching partial TCP fields";
404 ++flow->ibv_attr->num_of_specs;
405 tcp = (void *)((uintptr_t)flow->ibv_attr + flow->ibv_attr_size);
406 *tcp = (struct ibv_flow_spec_tcp_udp) {
407 .type = IBV_FLOW_SPEC_TCP,
408 .size = sizeof(*tcp),
412 tcp->val.dst_port = spec->hdr.dst_port;
413 tcp->val.src_port = spec->hdr.src_port;
414 tcp->mask.dst_port = mask->hdr.dst_port;
415 tcp->mask.src_port = mask->hdr.src_port;
416 /* Remove unwanted bits from values. */
417 tcp->val.src_port &= tcp->mask.src_port;
418 tcp->val.dst_port &= tcp->mask.dst_port;
421 return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
426 * Perform basic sanity checks on a pattern item.
429 * Item specification.
431 * Associated item-processing object.
433 * Perform verbose error reporting if not NULL.
436 * 0 on success, a negative errno value otherwise and rte_errno is set.
439 mlx4_flow_item_check(const struct rte_flow_item *item,
440 const struct mlx4_flow_proc_item *proc,
441 struct rte_flow_error *error)
446 /* item->last and item->mask cannot exist without item->spec. */
447 if (!item->spec && (item->mask || item->last))
448 return rte_flow_error_set
449 (error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM, item,
450 "\"mask\" or \"last\" field provided without a"
451 " corresponding \"spec\"");
452 /* No spec, no mask, no problem. */
456 (const uint8_t *)item->mask :
457 (const uint8_t *)proc->mask_default;
460 * Single-pass check to make sure that:
461 * - Mask is supported, no bits are set outside proc->mask_support.
462 * - Both item->spec and item->last are included in mask.
464 for (i = 0; i != proc->mask_sz; ++i) {
467 if ((mask[i] | ((const uint8_t *)proc->mask_support)[i]) !=
468 ((const uint8_t *)proc->mask_support)[i])
469 return rte_flow_error_set
470 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
471 item, "unsupported field found in \"mask\"");
473 (((const uint8_t *)item->spec)[i] & mask[i]) !=
474 (((const uint8_t *)item->last)[i] & mask[i]))
475 return rte_flow_error_set
476 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
478 "range between \"spec\" and \"last\""
479 " is larger than \"mask\"");
484 /** Graph of supported items and associated actions. */
485 static const struct mlx4_flow_proc_item mlx4_flow_proc_item_list[] = {
486 [RTE_FLOW_ITEM_TYPE_END] = {
487 .next_item = NEXT_ITEM(RTE_FLOW_ITEM_TYPE_ETH),
489 [RTE_FLOW_ITEM_TYPE_ETH] = {
490 .next_item = NEXT_ITEM(RTE_FLOW_ITEM_TYPE_VLAN,
491 RTE_FLOW_ITEM_TYPE_IPV4),
492 .mask_support = &(const struct rte_flow_item_eth){
493 /* Only destination MAC can be matched. */
494 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
496 .mask_default = &rte_flow_item_eth_mask,
497 .mask_sz = sizeof(struct rte_flow_item_eth),
498 .merge = mlx4_flow_merge_eth,
499 .dst_sz = sizeof(struct ibv_flow_spec_eth),
501 [RTE_FLOW_ITEM_TYPE_VLAN] = {
502 .next_item = NEXT_ITEM(RTE_FLOW_ITEM_TYPE_IPV4),
503 .mask_support = &(const struct rte_flow_item_vlan){
504 /* Only TCI VID matching is supported. */
505 .tci = RTE_BE16(0x0fff),
507 .mask_default = &rte_flow_item_vlan_mask,
508 .mask_sz = sizeof(struct rte_flow_item_vlan),
509 .merge = mlx4_flow_merge_vlan,
512 [RTE_FLOW_ITEM_TYPE_IPV4] = {
513 .next_item = NEXT_ITEM(RTE_FLOW_ITEM_TYPE_UDP,
514 RTE_FLOW_ITEM_TYPE_TCP),
515 .mask_support = &(const struct rte_flow_item_ipv4){
517 .src_addr = RTE_BE32(0xffffffff),
518 .dst_addr = RTE_BE32(0xffffffff),
521 .mask_default = &rte_flow_item_ipv4_mask,
522 .mask_sz = sizeof(struct rte_flow_item_ipv4),
523 .merge = mlx4_flow_merge_ipv4,
524 .dst_sz = sizeof(struct ibv_flow_spec_ipv4),
526 [RTE_FLOW_ITEM_TYPE_UDP] = {
527 .mask_support = &(const struct rte_flow_item_udp){
529 .src_port = RTE_BE16(0xffff),
530 .dst_port = RTE_BE16(0xffff),
533 .mask_default = &rte_flow_item_udp_mask,
534 .mask_sz = sizeof(struct rte_flow_item_udp),
535 .merge = mlx4_flow_merge_udp,
536 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
538 [RTE_FLOW_ITEM_TYPE_TCP] = {
539 .mask_support = &(const struct rte_flow_item_tcp){
541 .src_port = RTE_BE16(0xffff),
542 .dst_port = RTE_BE16(0xffff),
545 .mask_default = &rte_flow_item_tcp_mask,
546 .mask_sz = sizeof(struct rte_flow_item_tcp),
547 .merge = mlx4_flow_merge_tcp,
548 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
553 * Make sure a flow rule is supported and initialize associated structure.
556 * Pointer to private structure.
558 * Flow rule attributes.
560 * Pattern specification (list terminated by the END pattern item).
562 * Associated actions (list terminated by the END action).
564 * Perform verbose error reporting if not NULL.
565 * @param[in, out] addr
566 * Buffer where the resulting flow rule handle pointer must be stored.
567 * If NULL, stop processing after validation stage.
570 * 0 on success, a negative errno value otherwise and rte_errno is set.
573 mlx4_flow_prepare(struct priv *priv,
574 const struct rte_flow_attr *attr,
575 const struct rte_flow_item pattern[],
576 const struct rte_flow_action actions[],
577 struct rte_flow_error *error,
578 struct rte_flow **addr)
580 const struct rte_flow_item *item;
581 const struct rte_flow_action *action;
582 const struct mlx4_flow_proc_item *proc;
583 struct rte_flow temp = { .ibv_attr_size = sizeof(*temp.ibv_attr) };
584 struct rte_flow *flow = &temp;
585 const char *msg = NULL;
588 return rte_flow_error_set
589 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
590 NULL, "groups are not supported");
591 if (attr->priority > MLX4_FLOW_PRIORITY_LAST)
592 return rte_flow_error_set
593 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
594 NULL, "maximum priority level is "
595 MLX4_STR_EXPAND(MLX4_FLOW_PRIORITY_LAST));
597 return rte_flow_error_set
598 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
599 NULL, "egress is not supported");
601 return rte_flow_error_set
602 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
603 NULL, "only ingress is supported");
605 proc = mlx4_flow_proc_item_list;
606 /* Go over pattern. */
607 for (item = pattern; item->type; ++item) {
608 const struct mlx4_flow_proc_item *next = NULL;
612 if (item->type == RTE_FLOW_ITEM_TYPE_VOID)
614 if (item->type == MLX4_FLOW_ITEM_TYPE_INTERNAL) {
619 msg = "mlx4 does not support additional matching"
620 " criteria combined with indiscriminate"
621 " matching on Ethernet headers";
622 goto exit_item_not_supported;
624 for (i = 0; proc->next_item && proc->next_item[i]; ++i) {
625 if (proc->next_item[i] == item->type) {
626 next = &mlx4_flow_proc_item_list[item->type];
631 goto exit_item_not_supported;
634 * Perform basic sanity checks only once, while handle is
638 err = mlx4_flow_item_check(item, proc, error);
643 err = proc->merge(flow, item, proc, error);
647 flow->ibv_attr_size += proc->dst_sz;
649 /* Go over actions list. */
650 for (action = actions; action->type; ++action) {
651 switch (action->type) {
652 const struct rte_flow_action_queue *queue;
654 case RTE_FLOW_ACTION_TYPE_VOID:
656 case RTE_FLOW_ACTION_TYPE_DROP:
659 case RTE_FLOW_ACTION_TYPE_QUEUE:
660 queue = action->conf;
661 if (queue->index >= priv->dev->data->nb_rx_queues)
662 goto exit_action_not_supported;
664 flow->queue_id = queue->index;
667 goto exit_action_not_supported;
670 if (!flow->queue && !flow->drop)
671 return rte_flow_error_set
672 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
673 NULL, "no valid action");
674 /* Validation ends here. */
678 /* Allocate proper handle based on collected data. */
679 const struct mlx4_malloc_vec vec[] = {
681 .align = alignof(struct rte_flow),
682 .size = sizeof(*flow),
683 .addr = (void **)&flow,
686 .align = alignof(struct ibv_flow_attr),
687 .size = temp.ibv_attr_size,
688 .addr = (void **)&temp.ibv_attr,
692 if (!mlx4_zmallocv(__func__, vec, RTE_DIM(vec)))
693 return rte_flow_error_set
695 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
696 "flow rule handle allocation failure");
697 /* Most fields will be updated by second pass. */
698 *flow = (struct rte_flow){
699 .ibv_attr = temp.ibv_attr,
700 .ibv_attr_size = sizeof(*flow->ibv_attr),
702 *flow->ibv_attr = (struct ibv_flow_attr){
703 .type = IBV_FLOW_ATTR_NORMAL,
704 .size = sizeof(*flow->ibv_attr),
705 .priority = attr->priority,
712 exit_item_not_supported:
713 return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
714 item, msg ? msg : "item not supported");
715 exit_action_not_supported:
716 return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
717 action, "action not supported");
721 * Validate a flow supported by the NIC.
723 * @see rte_flow_validate()
727 mlx4_flow_validate(struct rte_eth_dev *dev,
728 const struct rte_flow_attr *attr,
729 const struct rte_flow_item pattern[],
730 const struct rte_flow_action actions[],
731 struct rte_flow_error *error)
733 struct priv *priv = dev->data->dev_private;
735 return mlx4_flow_prepare(priv, attr, pattern, actions, error, NULL);
739 * Get a drop flow rule resources instance.
742 * Pointer to private structure.
745 * Pointer to drop flow resources on success, NULL otherwise and rte_errno
748 static struct mlx4_drop *
749 mlx4_drop_get(struct priv *priv)
751 struct mlx4_drop *drop = priv->drop;
754 assert(drop->refcnt);
755 assert(drop->priv == priv);
759 drop = rte_malloc(__func__, sizeof(*drop), 0);
762 *drop = (struct mlx4_drop){
766 drop->cq = ibv_create_cq(priv->ctx, 1, NULL, NULL, 0);
769 drop->qp = ibv_create_qp(priv->pd,
770 &(struct ibv_qp_init_attr){
773 .qp_type = IBV_QPT_RAW_PACKET,
781 claim_zero(ibv_destroy_qp(drop->qp));
783 claim_zero(ibv_destroy_cq(drop->cq));
791 * Give back a drop flow rule resources instance.
794 * Pointer to drop flow rule resources.
797 mlx4_drop_put(struct mlx4_drop *drop)
799 assert(drop->refcnt);
802 drop->priv->drop = NULL;
803 claim_zero(ibv_destroy_qp(drop->qp));
804 claim_zero(ibv_destroy_cq(drop->cq));
809 * Toggle a configured flow rule.
812 * Pointer to private structure.
814 * Flow rule handle to toggle.
816 * Whether associated Verbs flow must be created or removed.
818 * Perform verbose error reporting if not NULL.
821 * 0 on success, a negative errno value otherwise and rte_errno is set.
824 mlx4_flow_toggle(struct priv *priv,
825 struct rte_flow *flow,
827 struct rte_flow_error *error)
829 struct ibv_qp *qp = NULL;
836 claim_zero(ibv_destroy_flow(flow->ibv_flow));
837 flow->ibv_flow = NULL;
839 mlx4_drop_put(priv->drop);
842 assert(flow->ibv_attr);
843 if (!flow->internal &&
845 flow->ibv_attr->priority == MLX4_FLOW_PRIORITY_LAST) {
846 if (flow->ibv_flow) {
847 claim_zero(ibv_destroy_flow(flow->ibv_flow));
848 flow->ibv_flow = NULL;
850 mlx4_drop_put(priv->drop);
853 msg = ("priority level "
854 MLX4_STR_EXPAND(MLX4_FLOW_PRIORITY_LAST)
855 " is reserved when not in isolated mode");
859 struct rxq *rxq = NULL;
861 if (flow->queue_id < priv->dev->data->nb_rx_queues)
862 rxq = priv->dev->data->rx_queues[flow->queue_id];
863 if (flow->ibv_flow) {
864 if (!rxq ^ !flow->drop)
866 /* Verbs flow needs updating. */
867 claim_zero(ibv_destroy_flow(flow->ibv_flow));
868 flow->ibv_flow = NULL;
870 mlx4_drop_put(priv->drop);
874 /* A missing target queue drops traffic implicitly. */
881 msg = "resources for drop flow rule cannot be created";
889 flow->ibv_flow = ibv_create_flow(qp, flow->ibv_attr);
893 mlx4_drop_put(priv->drop);
895 msg = "flow rule rejected by device";
897 return rte_flow_error_set
898 (error, err, RTE_FLOW_ERROR_TYPE_HANDLE, flow, msg);
904 * @see rte_flow_create()
907 static struct rte_flow *
908 mlx4_flow_create(struct rte_eth_dev *dev,
909 const struct rte_flow_attr *attr,
910 const struct rte_flow_item pattern[],
911 const struct rte_flow_action actions[],
912 struct rte_flow_error *error)
914 struct priv *priv = dev->data->dev_private;
915 struct rte_flow *flow;
918 err = mlx4_flow_prepare(priv, attr, pattern, actions, error, &flow);
921 err = mlx4_flow_toggle(priv, flow, priv->started, error);
923 struct rte_flow *curr = LIST_FIRST(&priv->flows);
925 /* New rules are inserted after internal ones. */
926 if (!curr || !curr->internal) {
927 LIST_INSERT_HEAD(&priv->flows, flow, next);
929 while (LIST_NEXT(curr, next) &&
930 LIST_NEXT(curr, next)->internal)
931 curr = LIST_NEXT(curr, next);
932 LIST_INSERT_AFTER(curr, flow, next);
936 rte_flow_error_set(error, -err, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
943 * Configure isolated mode.
945 * @see rte_flow_isolate()
949 mlx4_flow_isolate(struct rte_eth_dev *dev,
951 struct rte_flow_error *error)
953 struct priv *priv = dev->data->dev_private;
955 if (!!enable == !!priv->isolated)
957 priv->isolated = !!enable;
958 if (mlx4_flow_sync(priv, error)) {
959 priv->isolated = !enable;
966 * Destroy a flow rule.
968 * @see rte_flow_destroy()
972 mlx4_flow_destroy(struct rte_eth_dev *dev,
973 struct rte_flow *flow,
974 struct rte_flow_error *error)
976 struct priv *priv = dev->data->dev_private;
977 int err = mlx4_flow_toggle(priv, flow, 0, error);
981 LIST_REMOVE(flow, next);
987 * Destroy user-configured flow rules.
989 * This function skips internal flows rules.
991 * @see rte_flow_flush()
995 mlx4_flow_flush(struct rte_eth_dev *dev,
996 struct rte_flow_error *error)
998 struct priv *priv = dev->data->dev_private;
999 struct rte_flow *flow = LIST_FIRST(&priv->flows);
1002 struct rte_flow *next = LIST_NEXT(flow, next);
1004 if (!flow->internal)
1005 mlx4_flow_destroy(dev, flow, error);
1012 * Helper function to determine the next configured VLAN filter.
1015 * Pointer to private structure.
1017 * VLAN ID to use as a starting point.
1020 * Next configured VLAN ID or a high value (>= 4096) if there is none.
1023 mlx4_flow_internal_next_vlan(struct priv *priv, uint16_t vlan)
1025 while (vlan < 4096) {
1026 if (priv->dev->data->vlan_filter_conf.ids[vlan / 64] &
1027 (UINT64_C(1) << (vlan % 64)))
1035 * Generate internal flow rules.
1037 * - MAC flow rules are generated from @p dev->data->mac_addrs
1038 * (@p priv->mac array).
1039 * - An additional flow rule for Ethernet broadcasts is also generated.
1040 * - All these are per-VLAN if @p dev->data->dev_conf.rxmode.hw_vlan_filter
1041 * is enabled and VLAN filters are configured.
1044 * Pointer to private structure.
1046 * Perform verbose error reporting if not NULL.
1049 * 0 on success, a negative errno value otherwise and rte_errno is set.
1052 mlx4_flow_internal(struct priv *priv, struct rte_flow_error *error)
1054 struct rte_flow_attr attr = {
1055 .priority = MLX4_FLOW_PRIORITY_LAST,
1058 struct rte_flow_item_eth eth_spec;
1059 const struct rte_flow_item_eth eth_mask = {
1060 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
1062 struct rte_flow_item_vlan vlan_spec;
1063 const struct rte_flow_item_vlan vlan_mask = {
1064 .tci = RTE_BE16(0x0fff),
1066 struct rte_flow_item pattern[] = {
1068 .type = MLX4_FLOW_ITEM_TYPE_INTERNAL,
1071 .type = RTE_FLOW_ITEM_TYPE_ETH,
1076 /* Replaced with VLAN if filtering is enabled. */
1077 .type = RTE_FLOW_ITEM_TYPE_END,
1080 .type = RTE_FLOW_ITEM_TYPE_END,
1083 struct rte_flow_action actions[] = {
1085 .type = RTE_FLOW_ACTION_TYPE_QUEUE,
1086 .conf = &(struct rte_flow_action_queue){
1091 .type = RTE_FLOW_ACTION_TYPE_END,
1094 struct ether_addr *rule_mac = ð_spec.dst;
1095 rte_be16_t *rule_vlan =
1096 priv->dev->data->dev_conf.rxmode.hw_vlan_filter ?
1100 struct rte_flow *flow;
1105 * Set up VLAN item if filtering is enabled and at least one VLAN
1106 * filter is configured.
1109 vlan = mlx4_flow_internal_next_vlan(priv, 0);
1111 pattern[2] = (struct rte_flow_item){
1112 .type = RTE_FLOW_ITEM_TYPE_VLAN,
1117 *rule_vlan = rte_cpu_to_be_16(vlan);
1122 for (i = 0; i != RTE_DIM(priv->mac) + 1; ++i) {
1123 const struct ether_addr *mac;
1125 /* Broadcasts are handled by an extra iteration. */
1126 if (i < RTE_DIM(priv->mac))
1127 mac = &priv->mac[i];
1129 mac = ð_mask.dst;
1130 if (is_zero_ether_addr(mac))
1132 /* Check if MAC flow rule is already present. */
1133 for (flow = LIST_FIRST(&priv->flows);
1134 flow && flow->internal;
1135 flow = LIST_NEXT(flow, next)) {
1136 const struct ibv_flow_spec_eth *eth =
1137 (const void *)((uintptr_t)flow->ibv_attr +
1138 sizeof(*flow->ibv_attr));
1143 assert(flow->ibv_attr->type == IBV_FLOW_ATTR_NORMAL);
1144 assert(flow->ibv_attr->num_of_specs == 1);
1145 assert(eth->type == IBV_FLOW_SPEC_ETH);
1147 (eth->val.vlan_tag != *rule_vlan ||
1148 eth->mask.vlan_tag != RTE_BE16(0x0fff)))
1150 if (!rule_vlan && eth->mask.vlan_tag)
1152 for (j = 0; j != sizeof(mac->addr_bytes); ++j)
1153 if (eth->val.dst_mac[j] != mac->addr_bytes[j] ||
1154 eth->mask.dst_mac[j] != UINT8_C(0xff) ||
1155 eth->val.src_mac[j] != UINT8_C(0x00) ||
1156 eth->mask.src_mac[j] != UINT8_C(0x00))
1158 if (j == sizeof(mac->addr_bytes))
1161 if (!flow || !flow->internal) {
1162 /* Not found, create a new flow rule. */
1163 memcpy(rule_mac, mac, sizeof(*mac));
1164 flow = mlx4_flow_create(priv->dev, &attr, pattern,
1174 if (!err && rule_vlan) {
1175 vlan = mlx4_flow_internal_next_vlan(priv, vlan + 1);
1179 /* Clear selection and clean up stale MAC flow rules. */
1180 flow = LIST_FIRST(&priv->flows);
1181 while (flow && flow->internal) {
1182 struct rte_flow *next = LIST_NEXT(flow, next);
1184 if (flow->mac && !flow->select)
1185 claim_zero(mlx4_flow_destroy(priv->dev, flow, error));
1194 * Synchronize flow rules.
1196 * This function synchronizes flow rules with the state of the device by
1197 * taking into account isolated mode and whether target queues are
1201 * Pointer to private structure.
1203 * Perform verbose error reporting if not NULL.
1206 * 0 on success, a negative errno value otherwise and rte_errno is set.
1209 mlx4_flow_sync(struct priv *priv, struct rte_flow_error *error)
1211 struct rte_flow *flow;
1214 /* Internal flow rules are guaranteed to come first in the list. */
1215 if (priv->isolated) {
1217 * Get rid of them in isolated mode, stop at the first
1218 * non-internal rule found.
1220 for (flow = LIST_FIRST(&priv->flows);
1221 flow && flow->internal;
1222 flow = LIST_FIRST(&priv->flows))
1223 claim_zero(mlx4_flow_destroy(priv->dev, flow, error));
1225 /* Refresh internal rules. */
1226 ret = mlx4_flow_internal(priv, error);
1230 /* Toggle the remaining flow rules . */
1231 for (flow = LIST_FIRST(&priv->flows);
1233 flow = LIST_NEXT(flow, next)) {
1234 ret = mlx4_flow_toggle(priv, flow, priv->started, error);
1239 assert(!priv->drop);
1244 * Clean up all flow rules.
1246 * Unlike mlx4_flow_flush(), this function takes care of all remaining flow
1247 * rules regardless of whether they are internal or user-configured.
1250 * Pointer to private structure.
1253 mlx4_flow_clean(struct priv *priv)
1255 struct rte_flow *flow;
1257 while ((flow = LIST_FIRST(&priv->flows)))
1258 mlx4_flow_destroy(priv->dev, flow, NULL);
1261 static const struct rte_flow_ops mlx4_flow_ops = {
1262 .validate = mlx4_flow_validate,
1263 .create = mlx4_flow_create,
1264 .destroy = mlx4_flow_destroy,
1265 .flush = mlx4_flow_flush,
1266 .isolate = mlx4_flow_isolate,
1270 * Manage filter operations.
1273 * Pointer to Ethernet device structure.
1274 * @param filter_type
1277 * Operation to perform.
1279 * Pointer to operation-specific structure.
1282 * 0 on success, negative errno value otherwise and rte_errno is set.
1285 mlx4_filter_ctrl(struct rte_eth_dev *dev,
1286 enum rte_filter_type filter_type,
1287 enum rte_filter_op filter_op,
1290 switch (filter_type) {
1291 case RTE_ETH_FILTER_GENERIC:
1292 if (filter_op != RTE_ETH_FILTER_GET)
1294 *(const void **)arg = &mlx4_flow_ops;
1297 ERROR("%p: filter type (%d) not supported",
1298 (void *)dev, filter_type);
1301 rte_errno = ENOTSUP;