4 * Copyright 2017 6WIND S.A.
5 * Copyright 2017 Mellanox
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of 6WIND S.A. nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 * Flow API operations for mlx4 driver.
39 #include <arpa/inet.h>
46 #include <sys/queue.h>
48 /* Verbs headers do not support -pedantic. */
50 #pragma GCC diagnostic ignored "-Wpedantic"
52 #include <infiniband/verbs.h>
54 #pragma GCC diagnostic error "-Wpedantic"
57 #include <rte_byteorder.h>
58 #include <rte_errno.h>
59 #include <rte_eth_ctrl.h>
60 #include <rte_ethdev.h>
62 #include <rte_flow_driver.h>
63 #include <rte_malloc.h>
67 #include "mlx4_flow.h"
68 #include "mlx4_rxtx.h"
69 #include "mlx4_utils.h"
71 /** Static initializer for a list of subsequent item types. */
72 #define NEXT_ITEM(...) \
73 (const enum rte_flow_item_type []){ \
74 __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
77 /** Processor structure associated with a flow item. */
78 struct mlx4_flow_proc_item {
79 /** Bit-masks corresponding to the possibilities for the item. */
82 * Default bit-masks to use when item->mask is not provided. When
83 * \default_mask is also NULL, the full supported bit-mask (\mask) is
86 const void *default_mask;
87 /** Bit-masks size in bytes. */
88 const unsigned int mask_sz;
90 * Check support for a given item.
95 * Bit-masks covering supported fields to compare with spec,
99 * Bit-Mask size in bytes.
102 * 0 on success, negative value otherwise.
104 int (*validate)(const struct rte_flow_item *item,
105 const uint8_t *mask, unsigned int size);
107 * Conversion function from rte_flow to NIC specific flow.
110 * rte_flow item to convert.
111 * @param default_mask
112 * Default bit-masks to use when item->mask is not provided.
114 * Flow rule handle to update.
117 * 0 on success, negative value otherwise.
119 int (*convert)(const struct rte_flow_item *item,
120 const void *default_mask,
121 struct rte_flow *flow);
122 /** Size in bytes of the destination structure. */
123 const unsigned int dst_sz;
124 /** List of possible subsequent items. */
125 const enum rte_flow_item_type *const next_item;
128 /** Shared resources for drop flow rules. */
130 struct ibv_qp *qp; /**< QP target. */
131 struct ibv_cq *cq; /**< CQ associated with above QP. */
132 struct priv *priv; /**< Back pointer to private data. */
133 uint32_t refcnt; /**< Reference count. */
137 * Convert Ethernet item to Verbs specification.
140 * Item specification.
141 * @param default_mask[in]
142 * Default bit-masks to use when item->mask is not provided.
143 * @param flow[in, out]
144 * Flow rule handle to update.
147 mlx4_flow_create_eth(const struct rte_flow_item *item,
148 const void *default_mask,
149 struct rte_flow *flow)
151 const struct rte_flow_item_eth *spec = item->spec;
152 const struct rte_flow_item_eth *mask = item->mask;
153 struct ibv_flow_spec_eth *eth;
154 const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
157 ++flow->ibv_attr->num_of_specs;
158 flow->ibv_attr->priority = 2;
159 eth = (void *)((uintptr_t)flow->ibv_attr + flow->ibv_attr_size);
160 *eth = (struct ibv_flow_spec_eth) {
161 .type = IBV_FLOW_SPEC_ETH,
165 flow->ibv_attr->type = IBV_FLOW_ATTR_ALL_DEFAULT;
170 memcpy(eth->val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
171 memcpy(eth->val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
172 memcpy(eth->mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
173 memcpy(eth->mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
174 /* Remove unwanted bits from values. */
175 for (i = 0; i < ETHER_ADDR_LEN; ++i) {
176 eth->val.dst_mac[i] &= eth->mask.dst_mac[i];
177 eth->val.src_mac[i] &= eth->mask.src_mac[i];
183 * Convert VLAN item to Verbs specification.
186 * Item specification.
187 * @param default_mask[in]
188 * Default bit-masks to use when item->mask is not provided.
189 * @param flow[in, out]
190 * Flow rule handle to update.
193 mlx4_flow_create_vlan(const struct rte_flow_item *item,
194 const void *default_mask,
195 struct rte_flow *flow)
197 const struct rte_flow_item_vlan *spec = item->spec;
198 const struct rte_flow_item_vlan *mask = item->mask;
199 struct ibv_flow_spec_eth *eth;
200 const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
202 eth = (void *)((uintptr_t)flow->ibv_attr + flow->ibv_attr_size -
208 eth->val.vlan_tag = spec->tci;
209 eth->mask.vlan_tag = mask->tci;
210 eth->val.vlan_tag &= eth->mask.vlan_tag;
215 * Convert IPv4 item to Verbs specification.
218 * Item specification.
219 * @param default_mask[in]
220 * Default bit-masks to use when item->mask is not provided.
221 * @param flow[in, out]
222 * Flow rule handle to update.
225 mlx4_flow_create_ipv4(const struct rte_flow_item *item,
226 const void *default_mask,
227 struct rte_flow *flow)
229 const struct rte_flow_item_ipv4 *spec = item->spec;
230 const struct rte_flow_item_ipv4 *mask = item->mask;
231 struct ibv_flow_spec_ipv4 *ipv4;
232 unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4);
234 ++flow->ibv_attr->num_of_specs;
235 flow->ibv_attr->priority = 1;
236 ipv4 = (void *)((uintptr_t)flow->ibv_attr + flow->ibv_attr_size);
237 *ipv4 = (struct ibv_flow_spec_ipv4) {
238 .type = IBV_FLOW_SPEC_IPV4,
243 ipv4->val = (struct ibv_flow_ipv4_filter) {
244 .src_ip = spec->hdr.src_addr,
245 .dst_ip = spec->hdr.dst_addr,
249 ipv4->mask = (struct ibv_flow_ipv4_filter) {
250 .src_ip = mask->hdr.src_addr,
251 .dst_ip = mask->hdr.dst_addr,
253 /* Remove unwanted bits from values. */
254 ipv4->val.src_ip &= ipv4->mask.src_ip;
255 ipv4->val.dst_ip &= ipv4->mask.dst_ip;
260 * Convert UDP item to Verbs specification.
263 * Item specification.
264 * @param default_mask[in]
265 * Default bit-masks to use when item->mask is not provided.
266 * @param flow[in, out]
267 * Flow rule handle to update.
270 mlx4_flow_create_udp(const struct rte_flow_item *item,
271 const void *default_mask,
272 struct rte_flow *flow)
274 const struct rte_flow_item_udp *spec = item->spec;
275 const struct rte_flow_item_udp *mask = item->mask;
276 struct ibv_flow_spec_tcp_udp *udp;
277 unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
279 ++flow->ibv_attr->num_of_specs;
280 flow->ibv_attr->priority = 0;
281 udp = (void *)((uintptr_t)flow->ibv_attr + flow->ibv_attr_size);
282 *udp = (struct ibv_flow_spec_tcp_udp) {
283 .type = IBV_FLOW_SPEC_UDP,
288 udp->val.dst_port = spec->hdr.dst_port;
289 udp->val.src_port = spec->hdr.src_port;
292 udp->mask.dst_port = mask->hdr.dst_port;
293 udp->mask.src_port = mask->hdr.src_port;
294 /* Remove unwanted bits from values. */
295 udp->val.src_port &= udp->mask.src_port;
296 udp->val.dst_port &= udp->mask.dst_port;
301 * Convert TCP item to Verbs specification.
304 * Item specification.
305 * @param default_mask[in]
306 * Default bit-masks to use when item->mask is not provided.
307 * @param flow[in, out]
308 * Flow rule handle to update.
311 mlx4_flow_create_tcp(const struct rte_flow_item *item,
312 const void *default_mask,
313 struct rte_flow *flow)
315 const struct rte_flow_item_tcp *spec = item->spec;
316 const struct rte_flow_item_tcp *mask = item->mask;
317 struct ibv_flow_spec_tcp_udp *tcp;
318 unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
320 ++flow->ibv_attr->num_of_specs;
321 flow->ibv_attr->priority = 0;
322 tcp = (void *)((uintptr_t)flow->ibv_attr + flow->ibv_attr_size);
323 *tcp = (struct ibv_flow_spec_tcp_udp) {
324 .type = IBV_FLOW_SPEC_TCP,
329 tcp->val.dst_port = spec->hdr.dst_port;
330 tcp->val.src_port = spec->hdr.src_port;
333 tcp->mask.dst_port = mask->hdr.dst_port;
334 tcp->mask.src_port = mask->hdr.src_port;
335 /* Remove unwanted bits from values. */
336 tcp->val.src_port &= tcp->mask.src_port;
337 tcp->val.dst_port &= tcp->mask.dst_port;
342 * Check support for a given item.
345 * Item specification.
347 * Bit-masks covering supported fields to compare with spec, last and mask in
350 * Bit-Mask size in bytes.
353 * 0 on success, negative value otherwise.
356 mlx4_flow_item_validate(const struct rte_flow_item *item,
357 const uint8_t *mask, unsigned int size)
361 if (!item->spec && (item->mask || item->last))
363 if (item->spec && !item->mask) {
365 const uint8_t *spec = item->spec;
367 for (i = 0; i < size; ++i)
368 if ((spec[i] | mask[i]) != mask[i])
371 if (item->last && !item->mask) {
373 const uint8_t *spec = item->last;
375 for (i = 0; i < size; ++i)
376 if ((spec[i] | mask[i]) != mask[i])
379 if (item->spec && item->last) {
382 const uint8_t *apply = mask;
387 for (i = 0; i < size; ++i) {
388 spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
389 last[i] = ((const uint8_t *)item->last)[i] & apply[i];
391 ret = memcmp(spec, last, size);
397 mlx4_flow_validate_eth(const struct rte_flow_item *item,
398 const uint8_t *mask, unsigned int size)
401 const struct rte_flow_item_eth *mask = item->mask;
403 if (mask->dst.addr_bytes[0] != 0xff ||
404 mask->dst.addr_bytes[1] != 0xff ||
405 mask->dst.addr_bytes[2] != 0xff ||
406 mask->dst.addr_bytes[3] != 0xff ||
407 mask->dst.addr_bytes[4] != 0xff ||
408 mask->dst.addr_bytes[5] != 0xff)
411 return mlx4_flow_item_validate(item, mask, size);
415 mlx4_flow_validate_vlan(const struct rte_flow_item *item,
416 const uint8_t *mask, unsigned int size)
419 const struct rte_flow_item_vlan *mask = item->mask;
421 if (mask->tci != 0 &&
422 ntohs(mask->tci) != 0x0fff)
425 return mlx4_flow_item_validate(item, mask, size);
429 mlx4_flow_validate_ipv4(const struct rte_flow_item *item,
430 const uint8_t *mask, unsigned int size)
433 const struct rte_flow_item_ipv4 *mask = item->mask;
435 if (mask->hdr.src_addr != 0 &&
436 mask->hdr.src_addr != 0xffffffff)
438 if (mask->hdr.dst_addr != 0 &&
439 mask->hdr.dst_addr != 0xffffffff)
442 return mlx4_flow_item_validate(item, mask, size);
446 mlx4_flow_validate_udp(const struct rte_flow_item *item,
447 const uint8_t *mask, unsigned int size)
450 const struct rte_flow_item_udp *mask = item->mask;
452 if (mask->hdr.src_port != 0 &&
453 mask->hdr.src_port != 0xffff)
455 if (mask->hdr.dst_port != 0 &&
456 mask->hdr.dst_port != 0xffff)
459 return mlx4_flow_item_validate(item, mask, size);
463 mlx4_flow_validate_tcp(const struct rte_flow_item *item,
464 const uint8_t *mask, unsigned int size)
467 const struct rte_flow_item_tcp *mask = item->mask;
469 if (mask->hdr.src_port != 0 &&
470 mask->hdr.src_port != 0xffff)
472 if (mask->hdr.dst_port != 0 &&
473 mask->hdr.dst_port != 0xffff)
476 return mlx4_flow_item_validate(item, mask, size);
479 /** Graph of supported items and associated actions. */
480 static const struct mlx4_flow_proc_item mlx4_flow_proc_item_list[] = {
481 [RTE_FLOW_ITEM_TYPE_END] = {
482 .next_item = NEXT_ITEM(RTE_FLOW_ITEM_TYPE_ETH),
484 [RTE_FLOW_ITEM_TYPE_ETH] = {
485 .next_item = NEXT_ITEM(RTE_FLOW_ITEM_TYPE_VLAN,
486 RTE_FLOW_ITEM_TYPE_IPV4),
487 .mask = &(const struct rte_flow_item_eth){
488 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
489 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
491 .default_mask = &rte_flow_item_eth_mask,
492 .mask_sz = sizeof(struct rte_flow_item_eth),
493 .validate = mlx4_flow_validate_eth,
494 .convert = mlx4_flow_create_eth,
495 .dst_sz = sizeof(struct ibv_flow_spec_eth),
497 [RTE_FLOW_ITEM_TYPE_VLAN] = {
498 .next_item = NEXT_ITEM(RTE_FLOW_ITEM_TYPE_IPV4),
499 .mask = &(const struct rte_flow_item_vlan){
500 /* Only TCI VID matching is supported. */
501 .tci = RTE_BE16(0x0fff),
503 .mask_sz = sizeof(struct rte_flow_item_vlan),
504 .validate = mlx4_flow_validate_vlan,
505 .convert = mlx4_flow_create_vlan,
508 [RTE_FLOW_ITEM_TYPE_IPV4] = {
509 .next_item = NEXT_ITEM(RTE_FLOW_ITEM_TYPE_UDP,
510 RTE_FLOW_ITEM_TYPE_TCP),
511 .mask = &(const struct rte_flow_item_ipv4){
513 .src_addr = RTE_BE32(0xffffffff),
514 .dst_addr = RTE_BE32(0xffffffff),
517 .default_mask = &rte_flow_item_ipv4_mask,
518 .mask_sz = sizeof(struct rte_flow_item_ipv4),
519 .validate = mlx4_flow_validate_ipv4,
520 .convert = mlx4_flow_create_ipv4,
521 .dst_sz = sizeof(struct ibv_flow_spec_ipv4),
523 [RTE_FLOW_ITEM_TYPE_UDP] = {
524 .mask = &(const struct rte_flow_item_udp){
526 .src_port = RTE_BE16(0xffff),
527 .dst_port = RTE_BE16(0xffff),
530 .default_mask = &rte_flow_item_udp_mask,
531 .mask_sz = sizeof(struct rte_flow_item_udp),
532 .validate = mlx4_flow_validate_udp,
533 .convert = mlx4_flow_create_udp,
534 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
536 [RTE_FLOW_ITEM_TYPE_TCP] = {
537 .mask = &(const struct rte_flow_item_tcp){
539 .src_port = RTE_BE16(0xffff),
540 .dst_port = RTE_BE16(0xffff),
543 .default_mask = &rte_flow_item_tcp_mask,
544 .mask_sz = sizeof(struct rte_flow_item_tcp),
545 .validate = mlx4_flow_validate_tcp,
546 .convert = mlx4_flow_create_tcp,
547 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
552 * Make sure a flow rule is supported and initialize associated structure.
555 * Pointer to private structure.
557 * Flow rule attributes.
559 * Pattern specification (list terminated by the END pattern item).
561 * Associated actions (list terminated by the END action).
563 * Perform verbose error reporting if not NULL.
564 * @param[in, out] addr
565 * Buffer where the resulting flow rule handle pointer must be stored.
566 * If NULL, stop processing after validation stage.
569 * 0 on success, a negative errno value otherwise and rte_errno is set.
572 mlx4_flow_prepare(struct priv *priv,
573 const struct rte_flow_attr *attr,
574 const struct rte_flow_item pattern[],
575 const struct rte_flow_action actions[],
576 struct rte_flow_error *error,
577 struct rte_flow **addr)
579 const struct rte_flow_item *item;
580 const struct rte_flow_action *action;
581 const struct mlx4_flow_proc_item *proc;
582 struct rte_flow temp = { .ibv_attr_size = sizeof(*temp.ibv_attr) };
583 struct rte_flow *flow = &temp;
584 uint32_t priority_override = 0;
587 return rte_flow_error_set
588 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
589 NULL, "groups are not supported");
591 priority_override = attr->priority;
592 else if (attr->priority)
593 return rte_flow_error_set
594 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
596 "priorities are not supported outside isolated mode");
597 if (attr->priority > MLX4_FLOW_PRIORITY_LAST)
598 return rte_flow_error_set
599 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
600 NULL, "maximum priority level is "
601 MLX4_STR_EXPAND(MLX4_FLOW_PRIORITY_LAST));
603 return rte_flow_error_set
604 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
605 NULL, "egress is not supported");
607 return rte_flow_error_set
608 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
609 NULL, "only ingress is supported");
611 proc = mlx4_flow_proc_item_list;
612 /* Go over pattern. */
613 for (item = pattern; item->type; ++item) {
614 const struct mlx4_flow_proc_item *next = NULL;
618 if (item->type == RTE_FLOW_ITEM_TYPE_VOID)
620 if (item->type == MLX4_FLOW_ITEM_TYPE_INTERNAL) {
625 * The nic can support patterns with NULL eth spec only
626 * if eth is a single item in a rule.
628 if (!item->spec && item->type == RTE_FLOW_ITEM_TYPE_ETH) {
629 const struct rte_flow_item *next = item + 1;
632 return rte_flow_error_set
634 RTE_FLOW_ERROR_TYPE_ITEM, item,
635 "the rule requires an Ethernet spec");
637 for (i = 0; proc->next_item && proc->next_item[i]; ++i) {
638 if (proc->next_item[i] == item->type) {
639 next = &mlx4_flow_proc_item_list[item->type];
644 goto exit_item_not_supported;
646 /* Perform validation once, while handle is not allocated. */
648 err = proc->validate(item, proc->mask, proc->mask_sz);
650 goto exit_item_not_supported;
651 } else if (proc->convert) {
652 err = proc->convert(item,
653 (proc->default_mask ?
658 goto exit_item_not_supported;
660 flow->ibv_attr_size += proc->dst_sz;
662 /* Use specified priority level when in isolated mode. */
663 if (priv->isolated && flow != &temp)
664 flow->ibv_attr->priority = priority_override;
665 /* Go over actions list. */
666 for (action = actions; action->type; ++action) {
667 switch (action->type) {
668 const struct rte_flow_action_queue *queue;
670 case RTE_FLOW_ACTION_TYPE_VOID:
672 case RTE_FLOW_ACTION_TYPE_DROP:
675 case RTE_FLOW_ACTION_TYPE_QUEUE:
676 queue = action->conf;
677 if (queue->index >= priv->dev->data->nb_rx_queues)
678 goto exit_action_not_supported;
680 flow->queue_id = queue->index;
683 goto exit_action_not_supported;
686 if (!flow->queue && !flow->drop)
687 return rte_flow_error_set
688 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
689 NULL, "no valid action");
690 /* Validation ends here. */
694 /* Allocate proper handle based on collected data. */
695 const struct mlx4_malloc_vec vec[] = {
697 .align = alignof(struct rte_flow),
698 .size = sizeof(*flow),
699 .addr = (void **)&flow,
702 .align = alignof(struct ibv_flow_attr),
703 .size = temp.ibv_attr_size,
704 .addr = (void **)&temp.ibv_attr,
708 if (!mlx4_zmallocv(__func__, vec, RTE_DIM(vec)))
709 return rte_flow_error_set
711 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
712 "flow rule handle allocation failure");
713 /* Most fields will be updated by second pass. */
714 *flow = (struct rte_flow){
715 .ibv_attr = temp.ibv_attr,
716 .ibv_attr_size = sizeof(*flow->ibv_attr),
718 *flow->ibv_attr = (struct ibv_flow_attr){
719 .type = IBV_FLOW_ATTR_NORMAL,
720 .size = sizeof(*flow->ibv_attr),
727 exit_item_not_supported:
728 return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
729 item, "item not supported");
730 exit_action_not_supported:
731 return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
732 action, "action not supported");
736 * Validate a flow supported by the NIC.
738 * @see rte_flow_validate()
742 mlx4_flow_validate(struct rte_eth_dev *dev,
743 const struct rte_flow_attr *attr,
744 const struct rte_flow_item pattern[],
745 const struct rte_flow_action actions[],
746 struct rte_flow_error *error)
748 struct priv *priv = dev->data->dev_private;
750 return mlx4_flow_prepare(priv, attr, pattern, actions, error, NULL);
754 * Get a drop flow rule resources instance.
757 * Pointer to private structure.
760 * Pointer to drop flow resources on success, NULL otherwise and rte_errno
763 static struct mlx4_drop *
764 mlx4_drop_get(struct priv *priv)
766 struct mlx4_drop *drop = priv->drop;
769 assert(drop->refcnt);
770 assert(drop->priv == priv);
774 drop = rte_malloc(__func__, sizeof(*drop), 0);
777 *drop = (struct mlx4_drop){
781 drop->cq = ibv_create_cq(priv->ctx, 1, NULL, NULL, 0);
784 drop->qp = ibv_create_qp(priv->pd,
785 &(struct ibv_qp_init_attr){
788 .qp_type = IBV_QPT_RAW_PACKET,
796 claim_zero(ibv_destroy_qp(drop->qp));
798 claim_zero(ibv_destroy_cq(drop->cq));
806 * Give back a drop flow rule resources instance.
809 * Pointer to drop flow rule resources.
812 mlx4_drop_put(struct mlx4_drop *drop)
814 assert(drop->refcnt);
817 drop->priv->drop = NULL;
818 claim_zero(ibv_destroy_qp(drop->qp));
819 claim_zero(ibv_destroy_cq(drop->cq));
824 * Toggle a configured flow rule.
827 * Pointer to private structure.
829 * Flow rule handle to toggle.
831 * Whether associated Verbs flow must be created or removed.
833 * Perform verbose error reporting if not NULL.
836 * 0 on success, a negative errno value otherwise and rte_errno is set.
839 mlx4_flow_toggle(struct priv *priv,
840 struct rte_flow *flow,
842 struct rte_flow_error *error)
844 struct ibv_qp *qp = NULL;
851 claim_zero(ibv_destroy_flow(flow->ibv_flow));
852 flow->ibv_flow = NULL;
854 mlx4_drop_put(priv->drop);
858 struct rxq *rxq = NULL;
860 if (flow->queue_id < priv->dev->data->nb_rx_queues)
861 rxq = priv->dev->data->rx_queues[flow->queue_id];
862 if (flow->ibv_flow) {
863 if (!rxq ^ !flow->drop)
865 /* Verbs flow needs updating. */
866 claim_zero(ibv_destroy_flow(flow->ibv_flow));
867 flow->ibv_flow = NULL;
869 mlx4_drop_put(priv->drop);
873 /* A missing target queue drops traffic implicitly. */
880 msg = "resources for drop flow rule cannot be created";
886 assert(flow->ibv_attr);
889 flow->ibv_flow = ibv_create_flow(qp, flow->ibv_attr);
893 mlx4_drop_put(priv->drop);
895 msg = "flow rule rejected by device";
897 return rte_flow_error_set
898 (error, err, RTE_FLOW_ERROR_TYPE_HANDLE, flow, msg);
904 * @see rte_flow_create()
907 static struct rte_flow *
908 mlx4_flow_create(struct rte_eth_dev *dev,
909 const struct rte_flow_attr *attr,
910 const struct rte_flow_item pattern[],
911 const struct rte_flow_action actions[],
912 struct rte_flow_error *error)
914 struct priv *priv = dev->data->dev_private;
915 struct rte_flow *flow;
918 err = mlx4_flow_prepare(priv, attr, pattern, actions, error, &flow);
921 err = mlx4_flow_toggle(priv, flow, priv->started, error);
923 struct rte_flow *curr = LIST_FIRST(&priv->flows);
925 /* New rules are inserted after internal ones. */
926 if (!curr || !curr->internal) {
927 LIST_INSERT_HEAD(&priv->flows, flow, next);
929 while (LIST_NEXT(curr, next) &&
930 LIST_NEXT(curr, next)->internal)
931 curr = LIST_NEXT(curr, next);
932 LIST_INSERT_AFTER(curr, flow, next);
936 rte_flow_error_set(error, -err, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
943 * Configure isolated mode.
945 * @see rte_flow_isolate()
949 mlx4_flow_isolate(struct rte_eth_dev *dev,
951 struct rte_flow_error *error)
953 struct priv *priv = dev->data->dev_private;
955 if (!!enable == !!priv->isolated)
957 priv->isolated = !!enable;
958 if (mlx4_flow_sync(priv)) {
959 priv->isolated = !enable;
960 return rte_flow_error_set(error, rte_errno,
961 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
964 "cannot enter isolated mode" :
965 "cannot leave isolated mode");
971 * Destroy a flow rule.
973 * @see rte_flow_destroy()
977 mlx4_flow_destroy(struct rte_eth_dev *dev,
978 struct rte_flow *flow,
979 struct rte_flow_error *error)
981 struct priv *priv = dev->data->dev_private;
982 int err = mlx4_flow_toggle(priv, flow, 0, error);
986 LIST_REMOVE(flow, next);
992 * Destroy user-configured flow rules.
994 * This function skips internal flows rules.
996 * @see rte_flow_flush()
1000 mlx4_flow_flush(struct rte_eth_dev *dev,
1001 struct rte_flow_error *error)
1003 struct priv *priv = dev->data->dev_private;
1004 struct rte_flow *flow = LIST_FIRST(&priv->flows);
1007 struct rte_flow *next = LIST_NEXT(flow, next);
1009 if (!flow->internal)
1010 mlx4_flow_destroy(dev, flow, error);
1017 * Generate internal flow rules.
1020 * Pointer to private structure.
1022 * Perform verbose error reporting if not NULL.
1025 * 0 on success, a negative errno value otherwise and rte_errno is set.
1028 mlx4_flow_internal(struct priv *priv, struct rte_flow_error *error)
1030 struct rte_flow_attr attr = {
1033 struct rte_flow_item pattern[] = {
1035 .type = MLX4_FLOW_ITEM_TYPE_INTERNAL,
1038 .type = RTE_FLOW_ITEM_TYPE_ETH,
1039 .spec = &(struct rte_flow_item_eth){
1042 .mask = &(struct rte_flow_item_eth){
1043 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
1047 .type = RTE_FLOW_ITEM_TYPE_END,
1050 struct rte_flow_action actions[] = {
1052 .type = RTE_FLOW_ACTION_TYPE_QUEUE,
1053 .conf = &(struct rte_flow_action_queue){
1058 .type = RTE_FLOW_ACTION_TYPE_END,
1062 if (!mlx4_flow_create(priv->dev, &attr, pattern, actions, error))
1068 * Synchronize flow rules.
1070 * This function synchronizes flow rules with the state of the device by
1071 * taking into account isolated mode and whether target queues are
1075 * Pointer to private structure.
1078 * 0 on success, a negative errno value otherwise and rte_errno is set.
1081 mlx4_flow_sync(struct priv *priv)
1083 struct rte_flow *flow;
1086 /* Internal flow rules are guaranteed to come first in the list. */
1087 if (priv->isolated) {
1089 * Get rid of them in isolated mode, stop at the first
1090 * non-internal rule found.
1092 for (flow = LIST_FIRST(&priv->flows);
1093 flow && flow->internal;
1094 flow = LIST_FIRST(&priv->flows))
1095 claim_zero(mlx4_flow_destroy(priv->dev, flow, NULL));
1096 } else if (!LIST_FIRST(&priv->flows) ||
1097 !LIST_FIRST(&priv->flows)->internal) {
1099 * If the first rule is not internal outside isolated mode,
1100 * they must be added back.
1102 ret = mlx4_flow_internal(priv, NULL);
1107 return mlx4_flow_start(priv);
1108 mlx4_flow_stop(priv);
1113 * Clean up all flow rules.
1115 * Unlike mlx4_flow_flush(), this function takes care of all remaining flow
1116 * rules regardless of whether they are internal or user-configured.
1119 * Pointer to private structure.
1122 mlx4_flow_clean(struct priv *priv)
1124 struct rte_flow *flow;
1126 while ((flow = LIST_FIRST(&priv->flows)))
1127 mlx4_flow_destroy(priv->dev, flow, NULL);
1131 * Disable flow rules.
1134 * Pointer to private structure.
1137 mlx4_flow_stop(struct priv *priv)
1139 struct rte_flow *flow;
1141 for (flow = LIST_FIRST(&priv->flows);
1143 flow = LIST_NEXT(flow, next)) {
1144 claim_zero(mlx4_flow_toggle(priv, flow, 0, NULL));
1146 assert(!priv->drop);
1150 * Enable flow rules.
1153 * Pointer to private structure.
1156 * 0 on success, a negative errno value otherwise and rte_errno is set.
1159 mlx4_flow_start(struct priv *priv)
1162 struct rte_flow *flow;
1164 for (flow = LIST_FIRST(&priv->flows);
1166 flow = LIST_NEXT(flow, next)) {
1167 ret = mlx4_flow_toggle(priv, flow, 1, NULL);
1168 if (unlikely(ret)) {
1169 mlx4_flow_stop(priv);
1176 static const struct rte_flow_ops mlx4_flow_ops = {
1177 .validate = mlx4_flow_validate,
1178 .create = mlx4_flow_create,
1179 .destroy = mlx4_flow_destroy,
1180 .flush = mlx4_flow_flush,
1181 .isolate = mlx4_flow_isolate,
1185 * Manage filter operations.
1188 * Pointer to Ethernet device structure.
1189 * @param filter_type
1192 * Operation to perform.
1194 * Pointer to operation-specific structure.
1197 * 0 on success, negative errno value otherwise and rte_errno is set.
1200 mlx4_filter_ctrl(struct rte_eth_dev *dev,
1201 enum rte_filter_type filter_type,
1202 enum rte_filter_op filter_op,
1205 switch (filter_type) {
1206 case RTE_ETH_FILTER_GENERIC:
1207 if (filter_op != RTE_ETH_FILTER_GET)
1209 *(const void **)arg = &mlx4_flow_ops;
1212 ERROR("%p: filter type (%d) not supported",
1213 (void *)dev, filter_type);
1216 rte_errno = ENOTSUP;