4 * Copyright 2016 6WIND S.A.
5 * Copyright 2016 Mellanox.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of 6WIND S.A. nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #include <sys/queue.h>
38 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
40 #pragma GCC diagnostic ignored "-Wpedantic"
42 #include <infiniband/verbs.h>
44 #pragma GCC diagnostic error "-Wpedantic"
47 #include <rte_ethdev.h>
49 #include <rte_flow_driver.h>
50 #include <rte_malloc.h>
56 mlx5_flow_create_eth(const struct rte_flow_item *item,
57 const void *default_mask,
61 mlx5_flow_create_vlan(const struct rte_flow_item *item,
62 const void *default_mask,
66 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
67 const void *default_mask,
71 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
72 const void *default_mask,
76 mlx5_flow_create_udp(const struct rte_flow_item *item,
77 const void *default_mask,
81 mlx5_flow_create_tcp(const struct rte_flow_item *item,
82 const void *default_mask,
86 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
87 const void *default_mask,
91 LIST_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
92 struct ibv_exp_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
93 struct ibv_exp_rwq_ind_table *ind_table; /**< Indirection table. */
94 struct ibv_qp *qp; /**< Verbs queue pair. */
95 struct ibv_exp_flow *ibv_flow; /**< Verbs flow. */
96 struct ibv_exp_wq *wq; /**< Verbs work queue. */
97 struct ibv_cq *cq; /**< Verbs completion queue. */
98 struct rxq *rxq; /**< Pointer to the queue, NULL if drop queue. */
99 uint32_t mark:1; /**< Set if the flow is marked. */
102 /** Static initializer for items. */
104 (const enum rte_flow_item_type []){ \
105 __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
108 /** Structure to generate a simple graph of layers supported by the NIC. */
109 struct mlx5_flow_items {
110 /** List of possible actions for these items. */
111 const enum rte_flow_action_type *const actions;
112 /** Bit-masks corresponding to the possibilities for the item. */
115 * Default bit-masks to use when item->mask is not provided. When
116 * \default_mask is also NULL, the full supported bit-mask (\mask) is
119 const void *default_mask;
120 /** Bit-masks size in bytes. */
121 const unsigned int mask_sz;
123 * Conversion function from rte_flow to NIC specific flow.
126 * rte_flow item to convert.
127 * @param default_mask
128 * Default bit-masks to use when item->mask is not provided.
130 * Internal structure to store the conversion.
133 * 0 on success, negative value otherwise.
135 int (*convert)(const struct rte_flow_item *item,
136 const void *default_mask,
138 /** Size in bytes of the destination structure. */
139 const unsigned int dst_sz;
140 /** List of possible following items. */
141 const enum rte_flow_item_type *const items;
144 /** Valid action for this PMD. */
145 static const enum rte_flow_action_type valid_actions[] = {
146 RTE_FLOW_ACTION_TYPE_DROP,
147 RTE_FLOW_ACTION_TYPE_QUEUE,
148 RTE_FLOW_ACTION_TYPE_MARK,
149 RTE_FLOW_ACTION_TYPE_END,
152 /** Graph of supported items and associated actions. */
153 static const struct mlx5_flow_items mlx5_flow_items[] = {
154 [RTE_FLOW_ITEM_TYPE_END] = {
155 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
156 RTE_FLOW_ITEM_TYPE_VXLAN),
158 [RTE_FLOW_ITEM_TYPE_ETH] = {
159 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
160 RTE_FLOW_ITEM_TYPE_IPV4,
161 RTE_FLOW_ITEM_TYPE_IPV6),
162 .actions = valid_actions,
163 .mask = &(const struct rte_flow_item_eth){
164 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
165 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
168 .default_mask = &rte_flow_item_eth_mask,
169 .mask_sz = sizeof(struct rte_flow_item_eth),
170 .convert = mlx5_flow_create_eth,
171 .dst_sz = sizeof(struct ibv_exp_flow_spec_eth),
173 [RTE_FLOW_ITEM_TYPE_VLAN] = {
174 .items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
175 RTE_FLOW_ITEM_TYPE_IPV6),
176 .actions = valid_actions,
177 .mask = &(const struct rte_flow_item_vlan){
180 .default_mask = &rte_flow_item_vlan_mask,
181 .mask_sz = sizeof(struct rte_flow_item_vlan),
182 .convert = mlx5_flow_create_vlan,
185 [RTE_FLOW_ITEM_TYPE_IPV4] = {
186 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
187 RTE_FLOW_ITEM_TYPE_TCP),
188 .actions = valid_actions,
189 .mask = &(const struct rte_flow_item_ipv4){
193 .type_of_service = -1,
197 .default_mask = &rte_flow_item_ipv4_mask,
198 .mask_sz = sizeof(struct rte_flow_item_ipv4),
199 .convert = mlx5_flow_create_ipv4,
200 .dst_sz = sizeof(struct ibv_exp_flow_spec_ipv4_ext),
202 [RTE_FLOW_ITEM_TYPE_IPV6] = {
203 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
204 RTE_FLOW_ITEM_TYPE_TCP),
205 .actions = valid_actions,
206 .mask = &(const struct rte_flow_item_ipv6){
209 0xff, 0xff, 0xff, 0xff,
210 0xff, 0xff, 0xff, 0xff,
211 0xff, 0xff, 0xff, 0xff,
212 0xff, 0xff, 0xff, 0xff,
215 0xff, 0xff, 0xff, 0xff,
216 0xff, 0xff, 0xff, 0xff,
217 0xff, 0xff, 0xff, 0xff,
218 0xff, 0xff, 0xff, 0xff,
225 .default_mask = &rte_flow_item_ipv6_mask,
226 .mask_sz = sizeof(struct rte_flow_item_ipv6),
227 .convert = mlx5_flow_create_ipv6,
228 .dst_sz = sizeof(struct ibv_exp_flow_spec_ipv6_ext),
230 [RTE_FLOW_ITEM_TYPE_UDP] = {
231 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN),
232 .actions = valid_actions,
233 .mask = &(const struct rte_flow_item_udp){
239 .default_mask = &rte_flow_item_udp_mask,
240 .mask_sz = sizeof(struct rte_flow_item_udp),
241 .convert = mlx5_flow_create_udp,
242 .dst_sz = sizeof(struct ibv_exp_flow_spec_tcp_udp),
244 [RTE_FLOW_ITEM_TYPE_TCP] = {
245 .actions = valid_actions,
246 .mask = &(const struct rte_flow_item_tcp){
252 .default_mask = &rte_flow_item_tcp_mask,
253 .mask_sz = sizeof(struct rte_flow_item_tcp),
254 .convert = mlx5_flow_create_tcp,
255 .dst_sz = sizeof(struct ibv_exp_flow_spec_tcp_udp),
257 [RTE_FLOW_ITEM_TYPE_VXLAN] = {
258 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH),
259 .actions = valid_actions,
260 .mask = &(const struct rte_flow_item_vxlan){
261 .vni = "\xff\xff\xff",
263 .default_mask = &rte_flow_item_vxlan_mask,
264 .mask_sz = sizeof(struct rte_flow_item_vxlan),
265 .convert = mlx5_flow_create_vxlan,
266 .dst_sz = sizeof(struct ibv_exp_flow_spec_tunnel),
270 /** Structure to pass to the conversion function. */
272 struct ibv_exp_flow_attr *ibv_attr; /**< Verbs attribute. */
273 unsigned int offset; /**< Offset in bytes in the ibv_attr buffer. */
274 uint32_t inner; /**< Set once VXLAN is encountered. */
277 struct mlx5_flow_action {
278 uint32_t queue:1; /**< Target is a receive queue. */
279 uint32_t drop:1; /**< Target is a drop queue. */
280 uint32_t mark:1; /**< Mark is present in the flow. */
281 uint32_t queue_id; /**< Identifier of the queue. */
282 uint32_t mark_id; /**< Mark identifier. */
286 * Check support for a given item.
289 * Item specification.
291 * Bit-masks covering supported fields to compare with spec, last and mask in
294 * Bit-Mask size in bytes.
300 mlx5_flow_item_validate(const struct rte_flow_item *item,
301 const uint8_t *mask, unsigned int size)
305 if (!item->spec && (item->mask || item->last))
307 if (item->spec && !item->mask) {
309 const uint8_t *spec = item->spec;
311 for (i = 0; i < size; ++i)
312 if ((spec[i] | mask[i]) != mask[i])
315 if (item->last && !item->mask) {
317 const uint8_t *spec = item->last;
319 for (i = 0; i < size; ++i)
320 if ((spec[i] | mask[i]) != mask[i])
325 const uint8_t *spec = item->mask;
327 for (i = 0; i < size; ++i)
328 if ((spec[i] | mask[i]) != mask[i])
331 if (item->spec && item->last) {
334 const uint8_t *apply = mask;
339 for (i = 0; i < size; ++i) {
340 spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
341 last[i] = ((const uint8_t *)item->last)[i] & apply[i];
343 ret = memcmp(spec, last, size);
349 * Validate a flow supported by the NIC.
352 * Pointer to private structure.
354 * Flow rule attributes.
356 * Pattern specification (list terminated by the END pattern item).
358 * Associated actions (list terminated by the END action).
360 * Perform verbose error reporting if not NULL.
361 * @param[in, out] flow
362 * Flow structure to update.
365 * 0 on success, a negative errno value otherwise and rte_errno is set.
368 priv_flow_validate(struct priv *priv,
369 const struct rte_flow_attr *attr,
370 const struct rte_flow_item items[],
371 const struct rte_flow_action actions[],
372 struct rte_flow_error *error,
373 struct mlx5_flow *flow)
375 const struct mlx5_flow_items *cur_item = mlx5_flow_items;
376 struct mlx5_flow_action action = {
384 rte_flow_error_set(error, ENOTSUP,
385 RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
387 "groups are not supported");
390 if (attr->priority) {
391 rte_flow_error_set(error, ENOTSUP,
392 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
394 "priorities are not supported");
398 rte_flow_error_set(error, ENOTSUP,
399 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
401 "egress is not supported");
404 if (!attr->ingress) {
405 rte_flow_error_set(error, ENOTSUP,
406 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
408 "only ingress is supported");
411 for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
412 const struct mlx5_flow_items *token = NULL;
416 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
420 cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
422 if (cur_item->items[i] == items->type) {
423 token = &mlx5_flow_items[items->type];
428 goto exit_item_not_supported;
430 err = mlx5_flow_item_validate(items,
431 (const uint8_t *)cur_item->mask,
434 goto exit_item_not_supported;
435 if (flow->ibv_attr && cur_item->convert) {
436 err = cur_item->convert(items,
437 (cur_item->default_mask ?
438 cur_item->default_mask :
442 goto exit_item_not_supported;
444 flow->offset += cur_item->dst_sz;
446 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
447 if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
449 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
451 } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
452 const struct rte_flow_action_queue *queue =
453 (const struct rte_flow_action_queue *)
456 if (!queue || (queue->index > (priv->rxqs_n - 1)))
457 goto exit_action_not_supported;
459 } else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
460 const struct rte_flow_action_mark *mark =
461 (const struct rte_flow_action_mark *)
465 rte_flow_error_set(error, EINVAL,
466 RTE_FLOW_ERROR_TYPE_ACTION,
468 "mark must be defined");
470 } else if (mark->id >= MLX5_FLOW_MARK_MAX) {
471 rte_flow_error_set(error, ENOTSUP,
472 RTE_FLOW_ERROR_TYPE_ACTION,
474 "mark must be between 0"
480 goto exit_action_not_supported;
483 if (action.mark && !flow->ibv_attr && !action.drop)
484 flow->offset += sizeof(struct ibv_exp_flow_spec_action_tag);
485 if (!action.queue && !action.drop) {
486 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
487 NULL, "no valid action");
491 exit_item_not_supported:
492 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
493 items, "item not supported");
495 exit_action_not_supported:
496 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
497 actions, "action not supported");
502 * Validate a flow supported by the NIC.
504 * @see rte_flow_validate()
508 mlx5_flow_validate(struct rte_eth_dev *dev,
509 const struct rte_flow_attr *attr,
510 const struct rte_flow_item items[],
511 const struct rte_flow_action actions[],
512 struct rte_flow_error *error)
514 struct priv *priv = dev->data->dev_private;
516 struct mlx5_flow flow = { .offset = sizeof(struct ibv_exp_flow_attr) };
519 ret = priv_flow_validate(priv, attr, items, actions, error, &flow);
525 * Convert Ethernet item to Verbs specification.
528 * Item specification.
529 * @param default_mask[in]
530 * Default bit-masks to use when item->mask is not provided.
531 * @param data[in, out]
535 mlx5_flow_create_eth(const struct rte_flow_item *item,
536 const void *default_mask,
539 const struct rte_flow_item_eth *spec = item->spec;
540 const struct rte_flow_item_eth *mask = item->mask;
541 struct mlx5_flow *flow = (struct mlx5_flow *)data;
542 struct ibv_exp_flow_spec_eth *eth;
543 const unsigned int eth_size = sizeof(struct ibv_exp_flow_spec_eth);
546 ++flow->ibv_attr->num_of_specs;
547 flow->ibv_attr->priority = 2;
548 eth = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
549 *eth = (struct ibv_exp_flow_spec_eth) {
550 .type = flow->inner | IBV_EXP_FLOW_SPEC_ETH,
557 memcpy(eth->val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
558 memcpy(eth->val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
559 eth->val.ether_type = spec->type;
560 memcpy(eth->mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
561 memcpy(eth->mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
562 eth->mask.ether_type = mask->type;
563 /* Remove unwanted bits from values. */
564 for (i = 0; i < ETHER_ADDR_LEN; ++i) {
565 eth->val.dst_mac[i] &= eth->mask.dst_mac[i];
566 eth->val.src_mac[i] &= eth->mask.src_mac[i];
568 eth->val.ether_type &= eth->mask.ether_type;
573 * Convert VLAN item to Verbs specification.
576 * Item specification.
577 * @param default_mask[in]
578 * Default bit-masks to use when item->mask is not provided.
579 * @param data[in, out]
583 mlx5_flow_create_vlan(const struct rte_flow_item *item,
584 const void *default_mask,
587 const struct rte_flow_item_vlan *spec = item->spec;
588 const struct rte_flow_item_vlan *mask = item->mask;
589 struct mlx5_flow *flow = (struct mlx5_flow *)data;
590 struct ibv_exp_flow_spec_eth *eth;
591 const unsigned int eth_size = sizeof(struct ibv_exp_flow_spec_eth);
593 eth = (void *)((uintptr_t)flow->ibv_attr + flow->offset - eth_size);
598 eth->val.vlan_tag = spec->tci;
599 eth->mask.vlan_tag = mask->tci;
600 eth->val.vlan_tag &= eth->mask.vlan_tag;
605 * Convert IPv4 item to Verbs specification.
608 * Item specification.
609 * @param default_mask[in]
610 * Default bit-masks to use when item->mask is not provided.
611 * @param data[in, out]
615 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
616 const void *default_mask,
619 const struct rte_flow_item_ipv4 *spec = item->spec;
620 const struct rte_flow_item_ipv4 *mask = item->mask;
621 struct mlx5_flow *flow = (struct mlx5_flow *)data;
622 struct ibv_exp_flow_spec_ipv4_ext *ipv4;
623 unsigned int ipv4_size = sizeof(struct ibv_exp_flow_spec_ipv4_ext);
625 ++flow->ibv_attr->num_of_specs;
626 flow->ibv_attr->priority = 1;
627 ipv4 = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
628 *ipv4 = (struct ibv_exp_flow_spec_ipv4_ext) {
629 .type = flow->inner | IBV_EXP_FLOW_SPEC_IPV4_EXT,
636 ipv4->val = (struct ibv_exp_flow_ipv4_ext_filter){
637 .src_ip = spec->hdr.src_addr,
638 .dst_ip = spec->hdr.dst_addr,
639 .proto = spec->hdr.next_proto_id,
640 .tos = spec->hdr.type_of_service,
642 ipv4->mask = (struct ibv_exp_flow_ipv4_ext_filter){
643 .src_ip = mask->hdr.src_addr,
644 .dst_ip = mask->hdr.dst_addr,
645 .proto = mask->hdr.next_proto_id,
646 .tos = mask->hdr.type_of_service,
648 /* Remove unwanted bits from values. */
649 ipv4->val.src_ip &= ipv4->mask.src_ip;
650 ipv4->val.dst_ip &= ipv4->mask.dst_ip;
651 ipv4->val.proto &= ipv4->mask.proto;
652 ipv4->val.tos &= ipv4->mask.tos;
657 * Convert IPv6 item to Verbs specification.
660 * Item specification.
661 * @param default_mask[in]
662 * Default bit-masks to use when item->mask is not provided.
663 * @param data[in, out]
667 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
668 const void *default_mask,
671 const struct rte_flow_item_ipv6 *spec = item->spec;
672 const struct rte_flow_item_ipv6 *mask = item->mask;
673 struct mlx5_flow *flow = (struct mlx5_flow *)data;
674 struct ibv_exp_flow_spec_ipv6_ext *ipv6;
675 unsigned int ipv6_size = sizeof(struct ibv_exp_flow_spec_ipv6_ext);
677 ++flow->ibv_attr->num_of_specs;
678 flow->ibv_attr->priority = 1;
679 ipv6 = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
680 *ipv6 = (struct ibv_exp_flow_spec_ipv6_ext) {
681 .type = flow->inner | IBV_EXP_FLOW_SPEC_IPV6_EXT,
688 memcpy(ipv6->val.src_ip, spec->hdr.src_addr,
689 RTE_DIM(ipv6->val.src_ip));
690 memcpy(ipv6->val.dst_ip, spec->hdr.dst_addr,
691 RTE_DIM(ipv6->val.dst_ip));
692 memcpy(ipv6->mask.src_ip, mask->hdr.src_addr,
693 RTE_DIM(ipv6->mask.src_ip));
694 memcpy(ipv6->mask.dst_ip, mask->hdr.dst_addr,
695 RTE_DIM(ipv6->mask.dst_ip));
696 ipv6->mask.flow_label = mask->hdr.vtc_flow;
697 ipv6->mask.next_hdr = mask->hdr.proto;
698 ipv6->mask.hop_limit = mask->hdr.hop_limits;
699 ipv6->val.flow_label &= ipv6->mask.flow_label;
700 ipv6->val.next_hdr &= ipv6->mask.next_hdr;
701 ipv6->val.hop_limit &= ipv6->mask.hop_limit;
706 * Convert UDP item to Verbs specification.
709 * Item specification.
710 * @param default_mask[in]
711 * Default bit-masks to use when item->mask is not provided.
712 * @param data[in, out]
716 mlx5_flow_create_udp(const struct rte_flow_item *item,
717 const void *default_mask,
720 const struct rte_flow_item_udp *spec = item->spec;
721 const struct rte_flow_item_udp *mask = item->mask;
722 struct mlx5_flow *flow = (struct mlx5_flow *)data;
723 struct ibv_exp_flow_spec_tcp_udp *udp;
724 unsigned int udp_size = sizeof(struct ibv_exp_flow_spec_tcp_udp);
726 ++flow->ibv_attr->num_of_specs;
727 flow->ibv_attr->priority = 0;
728 udp = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
729 *udp = (struct ibv_exp_flow_spec_tcp_udp) {
730 .type = flow->inner | IBV_EXP_FLOW_SPEC_UDP,
737 udp->val.dst_port = spec->hdr.dst_port;
738 udp->val.src_port = spec->hdr.src_port;
739 udp->mask.dst_port = mask->hdr.dst_port;
740 udp->mask.src_port = mask->hdr.src_port;
741 /* Remove unwanted bits from values. */
742 udp->val.src_port &= udp->mask.src_port;
743 udp->val.dst_port &= udp->mask.dst_port;
748 * Convert TCP item to Verbs specification.
751 * Item specification.
752 * @param default_mask[in]
753 * Default bit-masks to use when item->mask is not provided.
754 * @param data[in, out]
758 mlx5_flow_create_tcp(const struct rte_flow_item *item,
759 const void *default_mask,
762 const struct rte_flow_item_tcp *spec = item->spec;
763 const struct rte_flow_item_tcp *mask = item->mask;
764 struct mlx5_flow *flow = (struct mlx5_flow *)data;
765 struct ibv_exp_flow_spec_tcp_udp *tcp;
766 unsigned int tcp_size = sizeof(struct ibv_exp_flow_spec_tcp_udp);
768 ++flow->ibv_attr->num_of_specs;
769 flow->ibv_attr->priority = 0;
770 tcp = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
771 *tcp = (struct ibv_exp_flow_spec_tcp_udp) {
772 .type = flow->inner | IBV_EXP_FLOW_SPEC_TCP,
779 tcp->val.dst_port = spec->hdr.dst_port;
780 tcp->val.src_port = spec->hdr.src_port;
781 tcp->mask.dst_port = mask->hdr.dst_port;
782 tcp->mask.src_port = mask->hdr.src_port;
783 /* Remove unwanted bits from values. */
784 tcp->val.src_port &= tcp->mask.src_port;
785 tcp->val.dst_port &= tcp->mask.dst_port;
790 * Convert VXLAN item to Verbs specification.
793 * Item specification.
794 * @param default_mask[in]
795 * Default bit-masks to use when item->mask is not provided.
796 * @param data[in, out]
800 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
801 const void *default_mask,
804 const struct rte_flow_item_vxlan *spec = item->spec;
805 const struct rte_flow_item_vxlan *mask = item->mask;
806 struct mlx5_flow *flow = (struct mlx5_flow *)data;
807 struct ibv_exp_flow_spec_tunnel *vxlan;
808 unsigned int size = sizeof(struct ibv_exp_flow_spec_tunnel);
814 ++flow->ibv_attr->num_of_specs;
815 flow->ibv_attr->priority = 0;
817 vxlan = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
818 *vxlan = (struct ibv_exp_flow_spec_tunnel) {
819 .type = flow->inner | IBV_EXP_FLOW_SPEC_VXLAN_TUNNEL,
822 flow->inner = IBV_EXP_FLOW_SPEC_INNER;
827 memcpy(&id.vni[1], spec->vni, 3);
828 vxlan->val.tunnel_id = id.vlan_id;
829 memcpy(&id.vni[1], mask->vni, 3);
830 vxlan->mask.tunnel_id = id.vlan_id;
831 /* Remove unwanted bits from values. */
832 vxlan->val.tunnel_id &= vxlan->mask.tunnel_id;
837 * Convert mark/flag action to Verbs specification.
840 * Pointer to MLX5 flow structure.
845 mlx5_flow_create_flag_mark(struct mlx5_flow *flow, uint32_t mark_id)
847 struct ibv_exp_flow_spec_action_tag *tag;
848 unsigned int size = sizeof(struct ibv_exp_flow_spec_action_tag);
850 tag = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
851 *tag = (struct ibv_exp_flow_spec_action_tag){
852 .type = IBV_EXP_FLOW_SPEC_ACTION_TAG,
854 .tag_id = mlx5_flow_mark_set(mark_id),
856 ++flow->ibv_attr->num_of_specs;
861 * Complete flow rule creation.
864 * Pointer to private structure.
866 * Verbs flow attributes.
868 * Target action structure.
870 * Perform verbose error reporting if not NULL.
873 * A flow if the rule could be created.
875 static struct rte_flow *
876 priv_flow_create_action_queue(struct priv *priv,
877 struct ibv_exp_flow_attr *ibv_attr,
878 struct mlx5_flow_action *action,
879 struct rte_flow_error *error)
881 struct rxq_ctrl *rxq;
882 struct rte_flow *rte_flow;
886 rte_flow = rte_calloc(__func__, 1, sizeof(*rte_flow), 0);
888 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
889 NULL, "cannot allocate flow memory");
894 ibv_exp_create_cq(priv->ctx, 1, NULL, NULL, 0,
895 &(struct ibv_exp_cq_init_attr){
899 rte_flow_error_set(error, ENOMEM,
900 RTE_FLOW_ERROR_TYPE_HANDLE,
901 NULL, "cannot allocate CQ");
904 rte_flow->wq = ibv_exp_create_wq(priv->ctx,
905 &(struct ibv_exp_wq_init_attr){
906 .wq_type = IBV_EXP_WQT_RQ,
913 rte_flow_error_set(error, ENOMEM,
914 RTE_FLOW_ERROR_TYPE_HANDLE,
915 NULL, "cannot allocate WQ");
919 rxq = container_of((*priv->rxqs)[action->queue_id],
920 struct rxq_ctrl, rxq);
921 rte_flow->rxq = &rxq->rxq;
922 rxq->rxq.mark |= action->mark;
923 rte_flow->wq = rxq->wq;
925 rte_flow->mark = action->mark;
926 rte_flow->ibv_attr = ibv_attr;
927 rte_flow->ind_table = ibv_exp_create_rwq_ind_table(
929 &(struct ibv_exp_rwq_ind_table_init_attr){
931 .log_ind_tbl_size = 0,
932 .ind_tbl = &rte_flow->wq,
935 if (!rte_flow->ind_table) {
936 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
937 NULL, "cannot allocate indirection table");
940 rte_flow->qp = ibv_exp_create_qp(
942 &(struct ibv_exp_qp_init_attr){
943 .qp_type = IBV_QPT_RAW_PACKET,
945 IBV_EXP_QP_INIT_ATTR_PD |
946 IBV_EXP_QP_INIT_ATTR_PORT |
947 IBV_EXP_QP_INIT_ATTR_RX_HASH,
949 .rx_hash_conf = &(struct ibv_exp_rx_hash_conf){
951 IBV_EXP_RX_HASH_FUNC_TOEPLITZ,
952 .rx_hash_key_len = rss_hash_default_key_len,
953 .rx_hash_key = rss_hash_default_key,
954 .rx_hash_fields_mask = 0,
955 .rwq_ind_tbl = rte_flow->ind_table,
957 .port_num = priv->port,
960 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
961 NULL, "cannot allocate QP");
966 rte_flow->ibv_flow = ibv_exp_create_flow(rte_flow->qp,
968 if (!rte_flow->ibv_flow) {
969 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
970 NULL, "flow rule creation failure");
977 ibv_destroy_qp(rte_flow->qp);
978 if (rte_flow->ind_table)
979 ibv_exp_destroy_rwq_ind_table(rte_flow->ind_table);
980 if (!rte_flow->rxq && rte_flow->wq)
981 ibv_exp_destroy_wq(rte_flow->wq);
982 if (!rte_flow->rxq && rte_flow->cq)
983 ibv_destroy_cq(rte_flow->cq);
992 * Pointer to private structure.
994 * Flow rule attributes.
996 * Pattern specification (list terminated by the END pattern item).
998 * Associated actions (list terminated by the END action).
1000 * Perform verbose error reporting if not NULL.
1003 * A flow on success, NULL otherwise.
1005 static struct rte_flow *
1006 priv_flow_create(struct priv *priv,
1007 const struct rte_flow_attr *attr,
1008 const struct rte_flow_item items[],
1009 const struct rte_flow_action actions[],
1010 struct rte_flow_error *error)
1012 struct rte_flow *rte_flow;
1013 struct mlx5_flow_action action;
1014 struct mlx5_flow flow = { .offset = sizeof(struct ibv_exp_flow_attr), };
1017 err = priv_flow_validate(priv, attr, items, actions, error, &flow);
1020 flow.ibv_attr = rte_malloc(__func__, flow.offset, 0);
1021 flow.offset = sizeof(struct ibv_exp_flow_attr);
1022 if (!flow.ibv_attr) {
1023 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1024 NULL, "cannot allocate ibv_attr memory");
1027 *flow.ibv_attr = (struct ibv_exp_flow_attr){
1028 .type = IBV_EXP_FLOW_ATTR_NORMAL,
1029 .size = sizeof(struct ibv_exp_flow_attr),
1030 .priority = attr->priority,
1037 claim_zero(priv_flow_validate(priv, attr, items, actions,
1039 action = (struct mlx5_flow_action){
1043 .mark_id = MLX5_FLOW_MARK_DEFAULT,
1045 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
1046 if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
1048 } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
1051 ((const struct rte_flow_action_queue *)
1052 actions->conf)->index;
1053 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
1056 } else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
1057 const struct rte_flow_action_mark *mark =
1058 (const struct rte_flow_action_mark *)
1062 action.mark_id = mark->id;
1063 action.mark = !action.drop;
1065 rte_flow_error_set(error, ENOTSUP,
1066 RTE_FLOW_ERROR_TYPE_ACTION,
1067 actions, "unsupported action");
1072 mlx5_flow_create_flag_mark(&flow, action.mark_id);
1073 flow.offset += sizeof(struct ibv_exp_flow_spec_action_tag);
1075 rte_flow = priv_flow_create_action_queue(priv, flow.ibv_attr,
1081 rte_free(flow.ibv_attr);
1088 * @see rte_flow_create()
1092 mlx5_flow_create(struct rte_eth_dev *dev,
1093 const struct rte_flow_attr *attr,
1094 const struct rte_flow_item items[],
1095 const struct rte_flow_action actions[],
1096 struct rte_flow_error *error)
1098 struct priv *priv = dev->data->dev_private;
1099 struct rte_flow *flow;
1102 flow = priv_flow_create(priv, attr, items, actions, error);
1104 LIST_INSERT_HEAD(&priv->flows, flow, next);
1105 DEBUG("Flow created %p", (void *)flow);
1115 * Pointer to private structure.
1120 priv_flow_destroy(struct priv *priv,
1121 struct rte_flow *flow)
1124 LIST_REMOVE(flow, next);
1126 claim_zero(ibv_exp_destroy_flow(flow->ibv_flow));
1128 claim_zero(ibv_destroy_qp(flow->qp));
1129 if (flow->ind_table)
1130 claim_zero(ibv_exp_destroy_rwq_ind_table(flow->ind_table));
1131 if (!flow->rxq && flow->wq)
1132 claim_zero(ibv_exp_destroy_wq(flow->wq));
1133 if (!flow->rxq && flow->cq)
1134 claim_zero(ibv_destroy_cq(flow->cq));
1136 struct rte_flow *tmp;
1137 uint32_t mark_n = 0;
1139 for (tmp = LIST_FIRST(&priv->flows);
1141 tmp = LIST_NEXT(tmp, next)) {
1142 if ((flow->rxq == tmp->rxq) && tmp->mark)
1145 flow->rxq->mark = !!mark_n;
1147 rte_free(flow->ibv_attr);
1148 DEBUG("Flow destroyed %p", (void *)flow);
1155 * @see rte_flow_destroy()
1159 mlx5_flow_destroy(struct rte_eth_dev *dev,
1160 struct rte_flow *flow,
1161 struct rte_flow_error *error)
1163 struct priv *priv = dev->data->dev_private;
1167 priv_flow_destroy(priv, flow);
1173 * Destroy all flows.
1176 * Pointer to private structure.
1179 priv_flow_flush(struct priv *priv)
1181 while (!LIST_EMPTY(&priv->flows)) {
1182 struct rte_flow *flow;
1184 flow = LIST_FIRST(&priv->flows);
1185 priv_flow_destroy(priv, flow);
1190 * Destroy all flows.
1192 * @see rte_flow_flush()
1196 mlx5_flow_flush(struct rte_eth_dev *dev,
1197 struct rte_flow_error *error)
1199 struct priv *priv = dev->data->dev_private;
1203 priv_flow_flush(priv);
1211 * Called by dev_stop() to remove all flows.
1214 * Pointer to private structure.
1217 priv_flow_stop(struct priv *priv)
1219 struct rte_flow *flow;
1221 for (flow = LIST_FIRST(&priv->flows);
1223 flow = LIST_NEXT(flow, next)) {
1224 claim_zero(ibv_exp_destroy_flow(flow->ibv_flow));
1225 flow->ibv_flow = NULL;
1227 flow->rxq->mark = 0;
1228 DEBUG("Flow %p removed", (void *)flow);
1236 * Pointer to private structure.
1239 * 0 on success, a errno value otherwise and rte_errno is set.
1242 priv_flow_start(struct priv *priv)
1244 struct rte_flow *flow;
1246 for (flow = LIST_FIRST(&priv->flows);
1248 flow = LIST_NEXT(flow, next)) {
1249 flow->ibv_flow = ibv_exp_create_flow(flow->qp,
1251 if (!flow->ibv_flow) {
1252 DEBUG("Flow %p cannot be applied", (void *)flow);
1256 DEBUG("Flow %p applied", (void *)flow);
1258 flow->rxq->mark |= flow->mark;