4 * Copyright 2016 6WIND S.A.
5 * Copyright 2016 Mellanox.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of 6WIND S.A. nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #include <sys/queue.h>
38 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
40 #pragma GCC diagnostic ignored "-Wpedantic"
42 #include <infiniband/verbs.h>
44 #pragma GCC diagnostic error "-Wpedantic"
47 #include <rte_ethdev.h>
49 #include <rte_flow_driver.h>
50 #include <rte_malloc.h>
56 mlx5_flow_create_eth(const struct rte_flow_item *item,
57 const void *default_mask,
61 mlx5_flow_create_vlan(const struct rte_flow_item *item,
62 const void *default_mask,
66 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
67 const void *default_mask,
71 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
72 const void *default_mask,
76 mlx5_flow_create_udp(const struct rte_flow_item *item,
77 const void *default_mask,
81 mlx5_flow_create_tcp(const struct rte_flow_item *item,
82 const void *default_mask,
86 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
87 const void *default_mask,
91 LIST_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
92 struct ibv_exp_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
93 struct ibv_exp_rwq_ind_table *ind_table; /**< Indirection table. */
94 struct ibv_qp *qp; /**< Verbs queue pair. */
95 struct ibv_exp_flow *ibv_flow; /**< Verbs flow. */
96 struct ibv_exp_wq *wq; /**< Verbs work queue. */
97 struct ibv_cq *cq; /**< Verbs completion queue. */
98 struct rxq *rxq; /**< Pointer to the queue, NULL if drop queue. */
99 uint32_t mark:1; /**< Set if the flow is marked. */
102 /** Static initializer for items. */
104 (const enum rte_flow_item_type []){ \
105 __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
108 /** Structure to generate a simple graph of layers supported by the NIC. */
109 struct mlx5_flow_items {
110 /** List of possible actions for these items. */
111 const enum rte_flow_action_type *const actions;
112 /** Bit-masks corresponding to the possibilities for the item. */
115 * Default bit-masks to use when item->mask is not provided. When
116 * \default_mask is also NULL, the full supported bit-mask (\mask) is
119 const void *default_mask;
120 /** Bit-masks size in bytes. */
121 const unsigned int mask_sz;
123 * Conversion function from rte_flow to NIC specific flow.
126 * rte_flow item to convert.
127 * @param default_mask
128 * Default bit-masks to use when item->mask is not provided.
130 * Internal structure to store the conversion.
133 * 0 on success, negative value otherwise.
135 int (*convert)(const struct rte_flow_item *item,
136 const void *default_mask,
138 /** Size in bytes of the destination structure. */
139 const unsigned int dst_sz;
140 /** List of possible following items. */
141 const enum rte_flow_item_type *const items;
144 /** Valid action for this PMD. */
145 static const enum rte_flow_action_type valid_actions[] = {
146 RTE_FLOW_ACTION_TYPE_DROP,
147 RTE_FLOW_ACTION_TYPE_QUEUE,
148 RTE_FLOW_ACTION_TYPE_MARK,
149 RTE_FLOW_ACTION_TYPE_END,
152 /** Graph of supported items and associated actions. */
153 static const struct mlx5_flow_items mlx5_flow_items[] = {
154 [RTE_FLOW_ITEM_TYPE_END] = {
155 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
156 RTE_FLOW_ITEM_TYPE_VXLAN),
158 [RTE_FLOW_ITEM_TYPE_ETH] = {
159 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
160 RTE_FLOW_ITEM_TYPE_IPV4,
161 RTE_FLOW_ITEM_TYPE_IPV6),
162 .actions = valid_actions,
163 .mask = &(const struct rte_flow_item_eth){
164 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
165 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
167 .mask_sz = sizeof(struct rte_flow_item_eth),
168 .convert = mlx5_flow_create_eth,
169 .dst_sz = sizeof(struct ibv_exp_flow_spec_eth),
171 [RTE_FLOW_ITEM_TYPE_VLAN] = {
172 .items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
173 RTE_FLOW_ITEM_TYPE_IPV6),
174 .actions = valid_actions,
175 .mask = &(const struct rte_flow_item_vlan){
178 .mask_sz = sizeof(struct rte_flow_item_vlan),
179 .convert = mlx5_flow_create_vlan,
182 [RTE_FLOW_ITEM_TYPE_IPV4] = {
183 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
184 RTE_FLOW_ITEM_TYPE_TCP),
185 .actions = valid_actions,
186 .mask = &(const struct rte_flow_item_ipv4){
190 .type_of_service = -1,
194 .default_mask = &(const struct rte_flow_item_ipv4){
200 .mask_sz = sizeof(struct rte_flow_item_ipv4),
201 .convert = mlx5_flow_create_ipv4,
202 .dst_sz = sizeof(struct ibv_exp_flow_spec_ipv4_ext),
204 [RTE_FLOW_ITEM_TYPE_IPV6] = {
205 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
206 RTE_FLOW_ITEM_TYPE_TCP),
207 .actions = valid_actions,
208 .mask = &(const struct rte_flow_item_ipv6){
211 0xff, 0xff, 0xff, 0xff,
212 0xff, 0xff, 0xff, 0xff,
213 0xff, 0xff, 0xff, 0xff,
214 0xff, 0xff, 0xff, 0xff,
217 0xff, 0xff, 0xff, 0xff,
218 0xff, 0xff, 0xff, 0xff,
219 0xff, 0xff, 0xff, 0xff,
220 0xff, 0xff, 0xff, 0xff,
224 .mask_sz = sizeof(struct rte_flow_item_ipv6),
225 .convert = mlx5_flow_create_ipv6,
226 .dst_sz = sizeof(struct ibv_exp_flow_spec_ipv6),
228 [RTE_FLOW_ITEM_TYPE_UDP] = {
229 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN),
230 .actions = valid_actions,
231 .mask = &(const struct rte_flow_item_udp){
237 .mask_sz = sizeof(struct rte_flow_item_udp),
238 .convert = mlx5_flow_create_udp,
239 .dst_sz = sizeof(struct ibv_exp_flow_spec_tcp_udp),
241 [RTE_FLOW_ITEM_TYPE_TCP] = {
242 .actions = valid_actions,
243 .mask = &(const struct rte_flow_item_tcp){
249 .mask_sz = sizeof(struct rte_flow_item_tcp),
250 .convert = mlx5_flow_create_tcp,
251 .dst_sz = sizeof(struct ibv_exp_flow_spec_tcp_udp),
253 [RTE_FLOW_ITEM_TYPE_VXLAN] = {
254 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH),
255 .actions = valid_actions,
256 .mask = &(const struct rte_flow_item_vxlan){
257 .vni = "\xff\xff\xff",
259 .mask_sz = sizeof(struct rte_flow_item_vxlan),
260 .convert = mlx5_flow_create_vxlan,
261 .dst_sz = sizeof(struct ibv_exp_flow_spec_tunnel),
265 /** Structure to pass to the conversion function. */
267 struct ibv_exp_flow_attr *ibv_attr; /**< Verbs attribute. */
268 unsigned int offset; /**< Offset in bytes in the ibv_attr buffer. */
269 uint32_t inner; /**< Set once VXLAN is encountered. */
272 struct mlx5_flow_action {
273 uint32_t queue:1; /**< Target is a receive queue. */
274 uint32_t drop:1; /**< Target is a drop queue. */
275 uint32_t mark:1; /**< Mark is present in the flow. */
276 uint32_t queue_id; /**< Identifier of the queue. */
277 uint32_t mark_id; /**< Mark identifier. */
281 * Check support for a given item.
284 * Item specification.
286 * Bit-masks covering supported fields to compare with spec, last and mask in
289 * Bit-Mask size in bytes.
295 mlx5_flow_item_validate(const struct rte_flow_item *item,
296 const uint8_t *mask, unsigned int size)
300 if (!item->spec && (item->mask || item->last))
302 if (item->spec && !item->mask) {
304 const uint8_t *spec = item->spec;
306 for (i = 0; i < size; ++i)
307 if ((spec[i] | mask[i]) != mask[i])
310 if (item->last && !item->mask) {
312 const uint8_t *spec = item->last;
314 for (i = 0; i < size; ++i)
315 if ((spec[i] | mask[i]) != mask[i])
320 const uint8_t *spec = item->mask;
322 for (i = 0; i < size; ++i)
323 if ((spec[i] | mask[i]) != mask[i])
326 if (item->spec && item->last) {
329 const uint8_t *apply = mask;
334 for (i = 0; i < size; ++i) {
335 spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
336 last[i] = ((const uint8_t *)item->last)[i] & apply[i];
338 ret = memcmp(spec, last, size);
344 * Validate a flow supported by the NIC.
347 * Pointer to private structure.
349 * Flow rule attributes.
351 * Pattern specification (list terminated by the END pattern item).
353 * Associated actions (list terminated by the END action).
355 * Perform verbose error reporting if not NULL.
356 * @param[in, out] flow
357 * Flow structure to update.
360 * 0 on success, a negative errno value otherwise and rte_errno is set.
363 priv_flow_validate(struct priv *priv,
364 const struct rte_flow_attr *attr,
365 const struct rte_flow_item items[],
366 const struct rte_flow_action actions[],
367 struct rte_flow_error *error,
368 struct mlx5_flow *flow)
370 const struct mlx5_flow_items *cur_item = mlx5_flow_items;
371 struct mlx5_flow_action action = {
379 rte_flow_error_set(error, ENOTSUP,
380 RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
382 "groups are not supported");
385 if (attr->priority) {
386 rte_flow_error_set(error, ENOTSUP,
387 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
389 "priorities are not supported");
393 rte_flow_error_set(error, ENOTSUP,
394 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
396 "egress is not supported");
399 if (!attr->ingress) {
400 rte_flow_error_set(error, ENOTSUP,
401 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
403 "only ingress is supported");
406 for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
407 const struct mlx5_flow_items *token = NULL;
411 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
413 /* Handle special situation for VLAN. */
414 if (items->type == RTE_FLOW_ITEM_TYPE_VLAN) {
415 if (((const struct rte_flow_item_vlan *)items)->tci >
417 rte_flow_error_set(error, ENOTSUP,
418 RTE_FLOW_ERROR_TYPE_ITEM,
420 "wrong VLAN id value");
426 cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
428 if (cur_item->items[i] == items->type) {
429 token = &mlx5_flow_items[items->type];
434 goto exit_item_not_supported;
436 err = mlx5_flow_item_validate(items,
437 (const uint8_t *)cur_item->mask,
438 sizeof(cur_item->mask_sz));
440 goto exit_item_not_supported;
441 if (flow->ibv_attr && cur_item->convert) {
442 err = cur_item->convert(items,
443 (cur_item->default_mask ?
444 cur_item->default_mask :
448 goto exit_item_not_supported;
450 flow->offset += cur_item->dst_sz;
452 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
453 if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
455 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
457 } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
458 const struct rte_flow_action_queue *queue =
459 (const struct rte_flow_action_queue *)
462 if (!queue || (queue->index > (priv->rxqs_n - 1)))
463 goto exit_action_not_supported;
465 } else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
466 const struct rte_flow_action_mark *mark =
467 (const struct rte_flow_action_mark *)
470 if (mark && (mark->id >= MLX5_FLOW_MARK_MAX)) {
471 rte_flow_error_set(error, ENOTSUP,
472 RTE_FLOW_ERROR_TYPE_ACTION,
474 "mark must be between 0"
480 goto exit_action_not_supported;
483 if (action.mark && !flow->ibv_attr && !action.drop)
484 flow->offset += sizeof(struct ibv_exp_flow_spec_action_tag);
485 if (!action.queue && !action.drop) {
486 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
487 NULL, "no valid action");
491 exit_item_not_supported:
492 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
493 items, "item not supported");
495 exit_action_not_supported:
496 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
497 actions, "action not supported");
502 * Validate a flow supported by the NIC.
504 * @see rte_flow_validate()
508 mlx5_flow_validate(struct rte_eth_dev *dev,
509 const struct rte_flow_attr *attr,
510 const struct rte_flow_item items[],
511 const struct rte_flow_action actions[],
512 struct rte_flow_error *error)
514 struct priv *priv = dev->data->dev_private;
516 struct mlx5_flow flow = { .offset = sizeof(struct ibv_exp_flow_attr) };
519 ret = priv_flow_validate(priv, attr, items, actions, error, &flow);
525 * Convert Ethernet item to Verbs specification.
528 * Item specification.
529 * @param default_mask[in]
530 * Default bit-masks to use when item->mask is not provided.
531 * @param data[in, out]
535 mlx5_flow_create_eth(const struct rte_flow_item *item,
536 const void *default_mask,
539 const struct rte_flow_item_eth *spec = item->spec;
540 const struct rte_flow_item_eth *mask = item->mask;
541 struct mlx5_flow *flow = (struct mlx5_flow *)data;
542 struct ibv_exp_flow_spec_eth *eth;
543 const unsigned int eth_size = sizeof(struct ibv_exp_flow_spec_eth);
546 ++flow->ibv_attr->num_of_specs;
547 flow->ibv_attr->priority = 2;
548 eth = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
549 *eth = (struct ibv_exp_flow_spec_eth) {
550 .type = flow->inner | IBV_EXP_FLOW_SPEC_ETH,
557 memcpy(eth->val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
558 memcpy(eth->val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
559 memcpy(eth->mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
560 memcpy(eth->mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
561 /* Remove unwanted bits from values. */
562 for (i = 0; i < ETHER_ADDR_LEN; ++i) {
563 eth->val.dst_mac[i] &= eth->mask.dst_mac[i];
564 eth->val.src_mac[i] &= eth->mask.src_mac[i];
570 * Convert VLAN item to Verbs specification.
573 * Item specification.
574 * @param default_mask[in]
575 * Default bit-masks to use when item->mask is not provided.
576 * @param data[in, out]
580 mlx5_flow_create_vlan(const struct rte_flow_item *item,
581 const void *default_mask,
584 const struct rte_flow_item_vlan *spec = item->spec;
585 const struct rte_flow_item_vlan *mask = item->mask;
586 struct mlx5_flow *flow = (struct mlx5_flow *)data;
587 struct ibv_exp_flow_spec_eth *eth;
588 const unsigned int eth_size = sizeof(struct ibv_exp_flow_spec_eth);
590 eth = (void *)((uintptr_t)flow->ibv_attr + flow->offset - eth_size);
595 eth->val.vlan_tag = spec->tci;
596 eth->mask.vlan_tag = mask->tci;
597 eth->val.vlan_tag &= eth->mask.vlan_tag;
602 * Convert IPv4 item to Verbs specification.
605 * Item specification.
606 * @param default_mask[in]
607 * Default bit-masks to use when item->mask is not provided.
608 * @param data[in, out]
612 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
613 const void *default_mask,
616 const struct rte_flow_item_ipv4 *spec = item->spec;
617 const struct rte_flow_item_ipv4 *mask = item->mask;
618 struct mlx5_flow *flow = (struct mlx5_flow *)data;
619 struct ibv_exp_flow_spec_ipv4_ext *ipv4;
620 unsigned int ipv4_size = sizeof(struct ibv_exp_flow_spec_ipv4_ext);
622 ++flow->ibv_attr->num_of_specs;
623 flow->ibv_attr->priority = 1;
624 ipv4 = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
625 *ipv4 = (struct ibv_exp_flow_spec_ipv4_ext) {
626 .type = flow->inner | IBV_EXP_FLOW_SPEC_IPV4_EXT,
633 ipv4->val = (struct ibv_exp_flow_ipv4_ext_filter){
634 .src_ip = spec->hdr.src_addr,
635 .dst_ip = spec->hdr.dst_addr,
636 .proto = spec->hdr.next_proto_id,
637 .tos = spec->hdr.type_of_service,
639 ipv4->mask = (struct ibv_exp_flow_ipv4_ext_filter){
640 .src_ip = mask->hdr.src_addr,
641 .dst_ip = mask->hdr.dst_addr,
642 .proto = mask->hdr.next_proto_id,
643 .tos = mask->hdr.type_of_service,
645 /* Remove unwanted bits from values. */
646 ipv4->val.src_ip &= ipv4->mask.src_ip;
647 ipv4->val.dst_ip &= ipv4->mask.dst_ip;
648 ipv4->val.proto &= ipv4->mask.proto;
649 ipv4->val.tos &= ipv4->mask.tos;
654 * Convert IPv6 item to Verbs specification.
657 * Item specification.
658 * @param default_mask[in]
659 * Default bit-masks to use when item->mask is not provided.
660 * @param data[in, out]
664 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
665 const void *default_mask,
668 const struct rte_flow_item_ipv6 *spec = item->spec;
669 const struct rte_flow_item_ipv6 *mask = item->mask;
670 struct mlx5_flow *flow = (struct mlx5_flow *)data;
671 struct ibv_exp_flow_spec_ipv6 *ipv6;
672 unsigned int ipv6_size = sizeof(struct ibv_exp_flow_spec_ipv6);
675 ++flow->ibv_attr->num_of_specs;
676 flow->ibv_attr->priority = 1;
677 ipv6 = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
678 *ipv6 = (struct ibv_exp_flow_spec_ipv6) {
679 .type = flow->inner | IBV_EXP_FLOW_SPEC_IPV6,
686 memcpy(ipv6->val.src_ip, spec->hdr.src_addr,
687 RTE_DIM(ipv6->val.src_ip));
688 memcpy(ipv6->val.dst_ip, spec->hdr.dst_addr,
689 RTE_DIM(ipv6->val.dst_ip));
690 memcpy(ipv6->mask.src_ip, mask->hdr.src_addr,
691 RTE_DIM(ipv6->mask.src_ip));
692 memcpy(ipv6->mask.dst_ip, mask->hdr.dst_addr,
693 RTE_DIM(ipv6->mask.dst_ip));
694 /* Remove unwanted bits from values. */
695 for (i = 0; i < RTE_DIM(ipv6->val.src_ip); ++i) {
696 ipv6->val.src_ip[i] &= ipv6->mask.src_ip[i];
697 ipv6->val.dst_ip[i] &= ipv6->mask.dst_ip[i];
703 * Convert UDP item to Verbs specification.
706 * Item specification.
707 * @param default_mask[in]
708 * Default bit-masks to use when item->mask is not provided.
709 * @param data[in, out]
713 mlx5_flow_create_udp(const struct rte_flow_item *item,
714 const void *default_mask,
717 const struct rte_flow_item_udp *spec = item->spec;
718 const struct rte_flow_item_udp *mask = item->mask;
719 struct mlx5_flow *flow = (struct mlx5_flow *)data;
720 struct ibv_exp_flow_spec_tcp_udp *udp;
721 unsigned int udp_size = sizeof(struct ibv_exp_flow_spec_tcp_udp);
723 ++flow->ibv_attr->num_of_specs;
724 flow->ibv_attr->priority = 0;
725 udp = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
726 *udp = (struct ibv_exp_flow_spec_tcp_udp) {
727 .type = flow->inner | IBV_EXP_FLOW_SPEC_UDP,
734 udp->val.dst_port = spec->hdr.dst_port;
735 udp->val.src_port = spec->hdr.src_port;
736 udp->mask.dst_port = mask->hdr.dst_port;
737 udp->mask.src_port = mask->hdr.src_port;
738 /* Remove unwanted bits from values. */
739 udp->val.src_port &= udp->mask.src_port;
740 udp->val.dst_port &= udp->mask.dst_port;
745 * Convert TCP item to Verbs specification.
748 * Item specification.
749 * @param default_mask[in]
750 * Default bit-masks to use when item->mask is not provided.
751 * @param data[in, out]
755 mlx5_flow_create_tcp(const struct rte_flow_item *item,
756 const void *default_mask,
759 const struct rte_flow_item_tcp *spec = item->spec;
760 const struct rte_flow_item_tcp *mask = item->mask;
761 struct mlx5_flow *flow = (struct mlx5_flow *)data;
762 struct ibv_exp_flow_spec_tcp_udp *tcp;
763 unsigned int tcp_size = sizeof(struct ibv_exp_flow_spec_tcp_udp);
765 ++flow->ibv_attr->num_of_specs;
766 flow->ibv_attr->priority = 0;
767 tcp = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
768 *tcp = (struct ibv_exp_flow_spec_tcp_udp) {
769 .type = flow->inner | IBV_EXP_FLOW_SPEC_TCP,
776 tcp->val.dst_port = spec->hdr.dst_port;
777 tcp->val.src_port = spec->hdr.src_port;
778 tcp->mask.dst_port = mask->hdr.dst_port;
779 tcp->mask.src_port = mask->hdr.src_port;
780 /* Remove unwanted bits from values. */
781 tcp->val.src_port &= tcp->mask.src_port;
782 tcp->val.dst_port &= tcp->mask.dst_port;
787 * Convert VXLAN item to Verbs specification.
790 * Item specification.
791 * @param default_mask[in]
792 * Default bit-masks to use when item->mask is not provided.
793 * @param data[in, out]
797 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
798 const void *default_mask,
801 const struct rte_flow_item_vxlan *spec = item->spec;
802 const struct rte_flow_item_vxlan *mask = item->mask;
803 struct mlx5_flow *flow = (struct mlx5_flow *)data;
804 struct ibv_exp_flow_spec_tunnel *vxlan;
805 unsigned int size = sizeof(struct ibv_exp_flow_spec_tunnel);
811 ++flow->ibv_attr->num_of_specs;
812 flow->ibv_attr->priority = 0;
814 vxlan = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
815 *vxlan = (struct ibv_exp_flow_spec_tunnel) {
816 .type = flow->inner | IBV_EXP_FLOW_SPEC_VXLAN_TUNNEL,
819 flow->inner = IBV_EXP_FLOW_SPEC_INNER;
824 memcpy(&id.vni[1], spec->vni, 3);
825 vxlan->val.tunnel_id = id.vlan_id;
826 memcpy(&id.vni[1], mask->vni, 3);
827 vxlan->mask.tunnel_id = id.vlan_id;
828 /* Remove unwanted bits from values. */
829 vxlan->val.tunnel_id &= vxlan->mask.tunnel_id;
834 * Convert mark/flag action to Verbs specification.
837 * Pointer to MLX5 flow structure.
842 mlx5_flow_create_flag_mark(struct mlx5_flow *flow, uint32_t mark_id)
844 struct ibv_exp_flow_spec_action_tag *tag;
845 unsigned int size = sizeof(struct ibv_exp_flow_spec_action_tag);
847 tag = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
848 *tag = (struct ibv_exp_flow_spec_action_tag){
849 .type = IBV_EXP_FLOW_SPEC_ACTION_TAG,
851 .tag_id = mlx5_flow_mark_set(mark_id),
853 ++flow->ibv_attr->num_of_specs;
858 * Complete flow rule creation.
861 * Pointer to private structure.
863 * Verbs flow attributes.
865 * Target action structure.
867 * Perform verbose error reporting if not NULL.
870 * A flow if the rule could be created.
872 static struct rte_flow *
873 priv_flow_create_action_queue(struct priv *priv,
874 struct ibv_exp_flow_attr *ibv_attr,
875 struct mlx5_flow_action *action,
876 struct rte_flow_error *error)
878 struct rxq_ctrl *rxq;
879 struct rte_flow *rte_flow;
883 rte_flow = rte_calloc(__func__, 1, sizeof(*rte_flow), 0);
885 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
886 NULL, "cannot allocate flow memory");
891 ibv_exp_create_cq(priv->ctx, 1, NULL, NULL, 0,
892 &(struct ibv_exp_cq_init_attr){
896 rte_flow_error_set(error, ENOMEM,
897 RTE_FLOW_ERROR_TYPE_HANDLE,
898 NULL, "cannot allocate CQ");
901 rte_flow->wq = ibv_exp_create_wq(priv->ctx,
902 &(struct ibv_exp_wq_init_attr){
903 .wq_type = IBV_EXP_WQT_RQ,
910 rxq = container_of((*priv->rxqs)[action->queue_id],
911 struct rxq_ctrl, rxq);
912 rte_flow->rxq = &rxq->rxq;
913 rxq->rxq.mark |= action->mark;
914 rte_flow->wq = rxq->wq;
916 rte_flow->mark = action->mark;
917 rte_flow->ibv_attr = ibv_attr;
918 rte_flow->ind_table = ibv_exp_create_rwq_ind_table(
920 &(struct ibv_exp_rwq_ind_table_init_attr){
922 .log_ind_tbl_size = 0,
923 .ind_tbl = &rte_flow->wq,
926 if (!rte_flow->ind_table) {
927 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
928 NULL, "cannot allocate indirection table");
931 rte_flow->qp = ibv_exp_create_qp(
933 &(struct ibv_exp_qp_init_attr){
934 .qp_type = IBV_QPT_RAW_PACKET,
936 IBV_EXP_QP_INIT_ATTR_PD |
937 IBV_EXP_QP_INIT_ATTR_PORT |
938 IBV_EXP_QP_INIT_ATTR_RX_HASH,
940 .rx_hash_conf = &(struct ibv_exp_rx_hash_conf){
942 IBV_EXP_RX_HASH_FUNC_TOEPLITZ,
943 .rx_hash_key_len = rss_hash_default_key_len,
944 .rx_hash_key = rss_hash_default_key,
945 .rx_hash_fields_mask = 0,
946 .rwq_ind_tbl = rte_flow->ind_table,
948 .port_num = priv->port,
951 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
952 NULL, "cannot allocate QP");
955 rte_flow->ibv_flow = ibv_exp_create_flow(rte_flow->qp,
957 if (!rte_flow->ibv_flow) {
958 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
959 NULL, "flow rule creation failure");
966 ibv_destroy_qp(rte_flow->qp);
967 if (rte_flow->ind_table)
968 ibv_exp_destroy_rwq_ind_table(rte_flow->ind_table);
969 if (!rte_flow->rxq && rte_flow->wq)
970 ibv_exp_destroy_wq(rte_flow->wq);
971 if (!rte_flow->rxq && rte_flow->cq)
972 ibv_destroy_cq(rte_flow->cq);
973 rte_free(rte_flow->ibv_attr);
982 * Pointer to private structure.
984 * Flow rule attributes.
986 * Pattern specification (list terminated by the END pattern item).
988 * Associated actions (list terminated by the END action).
990 * Perform verbose error reporting if not NULL.
993 * A flow on success, NULL otherwise.
995 static struct rte_flow *
996 priv_flow_create(struct priv *priv,
997 const struct rte_flow_attr *attr,
998 const struct rte_flow_item items[],
999 const struct rte_flow_action actions[],
1000 struct rte_flow_error *error)
1002 struct rte_flow *rte_flow;
1003 struct mlx5_flow_action action;
1004 struct mlx5_flow flow = { .offset = sizeof(struct ibv_exp_flow_attr), };
1007 err = priv_flow_validate(priv, attr, items, actions, error, &flow);
1010 flow.ibv_attr = rte_malloc(__func__, flow.offset, 0);
1011 flow.offset = sizeof(struct ibv_exp_flow_attr);
1012 if (!flow.ibv_attr) {
1013 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1014 NULL, "cannot allocate ibv_attr memory");
1017 *flow.ibv_attr = (struct ibv_exp_flow_attr){
1018 .type = IBV_EXP_FLOW_ATTR_NORMAL,
1019 .size = sizeof(struct ibv_exp_flow_attr),
1020 .priority = attr->priority,
1027 claim_zero(priv_flow_validate(priv, attr, items, actions,
1029 action = (struct mlx5_flow_action){
1033 .mark_id = MLX5_FLOW_MARK_DEFAULT,
1035 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
1036 if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
1038 } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
1041 ((const struct rte_flow_action_queue *)
1042 actions->conf)->index;
1043 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
1046 } else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
1047 const struct rte_flow_action_mark *mark =
1048 (const struct rte_flow_action_mark *)
1052 action.mark_id = mark->id;
1053 action.mark = !action.drop;
1055 rte_flow_error_set(error, ENOTSUP,
1056 RTE_FLOW_ERROR_TYPE_ACTION,
1057 actions, "unsupported action");
1062 mlx5_flow_create_flag_mark(&flow, action.mark_id);
1063 flow.offset += sizeof(struct ibv_exp_flow_spec_action_tag);
1065 rte_flow = priv_flow_create_action_queue(priv, flow.ibv_attr,
1069 rte_free(flow.ibv_attr);
1076 * @see rte_flow_create()
1080 mlx5_flow_create(struct rte_eth_dev *dev,
1081 const struct rte_flow_attr *attr,
1082 const struct rte_flow_item items[],
1083 const struct rte_flow_action actions[],
1084 struct rte_flow_error *error)
1086 struct priv *priv = dev->data->dev_private;
1087 struct rte_flow *flow;
1090 flow = priv_flow_create(priv, attr, items, actions, error);
1092 LIST_INSERT_HEAD(&priv->flows, flow, next);
1093 DEBUG("Flow created %p", (void *)flow);
1103 * Pointer to private structure.
1108 priv_flow_destroy(struct priv *priv,
1109 struct rte_flow *flow)
1112 LIST_REMOVE(flow, next);
1114 claim_zero(ibv_exp_destroy_flow(flow->ibv_flow));
1116 claim_zero(ibv_destroy_qp(flow->qp));
1117 if (flow->ind_table)
1118 claim_zero(ibv_exp_destroy_rwq_ind_table(flow->ind_table));
1119 if (!flow->rxq && flow->wq)
1120 claim_zero(ibv_exp_destroy_wq(flow->wq));
1121 if (!flow->rxq && flow->cq)
1122 claim_zero(ibv_destroy_cq(flow->cq));
1124 struct rte_flow *tmp;
1125 uint32_t mark_n = 0;
1127 for (tmp = LIST_FIRST(&priv->flows);
1129 tmp = LIST_NEXT(tmp, next)) {
1130 if ((flow->rxq == tmp->rxq) && tmp->mark)
1133 flow->rxq->mark = !!mark_n;
1135 rte_free(flow->ibv_attr);
1136 DEBUG("Flow destroyed %p", (void *)flow);
1143 * @see rte_flow_destroy()
1147 mlx5_flow_destroy(struct rte_eth_dev *dev,
1148 struct rte_flow *flow,
1149 struct rte_flow_error *error)
1151 struct priv *priv = dev->data->dev_private;
1155 priv_flow_destroy(priv, flow);
1161 * Destroy all flows.
1164 * Pointer to private structure.
1167 priv_flow_flush(struct priv *priv)
1169 while (!LIST_EMPTY(&priv->flows)) {
1170 struct rte_flow *flow;
1172 flow = LIST_FIRST(&priv->flows);
1173 priv_flow_destroy(priv, flow);
1178 * Destroy all flows.
1180 * @see rte_flow_flush()
1184 mlx5_flow_flush(struct rte_eth_dev *dev,
1185 struct rte_flow_error *error)
1187 struct priv *priv = dev->data->dev_private;
1191 priv_flow_flush(priv);
1199 * Called by dev_stop() to remove all flows.
1202 * Pointer to private structure.
1205 priv_flow_stop(struct priv *priv)
1207 struct rte_flow *flow;
1209 for (flow = LIST_FIRST(&priv->flows);
1211 flow = LIST_NEXT(flow, next)) {
1212 claim_zero(ibv_exp_destroy_flow(flow->ibv_flow));
1213 flow->ibv_flow = NULL;
1215 flow->rxq->mark = 0;
1216 DEBUG("Flow %p removed", (void *)flow);
1224 * Pointer to private structure.
1227 * 0 on success, a errno value otherwise and rte_errno is set.
1230 priv_flow_start(struct priv *priv)
1232 struct rte_flow *flow;
1234 for (flow = LIST_FIRST(&priv->flows);
1236 flow = LIST_NEXT(flow, next)) {
1237 flow->ibv_flow = ibv_exp_create_flow(flow->qp,
1239 if (!flow->ibv_flow) {
1240 DEBUG("Flow %p cannot be applied", (void *)flow);
1244 DEBUG("Flow %p applied", (void *)flow);
1246 flow->rxq->mark |= flow->mark;