4 * Copyright 2016 6WIND S.A.
5 * Copyright 2016 Mellanox.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of 6WIND S.A. nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #include <sys/queue.h>
38 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
40 #pragma GCC diagnostic ignored "-Wpedantic"
42 #include <infiniband/verbs.h>
44 #pragma GCC diagnostic error "-Wpedantic"
47 #include <rte_ethdev.h>
49 #include <rte_flow_driver.h>
50 #include <rte_malloc.h>
56 mlx5_flow_create_eth(const struct rte_flow_item *item,
57 const void *default_mask,
61 mlx5_flow_create_vlan(const struct rte_flow_item *item,
62 const void *default_mask,
66 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
67 const void *default_mask,
71 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
72 const void *default_mask,
76 mlx5_flow_create_udp(const struct rte_flow_item *item,
77 const void *default_mask,
81 mlx5_flow_create_tcp(const struct rte_flow_item *item,
82 const void *default_mask,
86 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
87 const void *default_mask,
91 LIST_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
92 struct ibv_exp_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
93 struct ibv_exp_rwq_ind_table *ind_table; /**< Indirection table. */
94 struct ibv_qp *qp; /**< Verbs queue pair. */
95 struct ibv_exp_flow *ibv_flow; /**< Verbs flow. */
96 struct ibv_exp_wq *wq; /**< Verbs work queue. */
97 struct ibv_cq *cq; /**< Verbs completion queue. */
98 struct rxq *rxq; /**< Pointer to the queue, NULL if drop queue. */
99 uint32_t mark:1; /**< Set if the flow is marked. */
102 /** Static initializer for items. */
104 (const enum rte_flow_item_type []){ \
105 __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
108 /** Structure to generate a simple graph of layers supported by the NIC. */
109 struct mlx5_flow_items {
110 /** List of possible actions for these items. */
111 const enum rte_flow_action_type *const actions;
112 /** Bit-masks corresponding to the possibilities for the item. */
115 * Default bit-masks to use when item->mask is not provided. When
116 * \default_mask is also NULL, the full supported bit-mask (\mask) is
119 const void *default_mask;
120 /** Bit-masks size in bytes. */
121 const unsigned int mask_sz;
123 * Conversion function from rte_flow to NIC specific flow.
126 * rte_flow item to convert.
127 * @param default_mask
128 * Default bit-masks to use when item->mask is not provided.
130 * Internal structure to store the conversion.
133 * 0 on success, negative value otherwise.
135 int (*convert)(const struct rte_flow_item *item,
136 const void *default_mask,
138 /** Size in bytes of the destination structure. */
139 const unsigned int dst_sz;
140 /** List of possible following items. */
141 const enum rte_flow_item_type *const items;
144 /** Valid action for this PMD. */
145 static const enum rte_flow_action_type valid_actions[] = {
146 RTE_FLOW_ACTION_TYPE_DROP,
147 RTE_FLOW_ACTION_TYPE_QUEUE,
148 RTE_FLOW_ACTION_TYPE_MARK,
149 RTE_FLOW_ACTION_TYPE_FLAG,
150 RTE_FLOW_ACTION_TYPE_END,
153 /** Graph of supported items and associated actions. */
154 static const struct mlx5_flow_items mlx5_flow_items[] = {
155 [RTE_FLOW_ITEM_TYPE_END] = {
156 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
157 RTE_FLOW_ITEM_TYPE_VXLAN),
159 [RTE_FLOW_ITEM_TYPE_ETH] = {
160 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
161 RTE_FLOW_ITEM_TYPE_IPV4,
162 RTE_FLOW_ITEM_TYPE_IPV6),
163 .actions = valid_actions,
164 .mask = &(const struct rte_flow_item_eth){
165 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
166 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
169 .default_mask = &rte_flow_item_eth_mask,
170 .mask_sz = sizeof(struct rte_flow_item_eth),
171 .convert = mlx5_flow_create_eth,
172 .dst_sz = sizeof(struct ibv_exp_flow_spec_eth),
174 [RTE_FLOW_ITEM_TYPE_VLAN] = {
175 .items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
176 RTE_FLOW_ITEM_TYPE_IPV6),
177 .actions = valid_actions,
178 .mask = &(const struct rte_flow_item_vlan){
181 .default_mask = &rte_flow_item_vlan_mask,
182 .mask_sz = sizeof(struct rte_flow_item_vlan),
183 .convert = mlx5_flow_create_vlan,
186 [RTE_FLOW_ITEM_TYPE_IPV4] = {
187 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
188 RTE_FLOW_ITEM_TYPE_TCP),
189 .actions = valid_actions,
190 .mask = &(const struct rte_flow_item_ipv4){
194 .type_of_service = -1,
198 .default_mask = &rte_flow_item_ipv4_mask,
199 .mask_sz = sizeof(struct rte_flow_item_ipv4),
200 .convert = mlx5_flow_create_ipv4,
201 .dst_sz = sizeof(struct ibv_exp_flow_spec_ipv4_ext),
203 [RTE_FLOW_ITEM_TYPE_IPV6] = {
204 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
205 RTE_FLOW_ITEM_TYPE_TCP),
206 .actions = valid_actions,
207 .mask = &(const struct rte_flow_item_ipv6){
210 0xff, 0xff, 0xff, 0xff,
211 0xff, 0xff, 0xff, 0xff,
212 0xff, 0xff, 0xff, 0xff,
213 0xff, 0xff, 0xff, 0xff,
216 0xff, 0xff, 0xff, 0xff,
217 0xff, 0xff, 0xff, 0xff,
218 0xff, 0xff, 0xff, 0xff,
219 0xff, 0xff, 0xff, 0xff,
226 .default_mask = &rte_flow_item_ipv6_mask,
227 .mask_sz = sizeof(struct rte_flow_item_ipv6),
228 .convert = mlx5_flow_create_ipv6,
229 .dst_sz = sizeof(struct ibv_exp_flow_spec_ipv6_ext),
231 [RTE_FLOW_ITEM_TYPE_UDP] = {
232 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN),
233 .actions = valid_actions,
234 .mask = &(const struct rte_flow_item_udp){
240 .default_mask = &rte_flow_item_udp_mask,
241 .mask_sz = sizeof(struct rte_flow_item_udp),
242 .convert = mlx5_flow_create_udp,
243 .dst_sz = sizeof(struct ibv_exp_flow_spec_tcp_udp),
245 [RTE_FLOW_ITEM_TYPE_TCP] = {
246 .actions = valid_actions,
247 .mask = &(const struct rte_flow_item_tcp){
253 .default_mask = &rte_flow_item_tcp_mask,
254 .mask_sz = sizeof(struct rte_flow_item_tcp),
255 .convert = mlx5_flow_create_tcp,
256 .dst_sz = sizeof(struct ibv_exp_flow_spec_tcp_udp),
258 [RTE_FLOW_ITEM_TYPE_VXLAN] = {
259 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH),
260 .actions = valid_actions,
261 .mask = &(const struct rte_flow_item_vxlan){
262 .vni = "\xff\xff\xff",
264 .default_mask = &rte_flow_item_vxlan_mask,
265 .mask_sz = sizeof(struct rte_flow_item_vxlan),
266 .convert = mlx5_flow_create_vxlan,
267 .dst_sz = sizeof(struct ibv_exp_flow_spec_tunnel),
271 /** Structure to pass to the conversion function. */
273 struct ibv_exp_flow_attr *ibv_attr; /**< Verbs attribute. */
274 unsigned int offset; /**< Offset in bytes in the ibv_attr buffer. */
275 uint32_t inner; /**< Set once VXLAN is encountered. */
278 struct mlx5_flow_action {
279 uint32_t queue:1; /**< Target is a receive queue. */
280 uint32_t drop:1; /**< Target is a drop queue. */
281 uint32_t mark:1; /**< Mark is present in the flow. */
282 uint32_t queue_id; /**< Identifier of the queue. */
283 uint32_t mark_id; /**< Mark identifier. */
287 * Check support for a given item.
290 * Item specification.
292 * Bit-masks covering supported fields to compare with spec, last and mask in
295 * Bit-Mask size in bytes.
301 mlx5_flow_item_validate(const struct rte_flow_item *item,
302 const uint8_t *mask, unsigned int size)
306 if (!item->spec && (item->mask || item->last))
308 if (item->spec && !item->mask) {
310 const uint8_t *spec = item->spec;
312 for (i = 0; i < size; ++i)
313 if ((spec[i] | mask[i]) != mask[i])
316 if (item->last && !item->mask) {
318 const uint8_t *spec = item->last;
320 for (i = 0; i < size; ++i)
321 if ((spec[i] | mask[i]) != mask[i])
326 const uint8_t *spec = item->mask;
328 for (i = 0; i < size; ++i)
329 if ((spec[i] | mask[i]) != mask[i])
332 if (item->spec && item->last) {
335 const uint8_t *apply = mask;
340 for (i = 0; i < size; ++i) {
341 spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
342 last[i] = ((const uint8_t *)item->last)[i] & apply[i];
344 ret = memcmp(spec, last, size);
350 * Validate a flow supported by the NIC.
353 * Pointer to private structure.
355 * Flow rule attributes.
357 * Pattern specification (list terminated by the END pattern item).
359 * Associated actions (list terminated by the END action).
361 * Perform verbose error reporting if not NULL.
362 * @param[in, out] flow
363 * Flow structure to update.
366 * 0 on success, a negative errno value otherwise and rte_errno is set.
369 priv_flow_validate(struct priv *priv,
370 const struct rte_flow_attr *attr,
371 const struct rte_flow_item items[],
372 const struct rte_flow_action actions[],
373 struct rte_flow_error *error,
374 struct mlx5_flow *flow)
376 const struct mlx5_flow_items *cur_item = mlx5_flow_items;
377 struct mlx5_flow_action action = {
385 rte_flow_error_set(error, ENOTSUP,
386 RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
388 "groups are not supported");
391 if (attr->priority) {
392 rte_flow_error_set(error, ENOTSUP,
393 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
395 "priorities are not supported");
399 rte_flow_error_set(error, ENOTSUP,
400 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
402 "egress is not supported");
405 if (!attr->ingress) {
406 rte_flow_error_set(error, ENOTSUP,
407 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
409 "only ingress is supported");
412 for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
413 const struct mlx5_flow_items *token = NULL;
417 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
421 cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
423 if (cur_item->items[i] == items->type) {
424 token = &mlx5_flow_items[items->type];
429 goto exit_item_not_supported;
431 err = mlx5_flow_item_validate(items,
432 (const uint8_t *)cur_item->mask,
435 goto exit_item_not_supported;
436 if (flow->ibv_attr && cur_item->convert) {
437 err = cur_item->convert(items,
438 (cur_item->default_mask ?
439 cur_item->default_mask :
443 goto exit_item_not_supported;
445 flow->offset += cur_item->dst_sz;
447 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
448 if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
450 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
452 } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
453 const struct rte_flow_action_queue *queue =
454 (const struct rte_flow_action_queue *)
457 if (!queue || (queue->index > (priv->rxqs_n - 1)))
458 goto exit_action_not_supported;
460 } else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
461 const struct rte_flow_action_mark *mark =
462 (const struct rte_flow_action_mark *)
466 rte_flow_error_set(error, EINVAL,
467 RTE_FLOW_ERROR_TYPE_ACTION,
469 "mark must be defined");
471 } else if (mark->id >= MLX5_FLOW_MARK_MAX) {
472 rte_flow_error_set(error, ENOTSUP,
473 RTE_FLOW_ERROR_TYPE_ACTION,
475 "mark must be between 0"
480 } else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
483 goto exit_action_not_supported;
486 if (action.mark && !flow->ibv_attr && !action.drop)
487 flow->offset += sizeof(struct ibv_exp_flow_spec_action_tag);
488 if (!action.queue && !action.drop) {
489 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
490 NULL, "no valid action");
494 exit_item_not_supported:
495 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
496 items, "item not supported");
498 exit_action_not_supported:
499 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
500 actions, "action not supported");
505 * Validate a flow supported by the NIC.
507 * @see rte_flow_validate()
511 mlx5_flow_validate(struct rte_eth_dev *dev,
512 const struct rte_flow_attr *attr,
513 const struct rte_flow_item items[],
514 const struct rte_flow_action actions[],
515 struct rte_flow_error *error)
517 struct priv *priv = dev->data->dev_private;
519 struct mlx5_flow flow = { .offset = sizeof(struct ibv_exp_flow_attr) };
522 ret = priv_flow_validate(priv, attr, items, actions, error, &flow);
528 * Convert Ethernet item to Verbs specification.
531 * Item specification.
532 * @param default_mask[in]
533 * Default bit-masks to use when item->mask is not provided.
534 * @param data[in, out]
538 mlx5_flow_create_eth(const struct rte_flow_item *item,
539 const void *default_mask,
542 const struct rte_flow_item_eth *spec = item->spec;
543 const struct rte_flow_item_eth *mask = item->mask;
544 struct mlx5_flow *flow = (struct mlx5_flow *)data;
545 struct ibv_exp_flow_spec_eth *eth;
546 const unsigned int eth_size = sizeof(struct ibv_exp_flow_spec_eth);
549 ++flow->ibv_attr->num_of_specs;
550 flow->ibv_attr->priority = 2;
551 eth = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
552 *eth = (struct ibv_exp_flow_spec_eth) {
553 .type = flow->inner | IBV_EXP_FLOW_SPEC_ETH,
560 memcpy(eth->val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
561 memcpy(eth->val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
562 eth->val.ether_type = spec->type;
563 memcpy(eth->mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
564 memcpy(eth->mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
565 eth->mask.ether_type = mask->type;
566 /* Remove unwanted bits from values. */
567 for (i = 0; i < ETHER_ADDR_LEN; ++i) {
568 eth->val.dst_mac[i] &= eth->mask.dst_mac[i];
569 eth->val.src_mac[i] &= eth->mask.src_mac[i];
571 eth->val.ether_type &= eth->mask.ether_type;
576 * Convert VLAN item to Verbs specification.
579 * Item specification.
580 * @param default_mask[in]
581 * Default bit-masks to use when item->mask is not provided.
582 * @param data[in, out]
586 mlx5_flow_create_vlan(const struct rte_flow_item *item,
587 const void *default_mask,
590 const struct rte_flow_item_vlan *spec = item->spec;
591 const struct rte_flow_item_vlan *mask = item->mask;
592 struct mlx5_flow *flow = (struct mlx5_flow *)data;
593 struct ibv_exp_flow_spec_eth *eth;
594 const unsigned int eth_size = sizeof(struct ibv_exp_flow_spec_eth);
596 eth = (void *)((uintptr_t)flow->ibv_attr + flow->offset - eth_size);
601 eth->val.vlan_tag = spec->tci;
602 eth->mask.vlan_tag = mask->tci;
603 eth->val.vlan_tag &= eth->mask.vlan_tag;
608 * Convert IPv4 item to Verbs specification.
611 * Item specification.
612 * @param default_mask[in]
613 * Default bit-masks to use when item->mask is not provided.
614 * @param data[in, out]
618 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
619 const void *default_mask,
622 const struct rte_flow_item_ipv4 *spec = item->spec;
623 const struct rte_flow_item_ipv4 *mask = item->mask;
624 struct mlx5_flow *flow = (struct mlx5_flow *)data;
625 struct ibv_exp_flow_spec_ipv4_ext *ipv4;
626 unsigned int ipv4_size = sizeof(struct ibv_exp_flow_spec_ipv4_ext);
628 ++flow->ibv_attr->num_of_specs;
629 flow->ibv_attr->priority = 1;
630 ipv4 = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
631 *ipv4 = (struct ibv_exp_flow_spec_ipv4_ext) {
632 .type = flow->inner | IBV_EXP_FLOW_SPEC_IPV4_EXT,
639 ipv4->val = (struct ibv_exp_flow_ipv4_ext_filter){
640 .src_ip = spec->hdr.src_addr,
641 .dst_ip = spec->hdr.dst_addr,
642 .proto = spec->hdr.next_proto_id,
643 .tos = spec->hdr.type_of_service,
645 ipv4->mask = (struct ibv_exp_flow_ipv4_ext_filter){
646 .src_ip = mask->hdr.src_addr,
647 .dst_ip = mask->hdr.dst_addr,
648 .proto = mask->hdr.next_proto_id,
649 .tos = mask->hdr.type_of_service,
651 /* Remove unwanted bits from values. */
652 ipv4->val.src_ip &= ipv4->mask.src_ip;
653 ipv4->val.dst_ip &= ipv4->mask.dst_ip;
654 ipv4->val.proto &= ipv4->mask.proto;
655 ipv4->val.tos &= ipv4->mask.tos;
660 * Convert IPv6 item to Verbs specification.
663 * Item specification.
664 * @param default_mask[in]
665 * Default bit-masks to use when item->mask is not provided.
666 * @param data[in, out]
670 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
671 const void *default_mask,
674 const struct rte_flow_item_ipv6 *spec = item->spec;
675 const struct rte_flow_item_ipv6 *mask = item->mask;
676 struct mlx5_flow *flow = (struct mlx5_flow *)data;
677 struct ibv_exp_flow_spec_ipv6_ext *ipv6;
678 unsigned int ipv6_size = sizeof(struct ibv_exp_flow_spec_ipv6_ext);
680 ++flow->ibv_attr->num_of_specs;
681 flow->ibv_attr->priority = 1;
682 ipv6 = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
683 *ipv6 = (struct ibv_exp_flow_spec_ipv6_ext) {
684 .type = flow->inner | IBV_EXP_FLOW_SPEC_IPV6_EXT,
691 memcpy(ipv6->val.src_ip, spec->hdr.src_addr,
692 RTE_DIM(ipv6->val.src_ip));
693 memcpy(ipv6->val.dst_ip, spec->hdr.dst_addr,
694 RTE_DIM(ipv6->val.dst_ip));
695 memcpy(ipv6->mask.src_ip, mask->hdr.src_addr,
696 RTE_DIM(ipv6->mask.src_ip));
697 memcpy(ipv6->mask.dst_ip, mask->hdr.dst_addr,
698 RTE_DIM(ipv6->mask.dst_ip));
699 ipv6->mask.flow_label = mask->hdr.vtc_flow;
700 ipv6->mask.next_hdr = mask->hdr.proto;
701 ipv6->mask.hop_limit = mask->hdr.hop_limits;
702 ipv6->val.flow_label &= ipv6->mask.flow_label;
703 ipv6->val.next_hdr &= ipv6->mask.next_hdr;
704 ipv6->val.hop_limit &= ipv6->mask.hop_limit;
709 * Convert UDP item to Verbs specification.
712 * Item specification.
713 * @param default_mask[in]
714 * Default bit-masks to use when item->mask is not provided.
715 * @param data[in, out]
719 mlx5_flow_create_udp(const struct rte_flow_item *item,
720 const void *default_mask,
723 const struct rte_flow_item_udp *spec = item->spec;
724 const struct rte_flow_item_udp *mask = item->mask;
725 struct mlx5_flow *flow = (struct mlx5_flow *)data;
726 struct ibv_exp_flow_spec_tcp_udp *udp;
727 unsigned int udp_size = sizeof(struct ibv_exp_flow_spec_tcp_udp);
729 ++flow->ibv_attr->num_of_specs;
730 flow->ibv_attr->priority = 0;
731 udp = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
732 *udp = (struct ibv_exp_flow_spec_tcp_udp) {
733 .type = flow->inner | IBV_EXP_FLOW_SPEC_UDP,
740 udp->val.dst_port = spec->hdr.dst_port;
741 udp->val.src_port = spec->hdr.src_port;
742 udp->mask.dst_port = mask->hdr.dst_port;
743 udp->mask.src_port = mask->hdr.src_port;
744 /* Remove unwanted bits from values. */
745 udp->val.src_port &= udp->mask.src_port;
746 udp->val.dst_port &= udp->mask.dst_port;
751 * Convert TCP item to Verbs specification.
754 * Item specification.
755 * @param default_mask[in]
756 * Default bit-masks to use when item->mask is not provided.
757 * @param data[in, out]
761 mlx5_flow_create_tcp(const struct rte_flow_item *item,
762 const void *default_mask,
765 const struct rte_flow_item_tcp *spec = item->spec;
766 const struct rte_flow_item_tcp *mask = item->mask;
767 struct mlx5_flow *flow = (struct mlx5_flow *)data;
768 struct ibv_exp_flow_spec_tcp_udp *tcp;
769 unsigned int tcp_size = sizeof(struct ibv_exp_flow_spec_tcp_udp);
771 ++flow->ibv_attr->num_of_specs;
772 flow->ibv_attr->priority = 0;
773 tcp = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
774 *tcp = (struct ibv_exp_flow_spec_tcp_udp) {
775 .type = flow->inner | IBV_EXP_FLOW_SPEC_TCP,
782 tcp->val.dst_port = spec->hdr.dst_port;
783 tcp->val.src_port = spec->hdr.src_port;
784 tcp->mask.dst_port = mask->hdr.dst_port;
785 tcp->mask.src_port = mask->hdr.src_port;
786 /* Remove unwanted bits from values. */
787 tcp->val.src_port &= tcp->mask.src_port;
788 tcp->val.dst_port &= tcp->mask.dst_port;
793 * Convert VXLAN item to Verbs specification.
796 * Item specification.
797 * @param default_mask[in]
798 * Default bit-masks to use when item->mask is not provided.
799 * @param data[in, out]
803 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
804 const void *default_mask,
807 const struct rte_flow_item_vxlan *spec = item->spec;
808 const struct rte_flow_item_vxlan *mask = item->mask;
809 struct mlx5_flow *flow = (struct mlx5_flow *)data;
810 struct ibv_exp_flow_spec_tunnel *vxlan;
811 unsigned int size = sizeof(struct ibv_exp_flow_spec_tunnel);
817 ++flow->ibv_attr->num_of_specs;
818 flow->ibv_attr->priority = 0;
820 vxlan = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
821 *vxlan = (struct ibv_exp_flow_spec_tunnel) {
822 .type = flow->inner | IBV_EXP_FLOW_SPEC_VXLAN_TUNNEL,
825 flow->inner = IBV_EXP_FLOW_SPEC_INNER;
830 memcpy(&id.vni[1], spec->vni, 3);
831 vxlan->val.tunnel_id = id.vlan_id;
832 memcpy(&id.vni[1], mask->vni, 3);
833 vxlan->mask.tunnel_id = id.vlan_id;
834 /* Remove unwanted bits from values. */
835 vxlan->val.tunnel_id &= vxlan->mask.tunnel_id;
840 * Convert mark/flag action to Verbs specification.
843 * Pointer to MLX5 flow structure.
848 mlx5_flow_create_flag_mark(struct mlx5_flow *flow, uint32_t mark_id)
850 struct ibv_exp_flow_spec_action_tag *tag;
851 unsigned int size = sizeof(struct ibv_exp_flow_spec_action_tag);
853 tag = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
854 *tag = (struct ibv_exp_flow_spec_action_tag){
855 .type = IBV_EXP_FLOW_SPEC_ACTION_TAG,
857 .tag_id = mlx5_flow_mark_set(mark_id),
859 ++flow->ibv_attr->num_of_specs;
864 * Complete flow rule creation.
867 * Pointer to private structure.
869 * Verbs flow attributes.
871 * Target action structure.
873 * Perform verbose error reporting if not NULL.
876 * A flow if the rule could be created.
878 static struct rte_flow *
879 priv_flow_create_action_queue(struct priv *priv,
880 struct ibv_exp_flow_attr *ibv_attr,
881 struct mlx5_flow_action *action,
882 struct rte_flow_error *error)
884 struct rxq_ctrl *rxq;
885 struct rte_flow *rte_flow;
889 rte_flow = rte_calloc(__func__, 1, sizeof(*rte_flow), 0);
891 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
892 NULL, "cannot allocate flow memory");
897 ibv_exp_create_cq(priv->ctx, 1, NULL, NULL, 0,
898 &(struct ibv_exp_cq_init_attr){
902 rte_flow_error_set(error, ENOMEM,
903 RTE_FLOW_ERROR_TYPE_HANDLE,
904 NULL, "cannot allocate CQ");
907 rte_flow->wq = ibv_exp_create_wq(priv->ctx,
908 &(struct ibv_exp_wq_init_attr){
909 .wq_type = IBV_EXP_WQT_RQ,
916 rte_flow_error_set(error, ENOMEM,
917 RTE_FLOW_ERROR_TYPE_HANDLE,
918 NULL, "cannot allocate WQ");
922 rxq = container_of((*priv->rxqs)[action->queue_id],
923 struct rxq_ctrl, rxq);
924 rte_flow->rxq = &rxq->rxq;
925 rxq->rxq.mark |= action->mark;
926 rte_flow->wq = rxq->wq;
928 rte_flow->mark = action->mark;
929 rte_flow->ibv_attr = ibv_attr;
930 rte_flow->ind_table = ibv_exp_create_rwq_ind_table(
932 &(struct ibv_exp_rwq_ind_table_init_attr){
934 .log_ind_tbl_size = 0,
935 .ind_tbl = &rte_flow->wq,
938 if (!rte_flow->ind_table) {
939 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
940 NULL, "cannot allocate indirection table");
943 rte_flow->qp = ibv_exp_create_qp(
945 &(struct ibv_exp_qp_init_attr){
946 .qp_type = IBV_QPT_RAW_PACKET,
948 IBV_EXP_QP_INIT_ATTR_PD |
949 IBV_EXP_QP_INIT_ATTR_PORT |
950 IBV_EXP_QP_INIT_ATTR_RX_HASH,
952 .rx_hash_conf = &(struct ibv_exp_rx_hash_conf){
954 IBV_EXP_RX_HASH_FUNC_TOEPLITZ,
955 .rx_hash_key_len = rss_hash_default_key_len,
956 .rx_hash_key = rss_hash_default_key,
957 .rx_hash_fields_mask = 0,
958 .rwq_ind_tbl = rte_flow->ind_table,
960 .port_num = priv->port,
963 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
964 NULL, "cannot allocate QP");
969 rte_flow->ibv_flow = ibv_exp_create_flow(rte_flow->qp,
971 if (!rte_flow->ibv_flow) {
972 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
973 NULL, "flow rule creation failure");
980 ibv_destroy_qp(rte_flow->qp);
981 if (rte_flow->ind_table)
982 ibv_exp_destroy_rwq_ind_table(rte_flow->ind_table);
983 if (!rte_flow->rxq && rte_flow->wq)
984 ibv_exp_destroy_wq(rte_flow->wq);
985 if (!rte_flow->rxq && rte_flow->cq)
986 ibv_destroy_cq(rte_flow->cq);
995 * Pointer to private structure.
997 * Flow rule attributes.
999 * Pattern specification (list terminated by the END pattern item).
1000 * @param[in] actions
1001 * Associated actions (list terminated by the END action).
1003 * Perform verbose error reporting if not NULL.
1006 * A flow on success, NULL otherwise.
1008 static struct rte_flow *
1009 priv_flow_create(struct priv *priv,
1010 const struct rte_flow_attr *attr,
1011 const struct rte_flow_item items[],
1012 const struct rte_flow_action actions[],
1013 struct rte_flow_error *error)
1015 struct rte_flow *rte_flow;
1016 struct mlx5_flow_action action;
1017 struct mlx5_flow flow = { .offset = sizeof(struct ibv_exp_flow_attr), };
1020 err = priv_flow_validate(priv, attr, items, actions, error, &flow);
1023 flow.ibv_attr = rte_malloc(__func__, flow.offset, 0);
1024 flow.offset = sizeof(struct ibv_exp_flow_attr);
1025 if (!flow.ibv_attr) {
1026 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1027 NULL, "cannot allocate ibv_attr memory");
1030 *flow.ibv_attr = (struct ibv_exp_flow_attr){
1031 .type = IBV_EXP_FLOW_ATTR_NORMAL,
1032 .size = sizeof(struct ibv_exp_flow_attr),
1033 .priority = attr->priority,
1040 claim_zero(priv_flow_validate(priv, attr, items, actions,
1042 action = (struct mlx5_flow_action){
1046 .mark_id = MLX5_FLOW_MARK_DEFAULT,
1048 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
1049 if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
1051 } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
1054 ((const struct rte_flow_action_queue *)
1055 actions->conf)->index;
1056 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
1059 } else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
1060 const struct rte_flow_action_mark *mark =
1061 (const struct rte_flow_action_mark *)
1065 action.mark_id = mark->id;
1066 action.mark = !action.drop;
1067 } else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
1070 rte_flow_error_set(error, ENOTSUP,
1071 RTE_FLOW_ERROR_TYPE_ACTION,
1072 actions, "unsupported action");
1077 mlx5_flow_create_flag_mark(&flow, action.mark_id);
1078 flow.offset += sizeof(struct ibv_exp_flow_spec_action_tag);
1080 rte_flow = priv_flow_create_action_queue(priv, flow.ibv_attr,
1086 rte_free(flow.ibv_attr);
1093 * @see rte_flow_create()
1097 mlx5_flow_create(struct rte_eth_dev *dev,
1098 const struct rte_flow_attr *attr,
1099 const struct rte_flow_item items[],
1100 const struct rte_flow_action actions[],
1101 struct rte_flow_error *error)
1103 struct priv *priv = dev->data->dev_private;
1104 struct rte_flow *flow;
1107 flow = priv_flow_create(priv, attr, items, actions, error);
1109 LIST_INSERT_HEAD(&priv->flows, flow, next);
1110 DEBUG("Flow created %p", (void *)flow);
1120 * Pointer to private structure.
1125 priv_flow_destroy(struct priv *priv,
1126 struct rte_flow *flow)
1129 LIST_REMOVE(flow, next);
1131 claim_zero(ibv_exp_destroy_flow(flow->ibv_flow));
1133 claim_zero(ibv_destroy_qp(flow->qp));
1134 if (flow->ind_table)
1135 claim_zero(ibv_exp_destroy_rwq_ind_table(flow->ind_table));
1136 if (!flow->rxq && flow->wq)
1137 claim_zero(ibv_exp_destroy_wq(flow->wq));
1138 if (!flow->rxq && flow->cq)
1139 claim_zero(ibv_destroy_cq(flow->cq));
1141 struct rte_flow *tmp;
1142 uint32_t mark_n = 0;
1144 for (tmp = LIST_FIRST(&priv->flows);
1146 tmp = LIST_NEXT(tmp, next)) {
1147 if ((flow->rxq == tmp->rxq) && tmp->mark)
1150 flow->rxq->mark = !!mark_n;
1152 rte_free(flow->ibv_attr);
1153 DEBUG("Flow destroyed %p", (void *)flow);
1160 * @see rte_flow_destroy()
1164 mlx5_flow_destroy(struct rte_eth_dev *dev,
1165 struct rte_flow *flow,
1166 struct rte_flow_error *error)
1168 struct priv *priv = dev->data->dev_private;
1172 priv_flow_destroy(priv, flow);
1178 * Destroy all flows.
1181 * Pointer to private structure.
1184 priv_flow_flush(struct priv *priv)
1186 while (!LIST_EMPTY(&priv->flows)) {
1187 struct rte_flow *flow;
1189 flow = LIST_FIRST(&priv->flows);
1190 priv_flow_destroy(priv, flow);
1195 * Destroy all flows.
1197 * @see rte_flow_flush()
1201 mlx5_flow_flush(struct rte_eth_dev *dev,
1202 struct rte_flow_error *error)
1204 struct priv *priv = dev->data->dev_private;
1208 priv_flow_flush(priv);
1216 * Called by dev_stop() to remove all flows.
1219 * Pointer to private structure.
1222 priv_flow_stop(struct priv *priv)
1224 struct rte_flow *flow;
1226 for (flow = LIST_FIRST(&priv->flows);
1228 flow = LIST_NEXT(flow, next)) {
1229 claim_zero(ibv_exp_destroy_flow(flow->ibv_flow));
1230 flow->ibv_flow = NULL;
1232 flow->rxq->mark = 0;
1233 DEBUG("Flow %p removed", (void *)flow);
1241 * Pointer to private structure.
1244 * 0 on success, a errno value otherwise and rte_errno is set.
1247 priv_flow_start(struct priv *priv)
1249 struct rte_flow *flow;
1251 for (flow = LIST_FIRST(&priv->flows);
1253 flow = LIST_NEXT(flow, next)) {
1254 flow->ibv_flow = ibv_exp_create_flow(flow->qp,
1256 if (!flow->ibv_flow) {
1257 DEBUG("Flow %p cannot be applied", (void *)flow);
1261 DEBUG("Flow %p applied", (void *)flow);
1263 flow->rxq->mark |= flow->mark;