4 * Copyright 2016 6WIND S.A.
5 * Copyright 2016 Mellanox.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of 6WIND S.A. nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #include <sys/queue.h>
38 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
40 #pragma GCC diagnostic ignored "-Wpedantic"
42 #include <infiniband/verbs.h>
44 #pragma GCC diagnostic error "-Wpedantic"
47 #include <rte_ethdev.h>
49 #include <rte_flow_driver.h>
50 #include <rte_malloc.h>
56 mlx5_flow_create_eth(const struct rte_flow_item *item,
57 const void *default_mask,
61 mlx5_flow_create_vlan(const struct rte_flow_item *item,
62 const void *default_mask,
66 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
67 const void *default_mask,
71 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
72 const void *default_mask,
76 mlx5_flow_create_udp(const struct rte_flow_item *item,
77 const void *default_mask,
81 mlx5_flow_create_tcp(const struct rte_flow_item *item,
82 const void *default_mask,
86 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
87 const void *default_mask,
91 TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
92 struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
93 struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */
94 struct ibv_qp *qp; /**< Verbs queue pair. */
95 struct ibv_flow *ibv_flow; /**< Verbs flow. */
96 struct ibv_wq *wq; /**< Verbs work queue. */
97 struct ibv_cq *cq; /**< Verbs completion queue. */
98 uint16_t rxqs_n; /**< Number of queues in this flow, 0 if drop queue. */
99 uint32_t mark:1; /**< Set if the flow is marked. */
100 uint32_t drop:1; /**< Drop queue. */
101 uint64_t hash_fields; /**< Fields that participate in the hash. */
102 struct rxq *rxqs[]; /**< Pointer to the queues array. */
105 /** Static initializer for items. */
107 (const enum rte_flow_item_type []){ \
108 __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
111 /** Structure to generate a simple graph of layers supported by the NIC. */
112 struct mlx5_flow_items {
113 /** List of possible actions for these items. */
114 const enum rte_flow_action_type *const actions;
115 /** Bit-masks corresponding to the possibilities for the item. */
118 * Default bit-masks to use when item->mask is not provided. When
119 * \default_mask is also NULL, the full supported bit-mask (\mask) is
122 const void *default_mask;
123 /** Bit-masks size in bytes. */
124 const unsigned int mask_sz;
126 * Conversion function from rte_flow to NIC specific flow.
129 * rte_flow item to convert.
130 * @param default_mask
131 * Default bit-masks to use when item->mask is not provided.
133 * Internal structure to store the conversion.
136 * 0 on success, negative value otherwise.
138 int (*convert)(const struct rte_flow_item *item,
139 const void *default_mask,
141 /** Size in bytes of the destination structure. */
142 const unsigned int dst_sz;
143 /** List of possible following items. */
144 const enum rte_flow_item_type *const items;
147 /** Valid action for this PMD. */
148 static const enum rte_flow_action_type valid_actions[] = {
149 RTE_FLOW_ACTION_TYPE_DROP,
150 RTE_FLOW_ACTION_TYPE_QUEUE,
151 RTE_FLOW_ACTION_TYPE_MARK,
152 RTE_FLOW_ACTION_TYPE_FLAG,
153 RTE_FLOW_ACTION_TYPE_END,
156 /** Graph of supported items and associated actions. */
157 static const struct mlx5_flow_items mlx5_flow_items[] = {
158 [RTE_FLOW_ITEM_TYPE_END] = {
159 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
160 RTE_FLOW_ITEM_TYPE_VXLAN),
162 [RTE_FLOW_ITEM_TYPE_ETH] = {
163 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
164 RTE_FLOW_ITEM_TYPE_IPV4,
165 RTE_FLOW_ITEM_TYPE_IPV6),
166 .actions = valid_actions,
167 .mask = &(const struct rte_flow_item_eth){
168 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
169 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
172 .default_mask = &rte_flow_item_eth_mask,
173 .mask_sz = sizeof(struct rte_flow_item_eth),
174 .convert = mlx5_flow_create_eth,
175 .dst_sz = sizeof(struct ibv_flow_spec_eth),
177 [RTE_FLOW_ITEM_TYPE_VLAN] = {
178 .items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
179 RTE_FLOW_ITEM_TYPE_IPV6),
180 .actions = valid_actions,
181 .mask = &(const struct rte_flow_item_vlan){
184 .default_mask = &rte_flow_item_vlan_mask,
185 .mask_sz = sizeof(struct rte_flow_item_vlan),
186 .convert = mlx5_flow_create_vlan,
189 [RTE_FLOW_ITEM_TYPE_IPV4] = {
190 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
191 RTE_FLOW_ITEM_TYPE_TCP),
192 .actions = valid_actions,
193 .mask = &(const struct rte_flow_item_ipv4){
197 .type_of_service = -1,
201 .default_mask = &rte_flow_item_ipv4_mask,
202 .mask_sz = sizeof(struct rte_flow_item_ipv4),
203 .convert = mlx5_flow_create_ipv4,
204 .dst_sz = sizeof(struct ibv_flow_spec_ipv4),
206 [RTE_FLOW_ITEM_TYPE_IPV6] = {
207 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
208 RTE_FLOW_ITEM_TYPE_TCP),
209 .actions = valid_actions,
210 .mask = &(const struct rte_flow_item_ipv6){
213 0xff, 0xff, 0xff, 0xff,
214 0xff, 0xff, 0xff, 0xff,
215 0xff, 0xff, 0xff, 0xff,
216 0xff, 0xff, 0xff, 0xff,
219 0xff, 0xff, 0xff, 0xff,
220 0xff, 0xff, 0xff, 0xff,
221 0xff, 0xff, 0xff, 0xff,
222 0xff, 0xff, 0xff, 0xff,
229 .default_mask = &rte_flow_item_ipv6_mask,
230 .mask_sz = sizeof(struct rte_flow_item_ipv6),
231 .convert = mlx5_flow_create_ipv6,
232 .dst_sz = sizeof(struct ibv_flow_spec_ipv6),
234 [RTE_FLOW_ITEM_TYPE_UDP] = {
235 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN),
236 .actions = valid_actions,
237 .mask = &(const struct rte_flow_item_udp){
243 .default_mask = &rte_flow_item_udp_mask,
244 .mask_sz = sizeof(struct rte_flow_item_udp),
245 .convert = mlx5_flow_create_udp,
246 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
248 [RTE_FLOW_ITEM_TYPE_TCP] = {
249 .actions = valid_actions,
250 .mask = &(const struct rte_flow_item_tcp){
256 .default_mask = &rte_flow_item_tcp_mask,
257 .mask_sz = sizeof(struct rte_flow_item_tcp),
258 .convert = mlx5_flow_create_tcp,
259 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
261 [RTE_FLOW_ITEM_TYPE_VXLAN] = {
262 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH),
263 .actions = valid_actions,
264 .mask = &(const struct rte_flow_item_vxlan){
265 .vni = "\xff\xff\xff",
267 .default_mask = &rte_flow_item_vxlan_mask,
268 .mask_sz = sizeof(struct rte_flow_item_vxlan),
269 .convert = mlx5_flow_create_vxlan,
270 .dst_sz = sizeof(struct ibv_flow_spec_tunnel),
274 /* Structure to parse actions. */
275 struct mlx5_flow_action {
276 uint32_t queue:1; /**< Target is a receive queue. */
277 uint32_t drop:1; /**< Target is a drop queue. */
278 uint32_t mark:1; /**< Mark is present in the flow. */
279 uint32_t mark_id; /**< Mark identifier. */
280 uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
281 uint16_t queues_n; /**< Number of entries in queue[]. */
284 /** Structure to pass to the conversion function. */
285 struct mlx5_flow_parse {
286 struct ibv_flow_attr *ibv_attr; /**< Verbs attribute. */
287 unsigned int offset; /**< Offset in bytes in the ibv_attr buffer. */
288 uint32_t inner; /**< Set once VXLAN is encountered. */
289 uint64_t hash_fields; /**< Fields that participate in the hash. */
290 struct mlx5_flow_action actions; /**< Parsed action result. */
293 /** Structure for Drop queue. */
294 struct rte_flow_drop {
295 struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */
296 struct ibv_qp *qp; /**< Verbs queue pair. */
297 struct ibv_wq *wq; /**< Verbs work queue. */
298 struct ibv_cq *cq; /**< Verbs completion queue. */
302 * Check support for a given item.
305 * Item specification.
307 * Bit-masks covering supported fields to compare with spec, last and mask in
310 * Bit-Mask size in bytes.
316 mlx5_flow_item_validate(const struct rte_flow_item *item,
317 const uint8_t *mask, unsigned int size)
321 if (!item->spec && (item->mask || item->last))
323 if (item->spec && !item->mask) {
325 const uint8_t *spec = item->spec;
327 for (i = 0; i < size; ++i)
328 if ((spec[i] | mask[i]) != mask[i])
331 if (item->last && !item->mask) {
333 const uint8_t *spec = item->last;
335 for (i = 0; i < size; ++i)
336 if ((spec[i] | mask[i]) != mask[i])
341 const uint8_t *spec = item->mask;
343 for (i = 0; i < size; ++i)
344 if ((spec[i] | mask[i]) != mask[i])
347 if (item->spec && item->last) {
350 const uint8_t *apply = mask;
355 for (i = 0; i < size; ++i) {
356 spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
357 last[i] = ((const uint8_t *)item->last)[i] & apply[i];
359 ret = memcmp(spec, last, size);
365 * Validate a flow supported by the NIC.
368 * Pointer to private structure.
370 * Flow rule attributes.
372 * Pattern specification (list terminated by the END pattern item).
374 * Associated actions (list terminated by the END action).
376 * Perform verbose error reporting if not NULL.
377 * @param[in, out] flow
378 * Flow structure to update.
381 * 0 on success, a negative errno value otherwise and rte_errno is set.
384 priv_flow_validate(struct priv *priv,
385 const struct rte_flow_attr *attr,
386 const struct rte_flow_item items[],
387 const struct rte_flow_action actions[],
388 struct rte_flow_error *error,
389 struct mlx5_flow_parse *flow)
391 const struct mlx5_flow_items *cur_item = mlx5_flow_items;
395 rte_flow_error_set(error, ENOTSUP,
396 RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
398 "groups are not supported");
401 if (attr->priority) {
402 rte_flow_error_set(error, ENOTSUP,
403 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
405 "priorities are not supported");
409 rte_flow_error_set(error, ENOTSUP,
410 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
412 "egress is not supported");
415 if (!attr->ingress) {
416 rte_flow_error_set(error, ENOTSUP,
417 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
419 "only ingress is supported");
422 for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
423 const struct mlx5_flow_items *token = NULL;
427 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
431 cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
433 if (cur_item->items[i] == items->type) {
434 token = &mlx5_flow_items[items->type];
439 goto exit_item_not_supported;
441 err = mlx5_flow_item_validate(items,
442 (const uint8_t *)cur_item->mask,
445 goto exit_item_not_supported;
446 if (flow->ibv_attr && cur_item->convert) {
447 err = cur_item->convert(items,
448 (cur_item->default_mask ?
449 cur_item->default_mask :
453 goto exit_item_not_supported;
454 } else if (items->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
456 rte_flow_error_set(error, ENOTSUP,
457 RTE_FLOW_ERROR_TYPE_ITEM,
459 "cannot recognize multiple"
460 " VXLAN encapsulations");
465 flow->offset += cur_item->dst_sz;
467 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
468 if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
470 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
471 flow->actions.drop = 1;
472 } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
473 const struct rte_flow_action_queue *queue =
474 (const struct rte_flow_action_queue *)
479 if (!queue || (queue->index > (priv->rxqs_n - 1)))
480 goto exit_action_not_supported;
481 for (n = 0; n < flow->actions.queues_n; ++n) {
482 if (flow->actions.queues[n] == queue->index) {
487 if (flow->actions.queues_n > 1 && !found) {
488 rte_flow_error_set(error, ENOTSUP,
489 RTE_FLOW_ERROR_TYPE_ACTION,
491 "queue action not in RSS queues");
495 flow->actions.queue = 1;
496 flow->actions.queues_n = 1;
497 flow->actions.queues[0] = queue->index;
499 } else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
500 const struct rte_flow_action_rss *rss =
501 (const struct rte_flow_action_rss *)
505 if (!rss || !rss->num) {
506 rte_flow_error_set(error, EINVAL,
507 RTE_FLOW_ERROR_TYPE_ACTION,
512 if (flow->actions.queues_n == 1) {
515 assert(flow->actions.queues_n);
516 for (n = 0; n < rss->num; ++n) {
517 if (flow->actions.queues[0] ==
524 rte_flow_error_set(error, ENOTSUP,
525 RTE_FLOW_ERROR_TYPE_ACTION,
527 "queue action not in RSS"
532 for (n = 0; n < rss->num; ++n) {
533 if (rss->queue[n] >= priv->rxqs_n) {
534 rte_flow_error_set(error, EINVAL,
535 RTE_FLOW_ERROR_TYPE_ACTION,
537 "queue id > number of"
542 flow->actions.queue = 1;
543 for (n = 0; n < rss->num; ++n)
544 flow->actions.queues[n] = rss->queue[n];
545 flow->actions.queues_n = rss->num;
546 } else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
547 const struct rte_flow_action_mark *mark =
548 (const struct rte_flow_action_mark *)
552 rte_flow_error_set(error, EINVAL,
553 RTE_FLOW_ERROR_TYPE_ACTION,
555 "mark must be defined");
557 } else if (mark->id >= MLX5_FLOW_MARK_MAX) {
558 rte_flow_error_set(error, ENOTSUP,
559 RTE_FLOW_ERROR_TYPE_ACTION,
561 "mark must be between 0"
565 flow->actions.mark = 1;
566 flow->actions.mark_id = mark->id;
567 } else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
568 flow->actions.mark = 1;
570 goto exit_action_not_supported;
573 if (flow->actions.mark && !flow->ibv_attr && !flow->actions.drop)
574 flow->offset += sizeof(struct ibv_flow_spec_action_tag);
575 if (!flow->ibv_attr && flow->actions.drop)
576 flow->offset += sizeof(struct ibv_flow_spec_action_drop);
577 if (!flow->actions.queue && !flow->actions.drop) {
578 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
579 NULL, "no valid action");
583 exit_item_not_supported:
584 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
585 items, "item not supported");
587 exit_action_not_supported:
588 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
589 actions, "action not supported");
594 * Validate a flow supported by the NIC.
596 * @see rte_flow_validate()
600 mlx5_flow_validate(struct rte_eth_dev *dev,
601 const struct rte_flow_attr *attr,
602 const struct rte_flow_item items[],
603 const struct rte_flow_action actions[],
604 struct rte_flow_error *error)
606 struct priv *priv = dev->data->dev_private;
608 struct mlx5_flow_parse flow = {
609 .offset = sizeof(struct ibv_flow_attr),
611 .mark_id = MLX5_FLOW_MARK_DEFAULT,
617 ret = priv_flow_validate(priv, attr, items, actions, error, &flow);
623 * Convert Ethernet item to Verbs specification.
626 * Item specification.
627 * @param default_mask[in]
628 * Default bit-masks to use when item->mask is not provided.
629 * @param data[in, out]
633 mlx5_flow_create_eth(const struct rte_flow_item *item,
634 const void *default_mask,
637 const struct rte_flow_item_eth *spec = item->spec;
638 const struct rte_flow_item_eth *mask = item->mask;
639 struct mlx5_flow_parse *flow = (struct mlx5_flow_parse *)data;
640 struct ibv_flow_spec_eth *eth;
641 const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
644 ++flow->ibv_attr->num_of_specs;
645 flow->ibv_attr->priority = 2;
646 flow->hash_fields = 0;
647 eth = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
648 *eth = (struct ibv_flow_spec_eth) {
649 .type = flow->inner | IBV_FLOW_SPEC_ETH,
656 memcpy(eth->val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
657 memcpy(eth->val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
658 eth->val.ether_type = spec->type;
659 memcpy(eth->mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
660 memcpy(eth->mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
661 eth->mask.ether_type = mask->type;
662 /* Remove unwanted bits from values. */
663 for (i = 0; i < ETHER_ADDR_LEN; ++i) {
664 eth->val.dst_mac[i] &= eth->mask.dst_mac[i];
665 eth->val.src_mac[i] &= eth->mask.src_mac[i];
667 eth->val.ether_type &= eth->mask.ether_type;
672 * Convert VLAN item to Verbs specification.
675 * Item specification.
676 * @param default_mask[in]
677 * Default bit-masks to use when item->mask is not provided.
678 * @param data[in, out]
682 mlx5_flow_create_vlan(const struct rte_flow_item *item,
683 const void *default_mask,
686 const struct rte_flow_item_vlan *spec = item->spec;
687 const struct rte_flow_item_vlan *mask = item->mask;
688 struct mlx5_flow_parse *flow = (struct mlx5_flow_parse *)data;
689 struct ibv_flow_spec_eth *eth;
690 const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
692 eth = (void *)((uintptr_t)flow->ibv_attr + flow->offset - eth_size);
697 eth->val.vlan_tag = spec->tci;
698 eth->mask.vlan_tag = mask->tci;
699 eth->val.vlan_tag &= eth->mask.vlan_tag;
704 * Convert IPv4 item to Verbs specification.
707 * Item specification.
708 * @param default_mask[in]
709 * Default bit-masks to use when item->mask is not provided.
710 * @param data[in, out]
714 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
715 const void *default_mask,
718 const struct rte_flow_item_ipv4 *spec = item->spec;
719 const struct rte_flow_item_ipv4 *mask = item->mask;
720 struct mlx5_flow_parse *flow = (struct mlx5_flow_parse *)data;
721 struct ibv_flow_spec_ipv4_ext *ipv4;
722 unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4_ext);
724 ++flow->ibv_attr->num_of_specs;
725 flow->ibv_attr->priority = 1;
726 flow->hash_fields = (IBV_RX_HASH_SRC_IPV4 |
727 IBV_RX_HASH_DST_IPV4);
728 ipv4 = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
729 *ipv4 = (struct ibv_flow_spec_ipv4_ext) {
730 .type = flow->inner | IBV_FLOW_SPEC_IPV4_EXT,
737 ipv4->val = (struct ibv_flow_ipv4_ext_filter){
738 .src_ip = spec->hdr.src_addr,
739 .dst_ip = spec->hdr.dst_addr,
740 .proto = spec->hdr.next_proto_id,
741 .tos = spec->hdr.type_of_service,
743 ipv4->mask = (struct ibv_flow_ipv4_ext_filter){
744 .src_ip = mask->hdr.src_addr,
745 .dst_ip = mask->hdr.dst_addr,
746 .proto = mask->hdr.next_proto_id,
747 .tos = mask->hdr.type_of_service,
749 /* Remove unwanted bits from values. */
750 ipv4->val.src_ip &= ipv4->mask.src_ip;
751 ipv4->val.dst_ip &= ipv4->mask.dst_ip;
752 ipv4->val.proto &= ipv4->mask.proto;
753 ipv4->val.tos &= ipv4->mask.tos;
758 * Convert IPv6 item to Verbs specification.
761 * Item specification.
762 * @param default_mask[in]
763 * Default bit-masks to use when item->mask is not provided.
764 * @param data[in, out]
768 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
769 const void *default_mask,
772 const struct rte_flow_item_ipv6 *spec = item->spec;
773 const struct rte_flow_item_ipv6 *mask = item->mask;
774 struct mlx5_flow_parse *flow = (struct mlx5_flow_parse *)data;
775 struct ibv_flow_spec_ipv6 *ipv6;
776 unsigned int ipv6_size = sizeof(struct ibv_flow_spec_ipv6);
779 ++flow->ibv_attr->num_of_specs;
780 flow->ibv_attr->priority = 1;
781 flow->hash_fields = (IBV_RX_HASH_SRC_IPV6 |
782 IBV_RX_HASH_DST_IPV6);
783 ipv6 = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
784 *ipv6 = (struct ibv_flow_spec_ipv6) {
785 .type = flow->inner | IBV_FLOW_SPEC_IPV6,
792 memcpy(ipv6->val.src_ip, spec->hdr.src_addr,
793 RTE_DIM(ipv6->val.src_ip));
794 memcpy(ipv6->val.dst_ip, spec->hdr.dst_addr,
795 RTE_DIM(ipv6->val.dst_ip));
796 memcpy(ipv6->mask.src_ip, mask->hdr.src_addr,
797 RTE_DIM(ipv6->mask.src_ip));
798 memcpy(ipv6->mask.dst_ip, mask->hdr.dst_addr,
799 RTE_DIM(ipv6->mask.dst_ip));
800 ipv6->mask.flow_label = mask->hdr.vtc_flow;
801 ipv6->mask.next_hdr = mask->hdr.proto;
802 ipv6->mask.hop_limit = mask->hdr.hop_limits;
803 /* Remove unwanted bits from values. */
804 for (i = 0; i < RTE_DIM(ipv6->val.src_ip); ++i) {
805 ipv6->val.src_ip[i] &= ipv6->mask.src_ip[i];
806 ipv6->val.dst_ip[i] &= ipv6->mask.dst_ip[i];
808 ipv6->val.flow_label &= ipv6->mask.flow_label;
809 ipv6->val.next_hdr &= ipv6->mask.next_hdr;
810 ipv6->val.hop_limit &= ipv6->mask.hop_limit;
815 * Convert UDP item to Verbs specification.
818 * Item specification.
819 * @param default_mask[in]
820 * Default bit-masks to use when item->mask is not provided.
821 * @param data[in, out]
825 mlx5_flow_create_udp(const struct rte_flow_item *item,
826 const void *default_mask,
829 const struct rte_flow_item_udp *spec = item->spec;
830 const struct rte_flow_item_udp *mask = item->mask;
831 struct mlx5_flow_parse *flow = (struct mlx5_flow_parse *)data;
832 struct ibv_flow_spec_tcp_udp *udp;
833 unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
835 ++flow->ibv_attr->num_of_specs;
836 flow->ibv_attr->priority = 0;
837 flow->hash_fields |= (IBV_RX_HASH_SRC_PORT_UDP |
838 IBV_RX_HASH_DST_PORT_UDP);
839 udp = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
840 *udp = (struct ibv_flow_spec_tcp_udp) {
841 .type = flow->inner | IBV_FLOW_SPEC_UDP,
848 udp->val.dst_port = spec->hdr.dst_port;
849 udp->val.src_port = spec->hdr.src_port;
850 udp->mask.dst_port = mask->hdr.dst_port;
851 udp->mask.src_port = mask->hdr.src_port;
852 /* Remove unwanted bits from values. */
853 udp->val.src_port &= udp->mask.src_port;
854 udp->val.dst_port &= udp->mask.dst_port;
859 * Convert TCP item to Verbs specification.
862 * Item specification.
863 * @param default_mask[in]
864 * Default bit-masks to use when item->mask is not provided.
865 * @param data[in, out]
869 mlx5_flow_create_tcp(const struct rte_flow_item *item,
870 const void *default_mask,
873 const struct rte_flow_item_tcp *spec = item->spec;
874 const struct rte_flow_item_tcp *mask = item->mask;
875 struct mlx5_flow_parse *flow = (struct mlx5_flow_parse *)data;
876 struct ibv_flow_spec_tcp_udp *tcp;
877 unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
879 ++flow->ibv_attr->num_of_specs;
880 flow->ibv_attr->priority = 0;
881 flow->hash_fields |= (IBV_RX_HASH_SRC_PORT_TCP |
882 IBV_RX_HASH_DST_PORT_TCP);
883 tcp = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
884 *tcp = (struct ibv_flow_spec_tcp_udp) {
885 .type = flow->inner | IBV_FLOW_SPEC_TCP,
892 tcp->val.dst_port = spec->hdr.dst_port;
893 tcp->val.src_port = spec->hdr.src_port;
894 tcp->mask.dst_port = mask->hdr.dst_port;
895 tcp->mask.src_port = mask->hdr.src_port;
896 /* Remove unwanted bits from values. */
897 tcp->val.src_port &= tcp->mask.src_port;
898 tcp->val.dst_port &= tcp->mask.dst_port;
903 * Convert VXLAN item to Verbs specification.
906 * Item specification.
907 * @param default_mask[in]
908 * Default bit-masks to use when item->mask is not provided.
909 * @param data[in, out]
913 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
914 const void *default_mask,
917 const struct rte_flow_item_vxlan *spec = item->spec;
918 const struct rte_flow_item_vxlan *mask = item->mask;
919 struct mlx5_flow_parse *flow = (struct mlx5_flow_parse *)data;
920 struct ibv_flow_spec_tunnel *vxlan;
921 unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
927 ++flow->ibv_attr->num_of_specs;
928 flow->ibv_attr->priority = 0;
930 vxlan = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
931 *vxlan = (struct ibv_flow_spec_tunnel) {
932 .type = flow->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
935 flow->inner = IBV_FLOW_SPEC_INNER;
940 memcpy(&id.vni[1], spec->vni, 3);
941 vxlan->val.tunnel_id = id.vlan_id;
942 memcpy(&id.vni[1], mask->vni, 3);
943 vxlan->mask.tunnel_id = id.vlan_id;
944 /* Remove unwanted bits from values. */
945 vxlan->val.tunnel_id &= vxlan->mask.tunnel_id;
950 * Convert mark/flag action to Verbs specification.
953 * Pointer to MLX5 flow structure.
958 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *flow, uint32_t mark_id)
960 struct ibv_flow_spec_action_tag *tag;
961 unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
963 tag = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
964 *tag = (struct ibv_flow_spec_action_tag){
965 .type = IBV_FLOW_SPEC_ACTION_TAG,
967 .tag_id = mlx5_flow_mark_set(mark_id),
969 ++flow->ibv_attr->num_of_specs;
974 * Complete flow rule creation with a drop queue.
977 * Pointer to private structure.
979 * MLX5 flow attributes (filled by mlx5_flow_validate()).
981 * Perform verbose error reporting if not NULL.
984 * A flow if the rule could be created.
986 static struct rte_flow *
987 priv_flow_create_action_queue_drop(struct priv *priv,
988 struct mlx5_flow_parse *flow,
989 struct rte_flow_error *error)
991 struct rte_flow *rte_flow;
992 struct ibv_flow_spec_action_drop *drop;
993 unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
997 rte_flow = rte_calloc(__func__, 1, sizeof(*rte_flow), 0);
999 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1000 NULL, "cannot allocate flow memory");
1004 drop = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
1005 *drop = (struct ibv_flow_spec_action_drop){
1006 .type = IBV_FLOW_SPEC_ACTION_DROP,
1009 ++flow->ibv_attr->num_of_specs;
1010 flow->offset += sizeof(struct ibv_flow_spec_action_drop);
1011 rte_flow->ibv_attr = flow->ibv_attr;
1014 rte_flow->qp = priv->flow_drop_queue->qp;
1015 rte_flow->ibv_flow = ibv_create_flow(rte_flow->qp,
1016 rte_flow->ibv_attr);
1017 if (!rte_flow->ibv_flow) {
1018 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1019 NULL, "flow rule creation failure");
1030 * Complete flow rule creation.
1033 * Pointer to private structure.
1035 * MLX5 flow attributes (filled by mlx5_flow_validate()).
1037 * Perform verbose error reporting if not NULL.
1040 * A flow if the rule could be created.
1042 static struct rte_flow *
1043 priv_flow_create_action_queue(struct priv *priv,
1044 struct mlx5_flow_parse *flow,
1045 struct rte_flow_error *error)
1047 struct rte_flow *rte_flow;
1050 const unsigned int wqs_n = 1 << log2above(flow->actions.queues_n);
1051 struct ibv_wq *wqs[wqs_n];
1055 assert(!flow->actions.drop);
1056 rte_flow = rte_calloc(__func__, 1, sizeof(*rte_flow) +
1057 sizeof(*rte_flow->rxqs) * flow->actions.queues_n,
1060 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1061 NULL, "cannot allocate flow memory");
1064 for (i = 0; i < flow->actions.queues_n; ++i) {
1065 struct rxq_ctrl *rxq;
1067 rxq = container_of((*priv->rxqs)[flow->actions.queues[i]],
1068 struct rxq_ctrl, rxq);
1070 rte_flow->rxqs[i] = &rxq->rxq;
1072 rxq->rxq.mark |= flow->actions.mark;
1074 /* finalise indirection table. */
1075 for (j = 0; i < wqs_n; ++i, ++j) {
1077 if (j == flow->actions.queues_n)
1080 rte_flow->mark = flow->actions.mark;
1081 rte_flow->ibv_attr = flow->ibv_attr;
1082 rte_flow->hash_fields = flow->hash_fields;
1083 rte_flow->ind_table = ibv_create_rwq_ind_table(
1085 &(struct ibv_rwq_ind_table_init_attr){
1086 .log_ind_tbl_size = log2above(flow->actions.queues_n),
1090 if (!rte_flow->ind_table) {
1091 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1092 NULL, "cannot allocate indirection table");
1095 rte_flow->qp = ibv_create_qp_ex(
1097 &(struct ibv_qp_init_attr_ex){
1098 .qp_type = IBV_QPT_RAW_PACKET,
1100 IBV_QP_INIT_ATTR_PD |
1101 IBV_QP_INIT_ATTR_IND_TABLE |
1102 IBV_QP_INIT_ATTR_RX_HASH,
1103 .rx_hash_conf = (struct ibv_rx_hash_conf){
1105 IBV_RX_HASH_FUNC_TOEPLITZ,
1106 .rx_hash_key_len = rss_hash_default_key_len,
1107 .rx_hash_key = rss_hash_default_key,
1108 .rx_hash_fields_mask = rte_flow->hash_fields,
1110 .rwq_ind_tbl = rte_flow->ind_table,
1113 if (!rte_flow->qp) {
1114 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1115 NULL, "cannot allocate QP");
1120 rte_flow->ibv_flow = ibv_create_flow(rte_flow->qp,
1121 rte_flow->ibv_attr);
1122 if (!rte_flow->ibv_flow) {
1123 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1124 NULL, "flow rule creation failure");
1131 ibv_destroy_qp(rte_flow->qp);
1132 if (rte_flow->ind_table)
1133 ibv_destroy_rwq_ind_table(rte_flow->ind_table);
1142 * Pointer to private structure.
1144 * Flow rule attributes.
1145 * @param[in] pattern
1146 * Pattern specification (list terminated by the END pattern item).
1147 * @param[in] actions
1148 * Associated actions (list terminated by the END action).
1150 * Perform verbose error reporting if not NULL.
1153 * A flow on success, NULL otherwise.
1155 static struct rte_flow *
1156 priv_flow_create(struct priv *priv,
1157 const struct rte_flow_attr *attr,
1158 const struct rte_flow_item items[],
1159 const struct rte_flow_action actions[],
1160 struct rte_flow_error *error)
1162 struct rte_flow *rte_flow;
1163 struct mlx5_flow_parse flow = {
1164 .offset = sizeof(struct ibv_flow_attr),
1166 .mark_id = MLX5_FLOW_MARK_DEFAULT,
1173 err = priv_flow_validate(priv, attr, items, actions, error, &flow);
1176 flow.ibv_attr = rte_malloc(__func__, flow.offset, 0);
1177 flow.offset = sizeof(struct ibv_flow_attr);
1178 if (!flow.ibv_attr) {
1179 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1180 NULL, "cannot allocate ibv_attr memory");
1183 *flow.ibv_attr = (struct ibv_flow_attr){
1184 .type = IBV_FLOW_ATTR_NORMAL,
1185 .size = sizeof(struct ibv_flow_attr),
1186 .priority = attr->priority,
1192 flow.hash_fields = 0;
1193 claim_zero(priv_flow_validate(priv, attr, items, actions,
1195 if (flow.actions.mark && !flow.actions.drop) {
1196 mlx5_flow_create_flag_mark(&flow, flow.actions.mark_id);
1197 flow.offset += sizeof(struct ibv_flow_spec_action_tag);
1199 if (flow.actions.drop)
1201 priv_flow_create_action_queue_drop(priv, &flow, error);
1203 rte_flow = priv_flow_create_action_queue(priv, &flow, error);
1208 rte_free(flow.ibv_attr);
1215 * @see rte_flow_create()
1219 mlx5_flow_create(struct rte_eth_dev *dev,
1220 const struct rte_flow_attr *attr,
1221 const struct rte_flow_item items[],
1222 const struct rte_flow_action actions[],
1223 struct rte_flow_error *error)
1225 struct priv *priv = dev->data->dev_private;
1226 struct rte_flow *flow;
1229 flow = priv_flow_create(priv, attr, items, actions, error);
1231 TAILQ_INSERT_TAIL(&priv->flows, flow, next);
1232 DEBUG("Flow created %p", (void *)flow);
1242 * Pointer to private structure.
1247 priv_flow_destroy(struct priv *priv,
1248 struct rte_flow *flow)
1250 TAILQ_REMOVE(&priv->flows, flow, next);
1252 claim_zero(ibv_destroy_flow(flow->ibv_flow));
1256 claim_zero(ibv_destroy_qp(flow->qp));
1257 if (flow->ind_table)
1258 claim_zero(ibv_destroy_rwq_ind_table(flow->ind_table));
1260 struct rte_flow *tmp;
1262 uint32_t mark_n = 0;
1266 * To remove the mark from the queue, the queue must not be
1267 * present in any other marked flow (RSS or not).
1269 for (queue_n = 0; queue_n < flow->rxqs_n; ++queue_n) {
1270 rxq = flow->rxqs[queue_n];
1271 for (tmp = TAILQ_FIRST(&priv->flows);
1273 tmp = TAILQ_NEXT(tmp, next)) {
1279 tqueue_n < tmp->rxqs_n;
1283 trxq = tmp->rxqs[tqueue_n];
1288 rxq->mark = !!mark_n;
1292 rte_free(flow->ibv_attr);
1293 DEBUG("Flow destroyed %p", (void *)flow);
1300 * @see rte_flow_destroy()
1304 mlx5_flow_destroy(struct rte_eth_dev *dev,
1305 struct rte_flow *flow,
1306 struct rte_flow_error *error)
1308 struct priv *priv = dev->data->dev_private;
1312 priv_flow_destroy(priv, flow);
1318 * Destroy all flows.
1321 * Pointer to private structure.
1324 priv_flow_flush(struct priv *priv)
1326 while (!TAILQ_EMPTY(&priv->flows)) {
1327 struct rte_flow *flow;
1329 flow = TAILQ_FIRST(&priv->flows);
1330 priv_flow_destroy(priv, flow);
1335 * Destroy all flows.
1337 * @see rte_flow_flush()
1341 mlx5_flow_flush(struct rte_eth_dev *dev,
1342 struct rte_flow_error *error)
1344 struct priv *priv = dev->data->dev_private;
1348 priv_flow_flush(priv);
1354 * Create drop queue.
1357 * Pointer to private structure.
1363 priv_flow_create_drop_queue(struct priv *priv)
1365 struct rte_flow_drop *fdq = NULL;
1369 fdq = rte_calloc(__func__, 1, sizeof(*fdq), 0);
1371 WARN("cannot allocate memory for drop queue");
1374 fdq->cq = ibv_create_cq(priv->ctx, 1, NULL, NULL, 0);
1376 WARN("cannot allocate CQ for drop queue");
1379 fdq->wq = ibv_create_wq(priv->ctx,
1380 &(struct ibv_wq_init_attr){
1381 .wq_type = IBV_WQT_RQ,
1388 WARN("cannot allocate WQ for drop queue");
1391 fdq->ind_table = ibv_create_rwq_ind_table(priv->ctx,
1392 &(struct ibv_rwq_ind_table_init_attr){
1393 .log_ind_tbl_size = 0,
1394 .ind_tbl = &fdq->wq,
1397 if (!fdq->ind_table) {
1398 WARN("cannot allocate indirection table for drop queue");
1401 fdq->qp = ibv_create_qp_ex(priv->ctx,
1402 &(struct ibv_qp_init_attr_ex){
1403 .qp_type = IBV_QPT_RAW_PACKET,
1405 IBV_QP_INIT_ATTR_PD |
1406 IBV_QP_INIT_ATTR_IND_TABLE |
1407 IBV_QP_INIT_ATTR_RX_HASH,
1408 .rx_hash_conf = (struct ibv_rx_hash_conf){
1410 IBV_RX_HASH_FUNC_TOEPLITZ,
1411 .rx_hash_key_len = rss_hash_default_key_len,
1412 .rx_hash_key = rss_hash_default_key,
1413 .rx_hash_fields_mask = 0,
1415 .rwq_ind_tbl = fdq->ind_table,
1419 WARN("cannot allocate QP for drop queue");
1422 priv->flow_drop_queue = fdq;
1426 claim_zero(ibv_destroy_qp(fdq->qp));
1428 claim_zero(ibv_destroy_rwq_ind_table(fdq->ind_table));
1430 claim_zero(ibv_destroy_wq(fdq->wq));
1432 claim_zero(ibv_destroy_cq(fdq->cq));
1435 priv->flow_drop_queue = NULL;
1440 * Delete drop queue.
1443 * Pointer to private structure.
1446 priv_flow_delete_drop_queue(struct priv *priv)
1448 struct rte_flow_drop *fdq = priv->flow_drop_queue;
1453 claim_zero(ibv_destroy_qp(fdq->qp));
1455 claim_zero(ibv_destroy_rwq_ind_table(fdq->ind_table));
1457 claim_zero(ibv_destroy_wq(fdq->wq));
1459 claim_zero(ibv_destroy_cq(fdq->cq));
1461 priv->flow_drop_queue = NULL;
1467 * Called by dev_stop() to remove all flows.
1470 * Pointer to private structure.
1473 priv_flow_stop(struct priv *priv)
1475 struct rte_flow *flow;
1477 TAILQ_FOREACH_REVERSE(flow, &priv->flows, mlx5_flows, next) {
1478 claim_zero(ibv_destroy_flow(flow->ibv_flow));
1479 flow->ibv_flow = NULL;
1483 for (n = 0; n < flow->rxqs_n; ++n)
1484 flow->rxqs[n]->mark = 0;
1486 DEBUG("Flow %p removed", (void *)flow);
1488 priv_flow_delete_drop_queue(priv);
1495 * Pointer to private structure.
1498 * 0 on success, a errno value otherwise and rte_errno is set.
1501 priv_flow_start(struct priv *priv)
1504 struct rte_flow *flow;
1506 ret = priv_flow_create_drop_queue(priv);
1509 TAILQ_FOREACH(flow, &priv->flows, next) {
1513 qp = priv->flow_drop_queue->qp;
1516 flow->ibv_flow = ibv_create_flow(qp, flow->ibv_attr);
1517 if (!flow->ibv_flow) {
1518 DEBUG("Flow %p cannot be applied", (void *)flow);
1522 DEBUG("Flow %p applied", (void *)flow);
1526 for (n = 0; n < flow->rxqs_n; ++n)
1527 flow->rxqs[n]->mark = 1;
1534 * Verify if the Rx queue is used in a flow.
1537 * Pointer to private structure.
1539 * Pointer to the queue to search.
1542 * Nonzero if the queue is used by a flow.
1545 priv_flow_rxq_in_use(struct priv *priv, struct rxq *rxq)
1547 struct rte_flow *flow;
1549 for (flow = TAILQ_FIRST(&priv->flows);
1551 flow = TAILQ_NEXT(flow, next)) {
1556 for (n = 0; n < flow->rxqs_n; ++n) {
1557 if (flow->rxqs[n] == rxq)
1567 * @see rte_flow_isolate()
1571 mlx5_flow_isolate(struct rte_eth_dev *dev,
1573 struct rte_flow_error *error)
1575 struct priv *priv = dev->data->dev_private;
1578 if (priv->started) {
1579 rte_flow_error_set(error, EBUSY,
1580 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1582 "port must be stopped first");
1586 priv->isolated = !!enable;