4 * Copyright 2016 6WIND S.A.
5 * Copyright 2016 Mellanox.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of 6WIND S.A. nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #include <sys/queue.h>
38 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
40 #pragma GCC diagnostic ignored "-Wpedantic"
42 #include <infiniband/verbs.h>
44 #pragma GCC diagnostic error "-Wpedantic"
47 #include <rte_ethdev.h>
49 #include <rte_flow_driver.h>
50 #include <rte_malloc.h>
56 mlx5_flow_create_eth(const struct rte_flow_item *item,
57 const void *default_mask,
61 mlx5_flow_create_vlan(const struct rte_flow_item *item,
62 const void *default_mask,
66 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
67 const void *default_mask,
71 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
72 const void *default_mask,
76 mlx5_flow_create_udp(const struct rte_flow_item *item,
77 const void *default_mask,
81 mlx5_flow_create_tcp(const struct rte_flow_item *item,
82 const void *default_mask,
86 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
87 const void *default_mask,
91 TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
92 struct ibv_exp_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
93 struct ibv_exp_rwq_ind_table *ind_table; /**< Indirection table. */
94 struct ibv_qp *qp; /**< Verbs queue pair. */
95 struct ibv_exp_flow *ibv_flow; /**< Verbs flow. */
96 struct ibv_exp_wq *wq; /**< Verbs work queue. */
97 struct ibv_cq *cq; /**< Verbs completion queue. */
98 uint16_t rxqs_n; /**< Number of queues in this flow, 0 if drop queue. */
99 uint32_t mark:1; /**< Set if the flow is marked. */
100 uint32_t drop:1; /**< Drop queue. */
101 uint64_t hash_fields; /**< Fields that participate in the hash. */
102 struct rxq *rxqs[]; /**< Pointer to the queues array. */
105 /** Static initializer for items. */
107 (const enum rte_flow_item_type []){ \
108 __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
111 /** Structure to generate a simple graph of layers supported by the NIC. */
112 struct mlx5_flow_items {
113 /** List of possible actions for these items. */
114 const enum rte_flow_action_type *const actions;
115 /** Bit-masks corresponding to the possibilities for the item. */
118 * Default bit-masks to use when item->mask is not provided. When
119 * \default_mask is also NULL, the full supported bit-mask (\mask) is
122 const void *default_mask;
123 /** Bit-masks size in bytes. */
124 const unsigned int mask_sz;
126 * Conversion function from rte_flow to NIC specific flow.
129 * rte_flow item to convert.
130 * @param default_mask
131 * Default bit-masks to use when item->mask is not provided.
133 * Internal structure to store the conversion.
136 * 0 on success, negative value otherwise.
138 int (*convert)(const struct rte_flow_item *item,
139 const void *default_mask,
141 /** Size in bytes of the destination structure. */
142 const unsigned int dst_sz;
143 /** List of possible following items. */
144 const enum rte_flow_item_type *const items;
147 /** Valid action for this PMD. */
148 static const enum rte_flow_action_type valid_actions[] = {
149 RTE_FLOW_ACTION_TYPE_DROP,
150 RTE_FLOW_ACTION_TYPE_QUEUE,
151 RTE_FLOW_ACTION_TYPE_MARK,
152 RTE_FLOW_ACTION_TYPE_FLAG,
153 RTE_FLOW_ACTION_TYPE_END,
156 /** Graph of supported items and associated actions. */
157 static const struct mlx5_flow_items mlx5_flow_items[] = {
158 [RTE_FLOW_ITEM_TYPE_END] = {
159 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
160 RTE_FLOW_ITEM_TYPE_VXLAN),
162 [RTE_FLOW_ITEM_TYPE_ETH] = {
163 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
164 RTE_FLOW_ITEM_TYPE_IPV4,
165 RTE_FLOW_ITEM_TYPE_IPV6),
166 .actions = valid_actions,
167 .mask = &(const struct rte_flow_item_eth){
168 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
169 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
172 .default_mask = &rte_flow_item_eth_mask,
173 .mask_sz = sizeof(struct rte_flow_item_eth),
174 .convert = mlx5_flow_create_eth,
175 .dst_sz = sizeof(struct ibv_exp_flow_spec_eth),
177 [RTE_FLOW_ITEM_TYPE_VLAN] = {
178 .items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
179 RTE_FLOW_ITEM_TYPE_IPV6),
180 .actions = valid_actions,
181 .mask = &(const struct rte_flow_item_vlan){
184 .default_mask = &rte_flow_item_vlan_mask,
185 .mask_sz = sizeof(struct rte_flow_item_vlan),
186 .convert = mlx5_flow_create_vlan,
189 [RTE_FLOW_ITEM_TYPE_IPV4] = {
190 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
191 RTE_FLOW_ITEM_TYPE_TCP),
192 .actions = valid_actions,
193 .mask = &(const struct rte_flow_item_ipv4){
197 .type_of_service = -1,
201 .default_mask = &rte_flow_item_ipv4_mask,
202 .mask_sz = sizeof(struct rte_flow_item_ipv4),
203 .convert = mlx5_flow_create_ipv4,
204 .dst_sz = sizeof(struct ibv_exp_flow_spec_ipv4_ext),
206 [RTE_FLOW_ITEM_TYPE_IPV6] = {
207 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
208 RTE_FLOW_ITEM_TYPE_TCP),
209 .actions = valid_actions,
210 .mask = &(const struct rte_flow_item_ipv6){
213 0xff, 0xff, 0xff, 0xff,
214 0xff, 0xff, 0xff, 0xff,
215 0xff, 0xff, 0xff, 0xff,
216 0xff, 0xff, 0xff, 0xff,
219 0xff, 0xff, 0xff, 0xff,
220 0xff, 0xff, 0xff, 0xff,
221 0xff, 0xff, 0xff, 0xff,
222 0xff, 0xff, 0xff, 0xff,
229 .default_mask = &rte_flow_item_ipv6_mask,
230 .mask_sz = sizeof(struct rte_flow_item_ipv6),
231 .convert = mlx5_flow_create_ipv6,
232 .dst_sz = sizeof(struct ibv_exp_flow_spec_ipv6_ext),
234 [RTE_FLOW_ITEM_TYPE_UDP] = {
235 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN),
236 .actions = valid_actions,
237 .mask = &(const struct rte_flow_item_udp){
243 .default_mask = &rte_flow_item_udp_mask,
244 .mask_sz = sizeof(struct rte_flow_item_udp),
245 .convert = mlx5_flow_create_udp,
246 .dst_sz = sizeof(struct ibv_exp_flow_spec_tcp_udp),
248 [RTE_FLOW_ITEM_TYPE_TCP] = {
249 .actions = valid_actions,
250 .mask = &(const struct rte_flow_item_tcp){
256 .default_mask = &rte_flow_item_tcp_mask,
257 .mask_sz = sizeof(struct rte_flow_item_tcp),
258 .convert = mlx5_flow_create_tcp,
259 .dst_sz = sizeof(struct ibv_exp_flow_spec_tcp_udp),
261 [RTE_FLOW_ITEM_TYPE_VXLAN] = {
262 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH),
263 .actions = valid_actions,
264 .mask = &(const struct rte_flow_item_vxlan){
265 .vni = "\xff\xff\xff",
267 .default_mask = &rte_flow_item_vxlan_mask,
268 .mask_sz = sizeof(struct rte_flow_item_vxlan),
269 .convert = mlx5_flow_create_vxlan,
270 .dst_sz = sizeof(struct ibv_exp_flow_spec_tunnel),
274 /** Structure to pass to the conversion function. */
276 struct ibv_exp_flow_attr *ibv_attr; /**< Verbs attribute. */
277 unsigned int offset; /**< Offset in bytes in the ibv_attr buffer. */
278 uint32_t inner; /**< Set once VXLAN is encountered. */
279 uint64_t hash_fields; /**< Fields that participate in the hash. */
282 /** Structure for Drop queue. */
283 struct rte_flow_drop {
284 struct ibv_exp_rwq_ind_table *ind_table; /**< Indirection table. */
285 struct ibv_qp *qp; /**< Verbs queue pair. */
286 struct ibv_exp_wq *wq; /**< Verbs work queue. */
287 struct ibv_cq *cq; /**< Verbs completion queue. */
290 struct mlx5_flow_action {
291 uint32_t queue:1; /**< Target is a receive queue. */
292 uint32_t drop:1; /**< Target is a drop queue. */
293 uint32_t mark:1; /**< Mark is present in the flow. */
294 uint32_t mark_id; /**< Mark identifier. */
295 uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
296 uint16_t queues_n; /**< Number of entries in queue[]. */
300 * Check support for a given item.
303 * Item specification.
305 * Bit-masks covering supported fields to compare with spec, last and mask in
308 * Bit-Mask size in bytes.
314 mlx5_flow_item_validate(const struct rte_flow_item *item,
315 const uint8_t *mask, unsigned int size)
319 if (!item->spec && (item->mask || item->last))
321 if (item->spec && !item->mask) {
323 const uint8_t *spec = item->spec;
325 for (i = 0; i < size; ++i)
326 if ((spec[i] | mask[i]) != mask[i])
329 if (item->last && !item->mask) {
331 const uint8_t *spec = item->last;
333 for (i = 0; i < size; ++i)
334 if ((spec[i] | mask[i]) != mask[i])
339 const uint8_t *spec = item->mask;
341 for (i = 0; i < size; ++i)
342 if ((spec[i] | mask[i]) != mask[i])
345 if (item->spec && item->last) {
348 const uint8_t *apply = mask;
353 for (i = 0; i < size; ++i) {
354 spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
355 last[i] = ((const uint8_t *)item->last)[i] & apply[i];
357 ret = memcmp(spec, last, size);
363 * Validate a flow supported by the NIC.
366 * Pointer to private structure.
368 * Flow rule attributes.
370 * Pattern specification (list terminated by the END pattern item).
372 * Associated actions (list terminated by the END action).
374 * Perform verbose error reporting if not NULL.
375 * @param[in, out] flow
376 * Flow structure to update.
377 * @param[in, out] action
378 * Action structure to update.
381 * 0 on success, a negative errno value otherwise and rte_errno is set.
384 priv_flow_validate(struct priv *priv,
385 const struct rte_flow_attr *attr,
386 const struct rte_flow_item items[],
387 const struct rte_flow_action actions[],
388 struct rte_flow_error *error,
389 struct mlx5_flow *flow,
390 struct mlx5_flow_action *action)
392 const struct mlx5_flow_items *cur_item = mlx5_flow_items;
396 rte_flow_error_set(error, ENOTSUP,
397 RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
399 "groups are not supported");
402 if (attr->priority) {
403 rte_flow_error_set(error, ENOTSUP,
404 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
406 "priorities are not supported");
410 rte_flow_error_set(error, ENOTSUP,
411 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
413 "egress is not supported");
416 if (!attr->ingress) {
417 rte_flow_error_set(error, ENOTSUP,
418 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
420 "only ingress is supported");
423 for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
424 const struct mlx5_flow_items *token = NULL;
428 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
432 cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
434 if (cur_item->items[i] == items->type) {
435 token = &mlx5_flow_items[items->type];
440 goto exit_item_not_supported;
442 err = mlx5_flow_item_validate(items,
443 (const uint8_t *)cur_item->mask,
446 goto exit_item_not_supported;
447 if (flow->ibv_attr && cur_item->convert) {
448 err = cur_item->convert(items,
449 (cur_item->default_mask ?
450 cur_item->default_mask :
454 goto exit_item_not_supported;
455 } else if (items->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
457 rte_flow_error_set(error, ENOTSUP,
458 RTE_FLOW_ERROR_TYPE_ITEM,
460 "cannot recognize multiple"
461 " VXLAN encapsulations");
466 flow->offset += cur_item->dst_sz;
468 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
469 if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
471 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
473 } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
474 const struct rte_flow_action_queue *queue =
475 (const struct rte_flow_action_queue *)
480 if (!queue || (queue->index > (priv->rxqs_n - 1)))
481 goto exit_action_not_supported;
482 for (n = 0; n < action->queues_n; ++n) {
483 if (action->queues[n] == queue->index) {
488 if (action->queues_n > 1 && !found) {
489 rte_flow_error_set(error, ENOTSUP,
490 RTE_FLOW_ERROR_TYPE_ACTION,
492 "queue action not in RSS queues");
497 action->queues_n = 1;
498 action->queues[0] = queue->index;
500 } else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
501 const struct rte_flow_action_rss *rss =
502 (const struct rte_flow_action_rss *)
506 if (!rss || !rss->num) {
507 rte_flow_error_set(error, EINVAL,
508 RTE_FLOW_ERROR_TYPE_ACTION,
513 if (action->queues_n == 1) {
516 assert(action->queues_n);
517 for (n = 0; n < rss->num; ++n) {
518 if (action->queues[0] ==
525 rte_flow_error_set(error, ENOTSUP,
526 RTE_FLOW_ERROR_TYPE_ACTION,
528 "queue action not in RSS"
533 for (n = 0; n < rss->num; ++n) {
534 if (rss->queue[n] >= priv->rxqs_n) {
535 rte_flow_error_set(error, EINVAL,
536 RTE_FLOW_ERROR_TYPE_ACTION,
538 "queue id > number of"
544 for (n = 0; n < rss->num; ++n)
545 action->queues[n] = rss->queue[n];
546 action->queues_n = rss->num;
547 } else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
548 const struct rte_flow_action_mark *mark =
549 (const struct rte_flow_action_mark *)
553 rte_flow_error_set(error, EINVAL,
554 RTE_FLOW_ERROR_TYPE_ACTION,
556 "mark must be defined");
558 } else if (mark->id >= MLX5_FLOW_MARK_MAX) {
559 rte_flow_error_set(error, ENOTSUP,
560 RTE_FLOW_ERROR_TYPE_ACTION,
562 "mark must be between 0"
567 action->mark_id = mark->id;
568 } else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
571 goto exit_action_not_supported;
574 if (action->mark && !flow->ibv_attr && !action->drop)
575 flow->offset += sizeof(struct ibv_exp_flow_spec_action_tag);
576 if (!flow->ibv_attr && action->drop)
577 flow->offset += sizeof(struct ibv_exp_flow_spec_action_drop);
578 if (!action->queue && !action->drop) {
579 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
580 NULL, "no valid action");
584 exit_item_not_supported:
585 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
586 items, "item not supported");
588 exit_action_not_supported:
589 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
590 actions, "action not supported");
595 * Validate a flow supported by the NIC.
597 * @see rte_flow_validate()
601 mlx5_flow_validate(struct rte_eth_dev *dev,
602 const struct rte_flow_attr *attr,
603 const struct rte_flow_item items[],
604 const struct rte_flow_action actions[],
605 struct rte_flow_error *error)
607 struct priv *priv = dev->data->dev_private;
609 struct mlx5_flow flow = { .offset = sizeof(struct ibv_exp_flow_attr) };
610 struct mlx5_flow_action action = {
614 .mark_id = MLX5_FLOW_MARK_DEFAULT,
619 ret = priv_flow_validate(priv, attr, items, actions, error, &flow,
626 * Convert Ethernet item to Verbs specification.
629 * Item specification.
630 * @param default_mask[in]
631 * Default bit-masks to use when item->mask is not provided.
632 * @param data[in, out]
636 mlx5_flow_create_eth(const struct rte_flow_item *item,
637 const void *default_mask,
640 const struct rte_flow_item_eth *spec = item->spec;
641 const struct rte_flow_item_eth *mask = item->mask;
642 struct mlx5_flow *flow = (struct mlx5_flow *)data;
643 struct ibv_exp_flow_spec_eth *eth;
644 const unsigned int eth_size = sizeof(struct ibv_exp_flow_spec_eth);
647 ++flow->ibv_attr->num_of_specs;
648 flow->ibv_attr->priority = 2;
649 flow->hash_fields = 0;
650 eth = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
651 *eth = (struct ibv_exp_flow_spec_eth) {
652 .type = flow->inner | IBV_EXP_FLOW_SPEC_ETH,
659 memcpy(eth->val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
660 memcpy(eth->val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
661 eth->val.ether_type = spec->type;
662 memcpy(eth->mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
663 memcpy(eth->mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
664 eth->mask.ether_type = mask->type;
665 /* Remove unwanted bits from values. */
666 for (i = 0; i < ETHER_ADDR_LEN; ++i) {
667 eth->val.dst_mac[i] &= eth->mask.dst_mac[i];
668 eth->val.src_mac[i] &= eth->mask.src_mac[i];
670 eth->val.ether_type &= eth->mask.ether_type;
675 * Convert VLAN item to Verbs specification.
678 * Item specification.
679 * @param default_mask[in]
680 * Default bit-masks to use when item->mask is not provided.
681 * @param data[in, out]
685 mlx5_flow_create_vlan(const struct rte_flow_item *item,
686 const void *default_mask,
689 const struct rte_flow_item_vlan *spec = item->spec;
690 const struct rte_flow_item_vlan *mask = item->mask;
691 struct mlx5_flow *flow = (struct mlx5_flow *)data;
692 struct ibv_exp_flow_spec_eth *eth;
693 const unsigned int eth_size = sizeof(struct ibv_exp_flow_spec_eth);
695 eth = (void *)((uintptr_t)flow->ibv_attr + flow->offset - eth_size);
700 eth->val.vlan_tag = spec->tci;
701 eth->mask.vlan_tag = mask->tci;
702 eth->val.vlan_tag &= eth->mask.vlan_tag;
707 * Convert IPv4 item to Verbs specification.
710 * Item specification.
711 * @param default_mask[in]
712 * Default bit-masks to use when item->mask is not provided.
713 * @param data[in, out]
717 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
718 const void *default_mask,
721 const struct rte_flow_item_ipv4 *spec = item->spec;
722 const struct rte_flow_item_ipv4 *mask = item->mask;
723 struct mlx5_flow *flow = (struct mlx5_flow *)data;
724 struct ibv_exp_flow_spec_ipv4_ext *ipv4;
725 unsigned int ipv4_size = sizeof(struct ibv_exp_flow_spec_ipv4_ext);
727 ++flow->ibv_attr->num_of_specs;
728 flow->ibv_attr->priority = 1;
729 flow->hash_fields = (IBV_EXP_RX_HASH_SRC_IPV4 |
730 IBV_EXP_RX_HASH_DST_IPV4);
731 ipv4 = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
732 *ipv4 = (struct ibv_exp_flow_spec_ipv4_ext) {
733 .type = flow->inner | IBV_EXP_FLOW_SPEC_IPV4_EXT,
740 ipv4->val = (struct ibv_exp_flow_ipv4_ext_filter){
741 .src_ip = spec->hdr.src_addr,
742 .dst_ip = spec->hdr.dst_addr,
743 .proto = spec->hdr.next_proto_id,
744 .tos = spec->hdr.type_of_service,
746 ipv4->mask = (struct ibv_exp_flow_ipv4_ext_filter){
747 .src_ip = mask->hdr.src_addr,
748 .dst_ip = mask->hdr.dst_addr,
749 .proto = mask->hdr.next_proto_id,
750 .tos = mask->hdr.type_of_service,
752 /* Remove unwanted bits from values. */
753 ipv4->val.src_ip &= ipv4->mask.src_ip;
754 ipv4->val.dst_ip &= ipv4->mask.dst_ip;
755 ipv4->val.proto &= ipv4->mask.proto;
756 ipv4->val.tos &= ipv4->mask.tos;
761 * Convert IPv6 item to Verbs specification.
764 * Item specification.
765 * @param default_mask[in]
766 * Default bit-masks to use when item->mask is not provided.
767 * @param data[in, out]
771 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
772 const void *default_mask,
775 const struct rte_flow_item_ipv6 *spec = item->spec;
776 const struct rte_flow_item_ipv6 *mask = item->mask;
777 struct mlx5_flow *flow = (struct mlx5_flow *)data;
778 struct ibv_exp_flow_spec_ipv6_ext *ipv6;
779 unsigned int ipv6_size = sizeof(struct ibv_exp_flow_spec_ipv6_ext);
782 ++flow->ibv_attr->num_of_specs;
783 flow->ibv_attr->priority = 1;
784 flow->hash_fields = (IBV_EXP_RX_HASH_SRC_IPV6 |
785 IBV_EXP_RX_HASH_DST_IPV6);
786 ipv6 = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
787 *ipv6 = (struct ibv_exp_flow_spec_ipv6_ext) {
788 .type = flow->inner | IBV_EXP_FLOW_SPEC_IPV6_EXT,
795 memcpy(ipv6->val.src_ip, spec->hdr.src_addr,
796 RTE_DIM(ipv6->val.src_ip));
797 memcpy(ipv6->val.dst_ip, spec->hdr.dst_addr,
798 RTE_DIM(ipv6->val.dst_ip));
799 memcpy(ipv6->mask.src_ip, mask->hdr.src_addr,
800 RTE_DIM(ipv6->mask.src_ip));
801 memcpy(ipv6->mask.dst_ip, mask->hdr.dst_addr,
802 RTE_DIM(ipv6->mask.dst_ip));
803 ipv6->mask.flow_label = mask->hdr.vtc_flow;
804 ipv6->mask.next_hdr = mask->hdr.proto;
805 ipv6->mask.hop_limit = mask->hdr.hop_limits;
806 /* Remove unwanted bits from values. */
807 for (i = 0; i < RTE_DIM(ipv6->val.src_ip); ++i) {
808 ipv6->val.src_ip[i] &= ipv6->mask.src_ip[i];
809 ipv6->val.dst_ip[i] &= ipv6->mask.dst_ip[i];
811 ipv6->val.flow_label &= ipv6->mask.flow_label;
812 ipv6->val.next_hdr &= ipv6->mask.next_hdr;
813 ipv6->val.hop_limit &= ipv6->mask.hop_limit;
818 * Convert UDP item to Verbs specification.
821 * Item specification.
822 * @param default_mask[in]
823 * Default bit-masks to use when item->mask is not provided.
824 * @param data[in, out]
828 mlx5_flow_create_udp(const struct rte_flow_item *item,
829 const void *default_mask,
832 const struct rte_flow_item_udp *spec = item->spec;
833 const struct rte_flow_item_udp *mask = item->mask;
834 struct mlx5_flow *flow = (struct mlx5_flow *)data;
835 struct ibv_exp_flow_spec_tcp_udp *udp;
836 unsigned int udp_size = sizeof(struct ibv_exp_flow_spec_tcp_udp);
838 ++flow->ibv_attr->num_of_specs;
839 flow->ibv_attr->priority = 0;
840 flow->hash_fields |= (IBV_EXP_RX_HASH_SRC_PORT_UDP |
841 IBV_EXP_RX_HASH_DST_PORT_UDP);
842 udp = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
843 *udp = (struct ibv_exp_flow_spec_tcp_udp) {
844 .type = flow->inner | IBV_EXP_FLOW_SPEC_UDP,
851 udp->val.dst_port = spec->hdr.dst_port;
852 udp->val.src_port = spec->hdr.src_port;
853 udp->mask.dst_port = mask->hdr.dst_port;
854 udp->mask.src_port = mask->hdr.src_port;
855 /* Remove unwanted bits from values. */
856 udp->val.src_port &= udp->mask.src_port;
857 udp->val.dst_port &= udp->mask.dst_port;
862 * Convert TCP item to Verbs specification.
865 * Item specification.
866 * @param default_mask[in]
867 * Default bit-masks to use when item->mask is not provided.
868 * @param data[in, out]
872 mlx5_flow_create_tcp(const struct rte_flow_item *item,
873 const void *default_mask,
876 const struct rte_flow_item_tcp *spec = item->spec;
877 const struct rte_flow_item_tcp *mask = item->mask;
878 struct mlx5_flow *flow = (struct mlx5_flow *)data;
879 struct ibv_exp_flow_spec_tcp_udp *tcp;
880 unsigned int tcp_size = sizeof(struct ibv_exp_flow_spec_tcp_udp);
882 ++flow->ibv_attr->num_of_specs;
883 flow->ibv_attr->priority = 0;
884 flow->hash_fields |= (IBV_EXP_RX_HASH_SRC_PORT_TCP |
885 IBV_EXP_RX_HASH_DST_PORT_TCP);
886 tcp = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
887 *tcp = (struct ibv_exp_flow_spec_tcp_udp) {
888 .type = flow->inner | IBV_EXP_FLOW_SPEC_TCP,
895 tcp->val.dst_port = spec->hdr.dst_port;
896 tcp->val.src_port = spec->hdr.src_port;
897 tcp->mask.dst_port = mask->hdr.dst_port;
898 tcp->mask.src_port = mask->hdr.src_port;
899 /* Remove unwanted bits from values. */
900 tcp->val.src_port &= tcp->mask.src_port;
901 tcp->val.dst_port &= tcp->mask.dst_port;
906 * Convert VXLAN item to Verbs specification.
909 * Item specification.
910 * @param default_mask[in]
911 * Default bit-masks to use when item->mask is not provided.
912 * @param data[in, out]
916 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
917 const void *default_mask,
920 const struct rte_flow_item_vxlan *spec = item->spec;
921 const struct rte_flow_item_vxlan *mask = item->mask;
922 struct mlx5_flow *flow = (struct mlx5_flow *)data;
923 struct ibv_exp_flow_spec_tunnel *vxlan;
924 unsigned int size = sizeof(struct ibv_exp_flow_spec_tunnel);
930 ++flow->ibv_attr->num_of_specs;
931 flow->ibv_attr->priority = 0;
933 vxlan = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
934 *vxlan = (struct ibv_exp_flow_spec_tunnel) {
935 .type = flow->inner | IBV_EXP_FLOW_SPEC_VXLAN_TUNNEL,
938 flow->inner = IBV_EXP_FLOW_SPEC_INNER;
943 memcpy(&id.vni[1], spec->vni, 3);
944 vxlan->val.tunnel_id = id.vlan_id;
945 memcpy(&id.vni[1], mask->vni, 3);
946 vxlan->mask.tunnel_id = id.vlan_id;
947 /* Remove unwanted bits from values. */
948 vxlan->val.tunnel_id &= vxlan->mask.tunnel_id;
953 * Convert mark/flag action to Verbs specification.
956 * Pointer to MLX5 flow structure.
961 mlx5_flow_create_flag_mark(struct mlx5_flow *flow, uint32_t mark_id)
963 struct ibv_exp_flow_spec_action_tag *tag;
964 unsigned int size = sizeof(struct ibv_exp_flow_spec_action_tag);
966 tag = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
967 *tag = (struct ibv_exp_flow_spec_action_tag){
968 .type = IBV_EXP_FLOW_SPEC_ACTION_TAG,
970 .tag_id = mlx5_flow_mark_set(mark_id),
972 ++flow->ibv_attr->num_of_specs;
977 * Complete flow rule creation with a drop queue.
980 * Pointer to private structure.
982 * MLX5 flow attributes (filled by mlx5_flow_validate()).
984 * Perform verbose error reporting if not NULL.
987 * A flow if the rule could be created.
989 static struct rte_flow *
990 priv_flow_create_action_queue_drop(struct priv *priv,
991 struct mlx5_flow *flow,
992 struct rte_flow_error *error)
994 struct rte_flow *rte_flow;
995 struct ibv_exp_flow_spec_action_drop *drop;
996 unsigned int size = sizeof(struct ibv_exp_flow_spec_action_drop);
1000 rte_flow = rte_calloc(__func__, 1, sizeof(*rte_flow), 0);
1002 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1003 NULL, "cannot allocate flow memory");
1007 drop = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
1008 *drop = (struct ibv_exp_flow_spec_action_drop){
1009 .type = IBV_EXP_FLOW_SPEC_ACTION_DROP,
1012 ++flow->ibv_attr->num_of_specs;
1013 flow->offset += sizeof(struct ibv_exp_flow_spec_action_drop);
1014 rte_flow->ibv_attr = flow->ibv_attr;
1017 rte_flow->qp = priv->flow_drop_queue->qp;
1018 rte_flow->ibv_flow = ibv_exp_create_flow(rte_flow->qp,
1019 rte_flow->ibv_attr);
1020 if (!rte_flow->ibv_flow) {
1021 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1022 NULL, "flow rule creation failure");
1033 * Complete flow rule creation.
1036 * Pointer to private structure.
1038 * MLX5 flow attributes (filled by mlx5_flow_validate()).
1040 * Target action structure.
1042 * Perform verbose error reporting if not NULL.
1045 * A flow if the rule could be created.
1047 static struct rte_flow *
1048 priv_flow_create_action_queue(struct priv *priv,
1049 struct mlx5_flow *flow,
1050 struct mlx5_flow_action *action,
1051 struct rte_flow_error *error)
1053 struct rte_flow *rte_flow;
1056 const unsigned int wqs_n = 1 << log2above(action->queues_n);
1057 struct ibv_exp_wq *wqs[wqs_n];
1061 assert(!action->drop);
1062 rte_flow = rte_calloc(__func__, 1, sizeof(*rte_flow) +
1063 sizeof(*rte_flow->rxqs) * action->queues_n, 0);
1065 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1066 NULL, "cannot allocate flow memory");
1069 for (i = 0; i < action->queues_n; ++i) {
1070 struct rxq_ctrl *rxq;
1072 rxq = container_of((*priv->rxqs)[action->queues[i]],
1073 struct rxq_ctrl, rxq);
1075 rte_flow->rxqs[i] = &rxq->rxq;
1077 rxq->rxq.mark |= action->mark;
1079 /* finalise indirection table. */
1080 for (j = 0; i < wqs_n; ++i, ++j) {
1082 if (j == action->queues_n)
1085 rte_flow->mark = action->mark;
1086 rte_flow->ibv_attr = flow->ibv_attr;
1087 rte_flow->hash_fields = flow->hash_fields;
1088 rte_flow->ind_table = ibv_exp_create_rwq_ind_table(
1090 &(struct ibv_exp_rwq_ind_table_init_attr){
1092 .log_ind_tbl_size = log2above(action->queues_n),
1096 if (!rte_flow->ind_table) {
1097 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1098 NULL, "cannot allocate indirection table");
1101 rte_flow->qp = ibv_exp_create_qp(
1103 &(struct ibv_exp_qp_init_attr){
1104 .qp_type = IBV_QPT_RAW_PACKET,
1106 IBV_EXP_QP_INIT_ATTR_PD |
1107 IBV_EXP_QP_INIT_ATTR_PORT |
1108 IBV_EXP_QP_INIT_ATTR_RX_HASH,
1110 .rx_hash_conf = &(struct ibv_exp_rx_hash_conf){
1112 IBV_EXP_RX_HASH_FUNC_TOEPLITZ,
1113 .rx_hash_key_len = rss_hash_default_key_len,
1114 .rx_hash_key = rss_hash_default_key,
1115 .rx_hash_fields_mask = rte_flow->hash_fields,
1116 .rwq_ind_tbl = rte_flow->ind_table,
1118 .port_num = priv->port,
1120 if (!rte_flow->qp) {
1121 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1122 NULL, "cannot allocate QP");
1127 rte_flow->ibv_flow = ibv_exp_create_flow(rte_flow->qp,
1128 rte_flow->ibv_attr);
1129 if (!rte_flow->ibv_flow) {
1130 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1131 NULL, "flow rule creation failure");
1138 ibv_destroy_qp(rte_flow->qp);
1139 if (rte_flow->ind_table)
1140 ibv_exp_destroy_rwq_ind_table(rte_flow->ind_table);
1149 * Pointer to private structure.
1151 * Flow rule attributes.
1152 * @param[in] pattern
1153 * Pattern specification (list terminated by the END pattern item).
1154 * @param[in] actions
1155 * Associated actions (list terminated by the END action).
1157 * Perform verbose error reporting if not NULL.
1160 * A flow on success, NULL otherwise.
1162 static struct rte_flow *
1163 priv_flow_create(struct priv *priv,
1164 const struct rte_flow_attr *attr,
1165 const struct rte_flow_item items[],
1166 const struct rte_flow_action actions[],
1167 struct rte_flow_error *error)
1169 struct rte_flow *rte_flow;
1170 struct mlx5_flow flow = { .offset = sizeof(struct ibv_exp_flow_attr), };
1171 struct mlx5_flow_action action = {
1175 .mark_id = MLX5_FLOW_MARK_DEFAULT,
1180 err = priv_flow_validate(priv, attr, items, actions, error, &flow,
1184 flow.ibv_attr = rte_malloc(__func__, flow.offset, 0);
1185 flow.offset = sizeof(struct ibv_exp_flow_attr);
1186 if (!flow.ibv_attr) {
1187 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1188 NULL, "cannot allocate ibv_attr memory");
1191 *flow.ibv_attr = (struct ibv_exp_flow_attr){
1192 .type = IBV_EXP_FLOW_ATTR_NORMAL,
1193 .size = sizeof(struct ibv_exp_flow_attr),
1194 .priority = attr->priority,
1201 flow.hash_fields = 0;
1202 claim_zero(priv_flow_validate(priv, attr, items, actions,
1203 error, &flow, &action));
1204 if (action.mark && !action.drop) {
1205 mlx5_flow_create_flag_mark(&flow, action.mark_id);
1206 flow.offset += sizeof(struct ibv_exp_flow_spec_action_tag);
1210 priv_flow_create_action_queue_drop(priv, &flow, error);
1212 rte_flow = priv_flow_create_action_queue(priv, &flow, &action,
1218 rte_free(flow.ibv_attr);
1225 * @see rte_flow_create()
1229 mlx5_flow_create(struct rte_eth_dev *dev,
1230 const struct rte_flow_attr *attr,
1231 const struct rte_flow_item items[],
1232 const struct rte_flow_action actions[],
1233 struct rte_flow_error *error)
1235 struct priv *priv = dev->data->dev_private;
1236 struct rte_flow *flow;
1239 flow = priv_flow_create(priv, attr, items, actions, error);
1241 TAILQ_INSERT_TAIL(&priv->flows, flow, next);
1242 DEBUG("Flow created %p", (void *)flow);
1252 * Pointer to private structure.
1257 priv_flow_destroy(struct priv *priv,
1258 struct rte_flow *flow)
1260 TAILQ_REMOVE(&priv->flows, flow, next);
1262 claim_zero(ibv_exp_destroy_flow(flow->ibv_flow));
1266 claim_zero(ibv_destroy_qp(flow->qp));
1267 if (flow->ind_table)
1268 claim_zero(ibv_exp_destroy_rwq_ind_table(flow->ind_table));
1270 struct rte_flow *tmp;
1272 uint32_t mark_n = 0;
1276 * To remove the mark from the queue, the queue must not be
1277 * present in any other marked flow (RSS or not).
1279 for (queue_n = 0; queue_n < flow->rxqs_n; ++queue_n) {
1280 rxq = flow->rxqs[queue_n];
1281 for (tmp = TAILQ_FIRST(&priv->flows);
1283 tmp = TAILQ_NEXT(tmp, next)) {
1289 tqueue_n < tmp->rxqs_n;
1293 trxq = tmp->rxqs[tqueue_n];
1298 rxq->mark = !!mark_n;
1302 rte_free(flow->ibv_attr);
1303 DEBUG("Flow destroyed %p", (void *)flow);
1310 * @see rte_flow_destroy()
1314 mlx5_flow_destroy(struct rte_eth_dev *dev,
1315 struct rte_flow *flow,
1316 struct rte_flow_error *error)
1318 struct priv *priv = dev->data->dev_private;
1322 priv_flow_destroy(priv, flow);
1328 * Destroy all flows.
1331 * Pointer to private structure.
1334 priv_flow_flush(struct priv *priv)
1336 while (!TAILQ_EMPTY(&priv->flows)) {
1337 struct rte_flow *flow;
1339 flow = TAILQ_FIRST(&priv->flows);
1340 priv_flow_destroy(priv, flow);
1345 * Destroy all flows.
1347 * @see rte_flow_flush()
1351 mlx5_flow_flush(struct rte_eth_dev *dev,
1352 struct rte_flow_error *error)
1354 struct priv *priv = dev->data->dev_private;
1358 priv_flow_flush(priv);
1364 * Create drop queue.
1367 * Pointer to private structure.
1373 priv_flow_create_drop_queue(struct priv *priv)
1375 struct rte_flow_drop *fdq = NULL;
1379 fdq = rte_calloc(__func__, 1, sizeof(*fdq), 0);
1381 WARN("cannot allocate memory for drop queue");
1384 fdq->cq = ibv_exp_create_cq(priv->ctx, 1, NULL, NULL, 0,
1385 &(struct ibv_exp_cq_init_attr){
1389 WARN("cannot allocate CQ for drop queue");
1392 fdq->wq = ibv_exp_create_wq(priv->ctx,
1393 &(struct ibv_exp_wq_init_attr){
1394 .wq_type = IBV_EXP_WQT_RQ,
1401 WARN("cannot allocate WQ for drop queue");
1404 fdq->ind_table = ibv_exp_create_rwq_ind_table(priv->ctx,
1405 &(struct ibv_exp_rwq_ind_table_init_attr){
1407 .log_ind_tbl_size = 0,
1408 .ind_tbl = &fdq->wq,
1411 if (!fdq->ind_table) {
1412 WARN("cannot allocate indirection table for drop queue");
1415 fdq->qp = ibv_exp_create_qp(priv->ctx,
1416 &(struct ibv_exp_qp_init_attr){
1417 .qp_type = IBV_QPT_RAW_PACKET,
1419 IBV_EXP_QP_INIT_ATTR_PD |
1420 IBV_EXP_QP_INIT_ATTR_PORT |
1421 IBV_EXP_QP_INIT_ATTR_RX_HASH,
1423 .rx_hash_conf = &(struct ibv_exp_rx_hash_conf){
1425 IBV_EXP_RX_HASH_FUNC_TOEPLITZ,
1426 .rx_hash_key_len = rss_hash_default_key_len,
1427 .rx_hash_key = rss_hash_default_key,
1428 .rx_hash_fields_mask = 0,
1429 .rwq_ind_tbl = fdq->ind_table,
1431 .port_num = priv->port,
1434 WARN("cannot allocate QP for drop queue");
1437 priv->flow_drop_queue = fdq;
1441 claim_zero(ibv_destroy_qp(fdq->qp));
1443 claim_zero(ibv_exp_destroy_rwq_ind_table(fdq->ind_table));
1445 claim_zero(ibv_exp_destroy_wq(fdq->wq));
1447 claim_zero(ibv_destroy_cq(fdq->cq));
1450 priv->flow_drop_queue = NULL;
1455 * Delete drop queue.
1458 * Pointer to private structure.
1461 priv_flow_delete_drop_queue(struct priv *priv)
1463 struct rte_flow_drop *fdq = priv->flow_drop_queue;
1468 claim_zero(ibv_destroy_qp(fdq->qp));
1470 claim_zero(ibv_exp_destroy_rwq_ind_table(fdq->ind_table));
1472 claim_zero(ibv_exp_destroy_wq(fdq->wq));
1474 claim_zero(ibv_destroy_cq(fdq->cq));
1476 priv->flow_drop_queue = NULL;
1482 * Called by dev_stop() to remove all flows.
1485 * Pointer to private structure.
1488 priv_flow_stop(struct priv *priv)
1490 struct rte_flow *flow;
1492 TAILQ_FOREACH_REVERSE(flow, &priv->flows, mlx5_flows, next) {
1493 claim_zero(ibv_exp_destroy_flow(flow->ibv_flow));
1494 flow->ibv_flow = NULL;
1498 for (n = 0; n < flow->rxqs_n; ++n)
1499 flow->rxqs[n]->mark = 0;
1501 DEBUG("Flow %p removed", (void *)flow);
1503 priv_flow_delete_drop_queue(priv);
1510 * Pointer to private structure.
1513 * 0 on success, a errno value otherwise and rte_errno is set.
1516 priv_flow_start(struct priv *priv)
1519 struct rte_flow *flow;
1521 ret = priv_flow_create_drop_queue(priv);
1524 TAILQ_FOREACH(flow, &priv->flows, next) {
1528 qp = priv->flow_drop_queue->qp;
1531 flow->ibv_flow = ibv_exp_create_flow(qp, flow->ibv_attr);
1532 if (!flow->ibv_flow) {
1533 DEBUG("Flow %p cannot be applied", (void *)flow);
1537 DEBUG("Flow %p applied", (void *)flow);
1541 for (n = 0; n < flow->rxqs_n; ++n)
1542 flow->rxqs[n]->mark = 1;
1549 * Verify if the Rx queue is used in a flow.
1552 * Pointer to private structure.
1554 * Pointer to the queue to search.
1557 * Nonzero if the queue is used by a flow.
1560 priv_flow_rxq_in_use(struct priv *priv, struct rxq *rxq)
1562 struct rte_flow *flow;
1564 for (flow = TAILQ_FIRST(&priv->flows);
1566 flow = TAILQ_NEXT(flow, next)) {
1571 for (n = 0; n < flow->rxqs_n; ++n) {
1572 if (flow->rxqs[n] == rxq)
1582 * @see rte_flow_isolate()
1586 mlx5_flow_isolate(struct rte_eth_dev *dev,
1588 struct rte_flow_error *error)
1590 struct priv *priv = dev->data->dev_private;
1593 if (priv->started) {
1594 rte_flow_error_set(error, EBUSY,
1595 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1597 "port must be stopped first");
1601 priv->isolated = !!enable;