4 * Copyright 2016 6WIND S.A.
5 * Copyright 2016 Mellanox.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of 6WIND S.A. nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #include <sys/queue.h>
38 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
40 #pragma GCC diagnostic ignored "-Wpedantic"
42 #include <infiniband/verbs.h>
44 #pragma GCC diagnostic error "-Wpedantic"
47 #include <rte_ethdev.h>
49 #include <rte_flow_driver.h>
50 #include <rte_malloc.h>
56 mlx5_flow_create_eth(const struct rte_flow_item *item,
57 const void *default_mask,
61 mlx5_flow_create_vlan(const struct rte_flow_item *item,
62 const void *default_mask,
66 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
67 const void *default_mask,
71 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
72 const void *default_mask,
76 mlx5_flow_create_udp(const struct rte_flow_item *item,
77 const void *default_mask,
81 mlx5_flow_create_tcp(const struct rte_flow_item *item,
82 const void *default_mask,
86 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
87 const void *default_mask,
91 TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
92 struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
93 struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */
94 struct ibv_qp *qp; /**< Verbs queue pair. */
95 struct ibv_flow *ibv_flow; /**< Verbs flow. */
96 struct ibv_wq *wq; /**< Verbs work queue. */
97 struct ibv_cq *cq; /**< Verbs completion queue. */
98 uint32_t mark:1; /**< Set if the flow is marked. */
99 uint32_t drop:1; /**< Drop queue. */
100 uint64_t hash_fields; /**< Fields that participate in the hash. */
101 uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< List of queues. */
102 uint16_t queues_n; /**< Number of queues in the list. */
105 /** Static initializer for items. */
107 (const enum rte_flow_item_type []){ \
108 __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
111 /** Structure to generate a simple graph of layers supported by the NIC. */
112 struct mlx5_flow_items {
113 /** List of possible actions for these items. */
114 const enum rte_flow_action_type *const actions;
115 /** Bit-masks corresponding to the possibilities for the item. */
118 * Default bit-masks to use when item->mask is not provided. When
119 * \default_mask is also NULL, the full supported bit-mask (\mask) is
122 const void *default_mask;
123 /** Bit-masks size in bytes. */
124 const unsigned int mask_sz;
126 * Conversion function from rte_flow to NIC specific flow.
129 * rte_flow item to convert.
130 * @param default_mask
131 * Default bit-masks to use when item->mask is not provided.
133 * Internal structure to store the conversion.
136 * 0 on success, negative value otherwise.
138 int (*convert)(const struct rte_flow_item *item,
139 const void *default_mask,
141 /** Size in bytes of the destination structure. */
142 const unsigned int dst_sz;
143 /** List of possible following items. */
144 const enum rte_flow_item_type *const items;
147 /** Valid action for this PMD. */
148 static const enum rte_flow_action_type valid_actions[] = {
149 RTE_FLOW_ACTION_TYPE_DROP,
150 RTE_FLOW_ACTION_TYPE_QUEUE,
151 RTE_FLOW_ACTION_TYPE_MARK,
152 RTE_FLOW_ACTION_TYPE_FLAG,
153 RTE_FLOW_ACTION_TYPE_END,
156 /** Graph of supported items and associated actions. */
157 static const struct mlx5_flow_items mlx5_flow_items[] = {
158 [RTE_FLOW_ITEM_TYPE_END] = {
159 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
160 RTE_FLOW_ITEM_TYPE_VXLAN),
162 [RTE_FLOW_ITEM_TYPE_ETH] = {
163 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
164 RTE_FLOW_ITEM_TYPE_IPV4,
165 RTE_FLOW_ITEM_TYPE_IPV6),
166 .actions = valid_actions,
167 .mask = &(const struct rte_flow_item_eth){
168 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
169 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
172 .default_mask = &rte_flow_item_eth_mask,
173 .mask_sz = sizeof(struct rte_flow_item_eth),
174 .convert = mlx5_flow_create_eth,
175 .dst_sz = sizeof(struct ibv_flow_spec_eth),
177 [RTE_FLOW_ITEM_TYPE_VLAN] = {
178 .items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
179 RTE_FLOW_ITEM_TYPE_IPV6),
180 .actions = valid_actions,
181 .mask = &(const struct rte_flow_item_vlan){
184 .default_mask = &rte_flow_item_vlan_mask,
185 .mask_sz = sizeof(struct rte_flow_item_vlan),
186 .convert = mlx5_flow_create_vlan,
189 [RTE_FLOW_ITEM_TYPE_IPV4] = {
190 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
191 RTE_FLOW_ITEM_TYPE_TCP),
192 .actions = valid_actions,
193 .mask = &(const struct rte_flow_item_ipv4){
197 .type_of_service = -1,
201 .default_mask = &rte_flow_item_ipv4_mask,
202 .mask_sz = sizeof(struct rte_flow_item_ipv4),
203 .convert = mlx5_flow_create_ipv4,
204 .dst_sz = sizeof(struct ibv_flow_spec_ipv4),
206 [RTE_FLOW_ITEM_TYPE_IPV6] = {
207 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
208 RTE_FLOW_ITEM_TYPE_TCP),
209 .actions = valid_actions,
210 .mask = &(const struct rte_flow_item_ipv6){
213 0xff, 0xff, 0xff, 0xff,
214 0xff, 0xff, 0xff, 0xff,
215 0xff, 0xff, 0xff, 0xff,
216 0xff, 0xff, 0xff, 0xff,
219 0xff, 0xff, 0xff, 0xff,
220 0xff, 0xff, 0xff, 0xff,
221 0xff, 0xff, 0xff, 0xff,
222 0xff, 0xff, 0xff, 0xff,
229 .default_mask = &rte_flow_item_ipv6_mask,
230 .mask_sz = sizeof(struct rte_flow_item_ipv6),
231 .convert = mlx5_flow_create_ipv6,
232 .dst_sz = sizeof(struct ibv_flow_spec_ipv6),
234 [RTE_FLOW_ITEM_TYPE_UDP] = {
235 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN),
236 .actions = valid_actions,
237 .mask = &(const struct rte_flow_item_udp){
243 .default_mask = &rte_flow_item_udp_mask,
244 .mask_sz = sizeof(struct rte_flow_item_udp),
245 .convert = mlx5_flow_create_udp,
246 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
248 [RTE_FLOW_ITEM_TYPE_TCP] = {
249 .actions = valid_actions,
250 .mask = &(const struct rte_flow_item_tcp){
256 .default_mask = &rte_flow_item_tcp_mask,
257 .mask_sz = sizeof(struct rte_flow_item_tcp),
258 .convert = mlx5_flow_create_tcp,
259 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
261 [RTE_FLOW_ITEM_TYPE_VXLAN] = {
262 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH),
263 .actions = valid_actions,
264 .mask = &(const struct rte_flow_item_vxlan){
265 .vni = "\xff\xff\xff",
267 .default_mask = &rte_flow_item_vxlan_mask,
268 .mask_sz = sizeof(struct rte_flow_item_vxlan),
269 .convert = mlx5_flow_create_vxlan,
270 .dst_sz = sizeof(struct ibv_flow_spec_tunnel),
274 /* Structure to parse actions. */
275 struct mlx5_flow_action {
276 uint32_t queue:1; /**< Target is a receive queue. */
277 uint32_t drop:1; /**< Target is a drop queue. */
278 uint32_t mark:1; /**< Mark is present in the flow. */
279 uint32_t mark_id; /**< Mark identifier. */
280 uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
281 uint16_t queues_n; /**< Number of entries in queue[]. */
284 /** Structure to pass to the conversion function. */
285 struct mlx5_flow_parse {
286 struct ibv_flow_attr *ibv_attr; /**< Verbs attribute. */
287 unsigned int offset; /**< Offset in bytes in the ibv_attr buffer. */
288 uint32_t inner; /**< Set once VXLAN is encountered. */
289 uint64_t hash_fields; /**< Fields that participate in the hash. */
290 struct mlx5_flow_action actions; /**< Parsed action result. */
293 /** Structure for Drop queue. */
294 struct rte_flow_drop {
295 struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */
296 struct ibv_qp *qp; /**< Verbs queue pair. */
297 struct ibv_wq *wq; /**< Verbs work queue. */
298 struct ibv_cq *cq; /**< Verbs completion queue. */
301 static const struct rte_flow_ops mlx5_flow_ops = {
302 .validate = mlx5_flow_validate,
303 .create = mlx5_flow_create,
304 .destroy = mlx5_flow_destroy,
305 .flush = mlx5_flow_flush,
307 .isolate = mlx5_flow_isolate,
311 * Manage filter operations.
314 * Pointer to Ethernet device structure.
318 * Operation to perform.
320 * Pointer to operation-specific structure.
323 * 0 on success, negative errno value on failure.
326 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
327 enum rte_filter_type filter_type,
328 enum rte_filter_op filter_op,
333 if (filter_type == RTE_ETH_FILTER_GENERIC) {
334 if (filter_op != RTE_ETH_FILTER_GET)
336 *(const void **)arg = &mlx5_flow_ops;
339 ERROR("%p: filter type (%d) not supported",
340 (void *)dev, filter_type);
345 * Check support for a given item.
348 * Item specification.
350 * Bit-masks covering supported fields to compare with spec, last and mask in
353 * Bit-Mask size in bytes.
359 mlx5_flow_item_validate(const struct rte_flow_item *item,
360 const uint8_t *mask, unsigned int size)
364 if (!item->spec && (item->mask || item->last))
366 if (item->spec && !item->mask) {
368 const uint8_t *spec = item->spec;
370 for (i = 0; i < size; ++i)
371 if ((spec[i] | mask[i]) != mask[i])
374 if (item->last && !item->mask) {
376 const uint8_t *spec = item->last;
378 for (i = 0; i < size; ++i)
379 if ((spec[i] | mask[i]) != mask[i])
384 const uint8_t *spec = item->mask;
386 for (i = 0; i < size; ++i)
387 if ((spec[i] | mask[i]) != mask[i])
390 if (item->spec && item->last) {
393 const uint8_t *apply = mask;
398 for (i = 0; i < size; ++i) {
399 spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
400 last[i] = ((const uint8_t *)item->last)[i] & apply[i];
402 ret = memcmp(spec, last, size);
408 * Validate a flow supported by the NIC.
411 * Pointer to private structure.
413 * Flow rule attributes.
415 * Pattern specification (list terminated by the END pattern item).
417 * Associated actions (list terminated by the END action).
419 * Perform verbose error reporting if not NULL.
420 * @param[in, out] flow
421 * Flow structure to update.
424 * 0 on success, a negative errno value otherwise and rte_errno is set.
427 priv_flow_validate(struct priv *priv,
428 const struct rte_flow_attr *attr,
429 const struct rte_flow_item items[],
430 const struct rte_flow_action actions[],
431 struct rte_flow_error *error,
432 struct mlx5_flow_parse *flow)
434 const struct mlx5_flow_items *cur_item = mlx5_flow_items;
438 rte_flow_error_set(error, ENOTSUP,
439 RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
441 "groups are not supported");
444 if (attr->priority) {
445 rte_flow_error_set(error, ENOTSUP,
446 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
448 "priorities are not supported");
452 rte_flow_error_set(error, ENOTSUP,
453 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
455 "egress is not supported");
458 if (!attr->ingress) {
459 rte_flow_error_set(error, ENOTSUP,
460 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
462 "only ingress is supported");
465 for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
466 const struct mlx5_flow_items *token = NULL;
470 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
474 cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
476 if (cur_item->items[i] == items->type) {
477 token = &mlx5_flow_items[items->type];
482 goto exit_item_not_supported;
484 err = mlx5_flow_item_validate(items,
485 (const uint8_t *)cur_item->mask,
488 goto exit_item_not_supported;
489 if (flow->ibv_attr && cur_item->convert) {
490 err = cur_item->convert(items,
491 (cur_item->default_mask ?
492 cur_item->default_mask :
496 goto exit_item_not_supported;
497 } else if (items->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
499 rte_flow_error_set(error, ENOTSUP,
500 RTE_FLOW_ERROR_TYPE_ITEM,
502 "cannot recognize multiple"
503 " VXLAN encapsulations");
508 flow->offset += cur_item->dst_sz;
510 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
511 if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
513 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
514 flow->actions.drop = 1;
515 } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
516 const struct rte_flow_action_queue *queue =
517 (const struct rte_flow_action_queue *)
522 if (!queue || (queue->index > (priv->rxqs_n - 1)))
523 goto exit_action_not_supported;
524 for (n = 0; n < flow->actions.queues_n; ++n) {
525 if (flow->actions.queues[n] == queue->index) {
530 if (flow->actions.queues_n > 1 && !found) {
531 rte_flow_error_set(error, ENOTSUP,
532 RTE_FLOW_ERROR_TYPE_ACTION,
534 "queue action not in RSS queues");
538 flow->actions.queue = 1;
539 flow->actions.queues_n = 1;
540 flow->actions.queues[0] = queue->index;
542 } else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
543 const struct rte_flow_action_rss *rss =
544 (const struct rte_flow_action_rss *)
548 if (!rss || !rss->num) {
549 rte_flow_error_set(error, EINVAL,
550 RTE_FLOW_ERROR_TYPE_ACTION,
555 if (flow->actions.queues_n == 1) {
558 assert(flow->actions.queues_n);
559 for (n = 0; n < rss->num; ++n) {
560 if (flow->actions.queues[0] ==
567 rte_flow_error_set(error, ENOTSUP,
568 RTE_FLOW_ERROR_TYPE_ACTION,
570 "queue action not in RSS"
575 for (n = 0; n < rss->num; ++n) {
576 if (rss->queue[n] >= priv->rxqs_n) {
577 rte_flow_error_set(error, EINVAL,
578 RTE_FLOW_ERROR_TYPE_ACTION,
580 "queue id > number of"
585 flow->actions.queue = 1;
586 for (n = 0; n < rss->num; ++n)
587 flow->actions.queues[n] = rss->queue[n];
588 flow->actions.queues_n = rss->num;
589 } else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
590 const struct rte_flow_action_mark *mark =
591 (const struct rte_flow_action_mark *)
595 rte_flow_error_set(error, EINVAL,
596 RTE_FLOW_ERROR_TYPE_ACTION,
598 "mark must be defined");
600 } else if (mark->id >= MLX5_FLOW_MARK_MAX) {
601 rte_flow_error_set(error, ENOTSUP,
602 RTE_FLOW_ERROR_TYPE_ACTION,
604 "mark must be between 0"
608 flow->actions.mark = 1;
609 flow->actions.mark_id = mark->id;
610 } else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
611 flow->actions.mark = 1;
613 goto exit_action_not_supported;
616 if (flow->actions.mark && !flow->ibv_attr && !flow->actions.drop)
617 flow->offset += sizeof(struct ibv_flow_spec_action_tag);
618 if (!flow->ibv_attr && flow->actions.drop)
619 flow->offset += sizeof(struct ibv_flow_spec_action_drop);
620 if (!flow->actions.queue && !flow->actions.drop) {
621 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
622 NULL, "no valid action");
626 exit_item_not_supported:
627 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
628 items, "item not supported");
630 exit_action_not_supported:
631 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
632 actions, "action not supported");
637 * Validate a flow supported by the NIC.
639 * @see rte_flow_validate()
643 mlx5_flow_validate(struct rte_eth_dev *dev,
644 const struct rte_flow_attr *attr,
645 const struct rte_flow_item items[],
646 const struct rte_flow_action actions[],
647 struct rte_flow_error *error)
649 struct priv *priv = dev->data->dev_private;
651 struct mlx5_flow_parse flow = {
652 .offset = sizeof(struct ibv_flow_attr),
654 .mark_id = MLX5_FLOW_MARK_DEFAULT,
660 ret = priv_flow_validate(priv, attr, items, actions, error, &flow);
666 * Convert Ethernet item to Verbs specification.
669 * Item specification.
670 * @param default_mask[in]
671 * Default bit-masks to use when item->mask is not provided.
672 * @param data[in, out]
676 mlx5_flow_create_eth(const struct rte_flow_item *item,
677 const void *default_mask,
680 const struct rte_flow_item_eth *spec = item->spec;
681 const struct rte_flow_item_eth *mask = item->mask;
682 struct mlx5_flow_parse *flow = (struct mlx5_flow_parse *)data;
683 struct ibv_flow_spec_eth *eth;
684 const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
687 ++flow->ibv_attr->num_of_specs;
688 flow->ibv_attr->priority = 2;
689 flow->hash_fields = 0;
690 eth = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
691 *eth = (struct ibv_flow_spec_eth) {
692 .type = flow->inner | IBV_FLOW_SPEC_ETH,
699 memcpy(eth->val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
700 memcpy(eth->val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
701 eth->val.ether_type = spec->type;
702 memcpy(eth->mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
703 memcpy(eth->mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
704 eth->mask.ether_type = mask->type;
705 /* Remove unwanted bits from values. */
706 for (i = 0; i < ETHER_ADDR_LEN; ++i) {
707 eth->val.dst_mac[i] &= eth->mask.dst_mac[i];
708 eth->val.src_mac[i] &= eth->mask.src_mac[i];
710 eth->val.ether_type &= eth->mask.ether_type;
715 * Convert VLAN item to Verbs specification.
718 * Item specification.
719 * @param default_mask[in]
720 * Default bit-masks to use when item->mask is not provided.
721 * @param data[in, out]
725 mlx5_flow_create_vlan(const struct rte_flow_item *item,
726 const void *default_mask,
729 const struct rte_flow_item_vlan *spec = item->spec;
730 const struct rte_flow_item_vlan *mask = item->mask;
731 struct mlx5_flow_parse *flow = (struct mlx5_flow_parse *)data;
732 struct ibv_flow_spec_eth *eth;
733 const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
735 eth = (void *)((uintptr_t)flow->ibv_attr + flow->offset - eth_size);
740 eth->val.vlan_tag = spec->tci;
741 eth->mask.vlan_tag = mask->tci;
742 eth->val.vlan_tag &= eth->mask.vlan_tag;
747 * Convert IPv4 item to Verbs specification.
750 * Item specification.
751 * @param default_mask[in]
752 * Default bit-masks to use when item->mask is not provided.
753 * @param data[in, out]
757 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
758 const void *default_mask,
761 const struct rte_flow_item_ipv4 *spec = item->spec;
762 const struct rte_flow_item_ipv4 *mask = item->mask;
763 struct mlx5_flow_parse *flow = (struct mlx5_flow_parse *)data;
764 struct ibv_flow_spec_ipv4_ext *ipv4;
765 unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4_ext);
767 ++flow->ibv_attr->num_of_specs;
768 flow->ibv_attr->priority = 1;
769 flow->hash_fields = (IBV_RX_HASH_SRC_IPV4 |
770 IBV_RX_HASH_DST_IPV4);
771 ipv4 = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
772 *ipv4 = (struct ibv_flow_spec_ipv4_ext) {
773 .type = flow->inner | IBV_FLOW_SPEC_IPV4_EXT,
780 ipv4->val = (struct ibv_flow_ipv4_ext_filter){
781 .src_ip = spec->hdr.src_addr,
782 .dst_ip = spec->hdr.dst_addr,
783 .proto = spec->hdr.next_proto_id,
784 .tos = spec->hdr.type_of_service,
786 ipv4->mask = (struct ibv_flow_ipv4_ext_filter){
787 .src_ip = mask->hdr.src_addr,
788 .dst_ip = mask->hdr.dst_addr,
789 .proto = mask->hdr.next_proto_id,
790 .tos = mask->hdr.type_of_service,
792 /* Remove unwanted bits from values. */
793 ipv4->val.src_ip &= ipv4->mask.src_ip;
794 ipv4->val.dst_ip &= ipv4->mask.dst_ip;
795 ipv4->val.proto &= ipv4->mask.proto;
796 ipv4->val.tos &= ipv4->mask.tos;
801 * Convert IPv6 item to Verbs specification.
804 * Item specification.
805 * @param default_mask[in]
806 * Default bit-masks to use when item->mask is not provided.
807 * @param data[in, out]
811 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
812 const void *default_mask,
815 const struct rte_flow_item_ipv6 *spec = item->spec;
816 const struct rte_flow_item_ipv6 *mask = item->mask;
817 struct mlx5_flow_parse *flow = (struct mlx5_flow_parse *)data;
818 struct ibv_flow_spec_ipv6 *ipv6;
819 unsigned int ipv6_size = sizeof(struct ibv_flow_spec_ipv6);
822 ++flow->ibv_attr->num_of_specs;
823 flow->ibv_attr->priority = 1;
824 flow->hash_fields = (IBV_RX_HASH_SRC_IPV6 |
825 IBV_RX_HASH_DST_IPV6);
826 ipv6 = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
827 *ipv6 = (struct ibv_flow_spec_ipv6) {
828 .type = flow->inner | IBV_FLOW_SPEC_IPV6,
835 memcpy(ipv6->val.src_ip, spec->hdr.src_addr,
836 RTE_DIM(ipv6->val.src_ip));
837 memcpy(ipv6->val.dst_ip, spec->hdr.dst_addr,
838 RTE_DIM(ipv6->val.dst_ip));
839 memcpy(ipv6->mask.src_ip, mask->hdr.src_addr,
840 RTE_DIM(ipv6->mask.src_ip));
841 memcpy(ipv6->mask.dst_ip, mask->hdr.dst_addr,
842 RTE_DIM(ipv6->mask.dst_ip));
843 ipv6->mask.flow_label = mask->hdr.vtc_flow;
844 ipv6->mask.next_hdr = mask->hdr.proto;
845 ipv6->mask.hop_limit = mask->hdr.hop_limits;
846 /* Remove unwanted bits from values. */
847 for (i = 0; i < RTE_DIM(ipv6->val.src_ip); ++i) {
848 ipv6->val.src_ip[i] &= ipv6->mask.src_ip[i];
849 ipv6->val.dst_ip[i] &= ipv6->mask.dst_ip[i];
851 ipv6->val.flow_label &= ipv6->mask.flow_label;
852 ipv6->val.next_hdr &= ipv6->mask.next_hdr;
853 ipv6->val.hop_limit &= ipv6->mask.hop_limit;
858 * Convert UDP item to Verbs specification.
861 * Item specification.
862 * @param default_mask[in]
863 * Default bit-masks to use when item->mask is not provided.
864 * @param data[in, out]
868 mlx5_flow_create_udp(const struct rte_flow_item *item,
869 const void *default_mask,
872 const struct rte_flow_item_udp *spec = item->spec;
873 const struct rte_flow_item_udp *mask = item->mask;
874 struct mlx5_flow_parse *flow = (struct mlx5_flow_parse *)data;
875 struct ibv_flow_spec_tcp_udp *udp;
876 unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
878 ++flow->ibv_attr->num_of_specs;
879 flow->ibv_attr->priority = 0;
880 flow->hash_fields |= (IBV_RX_HASH_SRC_PORT_UDP |
881 IBV_RX_HASH_DST_PORT_UDP);
882 udp = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
883 *udp = (struct ibv_flow_spec_tcp_udp) {
884 .type = flow->inner | IBV_FLOW_SPEC_UDP,
891 udp->val.dst_port = spec->hdr.dst_port;
892 udp->val.src_port = spec->hdr.src_port;
893 udp->mask.dst_port = mask->hdr.dst_port;
894 udp->mask.src_port = mask->hdr.src_port;
895 /* Remove unwanted bits from values. */
896 udp->val.src_port &= udp->mask.src_port;
897 udp->val.dst_port &= udp->mask.dst_port;
902 * Convert TCP item to Verbs specification.
905 * Item specification.
906 * @param default_mask[in]
907 * Default bit-masks to use when item->mask is not provided.
908 * @param data[in, out]
912 mlx5_flow_create_tcp(const struct rte_flow_item *item,
913 const void *default_mask,
916 const struct rte_flow_item_tcp *spec = item->spec;
917 const struct rte_flow_item_tcp *mask = item->mask;
918 struct mlx5_flow_parse *flow = (struct mlx5_flow_parse *)data;
919 struct ibv_flow_spec_tcp_udp *tcp;
920 unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
922 ++flow->ibv_attr->num_of_specs;
923 flow->ibv_attr->priority = 0;
924 flow->hash_fields |= (IBV_RX_HASH_SRC_PORT_TCP |
925 IBV_RX_HASH_DST_PORT_TCP);
926 tcp = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
927 *tcp = (struct ibv_flow_spec_tcp_udp) {
928 .type = flow->inner | IBV_FLOW_SPEC_TCP,
935 tcp->val.dst_port = spec->hdr.dst_port;
936 tcp->val.src_port = spec->hdr.src_port;
937 tcp->mask.dst_port = mask->hdr.dst_port;
938 tcp->mask.src_port = mask->hdr.src_port;
939 /* Remove unwanted bits from values. */
940 tcp->val.src_port &= tcp->mask.src_port;
941 tcp->val.dst_port &= tcp->mask.dst_port;
946 * Convert VXLAN item to Verbs specification.
949 * Item specification.
950 * @param default_mask[in]
951 * Default bit-masks to use when item->mask is not provided.
952 * @param data[in, out]
956 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
957 const void *default_mask,
960 const struct rte_flow_item_vxlan *spec = item->spec;
961 const struct rte_flow_item_vxlan *mask = item->mask;
962 struct mlx5_flow_parse *flow = (struct mlx5_flow_parse *)data;
963 struct ibv_flow_spec_tunnel *vxlan;
964 unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
970 ++flow->ibv_attr->num_of_specs;
971 flow->ibv_attr->priority = 0;
973 vxlan = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
974 *vxlan = (struct ibv_flow_spec_tunnel) {
975 .type = flow->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
978 flow->inner = IBV_FLOW_SPEC_INNER;
983 memcpy(&id.vni[1], spec->vni, 3);
984 vxlan->val.tunnel_id = id.vlan_id;
985 memcpy(&id.vni[1], mask->vni, 3);
986 vxlan->mask.tunnel_id = id.vlan_id;
987 /* Remove unwanted bits from values. */
988 vxlan->val.tunnel_id &= vxlan->mask.tunnel_id;
993 * Convert mark/flag action to Verbs specification.
996 * Pointer to MLX5 flow structure.
1001 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *flow, uint32_t mark_id)
1003 struct ibv_flow_spec_action_tag *tag;
1004 unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1006 tag = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
1007 *tag = (struct ibv_flow_spec_action_tag){
1008 .type = IBV_FLOW_SPEC_ACTION_TAG,
1010 .tag_id = mlx5_flow_mark_set(mark_id),
1012 ++flow->ibv_attr->num_of_specs;
1017 * Complete flow rule creation with a drop queue.
1020 * Pointer to private structure.
1022 * MLX5 flow attributes (filled by mlx5_flow_validate()).
1024 * Perform verbose error reporting if not NULL.
1027 * A flow if the rule could be created.
1029 static struct rte_flow *
1030 priv_flow_create_action_queue_drop(struct priv *priv,
1031 struct mlx5_flow_parse *flow,
1032 struct rte_flow_error *error)
1034 struct rte_flow *rte_flow;
1035 struct ibv_flow_spec_action_drop *drop;
1036 unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
1040 rte_flow = rte_calloc(__func__, 1, sizeof(*rte_flow), 0);
1042 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1043 NULL, "cannot allocate flow memory");
1047 drop = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
1048 *drop = (struct ibv_flow_spec_action_drop){
1049 .type = IBV_FLOW_SPEC_ACTION_DROP,
1052 ++flow->ibv_attr->num_of_specs;
1053 flow->offset += sizeof(struct ibv_flow_spec_action_drop);
1054 rte_flow->ibv_attr = flow->ibv_attr;
1055 if (!priv->dev->data->dev_started)
1057 rte_flow->qp = priv->flow_drop_queue->qp;
1058 rte_flow->ibv_flow = ibv_create_flow(rte_flow->qp,
1059 rte_flow->ibv_attr);
1060 if (!rte_flow->ibv_flow) {
1061 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1062 NULL, "flow rule creation failure");
1073 * Complete flow rule creation.
1076 * Pointer to private structure.
1078 * MLX5 flow attributes (filled by mlx5_flow_validate()).
1080 * Perform verbose error reporting if not NULL.
1083 * A flow if the rule could be created.
1085 static struct rte_flow *
1086 priv_flow_create_action_queue(struct priv *priv,
1087 struct mlx5_flow_parse *flow,
1088 struct rte_flow_error *error)
1090 struct rte_flow *rte_flow;
1093 const unsigned int wqs_n = 1 << log2above(flow->actions.queues_n);
1094 struct ibv_wq *wqs[wqs_n];
1098 assert(!flow->actions.drop);
1099 rte_flow = rte_calloc(__func__, 1, sizeof(*rte_flow), 0);
1101 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1102 NULL, "cannot allocate flow memory");
1105 for (i = 0; i < flow->actions.queues_n; ++i) {
1106 struct mlx5_rxq_ibv *rxq_ibv =
1107 mlx5_priv_rxq_ibv_get(priv, flow->actions.queues[i]);
1109 wqs[i] = rxq_ibv->wq;
1110 rte_flow->queues[i] = flow->actions.queues[i];
1111 ++rte_flow->queues_n;
1112 (*priv->rxqs)[flow->actions.queues[i]]->mark |=
1115 /* finalise indirection table. */
1116 for (j = 0; i < wqs_n; ++i, ++j) {
1118 if (j == flow->actions.queues_n)
1121 rte_flow->mark = flow->actions.mark;
1122 rte_flow->ibv_attr = flow->ibv_attr;
1123 rte_flow->hash_fields = flow->hash_fields;
1124 rte_flow->ind_table = ibv_create_rwq_ind_table(
1126 &(struct ibv_rwq_ind_table_init_attr){
1127 .log_ind_tbl_size = log2above(flow->actions.queues_n),
1131 if (!rte_flow->ind_table) {
1132 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1133 NULL, "cannot allocate indirection table");
1136 rte_flow->qp = ibv_create_qp_ex(
1138 &(struct ibv_qp_init_attr_ex){
1139 .qp_type = IBV_QPT_RAW_PACKET,
1141 IBV_QP_INIT_ATTR_PD |
1142 IBV_QP_INIT_ATTR_IND_TABLE |
1143 IBV_QP_INIT_ATTR_RX_HASH,
1144 .rx_hash_conf = (struct ibv_rx_hash_conf){
1146 IBV_RX_HASH_FUNC_TOEPLITZ,
1147 .rx_hash_key_len = rss_hash_default_key_len,
1148 .rx_hash_key = rss_hash_default_key,
1149 .rx_hash_fields_mask = rte_flow->hash_fields,
1151 .rwq_ind_tbl = rte_flow->ind_table,
1154 if (!rte_flow->qp) {
1155 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1156 NULL, "cannot allocate QP");
1159 if (!priv->dev->data->dev_started)
1161 rte_flow->ibv_flow = ibv_create_flow(rte_flow->qp,
1162 rte_flow->ibv_attr);
1163 if (!rte_flow->ibv_flow) {
1164 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1165 NULL, "flow rule creation failure");
1172 ibv_destroy_qp(rte_flow->qp);
1173 if (rte_flow->ind_table)
1174 ibv_destroy_rwq_ind_table(rte_flow->ind_table);
1183 * Pointer to private structure.
1185 * Flow rule attributes.
1186 * @param[in] pattern
1187 * Pattern specification (list terminated by the END pattern item).
1188 * @param[in] actions
1189 * Associated actions (list terminated by the END action).
1191 * Perform verbose error reporting if not NULL.
1194 * A flow on success, NULL otherwise.
1196 static struct rte_flow *
1197 priv_flow_create(struct priv *priv,
1198 const struct rte_flow_attr *attr,
1199 const struct rte_flow_item items[],
1200 const struct rte_flow_action actions[],
1201 struct rte_flow_error *error)
1203 struct rte_flow *rte_flow;
1204 struct mlx5_flow_parse flow = {
1205 .offset = sizeof(struct ibv_flow_attr),
1207 .mark_id = MLX5_FLOW_MARK_DEFAULT,
1214 err = priv_flow_validate(priv, attr, items, actions, error, &flow);
1217 flow.ibv_attr = rte_malloc(__func__, flow.offset, 0);
1218 flow.offset = sizeof(struct ibv_flow_attr);
1219 if (!flow.ibv_attr) {
1220 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1221 NULL, "cannot allocate ibv_attr memory");
1224 *flow.ibv_attr = (struct ibv_flow_attr){
1225 .type = IBV_FLOW_ATTR_NORMAL,
1226 .size = sizeof(struct ibv_flow_attr),
1227 .priority = attr->priority,
1233 flow.hash_fields = 0;
1234 claim_zero(priv_flow_validate(priv, attr, items, actions,
1236 if (flow.actions.mark && !flow.actions.drop) {
1237 mlx5_flow_create_flag_mark(&flow, flow.actions.mark_id);
1238 flow.offset += sizeof(struct ibv_flow_spec_action_tag);
1240 if (flow.actions.drop)
1242 priv_flow_create_action_queue_drop(priv, &flow, error);
1244 rte_flow = priv_flow_create_action_queue(priv, &flow, error);
1249 rte_free(flow.ibv_attr);
1256 * @see rte_flow_create()
1260 mlx5_flow_create(struct rte_eth_dev *dev,
1261 const struct rte_flow_attr *attr,
1262 const struct rte_flow_item items[],
1263 const struct rte_flow_action actions[],
1264 struct rte_flow_error *error)
1266 struct priv *priv = dev->data->dev_private;
1267 struct rte_flow *flow;
1270 flow = priv_flow_create(priv, attr, items, actions, error);
1272 TAILQ_INSERT_TAIL(&priv->flows, flow, next);
1273 DEBUG("Flow created %p", (void *)flow);
1283 * Pointer to private structure.
1288 priv_flow_destroy(struct priv *priv,
1289 struct rte_flow *flow)
1293 TAILQ_REMOVE(&priv->flows, flow, next);
1295 claim_zero(ibv_destroy_flow(flow->ibv_flow));
1299 claim_zero(ibv_destroy_qp(flow->qp));
1300 if (flow->ind_table)
1301 claim_zero(ibv_destroy_rwq_ind_table(flow->ind_table));
1302 for (i = 0; i != flow->queues_n; ++i) {
1303 struct rte_flow *tmp;
1304 struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[flow->queues[i]];
1305 struct mlx5_rxq_ctrl *rxq_ctrl =
1306 container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
1309 * To remove the mark from the queue, the queue must not be
1310 * present in any other marked flow (RSS or not).
1315 TAILQ_FOREACH(tmp, &priv->flows, next) {
1322 for (j = 0; (j != tmp->queues_n) && !mark; j++)
1323 if (tmp->queues[j] == flow->queues[i])
1326 rxq_data->mark = mark;
1328 mlx5_priv_rxq_ibv_release(priv, rxq_ctrl->ibv);
1331 rte_free(flow->ibv_attr);
1332 DEBUG("Flow destroyed %p", (void *)flow);
1339 * @see rte_flow_destroy()
1343 mlx5_flow_destroy(struct rte_eth_dev *dev,
1344 struct rte_flow *flow,
1345 struct rte_flow_error *error)
1347 struct priv *priv = dev->data->dev_private;
1351 priv_flow_destroy(priv, flow);
1357 * Destroy all flows.
1360 * Pointer to private structure.
1363 priv_flow_flush(struct priv *priv)
1365 while (!TAILQ_EMPTY(&priv->flows)) {
1366 struct rte_flow *flow;
1368 flow = TAILQ_FIRST(&priv->flows);
1369 priv_flow_destroy(priv, flow);
1374 * Destroy all flows.
1376 * @see rte_flow_flush()
1380 mlx5_flow_flush(struct rte_eth_dev *dev,
1381 struct rte_flow_error *error)
1383 struct priv *priv = dev->data->dev_private;
1387 priv_flow_flush(priv);
1393 * Create drop queue.
1396 * Pointer to private structure.
1402 priv_flow_create_drop_queue(struct priv *priv)
1404 struct rte_flow_drop *fdq = NULL;
1408 fdq = rte_calloc(__func__, 1, sizeof(*fdq), 0);
1410 WARN("cannot allocate memory for drop queue");
1413 fdq->cq = ibv_create_cq(priv->ctx, 1, NULL, NULL, 0);
1415 WARN("cannot allocate CQ for drop queue");
1418 fdq->wq = ibv_create_wq(priv->ctx,
1419 &(struct ibv_wq_init_attr){
1420 .wq_type = IBV_WQT_RQ,
1427 WARN("cannot allocate WQ for drop queue");
1430 fdq->ind_table = ibv_create_rwq_ind_table(priv->ctx,
1431 &(struct ibv_rwq_ind_table_init_attr){
1432 .log_ind_tbl_size = 0,
1433 .ind_tbl = &fdq->wq,
1436 if (!fdq->ind_table) {
1437 WARN("cannot allocate indirection table for drop queue");
1440 fdq->qp = ibv_create_qp_ex(priv->ctx,
1441 &(struct ibv_qp_init_attr_ex){
1442 .qp_type = IBV_QPT_RAW_PACKET,
1444 IBV_QP_INIT_ATTR_PD |
1445 IBV_QP_INIT_ATTR_IND_TABLE |
1446 IBV_QP_INIT_ATTR_RX_HASH,
1447 .rx_hash_conf = (struct ibv_rx_hash_conf){
1449 IBV_RX_HASH_FUNC_TOEPLITZ,
1450 .rx_hash_key_len = rss_hash_default_key_len,
1451 .rx_hash_key = rss_hash_default_key,
1452 .rx_hash_fields_mask = 0,
1454 .rwq_ind_tbl = fdq->ind_table,
1458 WARN("cannot allocate QP for drop queue");
1461 priv->flow_drop_queue = fdq;
1465 claim_zero(ibv_destroy_qp(fdq->qp));
1467 claim_zero(ibv_destroy_rwq_ind_table(fdq->ind_table));
1469 claim_zero(ibv_destroy_wq(fdq->wq));
1471 claim_zero(ibv_destroy_cq(fdq->cq));
1474 priv->flow_drop_queue = NULL;
1479 * Delete drop queue.
1482 * Pointer to private structure.
1485 priv_flow_delete_drop_queue(struct priv *priv)
1487 struct rte_flow_drop *fdq = priv->flow_drop_queue;
1492 claim_zero(ibv_destroy_qp(fdq->qp));
1494 claim_zero(ibv_destroy_rwq_ind_table(fdq->ind_table));
1496 claim_zero(ibv_destroy_wq(fdq->wq));
1498 claim_zero(ibv_destroy_cq(fdq->cq));
1500 priv->flow_drop_queue = NULL;
1506 * Called by dev_stop() to remove all flows.
1509 * Pointer to private structure.
1512 priv_flow_stop(struct priv *priv)
1514 struct rte_flow *flow;
1516 TAILQ_FOREACH_REVERSE(flow, &priv->flows, mlx5_flows, next) {
1517 claim_zero(ibv_destroy_flow(flow->ibv_flow));
1518 flow->ibv_flow = NULL;
1522 for (n = 0; n < flow->queues_n; ++n)
1523 (*priv->rxqs)[flow->queues[n]]->mark = 0;
1525 DEBUG("Flow %p removed", (void *)flow);
1527 priv_flow_delete_drop_queue(priv);
1534 * Pointer to private structure.
1537 * 0 on success, a errno value otherwise and rte_errno is set.
1540 priv_flow_start(struct priv *priv)
1543 struct rte_flow *flow;
1545 ret = priv_flow_create_drop_queue(priv);
1548 TAILQ_FOREACH(flow, &priv->flows, next) {
1552 qp = priv->flow_drop_queue->qp;
1555 flow->ibv_flow = ibv_create_flow(qp, flow->ibv_attr);
1556 if (!flow->ibv_flow) {
1557 DEBUG("Flow %p cannot be applied", (void *)flow);
1561 DEBUG("Flow %p applied", (void *)flow);
1565 for (n = 0; n < flow->queues_n; ++n)
1566 (*priv->rxqs)[flow->queues[n]]->mark = 1;
1575 * @see rte_flow_isolate()
1579 mlx5_flow_isolate(struct rte_eth_dev *dev,
1581 struct rte_flow_error *error)
1583 struct priv *priv = dev->data->dev_private;
1586 if (dev->data->dev_started) {
1587 rte_flow_error_set(error, EBUSY,
1588 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1590 "port must be stopped first");
1594 priv->isolated = !!enable;
1600 * Verify the flow list is empty
1603 * Pointer to private structure.
1605 * @return the number of flows not released.
1608 priv_flow_verify(struct priv *priv)
1610 struct rte_flow *flow;
1613 TAILQ_FOREACH(flow, &priv->flows, next) {
1614 DEBUG("%p: flow %p still referenced", (void *)priv,