4 * Copyright 2016 6WIND S.A.
5 * Copyright 2016 Mellanox.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of 6WIND S.A. nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #include <sys/queue.h>
38 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
40 #pragma GCC diagnostic ignored "-Wpedantic"
42 #include <infiniband/verbs.h>
44 #pragma GCC diagnostic error "-Wpedantic"
47 #include <rte_ethdev.h>
49 #include <rte_flow_driver.h>
50 #include <rte_malloc.h>
56 mlx5_flow_create_eth(const struct rte_flow_item *item,
57 const void *default_mask,
61 mlx5_flow_create_vlan(const struct rte_flow_item *item,
62 const void *default_mask,
66 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
67 const void *default_mask,
71 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
72 const void *default_mask,
76 mlx5_flow_create_udp(const struct rte_flow_item *item,
77 const void *default_mask,
81 mlx5_flow_create_tcp(const struct rte_flow_item *item,
82 const void *default_mask,
86 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
87 const void *default_mask,
91 LIST_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
92 struct ibv_exp_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
93 struct ibv_exp_rwq_ind_table *ind_table; /**< Indirection table. */
94 struct ibv_qp *qp; /**< Verbs queue pair. */
95 struct ibv_exp_flow *ibv_flow; /**< Verbs flow. */
96 struct ibv_exp_wq *wq; /**< Verbs work queue. */
97 struct ibv_cq *cq; /**< Verbs completion queue. */
98 struct rxq *(*rxqs)[]; /**< Pointer to the queues array. */
99 uint16_t rxqs_n; /**< Number of queues in this flow, 0 if drop queue. */
100 uint32_t mark:1; /**< Set if the flow is marked. */
101 uint32_t drop:1; /**< Drop queue. */
104 /** Static initializer for items. */
106 (const enum rte_flow_item_type []){ \
107 __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
110 /** Structure to generate a simple graph of layers supported by the NIC. */
111 struct mlx5_flow_items {
112 /** List of possible actions for these items. */
113 const enum rte_flow_action_type *const actions;
114 /** Bit-masks corresponding to the possibilities for the item. */
117 * Default bit-masks to use when item->mask is not provided. When
118 * \default_mask is also NULL, the full supported bit-mask (\mask) is
121 const void *default_mask;
122 /** Bit-masks size in bytes. */
123 const unsigned int mask_sz;
125 * Conversion function from rte_flow to NIC specific flow.
128 * rte_flow item to convert.
129 * @param default_mask
130 * Default bit-masks to use when item->mask is not provided.
132 * Internal structure to store the conversion.
135 * 0 on success, negative value otherwise.
137 int (*convert)(const struct rte_flow_item *item,
138 const void *default_mask,
140 /** Size in bytes of the destination structure. */
141 const unsigned int dst_sz;
142 /** List of possible following items. */
143 const enum rte_flow_item_type *const items;
146 /** Valid action for this PMD. */
147 static const enum rte_flow_action_type valid_actions[] = {
148 RTE_FLOW_ACTION_TYPE_DROP,
149 RTE_FLOW_ACTION_TYPE_QUEUE,
150 RTE_FLOW_ACTION_TYPE_MARK,
151 RTE_FLOW_ACTION_TYPE_FLAG,
152 RTE_FLOW_ACTION_TYPE_END,
155 /** Graph of supported items and associated actions. */
156 static const struct mlx5_flow_items mlx5_flow_items[] = {
157 [RTE_FLOW_ITEM_TYPE_END] = {
158 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
159 RTE_FLOW_ITEM_TYPE_VXLAN),
161 [RTE_FLOW_ITEM_TYPE_ETH] = {
162 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
163 RTE_FLOW_ITEM_TYPE_IPV4,
164 RTE_FLOW_ITEM_TYPE_IPV6),
165 .actions = valid_actions,
166 .mask = &(const struct rte_flow_item_eth){
167 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
168 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
171 .default_mask = &rte_flow_item_eth_mask,
172 .mask_sz = sizeof(struct rte_flow_item_eth),
173 .convert = mlx5_flow_create_eth,
174 .dst_sz = sizeof(struct ibv_exp_flow_spec_eth),
176 [RTE_FLOW_ITEM_TYPE_VLAN] = {
177 .items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
178 RTE_FLOW_ITEM_TYPE_IPV6),
179 .actions = valid_actions,
180 .mask = &(const struct rte_flow_item_vlan){
183 .default_mask = &rte_flow_item_vlan_mask,
184 .mask_sz = sizeof(struct rte_flow_item_vlan),
185 .convert = mlx5_flow_create_vlan,
188 [RTE_FLOW_ITEM_TYPE_IPV4] = {
189 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
190 RTE_FLOW_ITEM_TYPE_TCP),
191 .actions = valid_actions,
192 .mask = &(const struct rte_flow_item_ipv4){
196 .type_of_service = -1,
200 .default_mask = &rte_flow_item_ipv4_mask,
201 .mask_sz = sizeof(struct rte_flow_item_ipv4),
202 .convert = mlx5_flow_create_ipv4,
203 .dst_sz = sizeof(struct ibv_exp_flow_spec_ipv4_ext),
205 [RTE_FLOW_ITEM_TYPE_IPV6] = {
206 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
207 RTE_FLOW_ITEM_TYPE_TCP),
208 .actions = valid_actions,
209 .mask = &(const struct rte_flow_item_ipv6){
212 0xff, 0xff, 0xff, 0xff,
213 0xff, 0xff, 0xff, 0xff,
214 0xff, 0xff, 0xff, 0xff,
215 0xff, 0xff, 0xff, 0xff,
218 0xff, 0xff, 0xff, 0xff,
219 0xff, 0xff, 0xff, 0xff,
220 0xff, 0xff, 0xff, 0xff,
221 0xff, 0xff, 0xff, 0xff,
228 .default_mask = &rte_flow_item_ipv6_mask,
229 .mask_sz = sizeof(struct rte_flow_item_ipv6),
230 .convert = mlx5_flow_create_ipv6,
231 .dst_sz = sizeof(struct ibv_exp_flow_spec_ipv6_ext),
233 [RTE_FLOW_ITEM_TYPE_UDP] = {
234 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN),
235 .actions = valid_actions,
236 .mask = &(const struct rte_flow_item_udp){
242 .default_mask = &rte_flow_item_udp_mask,
243 .mask_sz = sizeof(struct rte_flow_item_udp),
244 .convert = mlx5_flow_create_udp,
245 .dst_sz = sizeof(struct ibv_exp_flow_spec_tcp_udp),
247 [RTE_FLOW_ITEM_TYPE_TCP] = {
248 .actions = valid_actions,
249 .mask = &(const struct rte_flow_item_tcp){
255 .default_mask = &rte_flow_item_tcp_mask,
256 .mask_sz = sizeof(struct rte_flow_item_tcp),
257 .convert = mlx5_flow_create_tcp,
258 .dst_sz = sizeof(struct ibv_exp_flow_spec_tcp_udp),
260 [RTE_FLOW_ITEM_TYPE_VXLAN] = {
261 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH),
262 .actions = valid_actions,
263 .mask = &(const struct rte_flow_item_vxlan){
264 .vni = "\xff\xff\xff",
266 .default_mask = &rte_flow_item_vxlan_mask,
267 .mask_sz = sizeof(struct rte_flow_item_vxlan),
268 .convert = mlx5_flow_create_vxlan,
269 .dst_sz = sizeof(struct ibv_exp_flow_spec_tunnel),
273 /** Structure to pass to the conversion function. */
275 struct ibv_exp_flow_attr *ibv_attr; /**< Verbs attribute. */
276 unsigned int offset; /**< Offset in bytes in the ibv_attr buffer. */
277 uint32_t inner; /**< Set once VXLAN is encountered. */
280 struct mlx5_flow_action {
281 uint32_t queue:1; /**< Target is a receive queue. */
282 uint32_t drop:1; /**< Target is a drop queue. */
283 uint32_t mark:1; /**< Mark is present in the flow. */
284 uint32_t mark_id; /**< Mark identifier. */
285 uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
286 uint16_t queues_n; /**< Number of entries in queue[]. */
290 * Check support for a given item.
293 * Item specification.
295 * Bit-masks covering supported fields to compare with spec, last and mask in
298 * Bit-Mask size in bytes.
304 mlx5_flow_item_validate(const struct rte_flow_item *item,
305 const uint8_t *mask, unsigned int size)
309 if (!item->spec && (item->mask || item->last))
311 if (item->spec && !item->mask) {
313 const uint8_t *spec = item->spec;
315 for (i = 0; i < size; ++i)
316 if ((spec[i] | mask[i]) != mask[i])
319 if (item->last && !item->mask) {
321 const uint8_t *spec = item->last;
323 for (i = 0; i < size; ++i)
324 if ((spec[i] | mask[i]) != mask[i])
329 const uint8_t *spec = item->mask;
331 for (i = 0; i < size; ++i)
332 if ((spec[i] | mask[i]) != mask[i])
335 if (item->spec && item->last) {
338 const uint8_t *apply = mask;
343 for (i = 0; i < size; ++i) {
344 spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
345 last[i] = ((const uint8_t *)item->last)[i] & apply[i];
347 ret = memcmp(spec, last, size);
353 * Validate a flow supported by the NIC.
356 * Pointer to private structure.
358 * Flow rule attributes.
360 * Pattern specification (list terminated by the END pattern item).
362 * Associated actions (list terminated by the END action).
364 * Perform verbose error reporting if not NULL.
365 * @param[in, out] flow
366 * Flow structure to update.
369 * 0 on success, a negative errno value otherwise and rte_errno is set.
372 priv_flow_validate(struct priv *priv,
373 const struct rte_flow_attr *attr,
374 const struct rte_flow_item items[],
375 const struct rte_flow_action actions[],
376 struct rte_flow_error *error,
377 struct mlx5_flow *flow)
379 const struct mlx5_flow_items *cur_item = mlx5_flow_items;
380 struct mlx5_flow_action action = {
388 rte_flow_error_set(error, ENOTSUP,
389 RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
391 "groups are not supported");
394 if (attr->priority) {
395 rte_flow_error_set(error, ENOTSUP,
396 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
398 "priorities are not supported");
402 rte_flow_error_set(error, ENOTSUP,
403 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
405 "egress is not supported");
408 if (!attr->ingress) {
409 rte_flow_error_set(error, ENOTSUP,
410 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
412 "only ingress is supported");
415 for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
416 const struct mlx5_flow_items *token = NULL;
420 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
424 cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
426 if (cur_item->items[i] == items->type) {
427 token = &mlx5_flow_items[items->type];
432 goto exit_item_not_supported;
434 err = mlx5_flow_item_validate(items,
435 (const uint8_t *)cur_item->mask,
438 goto exit_item_not_supported;
439 if (flow->ibv_attr && cur_item->convert) {
440 err = cur_item->convert(items,
441 (cur_item->default_mask ?
442 cur_item->default_mask :
446 goto exit_item_not_supported;
448 flow->offset += cur_item->dst_sz;
450 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
451 if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
453 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
455 } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
456 const struct rte_flow_action_queue *queue =
457 (const struct rte_flow_action_queue *)
460 if (!queue || (queue->index > (priv->rxqs_n - 1)))
461 goto exit_action_not_supported;
463 } else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
464 const struct rte_flow_action_mark *mark =
465 (const struct rte_flow_action_mark *)
469 rte_flow_error_set(error, EINVAL,
470 RTE_FLOW_ERROR_TYPE_ACTION,
472 "mark must be defined");
474 } else if (mark->id >= MLX5_FLOW_MARK_MAX) {
475 rte_flow_error_set(error, ENOTSUP,
476 RTE_FLOW_ERROR_TYPE_ACTION,
478 "mark must be between 0"
483 } else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
486 goto exit_action_not_supported;
489 if (action.mark && !flow->ibv_attr && !action.drop)
490 flow->offset += sizeof(struct ibv_exp_flow_spec_action_tag);
491 if (!action.queue && !action.drop) {
492 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
493 NULL, "no valid action");
497 exit_item_not_supported:
498 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
499 items, "item not supported");
501 exit_action_not_supported:
502 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
503 actions, "action not supported");
508 * Validate a flow supported by the NIC.
510 * @see rte_flow_validate()
514 mlx5_flow_validate(struct rte_eth_dev *dev,
515 const struct rte_flow_attr *attr,
516 const struct rte_flow_item items[],
517 const struct rte_flow_action actions[],
518 struct rte_flow_error *error)
520 struct priv *priv = dev->data->dev_private;
522 struct mlx5_flow flow = { .offset = sizeof(struct ibv_exp_flow_attr) };
525 ret = priv_flow_validate(priv, attr, items, actions, error, &flow);
531 * Convert Ethernet item to Verbs specification.
534 * Item specification.
535 * @param default_mask[in]
536 * Default bit-masks to use when item->mask is not provided.
537 * @param data[in, out]
541 mlx5_flow_create_eth(const struct rte_flow_item *item,
542 const void *default_mask,
545 const struct rte_flow_item_eth *spec = item->spec;
546 const struct rte_flow_item_eth *mask = item->mask;
547 struct mlx5_flow *flow = (struct mlx5_flow *)data;
548 struct ibv_exp_flow_spec_eth *eth;
549 const unsigned int eth_size = sizeof(struct ibv_exp_flow_spec_eth);
552 ++flow->ibv_attr->num_of_specs;
553 flow->ibv_attr->priority = 2;
554 eth = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
555 *eth = (struct ibv_exp_flow_spec_eth) {
556 .type = flow->inner | IBV_EXP_FLOW_SPEC_ETH,
563 memcpy(eth->val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
564 memcpy(eth->val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
565 eth->val.ether_type = spec->type;
566 memcpy(eth->mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
567 memcpy(eth->mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
568 eth->mask.ether_type = mask->type;
569 /* Remove unwanted bits from values. */
570 for (i = 0; i < ETHER_ADDR_LEN; ++i) {
571 eth->val.dst_mac[i] &= eth->mask.dst_mac[i];
572 eth->val.src_mac[i] &= eth->mask.src_mac[i];
574 eth->val.ether_type &= eth->mask.ether_type;
579 * Convert VLAN item to Verbs specification.
582 * Item specification.
583 * @param default_mask[in]
584 * Default bit-masks to use when item->mask is not provided.
585 * @param data[in, out]
589 mlx5_flow_create_vlan(const struct rte_flow_item *item,
590 const void *default_mask,
593 const struct rte_flow_item_vlan *spec = item->spec;
594 const struct rte_flow_item_vlan *mask = item->mask;
595 struct mlx5_flow *flow = (struct mlx5_flow *)data;
596 struct ibv_exp_flow_spec_eth *eth;
597 const unsigned int eth_size = sizeof(struct ibv_exp_flow_spec_eth);
599 eth = (void *)((uintptr_t)flow->ibv_attr + flow->offset - eth_size);
604 eth->val.vlan_tag = spec->tci;
605 eth->mask.vlan_tag = mask->tci;
606 eth->val.vlan_tag &= eth->mask.vlan_tag;
611 * Convert IPv4 item to Verbs specification.
614 * Item specification.
615 * @param default_mask[in]
616 * Default bit-masks to use when item->mask is not provided.
617 * @param data[in, out]
621 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
622 const void *default_mask,
625 const struct rte_flow_item_ipv4 *spec = item->spec;
626 const struct rte_flow_item_ipv4 *mask = item->mask;
627 struct mlx5_flow *flow = (struct mlx5_flow *)data;
628 struct ibv_exp_flow_spec_ipv4_ext *ipv4;
629 unsigned int ipv4_size = sizeof(struct ibv_exp_flow_spec_ipv4_ext);
631 ++flow->ibv_attr->num_of_specs;
632 flow->ibv_attr->priority = 1;
633 ipv4 = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
634 *ipv4 = (struct ibv_exp_flow_spec_ipv4_ext) {
635 .type = flow->inner | IBV_EXP_FLOW_SPEC_IPV4_EXT,
642 ipv4->val = (struct ibv_exp_flow_ipv4_ext_filter){
643 .src_ip = spec->hdr.src_addr,
644 .dst_ip = spec->hdr.dst_addr,
645 .proto = spec->hdr.next_proto_id,
646 .tos = spec->hdr.type_of_service,
648 ipv4->mask = (struct ibv_exp_flow_ipv4_ext_filter){
649 .src_ip = mask->hdr.src_addr,
650 .dst_ip = mask->hdr.dst_addr,
651 .proto = mask->hdr.next_proto_id,
652 .tos = mask->hdr.type_of_service,
654 /* Remove unwanted bits from values. */
655 ipv4->val.src_ip &= ipv4->mask.src_ip;
656 ipv4->val.dst_ip &= ipv4->mask.dst_ip;
657 ipv4->val.proto &= ipv4->mask.proto;
658 ipv4->val.tos &= ipv4->mask.tos;
663 * Convert IPv6 item to Verbs specification.
666 * Item specification.
667 * @param default_mask[in]
668 * Default bit-masks to use when item->mask is not provided.
669 * @param data[in, out]
673 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
674 const void *default_mask,
677 const struct rte_flow_item_ipv6 *spec = item->spec;
678 const struct rte_flow_item_ipv6 *mask = item->mask;
679 struct mlx5_flow *flow = (struct mlx5_flow *)data;
680 struct ibv_exp_flow_spec_ipv6_ext *ipv6;
681 unsigned int ipv6_size = sizeof(struct ibv_exp_flow_spec_ipv6_ext);
683 ++flow->ibv_attr->num_of_specs;
684 flow->ibv_attr->priority = 1;
685 ipv6 = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
686 *ipv6 = (struct ibv_exp_flow_spec_ipv6_ext) {
687 .type = flow->inner | IBV_EXP_FLOW_SPEC_IPV6_EXT,
694 memcpy(ipv6->val.src_ip, spec->hdr.src_addr,
695 RTE_DIM(ipv6->val.src_ip));
696 memcpy(ipv6->val.dst_ip, spec->hdr.dst_addr,
697 RTE_DIM(ipv6->val.dst_ip));
698 memcpy(ipv6->mask.src_ip, mask->hdr.src_addr,
699 RTE_DIM(ipv6->mask.src_ip));
700 memcpy(ipv6->mask.dst_ip, mask->hdr.dst_addr,
701 RTE_DIM(ipv6->mask.dst_ip));
702 ipv6->mask.flow_label = mask->hdr.vtc_flow;
703 ipv6->mask.next_hdr = mask->hdr.proto;
704 ipv6->mask.hop_limit = mask->hdr.hop_limits;
705 ipv6->val.flow_label &= ipv6->mask.flow_label;
706 ipv6->val.next_hdr &= ipv6->mask.next_hdr;
707 ipv6->val.hop_limit &= ipv6->mask.hop_limit;
712 * Convert UDP item to Verbs specification.
715 * Item specification.
716 * @param default_mask[in]
717 * Default bit-masks to use when item->mask is not provided.
718 * @param data[in, out]
722 mlx5_flow_create_udp(const struct rte_flow_item *item,
723 const void *default_mask,
726 const struct rte_flow_item_udp *spec = item->spec;
727 const struct rte_flow_item_udp *mask = item->mask;
728 struct mlx5_flow *flow = (struct mlx5_flow *)data;
729 struct ibv_exp_flow_spec_tcp_udp *udp;
730 unsigned int udp_size = sizeof(struct ibv_exp_flow_spec_tcp_udp);
732 ++flow->ibv_attr->num_of_specs;
733 flow->ibv_attr->priority = 0;
734 udp = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
735 *udp = (struct ibv_exp_flow_spec_tcp_udp) {
736 .type = flow->inner | IBV_EXP_FLOW_SPEC_UDP,
743 udp->val.dst_port = spec->hdr.dst_port;
744 udp->val.src_port = spec->hdr.src_port;
745 udp->mask.dst_port = mask->hdr.dst_port;
746 udp->mask.src_port = mask->hdr.src_port;
747 /* Remove unwanted bits from values. */
748 udp->val.src_port &= udp->mask.src_port;
749 udp->val.dst_port &= udp->mask.dst_port;
754 * Convert TCP item to Verbs specification.
757 * Item specification.
758 * @param default_mask[in]
759 * Default bit-masks to use when item->mask is not provided.
760 * @param data[in, out]
764 mlx5_flow_create_tcp(const struct rte_flow_item *item,
765 const void *default_mask,
768 const struct rte_flow_item_tcp *spec = item->spec;
769 const struct rte_flow_item_tcp *mask = item->mask;
770 struct mlx5_flow *flow = (struct mlx5_flow *)data;
771 struct ibv_exp_flow_spec_tcp_udp *tcp;
772 unsigned int tcp_size = sizeof(struct ibv_exp_flow_spec_tcp_udp);
774 ++flow->ibv_attr->num_of_specs;
775 flow->ibv_attr->priority = 0;
776 tcp = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
777 *tcp = (struct ibv_exp_flow_spec_tcp_udp) {
778 .type = flow->inner | IBV_EXP_FLOW_SPEC_TCP,
785 tcp->val.dst_port = spec->hdr.dst_port;
786 tcp->val.src_port = spec->hdr.src_port;
787 tcp->mask.dst_port = mask->hdr.dst_port;
788 tcp->mask.src_port = mask->hdr.src_port;
789 /* Remove unwanted bits from values. */
790 tcp->val.src_port &= tcp->mask.src_port;
791 tcp->val.dst_port &= tcp->mask.dst_port;
796 * Convert VXLAN item to Verbs specification.
799 * Item specification.
800 * @param default_mask[in]
801 * Default bit-masks to use when item->mask is not provided.
802 * @param data[in, out]
806 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
807 const void *default_mask,
810 const struct rte_flow_item_vxlan *spec = item->spec;
811 const struct rte_flow_item_vxlan *mask = item->mask;
812 struct mlx5_flow *flow = (struct mlx5_flow *)data;
813 struct ibv_exp_flow_spec_tunnel *vxlan;
814 unsigned int size = sizeof(struct ibv_exp_flow_spec_tunnel);
820 ++flow->ibv_attr->num_of_specs;
821 flow->ibv_attr->priority = 0;
823 vxlan = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
824 *vxlan = (struct ibv_exp_flow_spec_tunnel) {
825 .type = flow->inner | IBV_EXP_FLOW_SPEC_VXLAN_TUNNEL,
828 flow->inner = IBV_EXP_FLOW_SPEC_INNER;
833 memcpy(&id.vni[1], spec->vni, 3);
834 vxlan->val.tunnel_id = id.vlan_id;
835 memcpy(&id.vni[1], mask->vni, 3);
836 vxlan->mask.tunnel_id = id.vlan_id;
837 /* Remove unwanted bits from values. */
838 vxlan->val.tunnel_id &= vxlan->mask.tunnel_id;
843 * Convert mark/flag action to Verbs specification.
846 * Pointer to MLX5 flow structure.
851 mlx5_flow_create_flag_mark(struct mlx5_flow *flow, uint32_t mark_id)
853 struct ibv_exp_flow_spec_action_tag *tag;
854 unsigned int size = sizeof(struct ibv_exp_flow_spec_action_tag);
856 tag = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
857 *tag = (struct ibv_exp_flow_spec_action_tag){
858 .type = IBV_EXP_FLOW_SPEC_ACTION_TAG,
860 .tag_id = mlx5_flow_mark_set(mark_id),
862 ++flow->ibv_attr->num_of_specs;
867 * Complete flow rule creation with a drop queue.
870 * Pointer to private structure.
872 * MLX5 flow attributes (filled by mlx5_flow_validate()).
874 * Perform verbose error reporting if not NULL.
877 * A flow if the rule could be created.
879 static struct rte_flow *
880 priv_flow_create_action_queue_drop(struct priv *priv,
881 struct mlx5_flow *flow,
882 struct rte_flow_error *error)
884 struct rte_flow *rte_flow;
888 rte_flow = rte_calloc(__func__, 1, sizeof(*rte_flow), 0);
890 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
891 NULL, "cannot allocate flow memory");
895 ibv_exp_create_cq(priv->ctx, 1, NULL, NULL, 0,
896 &(struct ibv_exp_cq_init_attr){
900 rte_flow_error_set(error, ENOMEM,
901 RTE_FLOW_ERROR_TYPE_HANDLE,
902 NULL, "cannot allocate CQ");
905 rte_flow->wq = ibv_exp_create_wq(priv->ctx,
906 &(struct ibv_exp_wq_init_attr){
907 .wq_type = IBV_EXP_WQT_RQ,
914 rte_flow_error_set(error, ENOMEM,
915 RTE_FLOW_ERROR_TYPE_HANDLE,
916 NULL, "cannot allocate WQ");
920 rte_flow->ibv_attr = flow->ibv_attr;
921 rte_flow->ind_table = ibv_exp_create_rwq_ind_table(
923 &(struct ibv_exp_rwq_ind_table_init_attr){
925 .log_ind_tbl_size = 0,
926 .ind_tbl = &rte_flow->wq,
929 if (!rte_flow->ind_table) {
930 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
931 NULL, "cannot allocate indirection table");
934 rte_flow->qp = ibv_exp_create_qp(
936 &(struct ibv_exp_qp_init_attr){
937 .qp_type = IBV_QPT_RAW_PACKET,
939 IBV_EXP_QP_INIT_ATTR_PD |
940 IBV_EXP_QP_INIT_ATTR_PORT |
941 IBV_EXP_QP_INIT_ATTR_RX_HASH,
943 .rx_hash_conf = &(struct ibv_exp_rx_hash_conf){
945 IBV_EXP_RX_HASH_FUNC_TOEPLITZ,
946 .rx_hash_key_len = rss_hash_default_key_len,
947 .rx_hash_key = rss_hash_default_key,
948 .rx_hash_fields_mask = 0,
949 .rwq_ind_tbl = rte_flow->ind_table,
951 .port_num = priv->port,
954 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
955 NULL, "cannot allocate QP");
960 rte_flow->ibv_flow = ibv_exp_create_flow(rte_flow->qp,
962 if (!rte_flow->ibv_flow) {
963 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
964 NULL, "flow rule creation failure");
971 ibv_destroy_qp(rte_flow->qp);
972 if (rte_flow->ind_table)
973 ibv_exp_destroy_rwq_ind_table(rte_flow->ind_table);
975 ibv_exp_destroy_wq(rte_flow->wq);
977 ibv_destroy_cq(rte_flow->cq);
983 * Complete flow rule creation.
986 * Pointer to private structure.
988 * MLX5 flow attributes (filled by mlx5_flow_validate()).
990 * Target action structure.
992 * Perform verbose error reporting if not NULL.
995 * A flow if the rule could be created.
997 static struct rte_flow *
998 priv_flow_create_action_queue(struct priv *priv,
999 struct mlx5_flow *flow,
1000 struct mlx5_flow_action *action,
1001 struct rte_flow_error *error)
1003 struct rte_flow *rte_flow;
1005 struct ibv_exp_wq *wq[action->queues_n];
1009 assert(!action->drop);
1010 rte_flow = rte_calloc(__func__, 1,
1011 sizeof(*rte_flow) + sizeof(struct rxq *) *
1012 action->queues_n, 0);
1014 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1015 NULL, "cannot allocate flow memory");
1018 rte_flow->rxqs = (struct rxq *(*)[])((uintptr_t)rte_flow +
1019 sizeof(struct rxq *) *
1021 for (i = 0; i < action->queues_n; ++i) {
1022 struct rxq_ctrl *rxq;
1024 rxq = container_of((*priv->rxqs)[action->queues[i]],
1025 struct rxq_ctrl, rxq);
1027 (*rte_flow->rxqs)[i] = &rxq->rxq;
1029 rxq->rxq.mark |= action->mark;
1031 rte_flow->mark = action->mark;
1032 rte_flow->ibv_attr = flow->ibv_attr;
1033 rte_flow->ind_table = ibv_exp_create_rwq_ind_table(
1035 &(struct ibv_exp_rwq_ind_table_init_attr){
1037 .log_ind_tbl_size = 0,
1041 if (!rte_flow->ind_table) {
1042 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1043 NULL, "cannot allocate indirection table");
1046 rte_flow->qp = ibv_exp_create_qp(
1048 &(struct ibv_exp_qp_init_attr){
1049 .qp_type = IBV_QPT_RAW_PACKET,
1051 IBV_EXP_QP_INIT_ATTR_PD |
1052 IBV_EXP_QP_INIT_ATTR_PORT |
1053 IBV_EXP_QP_INIT_ATTR_RX_HASH,
1055 .rx_hash_conf = &(struct ibv_exp_rx_hash_conf){
1057 IBV_EXP_RX_HASH_FUNC_TOEPLITZ,
1058 .rx_hash_key_len = rss_hash_default_key_len,
1059 .rx_hash_key = rss_hash_default_key,
1060 .rx_hash_fields_mask = 0,
1061 .rwq_ind_tbl = rte_flow->ind_table,
1063 .port_num = priv->port,
1065 if (!rte_flow->qp) {
1066 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1067 NULL, "cannot allocate QP");
1072 rte_flow->ibv_flow = ibv_exp_create_flow(rte_flow->qp,
1073 rte_flow->ibv_attr);
1074 if (!rte_flow->ibv_flow) {
1075 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1076 NULL, "flow rule creation failure");
1083 ibv_destroy_qp(rte_flow->qp);
1084 if (rte_flow->ind_table)
1085 ibv_exp_destroy_rwq_ind_table(rte_flow->ind_table);
1094 * Pointer to private structure.
1096 * Flow rule attributes.
1097 * @param[in] pattern
1098 * Pattern specification (list terminated by the END pattern item).
1099 * @param[in] actions
1100 * Associated actions (list terminated by the END action).
1102 * Perform verbose error reporting if not NULL.
1105 * A flow on success, NULL otherwise.
1107 static struct rte_flow *
1108 priv_flow_create(struct priv *priv,
1109 const struct rte_flow_attr *attr,
1110 const struct rte_flow_item items[],
1111 const struct rte_flow_action actions[],
1112 struct rte_flow_error *error)
1114 struct rte_flow *rte_flow;
1115 struct mlx5_flow_action action;
1116 struct mlx5_flow flow = { .offset = sizeof(struct ibv_exp_flow_attr), };
1119 err = priv_flow_validate(priv, attr, items, actions, error, &flow);
1122 flow.ibv_attr = rte_malloc(__func__, flow.offset, 0);
1123 flow.offset = sizeof(struct ibv_exp_flow_attr);
1124 if (!flow.ibv_attr) {
1125 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1126 NULL, "cannot allocate ibv_attr memory");
1129 *flow.ibv_attr = (struct ibv_exp_flow_attr){
1130 .type = IBV_EXP_FLOW_ATTR_NORMAL,
1131 .size = sizeof(struct ibv_exp_flow_attr),
1132 .priority = attr->priority,
1139 claim_zero(priv_flow_validate(priv, attr, items, actions,
1141 action = (struct mlx5_flow_action){
1145 .mark_id = MLX5_FLOW_MARK_DEFAULT,
1147 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
1148 if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
1150 } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
1152 action.queues[action.queues_n++] =
1153 ((const struct rte_flow_action_queue *)
1154 actions->conf)->index;
1155 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
1158 } else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
1159 const struct rte_flow_action_mark *mark =
1160 (const struct rte_flow_action_mark *)
1164 action.mark_id = mark->id;
1165 action.mark = !action.drop;
1166 } else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
1169 rte_flow_error_set(error, ENOTSUP,
1170 RTE_FLOW_ERROR_TYPE_ACTION,
1171 actions, "unsupported action");
1176 mlx5_flow_create_flag_mark(&flow, action.mark_id);
1177 flow.offset += sizeof(struct ibv_exp_flow_spec_action_tag);
1181 priv_flow_create_action_queue_drop(priv, &flow, error);
1183 rte_flow = priv_flow_create_action_queue(priv, &flow, &action,
1189 rte_free(flow.ibv_attr);
1196 * @see rte_flow_create()
1200 mlx5_flow_create(struct rte_eth_dev *dev,
1201 const struct rte_flow_attr *attr,
1202 const struct rte_flow_item items[],
1203 const struct rte_flow_action actions[],
1204 struct rte_flow_error *error)
1206 struct priv *priv = dev->data->dev_private;
1207 struct rte_flow *flow;
1210 flow = priv_flow_create(priv, attr, items, actions, error);
1212 LIST_INSERT_HEAD(&priv->flows, flow, next);
1213 DEBUG("Flow created %p", (void *)flow);
1223 * Pointer to private structure.
1228 priv_flow_destroy(struct priv *priv,
1229 struct rte_flow *flow)
1232 LIST_REMOVE(flow, next);
1234 claim_zero(ibv_exp_destroy_flow(flow->ibv_flow));
1236 claim_zero(ibv_destroy_qp(flow->qp));
1237 if (flow->ind_table)
1238 claim_zero(ibv_exp_destroy_rwq_ind_table(flow->ind_table));
1239 if (flow->drop && flow->wq)
1240 claim_zero(ibv_exp_destroy_wq(flow->wq));
1241 if (flow->drop && flow->cq)
1242 claim_zero(ibv_destroy_cq(flow->cq));
1244 struct rte_flow *tmp;
1246 uint32_t mark_n = 0;
1250 * To remove the mark from the queue, the queue must not be
1251 * present in any other marked flow (RSS or not).
1253 for (queue_n = 0; queue_n < flow->rxqs_n; ++queue_n) {
1254 rxq = (*flow->rxqs)[queue_n];
1255 for (tmp = LIST_FIRST(&priv->flows);
1257 tmp = LIST_NEXT(tmp, next)) {
1263 tqueue_n < tmp->rxqs_n;
1267 trxq = (*tmp->rxqs)[tqueue_n];
1272 rxq->mark = !!mark_n;
1275 rte_free(flow->ibv_attr);
1276 DEBUG("Flow destroyed %p", (void *)flow);
1283 * @see rte_flow_destroy()
1287 mlx5_flow_destroy(struct rte_eth_dev *dev,
1288 struct rte_flow *flow,
1289 struct rte_flow_error *error)
1291 struct priv *priv = dev->data->dev_private;
1295 priv_flow_destroy(priv, flow);
1301 * Destroy all flows.
1304 * Pointer to private structure.
1307 priv_flow_flush(struct priv *priv)
1309 while (!LIST_EMPTY(&priv->flows)) {
1310 struct rte_flow *flow;
1312 flow = LIST_FIRST(&priv->flows);
1313 priv_flow_destroy(priv, flow);
1318 * Destroy all flows.
1320 * @see rte_flow_flush()
1324 mlx5_flow_flush(struct rte_eth_dev *dev,
1325 struct rte_flow_error *error)
1327 struct priv *priv = dev->data->dev_private;
1331 priv_flow_flush(priv);
1339 * Called by dev_stop() to remove all flows.
1342 * Pointer to private structure.
1345 priv_flow_stop(struct priv *priv)
1347 struct rte_flow *flow;
1349 for (flow = LIST_FIRST(&priv->flows);
1351 flow = LIST_NEXT(flow, next)) {
1352 claim_zero(ibv_exp_destroy_flow(flow->ibv_flow));
1353 flow->ibv_flow = NULL;
1357 for (n = 0; n < flow->rxqs_n; ++n)
1358 (*flow->rxqs)[n]->mark = 0;
1360 DEBUG("Flow %p removed", (void *)flow);
1368 * Pointer to private structure.
1371 * 0 on success, a errno value otherwise and rte_errno is set.
1374 priv_flow_start(struct priv *priv)
1376 struct rte_flow *flow;
1378 for (flow = LIST_FIRST(&priv->flows);
1380 flow = LIST_NEXT(flow, next)) {
1381 flow->ibv_flow = ibv_exp_create_flow(flow->qp,
1383 if (!flow->ibv_flow) {
1384 DEBUG("Flow %p cannot be applied", (void *)flow);
1388 DEBUG("Flow %p applied", (void *)flow);
1392 for (n = 0; n < flow->rxqs_n; ++n)
1393 (*flow->rxqs)[n]->mark = 1;