4 * Copyright 2016 6WIND S.A.
5 * Copyright 2016 Mellanox.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of 6WIND S.A. nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #include <sys/queue.h>
38 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
40 #pragma GCC diagnostic ignored "-Wpedantic"
42 #include <infiniband/verbs.h>
44 #pragma GCC diagnostic error "-Wpedantic"
47 #include <rte_ethdev.h>
49 #include <rte_flow_driver.h>
50 #include <rte_malloc.h>
56 mlx5_flow_create_eth(const struct rte_flow_item *item,
57 const void *default_mask,
61 mlx5_flow_create_vlan(const struct rte_flow_item *item,
62 const void *default_mask,
66 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
67 const void *default_mask,
71 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
72 const void *default_mask,
76 mlx5_flow_create_udp(const struct rte_flow_item *item,
77 const void *default_mask,
81 mlx5_flow_create_tcp(const struct rte_flow_item *item,
82 const void *default_mask,
86 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
87 const void *default_mask,
91 TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
92 struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
93 struct mlx5_ind_table_ibv *ind_table; /**< Indirection table. */
94 struct ibv_qp *qp; /**< Verbs queue pair. */
95 struct ibv_flow *ibv_flow; /**< Verbs flow. */
96 struct ibv_wq *wq; /**< Verbs work queue. */
97 struct ibv_cq *cq; /**< Verbs completion queue. */
98 uint32_t mark:1; /**< Set if the flow is marked. */
99 uint32_t drop:1; /**< Drop queue. */
100 uint64_t hash_fields; /**< Fields that participate in the hash. */
103 /** Static initializer for items. */
105 (const enum rte_flow_item_type []){ \
106 __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
109 /** Structure to generate a simple graph of layers supported by the NIC. */
110 struct mlx5_flow_items {
111 /** List of possible actions for these items. */
112 const enum rte_flow_action_type *const actions;
113 /** Bit-masks corresponding to the possibilities for the item. */
116 * Default bit-masks to use when item->mask is not provided. When
117 * \default_mask is also NULL, the full supported bit-mask (\mask) is
120 const void *default_mask;
121 /** Bit-masks size in bytes. */
122 const unsigned int mask_sz;
124 * Conversion function from rte_flow to NIC specific flow.
127 * rte_flow item to convert.
128 * @param default_mask
129 * Default bit-masks to use when item->mask is not provided.
131 * Internal structure to store the conversion.
134 * 0 on success, negative value otherwise.
136 int (*convert)(const struct rte_flow_item *item,
137 const void *default_mask,
139 /** Size in bytes of the destination structure. */
140 const unsigned int dst_sz;
141 /** List of possible following items. */
142 const enum rte_flow_item_type *const items;
145 /** Valid action for this PMD. */
146 static const enum rte_flow_action_type valid_actions[] = {
147 RTE_FLOW_ACTION_TYPE_DROP,
148 RTE_FLOW_ACTION_TYPE_QUEUE,
149 RTE_FLOW_ACTION_TYPE_MARK,
150 RTE_FLOW_ACTION_TYPE_FLAG,
151 RTE_FLOW_ACTION_TYPE_END,
154 /** Graph of supported items and associated actions. */
155 static const struct mlx5_flow_items mlx5_flow_items[] = {
156 [RTE_FLOW_ITEM_TYPE_END] = {
157 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
158 RTE_FLOW_ITEM_TYPE_VXLAN),
160 [RTE_FLOW_ITEM_TYPE_ETH] = {
161 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
162 RTE_FLOW_ITEM_TYPE_IPV4,
163 RTE_FLOW_ITEM_TYPE_IPV6),
164 .actions = valid_actions,
165 .mask = &(const struct rte_flow_item_eth){
166 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
167 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
170 .default_mask = &rte_flow_item_eth_mask,
171 .mask_sz = sizeof(struct rte_flow_item_eth),
172 .convert = mlx5_flow_create_eth,
173 .dst_sz = sizeof(struct ibv_flow_spec_eth),
175 [RTE_FLOW_ITEM_TYPE_VLAN] = {
176 .items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
177 RTE_FLOW_ITEM_TYPE_IPV6),
178 .actions = valid_actions,
179 .mask = &(const struct rte_flow_item_vlan){
182 .default_mask = &rte_flow_item_vlan_mask,
183 .mask_sz = sizeof(struct rte_flow_item_vlan),
184 .convert = mlx5_flow_create_vlan,
187 [RTE_FLOW_ITEM_TYPE_IPV4] = {
188 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
189 RTE_FLOW_ITEM_TYPE_TCP),
190 .actions = valid_actions,
191 .mask = &(const struct rte_flow_item_ipv4){
195 .type_of_service = -1,
199 .default_mask = &rte_flow_item_ipv4_mask,
200 .mask_sz = sizeof(struct rte_flow_item_ipv4),
201 .convert = mlx5_flow_create_ipv4,
202 .dst_sz = sizeof(struct ibv_flow_spec_ipv4),
204 [RTE_FLOW_ITEM_TYPE_IPV6] = {
205 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
206 RTE_FLOW_ITEM_TYPE_TCP),
207 .actions = valid_actions,
208 .mask = &(const struct rte_flow_item_ipv6){
211 0xff, 0xff, 0xff, 0xff,
212 0xff, 0xff, 0xff, 0xff,
213 0xff, 0xff, 0xff, 0xff,
214 0xff, 0xff, 0xff, 0xff,
217 0xff, 0xff, 0xff, 0xff,
218 0xff, 0xff, 0xff, 0xff,
219 0xff, 0xff, 0xff, 0xff,
220 0xff, 0xff, 0xff, 0xff,
227 .default_mask = &rte_flow_item_ipv6_mask,
228 .mask_sz = sizeof(struct rte_flow_item_ipv6),
229 .convert = mlx5_flow_create_ipv6,
230 .dst_sz = sizeof(struct ibv_flow_spec_ipv6),
232 [RTE_FLOW_ITEM_TYPE_UDP] = {
233 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN),
234 .actions = valid_actions,
235 .mask = &(const struct rte_flow_item_udp){
241 .default_mask = &rte_flow_item_udp_mask,
242 .mask_sz = sizeof(struct rte_flow_item_udp),
243 .convert = mlx5_flow_create_udp,
244 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
246 [RTE_FLOW_ITEM_TYPE_TCP] = {
247 .actions = valid_actions,
248 .mask = &(const struct rte_flow_item_tcp){
254 .default_mask = &rte_flow_item_tcp_mask,
255 .mask_sz = sizeof(struct rte_flow_item_tcp),
256 .convert = mlx5_flow_create_tcp,
257 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
259 [RTE_FLOW_ITEM_TYPE_VXLAN] = {
260 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH),
261 .actions = valid_actions,
262 .mask = &(const struct rte_flow_item_vxlan){
263 .vni = "\xff\xff\xff",
265 .default_mask = &rte_flow_item_vxlan_mask,
266 .mask_sz = sizeof(struct rte_flow_item_vxlan),
267 .convert = mlx5_flow_create_vxlan,
268 .dst_sz = sizeof(struct ibv_flow_spec_tunnel),
272 /* Structure to parse actions. */
273 struct mlx5_flow_action {
274 uint32_t queue:1; /**< Target is a receive queue. */
275 uint32_t drop:1; /**< Target is a drop queue. */
276 uint32_t mark:1; /**< Mark is present in the flow. */
277 uint32_t mark_id; /**< Mark identifier. */
278 uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
279 uint16_t queues_n; /**< Number of entries in queue[]. */
282 /** Structure to pass to the conversion function. */
283 struct mlx5_flow_parse {
284 struct ibv_flow_attr *ibv_attr; /**< Verbs attribute. */
285 unsigned int offset; /**< Offset in bytes in the ibv_attr buffer. */
286 uint32_t inner; /**< Set once VXLAN is encountered. */
287 uint64_t hash_fields; /**< Fields that participate in the hash. */
288 struct mlx5_flow_action actions; /**< Parsed action result. */
291 /** Structure for Drop queue. */
292 struct rte_flow_drop {
293 struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */
294 struct ibv_qp *qp; /**< Verbs queue pair. */
295 struct ibv_wq *wq; /**< Verbs work queue. */
296 struct ibv_cq *cq; /**< Verbs completion queue. */
299 static const struct rte_flow_ops mlx5_flow_ops = {
300 .validate = mlx5_flow_validate,
301 .create = mlx5_flow_create,
302 .destroy = mlx5_flow_destroy,
303 .flush = mlx5_flow_flush,
305 .isolate = mlx5_flow_isolate,
309 * Manage filter operations.
312 * Pointer to Ethernet device structure.
316 * Operation to perform.
318 * Pointer to operation-specific structure.
321 * 0 on success, negative errno value on failure.
324 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
325 enum rte_filter_type filter_type,
326 enum rte_filter_op filter_op,
331 if (filter_type == RTE_ETH_FILTER_GENERIC) {
332 if (filter_op != RTE_ETH_FILTER_GET)
334 *(const void **)arg = &mlx5_flow_ops;
337 ERROR("%p: filter type (%d) not supported",
338 (void *)dev, filter_type);
343 * Check support for a given item.
346 * Item specification.
348 * Bit-masks covering supported fields to compare with spec, last and mask in
351 * Bit-Mask size in bytes.
357 mlx5_flow_item_validate(const struct rte_flow_item *item,
358 const uint8_t *mask, unsigned int size)
362 if (!item->spec && (item->mask || item->last))
364 if (item->spec && !item->mask) {
366 const uint8_t *spec = item->spec;
368 for (i = 0; i < size; ++i)
369 if ((spec[i] | mask[i]) != mask[i])
372 if (item->last && !item->mask) {
374 const uint8_t *spec = item->last;
376 for (i = 0; i < size; ++i)
377 if ((spec[i] | mask[i]) != mask[i])
382 const uint8_t *spec = item->mask;
384 for (i = 0; i < size; ++i)
385 if ((spec[i] | mask[i]) != mask[i])
388 if (item->spec && item->last) {
391 const uint8_t *apply = mask;
396 for (i = 0; i < size; ++i) {
397 spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
398 last[i] = ((const uint8_t *)item->last)[i] & apply[i];
400 ret = memcmp(spec, last, size);
406 * Validate a flow supported by the NIC.
409 * Pointer to private structure.
411 * Flow rule attributes.
413 * Pattern specification (list terminated by the END pattern item).
415 * Associated actions (list terminated by the END action).
417 * Perform verbose error reporting if not NULL.
418 * @param[in, out] flow
419 * Flow structure to update.
422 * 0 on success, a negative errno value otherwise and rte_errno is set.
425 priv_flow_validate(struct priv *priv,
426 const struct rte_flow_attr *attr,
427 const struct rte_flow_item items[],
428 const struct rte_flow_action actions[],
429 struct rte_flow_error *error,
430 struct mlx5_flow_parse *flow)
432 const struct mlx5_flow_items *cur_item = mlx5_flow_items;
436 rte_flow_error_set(error, ENOTSUP,
437 RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
439 "groups are not supported");
442 if (attr->priority) {
443 rte_flow_error_set(error, ENOTSUP,
444 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
446 "priorities are not supported");
450 rte_flow_error_set(error, ENOTSUP,
451 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
453 "egress is not supported");
456 if (!attr->ingress) {
457 rte_flow_error_set(error, ENOTSUP,
458 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
460 "only ingress is supported");
463 for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
464 const struct mlx5_flow_items *token = NULL;
468 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
472 cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
474 if (cur_item->items[i] == items->type) {
475 token = &mlx5_flow_items[items->type];
480 goto exit_item_not_supported;
482 err = mlx5_flow_item_validate(items,
483 (const uint8_t *)cur_item->mask,
486 goto exit_item_not_supported;
487 if (flow->ibv_attr && cur_item->convert) {
488 err = cur_item->convert(items,
489 (cur_item->default_mask ?
490 cur_item->default_mask :
494 goto exit_item_not_supported;
495 } else if (items->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
497 rte_flow_error_set(error, ENOTSUP,
498 RTE_FLOW_ERROR_TYPE_ITEM,
500 "cannot recognize multiple"
501 " VXLAN encapsulations");
506 flow->offset += cur_item->dst_sz;
508 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
509 if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
511 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
512 flow->actions.drop = 1;
513 } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
514 const struct rte_flow_action_queue *queue =
515 (const struct rte_flow_action_queue *)
520 if (!queue || (queue->index > (priv->rxqs_n - 1)))
521 goto exit_action_not_supported;
522 for (n = 0; n < flow->actions.queues_n; ++n) {
523 if (flow->actions.queues[n] == queue->index) {
528 if (flow->actions.queues_n > 1 && !found) {
529 rte_flow_error_set(error, ENOTSUP,
530 RTE_FLOW_ERROR_TYPE_ACTION,
532 "queue action not in RSS queues");
536 flow->actions.queue = 1;
537 flow->actions.queues_n = 1;
538 flow->actions.queues[0] = queue->index;
540 } else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
541 const struct rte_flow_action_rss *rss =
542 (const struct rte_flow_action_rss *)
546 if (!rss || !rss->num) {
547 rte_flow_error_set(error, EINVAL,
548 RTE_FLOW_ERROR_TYPE_ACTION,
553 if (flow->actions.queues_n == 1) {
556 assert(flow->actions.queues_n);
557 for (n = 0; n < rss->num; ++n) {
558 if (flow->actions.queues[0] ==
565 rte_flow_error_set(error, ENOTSUP,
566 RTE_FLOW_ERROR_TYPE_ACTION,
568 "queue action not in RSS"
573 for (n = 0; n < rss->num; ++n) {
574 if (rss->queue[n] >= priv->rxqs_n) {
575 rte_flow_error_set(error, EINVAL,
576 RTE_FLOW_ERROR_TYPE_ACTION,
578 "queue id > number of"
583 flow->actions.queue = 1;
584 for (n = 0; n < rss->num; ++n)
585 flow->actions.queues[n] = rss->queue[n];
586 flow->actions.queues_n = rss->num;
587 } else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
588 const struct rte_flow_action_mark *mark =
589 (const struct rte_flow_action_mark *)
593 rte_flow_error_set(error, EINVAL,
594 RTE_FLOW_ERROR_TYPE_ACTION,
596 "mark must be defined");
598 } else if (mark->id >= MLX5_FLOW_MARK_MAX) {
599 rte_flow_error_set(error, ENOTSUP,
600 RTE_FLOW_ERROR_TYPE_ACTION,
602 "mark must be between 0"
606 flow->actions.mark = 1;
607 flow->actions.mark_id = mark->id;
608 } else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
609 flow->actions.mark = 1;
611 goto exit_action_not_supported;
614 if (flow->actions.mark && !flow->ibv_attr && !flow->actions.drop)
615 flow->offset += sizeof(struct ibv_flow_spec_action_tag);
616 if (!flow->ibv_attr && flow->actions.drop)
617 flow->offset += sizeof(struct ibv_flow_spec_action_drop);
618 if (!flow->actions.queue && !flow->actions.drop) {
619 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
620 NULL, "no valid action");
624 exit_item_not_supported:
625 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
626 items, "item not supported");
628 exit_action_not_supported:
629 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
630 actions, "action not supported");
635 * Validate a flow supported by the NIC.
637 * @see rte_flow_validate()
641 mlx5_flow_validate(struct rte_eth_dev *dev,
642 const struct rte_flow_attr *attr,
643 const struct rte_flow_item items[],
644 const struct rte_flow_action actions[],
645 struct rte_flow_error *error)
647 struct priv *priv = dev->data->dev_private;
649 struct mlx5_flow_parse flow = {
650 .offset = sizeof(struct ibv_flow_attr),
652 .mark_id = MLX5_FLOW_MARK_DEFAULT,
658 ret = priv_flow_validate(priv, attr, items, actions, error, &flow);
664 * Convert Ethernet item to Verbs specification.
667 * Item specification.
668 * @param default_mask[in]
669 * Default bit-masks to use when item->mask is not provided.
670 * @param data[in, out]
674 mlx5_flow_create_eth(const struct rte_flow_item *item,
675 const void *default_mask,
678 const struct rte_flow_item_eth *spec = item->spec;
679 const struct rte_flow_item_eth *mask = item->mask;
680 struct mlx5_flow_parse *flow = (struct mlx5_flow_parse *)data;
681 struct ibv_flow_spec_eth *eth;
682 const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
685 ++flow->ibv_attr->num_of_specs;
686 flow->ibv_attr->priority = 2;
687 flow->hash_fields = 0;
688 eth = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
689 *eth = (struct ibv_flow_spec_eth) {
690 .type = flow->inner | IBV_FLOW_SPEC_ETH,
697 memcpy(eth->val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
698 memcpy(eth->val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
699 eth->val.ether_type = spec->type;
700 memcpy(eth->mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
701 memcpy(eth->mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
702 eth->mask.ether_type = mask->type;
703 /* Remove unwanted bits from values. */
704 for (i = 0; i < ETHER_ADDR_LEN; ++i) {
705 eth->val.dst_mac[i] &= eth->mask.dst_mac[i];
706 eth->val.src_mac[i] &= eth->mask.src_mac[i];
708 eth->val.ether_type &= eth->mask.ether_type;
713 * Convert VLAN item to Verbs specification.
716 * Item specification.
717 * @param default_mask[in]
718 * Default bit-masks to use when item->mask is not provided.
719 * @param data[in, out]
723 mlx5_flow_create_vlan(const struct rte_flow_item *item,
724 const void *default_mask,
727 const struct rte_flow_item_vlan *spec = item->spec;
728 const struct rte_flow_item_vlan *mask = item->mask;
729 struct mlx5_flow_parse *flow = (struct mlx5_flow_parse *)data;
730 struct ibv_flow_spec_eth *eth;
731 const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
733 eth = (void *)((uintptr_t)flow->ibv_attr + flow->offset - eth_size);
738 eth->val.vlan_tag = spec->tci;
739 eth->mask.vlan_tag = mask->tci;
740 eth->val.vlan_tag &= eth->mask.vlan_tag;
745 * Convert IPv4 item to Verbs specification.
748 * Item specification.
749 * @param default_mask[in]
750 * Default bit-masks to use when item->mask is not provided.
751 * @param data[in, out]
755 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
756 const void *default_mask,
759 const struct rte_flow_item_ipv4 *spec = item->spec;
760 const struct rte_flow_item_ipv4 *mask = item->mask;
761 struct mlx5_flow_parse *flow = (struct mlx5_flow_parse *)data;
762 struct ibv_flow_spec_ipv4_ext *ipv4;
763 unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4_ext);
765 ++flow->ibv_attr->num_of_specs;
766 flow->ibv_attr->priority = 1;
767 flow->hash_fields = (IBV_RX_HASH_SRC_IPV4 |
768 IBV_RX_HASH_DST_IPV4);
769 ipv4 = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
770 *ipv4 = (struct ibv_flow_spec_ipv4_ext) {
771 .type = flow->inner | IBV_FLOW_SPEC_IPV4_EXT,
778 ipv4->val = (struct ibv_flow_ipv4_ext_filter){
779 .src_ip = spec->hdr.src_addr,
780 .dst_ip = spec->hdr.dst_addr,
781 .proto = spec->hdr.next_proto_id,
782 .tos = spec->hdr.type_of_service,
784 ipv4->mask = (struct ibv_flow_ipv4_ext_filter){
785 .src_ip = mask->hdr.src_addr,
786 .dst_ip = mask->hdr.dst_addr,
787 .proto = mask->hdr.next_proto_id,
788 .tos = mask->hdr.type_of_service,
790 /* Remove unwanted bits from values. */
791 ipv4->val.src_ip &= ipv4->mask.src_ip;
792 ipv4->val.dst_ip &= ipv4->mask.dst_ip;
793 ipv4->val.proto &= ipv4->mask.proto;
794 ipv4->val.tos &= ipv4->mask.tos;
799 * Convert IPv6 item to Verbs specification.
802 * Item specification.
803 * @param default_mask[in]
804 * Default bit-masks to use when item->mask is not provided.
805 * @param data[in, out]
809 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
810 const void *default_mask,
813 const struct rte_flow_item_ipv6 *spec = item->spec;
814 const struct rte_flow_item_ipv6 *mask = item->mask;
815 struct mlx5_flow_parse *flow = (struct mlx5_flow_parse *)data;
816 struct ibv_flow_spec_ipv6 *ipv6;
817 unsigned int ipv6_size = sizeof(struct ibv_flow_spec_ipv6);
820 ++flow->ibv_attr->num_of_specs;
821 flow->ibv_attr->priority = 1;
822 flow->hash_fields = (IBV_RX_HASH_SRC_IPV6 |
823 IBV_RX_HASH_DST_IPV6);
824 ipv6 = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
825 *ipv6 = (struct ibv_flow_spec_ipv6) {
826 .type = flow->inner | IBV_FLOW_SPEC_IPV6,
833 memcpy(ipv6->val.src_ip, spec->hdr.src_addr,
834 RTE_DIM(ipv6->val.src_ip));
835 memcpy(ipv6->val.dst_ip, spec->hdr.dst_addr,
836 RTE_DIM(ipv6->val.dst_ip));
837 memcpy(ipv6->mask.src_ip, mask->hdr.src_addr,
838 RTE_DIM(ipv6->mask.src_ip));
839 memcpy(ipv6->mask.dst_ip, mask->hdr.dst_addr,
840 RTE_DIM(ipv6->mask.dst_ip));
841 ipv6->mask.flow_label = mask->hdr.vtc_flow;
842 ipv6->mask.next_hdr = mask->hdr.proto;
843 ipv6->mask.hop_limit = mask->hdr.hop_limits;
844 /* Remove unwanted bits from values. */
845 for (i = 0; i < RTE_DIM(ipv6->val.src_ip); ++i) {
846 ipv6->val.src_ip[i] &= ipv6->mask.src_ip[i];
847 ipv6->val.dst_ip[i] &= ipv6->mask.dst_ip[i];
849 ipv6->val.flow_label &= ipv6->mask.flow_label;
850 ipv6->val.next_hdr &= ipv6->mask.next_hdr;
851 ipv6->val.hop_limit &= ipv6->mask.hop_limit;
856 * Convert UDP item to Verbs specification.
859 * Item specification.
860 * @param default_mask[in]
861 * Default bit-masks to use when item->mask is not provided.
862 * @param data[in, out]
866 mlx5_flow_create_udp(const struct rte_flow_item *item,
867 const void *default_mask,
870 const struct rte_flow_item_udp *spec = item->spec;
871 const struct rte_flow_item_udp *mask = item->mask;
872 struct mlx5_flow_parse *flow = (struct mlx5_flow_parse *)data;
873 struct ibv_flow_spec_tcp_udp *udp;
874 unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
876 ++flow->ibv_attr->num_of_specs;
877 flow->ibv_attr->priority = 0;
878 flow->hash_fields |= (IBV_RX_HASH_SRC_PORT_UDP |
879 IBV_RX_HASH_DST_PORT_UDP);
880 udp = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
881 *udp = (struct ibv_flow_spec_tcp_udp) {
882 .type = flow->inner | IBV_FLOW_SPEC_UDP,
889 udp->val.dst_port = spec->hdr.dst_port;
890 udp->val.src_port = spec->hdr.src_port;
891 udp->mask.dst_port = mask->hdr.dst_port;
892 udp->mask.src_port = mask->hdr.src_port;
893 /* Remove unwanted bits from values. */
894 udp->val.src_port &= udp->mask.src_port;
895 udp->val.dst_port &= udp->mask.dst_port;
900 * Convert TCP item to Verbs specification.
903 * Item specification.
904 * @param default_mask[in]
905 * Default bit-masks to use when item->mask is not provided.
906 * @param data[in, out]
910 mlx5_flow_create_tcp(const struct rte_flow_item *item,
911 const void *default_mask,
914 const struct rte_flow_item_tcp *spec = item->spec;
915 const struct rte_flow_item_tcp *mask = item->mask;
916 struct mlx5_flow_parse *flow = (struct mlx5_flow_parse *)data;
917 struct ibv_flow_spec_tcp_udp *tcp;
918 unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
920 ++flow->ibv_attr->num_of_specs;
921 flow->ibv_attr->priority = 0;
922 flow->hash_fields |= (IBV_RX_HASH_SRC_PORT_TCP |
923 IBV_RX_HASH_DST_PORT_TCP);
924 tcp = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
925 *tcp = (struct ibv_flow_spec_tcp_udp) {
926 .type = flow->inner | IBV_FLOW_SPEC_TCP,
933 tcp->val.dst_port = spec->hdr.dst_port;
934 tcp->val.src_port = spec->hdr.src_port;
935 tcp->mask.dst_port = mask->hdr.dst_port;
936 tcp->mask.src_port = mask->hdr.src_port;
937 /* Remove unwanted bits from values. */
938 tcp->val.src_port &= tcp->mask.src_port;
939 tcp->val.dst_port &= tcp->mask.dst_port;
944 * Convert VXLAN item to Verbs specification.
947 * Item specification.
948 * @param default_mask[in]
949 * Default bit-masks to use when item->mask is not provided.
950 * @param data[in, out]
954 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
955 const void *default_mask,
958 const struct rte_flow_item_vxlan *spec = item->spec;
959 const struct rte_flow_item_vxlan *mask = item->mask;
960 struct mlx5_flow_parse *flow = (struct mlx5_flow_parse *)data;
961 struct ibv_flow_spec_tunnel *vxlan;
962 unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
968 ++flow->ibv_attr->num_of_specs;
969 flow->ibv_attr->priority = 0;
971 vxlan = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
972 *vxlan = (struct ibv_flow_spec_tunnel) {
973 .type = flow->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
976 flow->inner = IBV_FLOW_SPEC_INNER;
981 memcpy(&id.vni[1], spec->vni, 3);
982 vxlan->val.tunnel_id = id.vlan_id;
983 memcpy(&id.vni[1], mask->vni, 3);
984 vxlan->mask.tunnel_id = id.vlan_id;
985 /* Remove unwanted bits from values. */
986 vxlan->val.tunnel_id &= vxlan->mask.tunnel_id;
991 * Convert mark/flag action to Verbs specification.
994 * Pointer to MLX5 flow structure.
999 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *flow, uint32_t mark_id)
1001 struct ibv_flow_spec_action_tag *tag;
1002 unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1004 tag = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
1005 *tag = (struct ibv_flow_spec_action_tag){
1006 .type = IBV_FLOW_SPEC_ACTION_TAG,
1008 .tag_id = mlx5_flow_mark_set(mark_id),
1010 ++flow->ibv_attr->num_of_specs;
1015 * Complete flow rule creation with a drop queue.
1018 * Pointer to private structure.
1020 * MLX5 flow attributes (filled by mlx5_flow_validate()).
1022 * Perform verbose error reporting if not NULL.
1025 * A flow if the rule could be created.
1027 static struct rte_flow *
1028 priv_flow_create_action_queue_drop(struct priv *priv,
1029 struct mlx5_flow_parse *flow,
1030 struct rte_flow_error *error)
1032 struct rte_flow *rte_flow;
1033 struct ibv_flow_spec_action_drop *drop;
1034 unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
1038 rte_flow = rte_calloc(__func__, 1, sizeof(*rte_flow), 0);
1040 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1041 NULL, "cannot allocate flow memory");
1045 drop = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
1046 *drop = (struct ibv_flow_spec_action_drop){
1047 .type = IBV_FLOW_SPEC_ACTION_DROP,
1050 ++flow->ibv_attr->num_of_specs;
1051 flow->offset += sizeof(struct ibv_flow_spec_action_drop);
1052 rte_flow->ibv_attr = flow->ibv_attr;
1053 if (!priv->dev->data->dev_started)
1055 rte_flow->qp = priv->flow_drop_queue->qp;
1056 rte_flow->ibv_flow = ibv_create_flow(rte_flow->qp,
1057 rte_flow->ibv_attr);
1058 if (!rte_flow->ibv_flow) {
1059 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1060 NULL, "flow rule creation failure");
1071 * Complete flow rule creation.
1074 * Pointer to private structure.
1076 * MLX5 flow attributes (filled by mlx5_flow_validate()).
1078 * Perform verbose error reporting if not NULL.
1081 * A flow if the rule could be created.
1083 static struct rte_flow *
1084 priv_flow_create_action_queue(struct priv *priv,
1085 struct mlx5_flow_parse *flow,
1086 struct rte_flow_error *error)
1088 struct rte_flow *rte_flow;
1093 assert(!flow->actions.drop);
1094 rte_flow = rte_calloc(__func__, 1, sizeof(*rte_flow), 0);
1096 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1097 NULL, "cannot allocate flow memory");
1100 for (i = 0; i != flow->actions.queues_n; ++i) {
1101 struct mlx5_rxq_data *q =
1102 (*priv->rxqs)[flow->actions.queues[i]];
1104 q->mark |= flow->actions.mark;
1106 rte_flow->mark = flow->actions.mark;
1107 rte_flow->ibv_attr = flow->ibv_attr;
1108 rte_flow->hash_fields = flow->hash_fields;
1109 rte_flow->ind_table =
1110 mlx5_priv_ind_table_ibv_get(priv, flow->actions.queues,
1111 flow->actions.queues_n);
1112 if (!rte_flow->ind_table) {
1113 rte_flow->ind_table =
1114 mlx5_priv_ind_table_ibv_new(priv, flow->actions.queues,
1115 flow->actions.queues_n);
1116 if (!rte_flow->ind_table) {
1117 rte_flow_error_set(error, ENOMEM,
1118 RTE_FLOW_ERROR_TYPE_HANDLE,
1120 "cannot allocate indirection table");
1124 rte_flow->qp = ibv_create_qp_ex(
1126 &(struct ibv_qp_init_attr_ex){
1127 .qp_type = IBV_QPT_RAW_PACKET,
1129 IBV_QP_INIT_ATTR_PD |
1130 IBV_QP_INIT_ATTR_IND_TABLE |
1131 IBV_QP_INIT_ATTR_RX_HASH,
1132 .rx_hash_conf = (struct ibv_rx_hash_conf){
1134 IBV_RX_HASH_FUNC_TOEPLITZ,
1135 .rx_hash_key_len = rss_hash_default_key_len,
1136 .rx_hash_key = rss_hash_default_key,
1137 .rx_hash_fields_mask = rte_flow->hash_fields,
1139 .rwq_ind_tbl = rte_flow->ind_table->ind_table,
1142 if (!rte_flow->qp) {
1143 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1144 NULL, "cannot allocate QP");
1147 if (!priv->dev->data->dev_started)
1149 rte_flow->ibv_flow = ibv_create_flow(rte_flow->qp,
1150 rte_flow->ibv_attr);
1151 if (!rte_flow->ibv_flow) {
1152 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1153 NULL, "flow rule creation failure");
1160 ibv_destroy_qp(rte_flow->qp);
1161 if (rte_flow->ind_table)
1162 mlx5_priv_ind_table_ibv_release(priv, rte_flow->ind_table);
1171 * Pointer to private structure.
1173 * Flow rule attributes.
1174 * @param[in] pattern
1175 * Pattern specification (list terminated by the END pattern item).
1176 * @param[in] actions
1177 * Associated actions (list terminated by the END action).
1179 * Perform verbose error reporting if not NULL.
1182 * A flow on success, NULL otherwise.
1184 static struct rte_flow *
1185 priv_flow_create(struct priv *priv,
1186 const struct rte_flow_attr *attr,
1187 const struct rte_flow_item items[],
1188 const struct rte_flow_action actions[],
1189 struct rte_flow_error *error)
1191 struct rte_flow *rte_flow;
1192 struct mlx5_flow_parse flow = {
1193 .offset = sizeof(struct ibv_flow_attr),
1195 .mark_id = MLX5_FLOW_MARK_DEFAULT,
1202 err = priv_flow_validate(priv, attr, items, actions, error, &flow);
1205 flow.ibv_attr = rte_malloc(__func__, flow.offset, 0);
1206 flow.offset = sizeof(struct ibv_flow_attr);
1207 if (!flow.ibv_attr) {
1208 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1209 NULL, "cannot allocate ibv_attr memory");
1212 *flow.ibv_attr = (struct ibv_flow_attr){
1213 .type = IBV_FLOW_ATTR_NORMAL,
1214 .size = sizeof(struct ibv_flow_attr),
1215 .priority = attr->priority,
1221 flow.hash_fields = 0;
1222 claim_zero(priv_flow_validate(priv, attr, items, actions,
1224 if (flow.actions.mark && !flow.actions.drop) {
1225 mlx5_flow_create_flag_mark(&flow, flow.actions.mark_id);
1226 flow.offset += sizeof(struct ibv_flow_spec_action_tag);
1228 if (flow.actions.drop)
1230 priv_flow_create_action_queue_drop(priv, &flow, error);
1232 rte_flow = priv_flow_create_action_queue(priv, &flow, error);
1237 rte_free(flow.ibv_attr);
1244 * @see rte_flow_create()
1248 mlx5_flow_create(struct rte_eth_dev *dev,
1249 const struct rte_flow_attr *attr,
1250 const struct rte_flow_item items[],
1251 const struct rte_flow_action actions[],
1252 struct rte_flow_error *error)
1254 struct priv *priv = dev->data->dev_private;
1255 struct rte_flow *flow;
1258 flow = priv_flow_create(priv, attr, items, actions, error);
1260 TAILQ_INSERT_TAIL(&priv->flows, flow, next);
1261 DEBUG("Flow created %p", (void *)flow);
1271 * Pointer to private structure.
1276 priv_flow_destroy(struct priv *priv,
1277 struct rte_flow *flow)
1281 TAILQ_REMOVE(&priv->flows, flow, next);
1283 claim_zero(ibv_destroy_flow(flow->ibv_flow));
1287 claim_zero(ibv_destroy_qp(flow->qp));
1288 for (i = 0; i != flow->ind_table->queues_n; ++i) {
1289 struct rte_flow *tmp;
1290 struct mlx5_rxq_data *rxq_data =
1291 (*priv->rxqs)[flow->ind_table->queues[i]];
1294 * To remove the mark from the queue, the queue must not be
1295 * present in any other marked flow (RSS or not).
1300 TAILQ_FOREACH(tmp, &priv->flows, next) {
1308 (j != tmp->ind_table->queues_n) && !mark;
1310 if (tmp->ind_table->queues[j] ==
1311 flow->ind_table->queues[i])
1314 rxq_data->mark = mark;
1317 mlx5_priv_ind_table_ibv_release(priv, flow->ind_table);
1319 rte_free(flow->ibv_attr);
1320 DEBUG("Flow destroyed %p", (void *)flow);
1327 * @see rte_flow_destroy()
1331 mlx5_flow_destroy(struct rte_eth_dev *dev,
1332 struct rte_flow *flow,
1333 struct rte_flow_error *error)
1335 struct priv *priv = dev->data->dev_private;
1339 priv_flow_destroy(priv, flow);
1345 * Destroy all flows.
1348 * Pointer to private structure.
1351 priv_flow_flush(struct priv *priv)
1353 while (!TAILQ_EMPTY(&priv->flows)) {
1354 struct rte_flow *flow;
1356 flow = TAILQ_FIRST(&priv->flows);
1357 priv_flow_destroy(priv, flow);
1362 * Destroy all flows.
1364 * @see rte_flow_flush()
1368 mlx5_flow_flush(struct rte_eth_dev *dev,
1369 struct rte_flow_error *error)
1371 struct priv *priv = dev->data->dev_private;
1375 priv_flow_flush(priv);
1381 * Create drop queue.
1384 * Pointer to private structure.
1390 priv_flow_create_drop_queue(struct priv *priv)
1392 struct rte_flow_drop *fdq = NULL;
1396 fdq = rte_calloc(__func__, 1, sizeof(*fdq), 0);
1398 WARN("cannot allocate memory for drop queue");
1401 fdq->cq = ibv_create_cq(priv->ctx, 1, NULL, NULL, 0);
1403 WARN("cannot allocate CQ for drop queue");
1406 fdq->wq = ibv_create_wq(priv->ctx,
1407 &(struct ibv_wq_init_attr){
1408 .wq_type = IBV_WQT_RQ,
1415 WARN("cannot allocate WQ for drop queue");
1418 fdq->ind_table = ibv_create_rwq_ind_table(priv->ctx,
1419 &(struct ibv_rwq_ind_table_init_attr){
1420 .log_ind_tbl_size = 0,
1421 .ind_tbl = &fdq->wq,
1424 if (!fdq->ind_table) {
1425 WARN("cannot allocate indirection table for drop queue");
1428 fdq->qp = ibv_create_qp_ex(priv->ctx,
1429 &(struct ibv_qp_init_attr_ex){
1430 .qp_type = IBV_QPT_RAW_PACKET,
1432 IBV_QP_INIT_ATTR_PD |
1433 IBV_QP_INIT_ATTR_IND_TABLE |
1434 IBV_QP_INIT_ATTR_RX_HASH,
1435 .rx_hash_conf = (struct ibv_rx_hash_conf){
1437 IBV_RX_HASH_FUNC_TOEPLITZ,
1438 .rx_hash_key_len = rss_hash_default_key_len,
1439 .rx_hash_key = rss_hash_default_key,
1440 .rx_hash_fields_mask = 0,
1442 .rwq_ind_tbl = fdq->ind_table,
1446 WARN("cannot allocate QP for drop queue");
1449 priv->flow_drop_queue = fdq;
1453 claim_zero(ibv_destroy_qp(fdq->qp));
1455 claim_zero(ibv_destroy_rwq_ind_table(fdq->ind_table));
1457 claim_zero(ibv_destroy_wq(fdq->wq));
1459 claim_zero(ibv_destroy_cq(fdq->cq));
1462 priv->flow_drop_queue = NULL;
1467 * Delete drop queue.
1470 * Pointer to private structure.
1473 priv_flow_delete_drop_queue(struct priv *priv)
1475 struct rte_flow_drop *fdq = priv->flow_drop_queue;
1480 claim_zero(ibv_destroy_qp(fdq->qp));
1482 claim_zero(ibv_destroy_rwq_ind_table(fdq->ind_table));
1484 claim_zero(ibv_destroy_wq(fdq->wq));
1486 claim_zero(ibv_destroy_cq(fdq->cq));
1488 priv->flow_drop_queue = NULL;
1494 * Called by dev_stop() to remove all flows.
1497 * Pointer to private structure.
1500 priv_flow_stop(struct priv *priv)
1502 struct rte_flow *flow;
1504 TAILQ_FOREACH_REVERSE(flow, &priv->flows, mlx5_flows, next) {
1505 claim_zero(ibv_destroy_flow(flow->ibv_flow));
1506 flow->ibv_flow = NULL;
1509 struct mlx5_ind_table_ibv *ind_tbl = flow->ind_table;
1511 for (n = 0; n < ind_tbl->queues_n; ++n)
1512 (*priv->rxqs)[ind_tbl->queues[n]]->mark = 0;
1514 DEBUG("Flow %p removed", (void *)flow);
1516 priv_flow_delete_drop_queue(priv);
1523 * Pointer to private structure.
1526 * 0 on success, a errno value otherwise and rte_errno is set.
1529 priv_flow_start(struct priv *priv)
1532 struct rte_flow *flow;
1534 ret = priv_flow_create_drop_queue(priv);
1537 TAILQ_FOREACH(flow, &priv->flows, next) {
1541 qp = priv->flow_drop_queue->qp;
1544 flow->ibv_flow = ibv_create_flow(qp, flow->ibv_attr);
1545 if (!flow->ibv_flow) {
1546 DEBUG("Flow %p cannot be applied", (void *)flow);
1550 DEBUG("Flow %p applied", (void *)flow);
1554 for (n = 0; n < flow->ind_table->queues_n; ++n) {
1555 uint16_t idx = flow->ind_table->queues[n];
1556 (*priv->rxqs)[idx]->mark = 1;
1566 * @see rte_flow_isolate()
1570 mlx5_flow_isolate(struct rte_eth_dev *dev,
1572 struct rte_flow_error *error)
1574 struct priv *priv = dev->data->dev_private;
1577 if (dev->data->dev_started) {
1578 rte_flow_error_set(error, EBUSY,
1579 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1581 "port must be stopped first");
1585 priv->isolated = !!enable;
1591 * Verify the flow list is empty
1594 * Pointer to private structure.
1596 * @return the number of flows not released.
1599 priv_flow_verify(struct priv *priv)
1601 struct rte_flow *flow;
1604 TAILQ_FOREACH(flow, &priv->flows, next) {
1605 DEBUG("%p: flow %p still referenced", (void *)priv,