4 * Copyright 2016 6WIND S.A.
5 * Copyright 2016 Mellanox.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of 6WIND S.A. nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #include <sys/queue.h>
38 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
40 #pragma GCC diagnostic ignored "-Wpedantic"
42 #include <infiniband/verbs.h>
44 #pragma GCC diagnostic error "-Wpedantic"
47 #include <rte_ethdev.h>
49 #include <rte_flow_driver.h>
50 #include <rte_malloc.h>
56 mlx5_flow_create_eth(const struct rte_flow_item *item,
57 const void *default_mask,
61 mlx5_flow_create_vlan(const struct rte_flow_item *item,
62 const void *default_mask,
66 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
67 const void *default_mask,
71 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
72 const void *default_mask,
76 mlx5_flow_create_udp(const struct rte_flow_item *item,
77 const void *default_mask,
81 mlx5_flow_create_tcp(const struct rte_flow_item *item,
82 const void *default_mask,
86 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
87 const void *default_mask,
90 /** Structure for Drop queue. */
91 struct mlx5_hrxq_drop {
92 struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */
93 struct ibv_qp *qp; /**< Verbs queue pair. */
94 struct ibv_wq *wq; /**< Verbs work queue. */
95 struct ibv_cq *cq; /**< Verbs completion queue. */
98 /* Flows structures. */
100 uint64_t hash_fields; /**< Fields that participate in the hash. */
101 struct mlx5_hrxq *hrxq; /**< Hash Rx queues. */
104 /* Drop flows structures. */
105 struct mlx5_flow_drop {
106 struct mlx5_hrxq_drop hrxq; /**< Drop hash Rx queue. */
110 TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
111 uint32_t mark:1; /**< Set if the flow is marked. */
112 uint32_t drop:1; /**< Drop queue. */
113 struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
114 struct ibv_flow *ibv_flow; /**< Verbs flow. */
115 uint16_t queues_n; /**< Number of entries in queue[]. */
116 uint16_t (*queues)[]; /**< Queues indexes to use. */
118 struct mlx5_flow frxq; /**< Flow with Rx queue. */
119 struct mlx5_flow_drop drxq; /**< Flow with drop Rx queue. */
123 /** Static initializer for items. */
125 (const enum rte_flow_item_type []){ \
126 __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
129 /** Structure to generate a simple graph of layers supported by the NIC. */
130 struct mlx5_flow_items {
131 /** List of possible actions for these items. */
132 const enum rte_flow_action_type *const actions;
133 /** Bit-masks corresponding to the possibilities for the item. */
136 * Default bit-masks to use when item->mask is not provided. When
137 * \default_mask is also NULL, the full supported bit-mask (\mask) is
140 const void *default_mask;
141 /** Bit-masks size in bytes. */
142 const unsigned int mask_sz;
144 * Conversion function from rte_flow to NIC specific flow.
147 * rte_flow item to convert.
148 * @param default_mask
149 * Default bit-masks to use when item->mask is not provided.
151 * Internal structure to store the conversion.
154 * 0 on success, negative value otherwise.
156 int (*convert)(const struct rte_flow_item *item,
157 const void *default_mask,
159 /** Size in bytes of the destination structure. */
160 const unsigned int dst_sz;
161 /** List of possible following items. */
162 const enum rte_flow_item_type *const items;
165 /** Valid action for this PMD. */
166 static const enum rte_flow_action_type valid_actions[] = {
167 RTE_FLOW_ACTION_TYPE_DROP,
168 RTE_FLOW_ACTION_TYPE_QUEUE,
169 RTE_FLOW_ACTION_TYPE_MARK,
170 RTE_FLOW_ACTION_TYPE_FLAG,
171 RTE_FLOW_ACTION_TYPE_END,
174 /** Graph of supported items and associated actions. */
175 static const struct mlx5_flow_items mlx5_flow_items[] = {
176 [RTE_FLOW_ITEM_TYPE_END] = {
177 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
178 RTE_FLOW_ITEM_TYPE_VXLAN),
180 [RTE_FLOW_ITEM_TYPE_ETH] = {
181 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
182 RTE_FLOW_ITEM_TYPE_IPV4,
183 RTE_FLOW_ITEM_TYPE_IPV6),
184 .actions = valid_actions,
185 .mask = &(const struct rte_flow_item_eth){
186 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
187 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
190 .default_mask = &rte_flow_item_eth_mask,
191 .mask_sz = sizeof(struct rte_flow_item_eth),
192 .convert = mlx5_flow_create_eth,
193 .dst_sz = sizeof(struct ibv_flow_spec_eth),
195 [RTE_FLOW_ITEM_TYPE_VLAN] = {
196 .items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
197 RTE_FLOW_ITEM_TYPE_IPV6),
198 .actions = valid_actions,
199 .mask = &(const struct rte_flow_item_vlan){
202 .default_mask = &rte_flow_item_vlan_mask,
203 .mask_sz = sizeof(struct rte_flow_item_vlan),
204 .convert = mlx5_flow_create_vlan,
207 [RTE_FLOW_ITEM_TYPE_IPV4] = {
208 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
209 RTE_FLOW_ITEM_TYPE_TCP),
210 .actions = valid_actions,
211 .mask = &(const struct rte_flow_item_ipv4){
215 .type_of_service = -1,
219 .default_mask = &rte_flow_item_ipv4_mask,
220 .mask_sz = sizeof(struct rte_flow_item_ipv4),
221 .convert = mlx5_flow_create_ipv4,
222 .dst_sz = sizeof(struct ibv_flow_spec_ipv4),
224 [RTE_FLOW_ITEM_TYPE_IPV6] = {
225 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
226 RTE_FLOW_ITEM_TYPE_TCP),
227 .actions = valid_actions,
228 .mask = &(const struct rte_flow_item_ipv6){
231 0xff, 0xff, 0xff, 0xff,
232 0xff, 0xff, 0xff, 0xff,
233 0xff, 0xff, 0xff, 0xff,
234 0xff, 0xff, 0xff, 0xff,
237 0xff, 0xff, 0xff, 0xff,
238 0xff, 0xff, 0xff, 0xff,
239 0xff, 0xff, 0xff, 0xff,
240 0xff, 0xff, 0xff, 0xff,
247 .default_mask = &rte_flow_item_ipv6_mask,
248 .mask_sz = sizeof(struct rte_flow_item_ipv6),
249 .convert = mlx5_flow_create_ipv6,
250 .dst_sz = sizeof(struct ibv_flow_spec_ipv6),
252 [RTE_FLOW_ITEM_TYPE_UDP] = {
253 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN),
254 .actions = valid_actions,
255 .mask = &(const struct rte_flow_item_udp){
261 .default_mask = &rte_flow_item_udp_mask,
262 .mask_sz = sizeof(struct rte_flow_item_udp),
263 .convert = mlx5_flow_create_udp,
264 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
266 [RTE_FLOW_ITEM_TYPE_TCP] = {
267 .actions = valid_actions,
268 .mask = &(const struct rte_flow_item_tcp){
274 .default_mask = &rte_flow_item_tcp_mask,
275 .mask_sz = sizeof(struct rte_flow_item_tcp),
276 .convert = mlx5_flow_create_tcp,
277 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
279 [RTE_FLOW_ITEM_TYPE_VXLAN] = {
280 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH),
281 .actions = valid_actions,
282 .mask = &(const struct rte_flow_item_vxlan){
283 .vni = "\xff\xff\xff",
285 .default_mask = &rte_flow_item_vxlan_mask,
286 .mask_sz = sizeof(struct rte_flow_item_vxlan),
287 .convert = mlx5_flow_create_vxlan,
288 .dst_sz = sizeof(struct ibv_flow_spec_tunnel),
292 /* Structure to parse actions. */
293 struct mlx5_flow_action {
294 uint32_t queue:1; /**< Target is a receive queue. */
295 uint32_t drop:1; /**< Target is a drop queue. */
296 uint32_t mark:1; /**< Mark is present in the flow. */
297 uint32_t mark_id; /**< Mark identifier. */
298 uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
299 uint16_t queues_n; /**< Number of entries in queue[]. */
302 /** Structure to pass to the conversion function. */
303 struct mlx5_flow_parse {
304 struct ibv_flow_attr *ibv_attr; /**< Verbs attribute. */
305 unsigned int offset; /**< Offset in bytes in the ibv_attr buffer. */
306 uint32_t inner; /**< Set once VXLAN is encountered. */
307 uint64_t hash_fields; /**< Fields that participate in the hash. */
308 struct mlx5_flow_action actions; /**< Parsed action result. */
311 static const struct rte_flow_ops mlx5_flow_ops = {
312 .validate = mlx5_flow_validate,
313 .create = mlx5_flow_create,
314 .destroy = mlx5_flow_destroy,
315 .flush = mlx5_flow_flush,
317 .isolate = mlx5_flow_isolate,
321 * Manage filter operations.
324 * Pointer to Ethernet device structure.
328 * Operation to perform.
330 * Pointer to operation-specific structure.
333 * 0 on success, negative errno value on failure.
336 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
337 enum rte_filter_type filter_type,
338 enum rte_filter_op filter_op,
343 if (filter_type == RTE_ETH_FILTER_GENERIC) {
344 if (filter_op != RTE_ETH_FILTER_GET)
346 *(const void **)arg = &mlx5_flow_ops;
349 ERROR("%p: filter type (%d) not supported",
350 (void *)dev, filter_type);
355 * Check support for a given item.
358 * Item specification.
360 * Bit-masks covering supported fields to compare with spec, last and mask in
363 * Bit-Mask size in bytes.
369 mlx5_flow_item_validate(const struct rte_flow_item *item,
370 const uint8_t *mask, unsigned int size)
374 if (!item->spec && (item->mask || item->last))
376 if (item->spec && !item->mask) {
378 const uint8_t *spec = item->spec;
380 for (i = 0; i < size; ++i)
381 if ((spec[i] | mask[i]) != mask[i])
384 if (item->last && !item->mask) {
386 const uint8_t *spec = item->last;
388 for (i = 0; i < size; ++i)
389 if ((spec[i] | mask[i]) != mask[i])
394 const uint8_t *spec = item->mask;
396 for (i = 0; i < size; ++i)
397 if ((spec[i] | mask[i]) != mask[i])
400 if (item->spec && item->last) {
403 const uint8_t *apply = mask;
408 for (i = 0; i < size; ++i) {
409 spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
410 last[i] = ((const uint8_t *)item->last)[i] & apply[i];
412 ret = memcmp(spec, last, size);
418 * Validate a flow supported by the NIC.
421 * Pointer to private structure.
423 * Flow rule attributes.
425 * Pattern specification (list terminated by the END pattern item).
427 * Associated actions (list terminated by the END action).
429 * Perform verbose error reporting if not NULL.
430 * @param[in, out] flow
431 * Flow structure to update.
434 * 0 on success, a negative errno value otherwise and rte_errno is set.
437 priv_flow_validate(struct priv *priv,
438 const struct rte_flow_attr *attr,
439 const struct rte_flow_item items[],
440 const struct rte_flow_action actions[],
441 struct rte_flow_error *error,
442 struct mlx5_flow_parse *flow)
444 const struct mlx5_flow_items *cur_item = mlx5_flow_items;
448 rte_flow_error_set(error, ENOTSUP,
449 RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
451 "groups are not supported");
454 if (attr->priority) {
455 rte_flow_error_set(error, ENOTSUP,
456 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
458 "priorities are not supported");
462 rte_flow_error_set(error, ENOTSUP,
463 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
465 "egress is not supported");
468 if (!attr->ingress) {
469 rte_flow_error_set(error, ENOTSUP,
470 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
472 "only ingress is supported");
475 for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
476 const struct mlx5_flow_items *token = NULL;
480 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
484 cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
486 if (cur_item->items[i] == items->type) {
487 token = &mlx5_flow_items[items->type];
492 goto exit_item_not_supported;
494 err = mlx5_flow_item_validate(items,
495 (const uint8_t *)cur_item->mask,
498 goto exit_item_not_supported;
499 if (flow->ibv_attr && cur_item->convert) {
500 err = cur_item->convert(items,
501 (cur_item->default_mask ?
502 cur_item->default_mask :
506 goto exit_item_not_supported;
507 } else if (items->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
509 rte_flow_error_set(error, ENOTSUP,
510 RTE_FLOW_ERROR_TYPE_ITEM,
512 "cannot recognize multiple"
513 " VXLAN encapsulations");
518 flow->offset += cur_item->dst_sz;
520 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
521 if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
523 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
524 flow->actions.drop = 1;
525 } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
526 const struct rte_flow_action_queue *queue =
527 (const struct rte_flow_action_queue *)
532 if (!queue || (queue->index > (priv->rxqs_n - 1)))
533 goto exit_action_not_supported;
534 for (n = 0; n < flow->actions.queues_n; ++n) {
535 if (flow->actions.queues[n] == queue->index) {
540 if (flow->actions.queues_n > 1 && !found) {
541 rte_flow_error_set(error, ENOTSUP,
542 RTE_FLOW_ERROR_TYPE_ACTION,
544 "queue action not in RSS queues");
548 flow->actions.queue = 1;
549 flow->actions.queues_n = 1;
550 flow->actions.queues[0] = queue->index;
552 } else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
553 const struct rte_flow_action_rss *rss =
554 (const struct rte_flow_action_rss *)
558 if (!rss || !rss->num) {
559 rte_flow_error_set(error, EINVAL,
560 RTE_FLOW_ERROR_TYPE_ACTION,
565 if (flow->actions.queues_n == 1) {
568 assert(flow->actions.queues_n);
569 for (n = 0; n < rss->num; ++n) {
570 if (flow->actions.queues[0] ==
577 rte_flow_error_set(error, ENOTSUP,
578 RTE_FLOW_ERROR_TYPE_ACTION,
580 "queue action not in RSS"
585 for (n = 0; n < rss->num; ++n) {
586 if (rss->queue[n] >= priv->rxqs_n) {
587 rte_flow_error_set(error, EINVAL,
588 RTE_FLOW_ERROR_TYPE_ACTION,
590 "queue id > number of"
595 flow->actions.queue = 1;
596 for (n = 0; n < rss->num; ++n)
597 flow->actions.queues[n] = rss->queue[n];
598 flow->actions.queues_n = rss->num;
599 } else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
600 const struct rte_flow_action_mark *mark =
601 (const struct rte_flow_action_mark *)
605 rte_flow_error_set(error, EINVAL,
606 RTE_FLOW_ERROR_TYPE_ACTION,
608 "mark must be defined");
610 } else if (mark->id >= MLX5_FLOW_MARK_MAX) {
611 rte_flow_error_set(error, ENOTSUP,
612 RTE_FLOW_ERROR_TYPE_ACTION,
614 "mark must be between 0"
618 flow->actions.mark = 1;
619 flow->actions.mark_id = mark->id;
620 } else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
621 flow->actions.mark = 1;
623 goto exit_action_not_supported;
626 if (flow->actions.mark && !flow->ibv_attr && !flow->actions.drop)
627 flow->offset += sizeof(struct ibv_flow_spec_action_tag);
628 if (!flow->ibv_attr && flow->actions.drop)
629 flow->offset += sizeof(struct ibv_flow_spec_action_drop);
630 if (!flow->actions.queue && !flow->actions.drop) {
631 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
632 NULL, "no valid action");
636 exit_item_not_supported:
637 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
638 items, "item not supported");
640 exit_action_not_supported:
641 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
642 actions, "action not supported");
647 * Validate a flow supported by the NIC.
649 * @see rte_flow_validate()
653 mlx5_flow_validate(struct rte_eth_dev *dev,
654 const struct rte_flow_attr *attr,
655 const struct rte_flow_item items[],
656 const struct rte_flow_action actions[],
657 struct rte_flow_error *error)
659 struct priv *priv = dev->data->dev_private;
661 struct mlx5_flow_parse flow = {
662 .offset = sizeof(struct ibv_flow_attr),
664 .mark_id = MLX5_FLOW_MARK_DEFAULT,
670 ret = priv_flow_validate(priv, attr, items, actions, error, &flow);
676 * Convert Ethernet item to Verbs specification.
679 * Item specification.
680 * @param default_mask[in]
681 * Default bit-masks to use when item->mask is not provided.
682 * @param data[in, out]
686 mlx5_flow_create_eth(const struct rte_flow_item *item,
687 const void *default_mask,
690 const struct rte_flow_item_eth *spec = item->spec;
691 const struct rte_flow_item_eth *mask = item->mask;
692 struct mlx5_flow_parse *flow = (struct mlx5_flow_parse *)data;
693 struct ibv_flow_spec_eth *eth;
694 const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
697 ++flow->ibv_attr->num_of_specs;
698 flow->ibv_attr->priority = 2;
699 flow->hash_fields = 0;
700 eth = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
701 *eth = (struct ibv_flow_spec_eth) {
702 .type = flow->inner | IBV_FLOW_SPEC_ETH,
709 memcpy(eth->val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
710 memcpy(eth->val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
711 eth->val.ether_type = spec->type;
712 memcpy(eth->mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
713 memcpy(eth->mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
714 eth->mask.ether_type = mask->type;
715 /* Remove unwanted bits from values. */
716 for (i = 0; i < ETHER_ADDR_LEN; ++i) {
717 eth->val.dst_mac[i] &= eth->mask.dst_mac[i];
718 eth->val.src_mac[i] &= eth->mask.src_mac[i];
720 eth->val.ether_type &= eth->mask.ether_type;
725 * Convert VLAN item to Verbs specification.
728 * Item specification.
729 * @param default_mask[in]
730 * Default bit-masks to use when item->mask is not provided.
731 * @param data[in, out]
735 mlx5_flow_create_vlan(const struct rte_flow_item *item,
736 const void *default_mask,
739 const struct rte_flow_item_vlan *spec = item->spec;
740 const struct rte_flow_item_vlan *mask = item->mask;
741 struct mlx5_flow_parse *flow = (struct mlx5_flow_parse *)data;
742 struct ibv_flow_spec_eth *eth;
743 const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
745 eth = (void *)((uintptr_t)flow->ibv_attr + flow->offset - eth_size);
750 eth->val.vlan_tag = spec->tci;
751 eth->mask.vlan_tag = mask->tci;
752 eth->val.vlan_tag &= eth->mask.vlan_tag;
757 * Convert IPv4 item to Verbs specification.
760 * Item specification.
761 * @param default_mask[in]
762 * Default bit-masks to use when item->mask is not provided.
763 * @param data[in, out]
767 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
768 const void *default_mask,
771 const struct rte_flow_item_ipv4 *spec = item->spec;
772 const struct rte_flow_item_ipv4 *mask = item->mask;
773 struct mlx5_flow_parse *flow = (struct mlx5_flow_parse *)data;
774 struct ibv_flow_spec_ipv4_ext *ipv4;
775 unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4_ext);
777 ++flow->ibv_attr->num_of_specs;
778 flow->ibv_attr->priority = 1;
779 flow->hash_fields = (IBV_RX_HASH_SRC_IPV4 |
780 IBV_RX_HASH_DST_IPV4);
781 ipv4 = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
782 *ipv4 = (struct ibv_flow_spec_ipv4_ext) {
783 .type = flow->inner | IBV_FLOW_SPEC_IPV4_EXT,
790 ipv4->val = (struct ibv_flow_ipv4_ext_filter){
791 .src_ip = spec->hdr.src_addr,
792 .dst_ip = spec->hdr.dst_addr,
793 .proto = spec->hdr.next_proto_id,
794 .tos = spec->hdr.type_of_service,
796 ipv4->mask = (struct ibv_flow_ipv4_ext_filter){
797 .src_ip = mask->hdr.src_addr,
798 .dst_ip = mask->hdr.dst_addr,
799 .proto = mask->hdr.next_proto_id,
800 .tos = mask->hdr.type_of_service,
802 /* Remove unwanted bits from values. */
803 ipv4->val.src_ip &= ipv4->mask.src_ip;
804 ipv4->val.dst_ip &= ipv4->mask.dst_ip;
805 ipv4->val.proto &= ipv4->mask.proto;
806 ipv4->val.tos &= ipv4->mask.tos;
811 * Convert IPv6 item to Verbs specification.
814 * Item specification.
815 * @param default_mask[in]
816 * Default bit-masks to use when item->mask is not provided.
817 * @param data[in, out]
821 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
822 const void *default_mask,
825 const struct rte_flow_item_ipv6 *spec = item->spec;
826 const struct rte_flow_item_ipv6 *mask = item->mask;
827 struct mlx5_flow_parse *flow = (struct mlx5_flow_parse *)data;
828 struct ibv_flow_spec_ipv6 *ipv6;
829 unsigned int ipv6_size = sizeof(struct ibv_flow_spec_ipv6);
832 ++flow->ibv_attr->num_of_specs;
833 flow->ibv_attr->priority = 1;
834 flow->hash_fields = (IBV_RX_HASH_SRC_IPV6 |
835 IBV_RX_HASH_DST_IPV6);
836 ipv6 = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
837 *ipv6 = (struct ibv_flow_spec_ipv6) {
838 .type = flow->inner | IBV_FLOW_SPEC_IPV6,
845 memcpy(ipv6->val.src_ip, spec->hdr.src_addr,
846 RTE_DIM(ipv6->val.src_ip));
847 memcpy(ipv6->val.dst_ip, spec->hdr.dst_addr,
848 RTE_DIM(ipv6->val.dst_ip));
849 memcpy(ipv6->mask.src_ip, mask->hdr.src_addr,
850 RTE_DIM(ipv6->mask.src_ip));
851 memcpy(ipv6->mask.dst_ip, mask->hdr.dst_addr,
852 RTE_DIM(ipv6->mask.dst_ip));
853 ipv6->mask.flow_label = mask->hdr.vtc_flow;
854 ipv6->mask.next_hdr = mask->hdr.proto;
855 ipv6->mask.hop_limit = mask->hdr.hop_limits;
856 /* Remove unwanted bits from values. */
857 for (i = 0; i < RTE_DIM(ipv6->val.src_ip); ++i) {
858 ipv6->val.src_ip[i] &= ipv6->mask.src_ip[i];
859 ipv6->val.dst_ip[i] &= ipv6->mask.dst_ip[i];
861 ipv6->val.flow_label &= ipv6->mask.flow_label;
862 ipv6->val.next_hdr &= ipv6->mask.next_hdr;
863 ipv6->val.hop_limit &= ipv6->mask.hop_limit;
868 * Convert UDP item to Verbs specification.
871 * Item specification.
872 * @param default_mask[in]
873 * Default bit-masks to use when item->mask is not provided.
874 * @param data[in, out]
878 mlx5_flow_create_udp(const struct rte_flow_item *item,
879 const void *default_mask,
882 const struct rte_flow_item_udp *spec = item->spec;
883 const struct rte_flow_item_udp *mask = item->mask;
884 struct mlx5_flow_parse *flow = (struct mlx5_flow_parse *)data;
885 struct ibv_flow_spec_tcp_udp *udp;
886 unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
888 ++flow->ibv_attr->num_of_specs;
889 flow->ibv_attr->priority = 0;
890 flow->hash_fields |= (IBV_RX_HASH_SRC_PORT_UDP |
891 IBV_RX_HASH_DST_PORT_UDP);
892 udp = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
893 *udp = (struct ibv_flow_spec_tcp_udp) {
894 .type = flow->inner | IBV_FLOW_SPEC_UDP,
901 udp->val.dst_port = spec->hdr.dst_port;
902 udp->val.src_port = spec->hdr.src_port;
903 udp->mask.dst_port = mask->hdr.dst_port;
904 udp->mask.src_port = mask->hdr.src_port;
905 /* Remove unwanted bits from values. */
906 udp->val.src_port &= udp->mask.src_port;
907 udp->val.dst_port &= udp->mask.dst_port;
912 * Convert TCP item to Verbs specification.
915 * Item specification.
916 * @param default_mask[in]
917 * Default bit-masks to use when item->mask is not provided.
918 * @param data[in, out]
922 mlx5_flow_create_tcp(const struct rte_flow_item *item,
923 const void *default_mask,
926 const struct rte_flow_item_tcp *spec = item->spec;
927 const struct rte_flow_item_tcp *mask = item->mask;
928 struct mlx5_flow_parse *flow = (struct mlx5_flow_parse *)data;
929 struct ibv_flow_spec_tcp_udp *tcp;
930 unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
932 ++flow->ibv_attr->num_of_specs;
933 flow->ibv_attr->priority = 0;
934 flow->hash_fields |= (IBV_RX_HASH_SRC_PORT_TCP |
935 IBV_RX_HASH_DST_PORT_TCP);
936 tcp = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
937 *tcp = (struct ibv_flow_spec_tcp_udp) {
938 .type = flow->inner | IBV_FLOW_SPEC_TCP,
945 tcp->val.dst_port = spec->hdr.dst_port;
946 tcp->val.src_port = spec->hdr.src_port;
947 tcp->mask.dst_port = mask->hdr.dst_port;
948 tcp->mask.src_port = mask->hdr.src_port;
949 /* Remove unwanted bits from values. */
950 tcp->val.src_port &= tcp->mask.src_port;
951 tcp->val.dst_port &= tcp->mask.dst_port;
956 * Convert VXLAN item to Verbs specification.
959 * Item specification.
960 * @param default_mask[in]
961 * Default bit-masks to use when item->mask is not provided.
962 * @param data[in, out]
966 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
967 const void *default_mask,
970 const struct rte_flow_item_vxlan *spec = item->spec;
971 const struct rte_flow_item_vxlan *mask = item->mask;
972 struct mlx5_flow_parse *flow = (struct mlx5_flow_parse *)data;
973 struct ibv_flow_spec_tunnel *vxlan;
974 unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
980 ++flow->ibv_attr->num_of_specs;
981 flow->ibv_attr->priority = 0;
983 vxlan = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
984 *vxlan = (struct ibv_flow_spec_tunnel) {
985 .type = flow->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
988 flow->inner = IBV_FLOW_SPEC_INNER;
993 memcpy(&id.vni[1], spec->vni, 3);
994 vxlan->val.tunnel_id = id.vlan_id;
995 memcpy(&id.vni[1], mask->vni, 3);
996 vxlan->mask.tunnel_id = id.vlan_id;
997 /* Remove unwanted bits from values. */
998 vxlan->val.tunnel_id &= vxlan->mask.tunnel_id;
1003 * Convert mark/flag action to Verbs specification.
1006 * Pointer to MLX5 flow structure.
1011 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *flow, uint32_t mark_id)
1013 struct ibv_flow_spec_action_tag *tag;
1014 unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1016 tag = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
1017 *tag = (struct ibv_flow_spec_action_tag){
1018 .type = IBV_FLOW_SPEC_ACTION_TAG,
1020 .tag_id = mlx5_flow_mark_set(mark_id),
1022 ++flow->ibv_attr->num_of_specs;
1027 * Complete flow rule creation with a drop queue.
1030 * Pointer to private structure.
1032 * MLX5 flow attributes (filled by mlx5_flow_validate()).
1034 * Perform verbose error reporting if not NULL.
1037 * A flow if the rule could be created.
1039 static struct rte_flow *
1040 priv_flow_create_action_queue_drop(struct priv *priv,
1041 struct mlx5_flow_parse *flow,
1042 struct rte_flow_error *error)
1044 struct rte_flow *rte_flow;
1045 struct ibv_flow_spec_action_drop *drop;
1046 unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
1050 rte_flow = rte_calloc(__func__, 1, sizeof(*rte_flow), 0);
1052 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1053 NULL, "cannot allocate flow memory");
1057 drop = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
1058 *drop = (struct ibv_flow_spec_action_drop){
1059 .type = IBV_FLOW_SPEC_ACTION_DROP,
1062 ++flow->ibv_attr->num_of_specs;
1063 flow->offset += sizeof(struct ibv_flow_spec_action_drop);
1064 rte_flow->ibv_attr = flow->ibv_attr;
1065 if (!priv->dev->data->dev_started)
1067 rte_flow->drxq.hrxq.qp = priv->flow_drop_queue->qp;
1068 rte_flow->ibv_flow = ibv_create_flow(rte_flow->drxq.hrxq.qp,
1069 rte_flow->ibv_attr);
1070 if (!rte_flow->ibv_flow) {
1071 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1072 NULL, "flow rule creation failure");
1083 * Complete flow rule creation.
1086 * Pointer to private structure.
1088 * MLX5 flow attributes (filled by mlx5_flow_validate()).
1090 * Perform verbose error reporting if not NULL.
1093 * A flow if the rule could be created.
1095 static struct rte_flow *
1096 priv_flow_create_action_queue(struct priv *priv,
1097 struct mlx5_flow_parse *flow,
1098 struct rte_flow_error *error)
1100 struct rte_flow *rte_flow;
1105 assert(!flow->actions.drop);
1107 rte_calloc(__func__, 1,
1109 flow->actions.queues_n * sizeof(uint16_t),
1112 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1113 NULL, "cannot allocate flow memory");
1116 rte_flow->mark = flow->actions.mark;
1117 rte_flow->ibv_attr = flow->ibv_attr;
1118 rte_flow->queues = (uint16_t (*)[])(rte_flow + 1);
1119 memcpy(rte_flow->queues, flow->actions.queues,
1120 flow->actions.queues_n * sizeof(uint16_t));
1121 rte_flow->queues_n = flow->actions.queues_n;
1122 rte_flow->frxq.hash_fields = flow->hash_fields;
1123 rte_flow->frxq.hrxq = mlx5_priv_hrxq_get(priv, rss_hash_default_key,
1124 rss_hash_default_key_len,
1126 (*rte_flow->queues),
1127 rte_flow->queues_n);
1128 if (rte_flow->frxq.hrxq) {
1129 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1130 NULL, "duplicated flow");
1133 rte_flow->frxq.hrxq = mlx5_priv_hrxq_new(priv, rss_hash_default_key,
1134 rss_hash_default_key_len,
1136 (*rte_flow->queues),
1137 rte_flow->queues_n);
1138 if (!rte_flow->frxq.hrxq) {
1139 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1140 NULL, "cannot create hash rxq");
1143 for (i = 0; i != flow->actions.queues_n; ++i) {
1144 struct mlx5_rxq_data *q =
1145 (*priv->rxqs)[flow->actions.queues[i]];
1147 q->mark |= flow->actions.mark;
1149 if (!priv->dev->data->dev_started)
1151 rte_flow->ibv_flow = ibv_create_flow(rte_flow->frxq.hrxq->qp,
1152 rte_flow->ibv_attr);
1153 if (!rte_flow->ibv_flow) {
1154 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1155 NULL, "flow rule creation failure");
1161 if (rte_flow->frxq.hrxq)
1162 mlx5_priv_hrxq_release(priv, rte_flow->frxq.hrxq);
1171 * Pointer to private structure.
1173 * Flow rule attributes.
1174 * @param[in] pattern
1175 * Pattern specification (list terminated by the END pattern item).
1176 * @param[in] actions
1177 * Associated actions (list terminated by the END action).
1179 * Perform verbose error reporting if not NULL.
1182 * A flow on success, NULL otherwise.
1184 static struct rte_flow *
1185 priv_flow_create(struct priv *priv,
1186 const struct rte_flow_attr *attr,
1187 const struct rte_flow_item items[],
1188 const struct rte_flow_action actions[],
1189 struct rte_flow_error *error)
1191 struct rte_flow *rte_flow;
1192 struct mlx5_flow_parse flow = {
1193 .offset = sizeof(struct ibv_flow_attr),
1195 .mark_id = MLX5_FLOW_MARK_DEFAULT,
1202 err = priv_flow_validate(priv, attr, items, actions, error, &flow);
1205 flow.ibv_attr = rte_malloc(__func__, flow.offset, 0);
1206 flow.offset = sizeof(struct ibv_flow_attr);
1207 if (!flow.ibv_attr) {
1208 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1209 NULL, "cannot allocate ibv_attr memory");
1212 *flow.ibv_attr = (struct ibv_flow_attr){
1213 .type = IBV_FLOW_ATTR_NORMAL,
1214 .size = sizeof(struct ibv_flow_attr),
1215 .priority = attr->priority,
1221 flow.hash_fields = 0;
1222 claim_zero(priv_flow_validate(priv, attr, items, actions,
1224 if (flow.actions.mark && !flow.actions.drop) {
1225 mlx5_flow_create_flag_mark(&flow, flow.actions.mark_id);
1226 flow.offset += sizeof(struct ibv_flow_spec_action_tag);
1228 if (flow.actions.drop)
1230 priv_flow_create_action_queue_drop(priv, &flow, error);
1232 rte_flow = priv_flow_create_action_queue(priv, &flow, error);
1237 rte_free(flow.ibv_attr);
1244 * @see rte_flow_create()
1248 mlx5_flow_create(struct rte_eth_dev *dev,
1249 const struct rte_flow_attr *attr,
1250 const struct rte_flow_item items[],
1251 const struct rte_flow_action actions[],
1252 struct rte_flow_error *error)
1254 struct priv *priv = dev->data->dev_private;
1255 struct rte_flow *flow;
1258 flow = priv_flow_create(priv, attr, items, actions, error);
1260 TAILQ_INSERT_TAIL(&priv->flows, flow, next);
1261 DEBUG("Flow created %p", (void *)flow);
1271 * Pointer to private structure.
1276 priv_flow_destroy(struct priv *priv,
1277 struct rte_flow *flow)
1283 if (flow->drop || !flow->mark)
1285 queues = flow->frxq.hrxq->ind_table->queues;
1286 queues_n = flow->frxq.hrxq->ind_table->queues_n;
1287 for (i = 0; i != queues_n; ++i) {
1288 struct rte_flow *tmp;
1289 struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[queues[i]];
1293 * To remove the mark from the queue, the queue must not be
1294 * present in any other marked flow (RSS or not).
1296 TAILQ_FOREACH(tmp, &priv->flows, next) {
1302 (j != tmp->frxq.hrxq->ind_table->queues_n) &&
1305 if (tmp->frxq.hrxq->ind_table->queues[j] ==
1309 rxq_data->mark = mark;
1313 claim_zero(ibv_destroy_flow(flow->ibv_flow));
1315 mlx5_priv_hrxq_release(priv, flow->frxq.hrxq);
1316 TAILQ_REMOVE(&priv->flows, flow, next);
1317 rte_free(flow->ibv_attr);
1318 DEBUG("Flow destroyed %p", (void *)flow);
1325 * @see rte_flow_destroy()
1329 mlx5_flow_destroy(struct rte_eth_dev *dev,
1330 struct rte_flow *flow,
1331 struct rte_flow_error *error)
1333 struct priv *priv = dev->data->dev_private;
1337 priv_flow_destroy(priv, flow);
1343 * Destroy all flows.
1346 * Pointer to private structure.
1349 priv_flow_flush(struct priv *priv)
1351 while (!TAILQ_EMPTY(&priv->flows)) {
1352 struct rte_flow *flow;
1354 flow = TAILQ_FIRST(&priv->flows);
1355 priv_flow_destroy(priv, flow);
1360 * Destroy all flows.
1362 * @see rte_flow_flush()
1366 mlx5_flow_flush(struct rte_eth_dev *dev,
1367 struct rte_flow_error *error)
1369 struct priv *priv = dev->data->dev_private;
1373 priv_flow_flush(priv);
1379 * Create drop queue.
1382 * Pointer to private structure.
1388 priv_flow_create_drop_queue(struct priv *priv)
1390 struct mlx5_hrxq_drop *fdq = NULL;
1394 fdq = rte_calloc(__func__, 1, sizeof(*fdq), 0);
1396 WARN("cannot allocate memory for drop queue");
1399 fdq->cq = ibv_create_cq(priv->ctx, 1, NULL, NULL, 0);
1401 WARN("cannot allocate CQ for drop queue");
1404 fdq->wq = ibv_create_wq(priv->ctx,
1405 &(struct ibv_wq_init_attr){
1406 .wq_type = IBV_WQT_RQ,
1413 WARN("cannot allocate WQ for drop queue");
1416 fdq->ind_table = ibv_create_rwq_ind_table(priv->ctx,
1417 &(struct ibv_rwq_ind_table_init_attr){
1418 .log_ind_tbl_size = 0,
1419 .ind_tbl = &fdq->wq,
1422 if (!fdq->ind_table) {
1423 WARN("cannot allocate indirection table for drop queue");
1426 fdq->qp = ibv_create_qp_ex(priv->ctx,
1427 &(struct ibv_qp_init_attr_ex){
1428 .qp_type = IBV_QPT_RAW_PACKET,
1430 IBV_QP_INIT_ATTR_PD |
1431 IBV_QP_INIT_ATTR_IND_TABLE |
1432 IBV_QP_INIT_ATTR_RX_HASH,
1433 .rx_hash_conf = (struct ibv_rx_hash_conf){
1435 IBV_RX_HASH_FUNC_TOEPLITZ,
1436 .rx_hash_key_len = rss_hash_default_key_len,
1437 .rx_hash_key = rss_hash_default_key,
1438 .rx_hash_fields_mask = 0,
1440 .rwq_ind_tbl = fdq->ind_table,
1444 WARN("cannot allocate QP for drop queue");
1447 priv->flow_drop_queue = fdq;
1451 claim_zero(ibv_destroy_qp(fdq->qp));
1453 claim_zero(ibv_destroy_rwq_ind_table(fdq->ind_table));
1455 claim_zero(ibv_destroy_wq(fdq->wq));
1457 claim_zero(ibv_destroy_cq(fdq->cq));
1460 priv->flow_drop_queue = NULL;
1465 * Delete drop queue.
1468 * Pointer to private structure.
1471 priv_flow_delete_drop_queue(struct priv *priv)
1473 struct mlx5_hrxq_drop *fdq = priv->flow_drop_queue;
1478 claim_zero(ibv_destroy_qp(fdq->qp));
1480 claim_zero(ibv_destroy_rwq_ind_table(fdq->ind_table));
1482 claim_zero(ibv_destroy_wq(fdq->wq));
1484 claim_zero(ibv_destroy_cq(fdq->cq));
1486 priv->flow_drop_queue = NULL;
1492 * Called by dev_stop() to remove all flows.
1495 * Pointer to private structure.
1498 priv_flow_stop(struct priv *priv)
1500 struct rte_flow *flow;
1502 TAILQ_FOREACH_REVERSE(flow, &priv->flows, mlx5_flows, next) {
1503 claim_zero(ibv_destroy_flow(flow->ibv_flow));
1504 flow->ibv_flow = NULL;
1505 mlx5_priv_hrxq_release(priv, flow->frxq.hrxq);
1506 flow->frxq.hrxq = NULL;
1509 struct mlx5_ind_table_ibv *ind_tbl =
1510 flow->frxq.hrxq->ind_table;
1512 for (n = 0; n < ind_tbl->queues_n; ++n)
1513 (*priv->rxqs)[ind_tbl->queues[n]]->mark = 0;
1515 DEBUG("Flow %p removed", (void *)flow);
1517 priv_flow_delete_drop_queue(priv);
1524 * Pointer to private structure.
1527 * 0 on success, a errno value otherwise and rte_errno is set.
1530 priv_flow_start(struct priv *priv)
1533 struct rte_flow *flow;
1535 ret = priv_flow_create_drop_queue(priv);
1538 TAILQ_FOREACH(flow, &priv->flows, next) {
1539 if (flow->frxq.hrxq)
1542 mlx5_priv_hrxq_get(priv, rss_hash_default_key,
1543 rss_hash_default_key_len,
1544 flow->frxq.hash_fields,
1547 if (flow->frxq.hrxq)
1550 mlx5_priv_hrxq_new(priv, rss_hash_default_key,
1551 rss_hash_default_key_len,
1552 flow->frxq.hash_fields,
1555 if (!flow->frxq.hrxq) {
1556 DEBUG("Flow %p cannot be applied",
1562 flow->ibv_flow = ibv_create_flow(flow->frxq.hrxq->qp,
1564 if (!flow->ibv_flow) {
1565 DEBUG("Flow %p cannot be applied", (void *)flow);
1569 DEBUG("Flow %p applied", (void *)flow);
1574 n < flow->frxq.hrxq->ind_table->queues_n;
1577 flow->frxq.hrxq->ind_table->queues[n];
1578 (*priv->rxqs)[idx]->mark = 1;
1588 * @see rte_flow_isolate()
1592 mlx5_flow_isolate(struct rte_eth_dev *dev,
1594 struct rte_flow_error *error)
1596 struct priv *priv = dev->data->dev_private;
1599 if (dev->data->dev_started) {
1600 rte_flow_error_set(error, EBUSY,
1601 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1603 "port must be stopped first");
1607 priv->isolated = !!enable;
1613 * Verify the flow list is empty
1616 * Pointer to private structure.
1618 * @return the number of flows not released.
1621 priv_flow_verify(struct priv *priv)
1623 struct rte_flow *flow;
1626 TAILQ_FOREACH(flow, &priv->flows, next) {
1627 DEBUG("%p: flow %p still referenced", (void *)priv,