4 * Copyright 2016 6WIND S.A.
5 * Copyright 2016 Mellanox.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of 6WIND S.A. nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #include <sys/queue.h>
38 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
40 #pragma GCC diagnostic ignored "-Wpedantic"
42 #include <infiniband/verbs.h>
44 #pragma GCC diagnostic error "-Wpedantic"
47 #include <rte_ethdev.h>
49 #include <rte_flow_driver.h>
50 #include <rte_malloc.h>
55 /* Number of Work Queue necessary for the DROP queue. */
56 #ifndef HAVE_VERBS_IBV_EXP_FLOW_SPEC_ACTION_DROP
57 #define MLX5_DROP_WQ_N 4
59 #define MLX5_DROP_WQ_N 1
63 mlx5_flow_create_eth(const struct rte_flow_item *item,
64 const void *default_mask,
68 mlx5_flow_create_vlan(const struct rte_flow_item *item,
69 const void *default_mask,
73 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
74 const void *default_mask,
78 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
79 const void *default_mask,
83 mlx5_flow_create_udp(const struct rte_flow_item *item,
84 const void *default_mask,
88 mlx5_flow_create_tcp(const struct rte_flow_item *item,
89 const void *default_mask,
93 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
94 const void *default_mask,
98 TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
99 struct ibv_exp_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
100 struct ibv_exp_rwq_ind_table *ind_table; /**< Indirection table. */
101 struct ibv_qp *qp; /**< Verbs queue pair. */
102 struct ibv_exp_flow *ibv_flow; /**< Verbs flow. */
103 struct ibv_exp_wq *wq; /**< Verbs work queue. */
104 struct ibv_cq *cq; /**< Verbs completion queue. */
105 uint16_t rxqs_n; /**< Number of queues in this flow, 0 if drop queue. */
106 uint32_t mark:1; /**< Set if the flow is marked. */
107 uint32_t drop:1; /**< Drop queue. */
108 uint64_t hash_fields; /**< Fields that participate in the hash. */
109 struct rxq *rxqs[]; /**< Pointer to the queues array. */
112 /** Static initializer for items. */
114 (const enum rte_flow_item_type []){ \
115 __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
118 /** Structure to generate a simple graph of layers supported by the NIC. */
119 struct mlx5_flow_items {
120 /** List of possible actions for these items. */
121 const enum rte_flow_action_type *const actions;
122 /** Bit-masks corresponding to the possibilities for the item. */
125 * Default bit-masks to use when item->mask is not provided. When
126 * \default_mask is also NULL, the full supported bit-mask (\mask) is
129 const void *default_mask;
130 /** Bit-masks size in bytes. */
131 const unsigned int mask_sz;
133 * Conversion function from rte_flow to NIC specific flow.
136 * rte_flow item to convert.
137 * @param default_mask
138 * Default bit-masks to use when item->mask is not provided.
140 * Internal structure to store the conversion.
143 * 0 on success, negative value otherwise.
145 int (*convert)(const struct rte_flow_item *item,
146 const void *default_mask,
148 /** Size in bytes of the destination structure. */
149 const unsigned int dst_sz;
150 /** List of possible following items. */
151 const enum rte_flow_item_type *const items;
154 /** Valid action for this PMD. */
155 static const enum rte_flow_action_type valid_actions[] = {
156 RTE_FLOW_ACTION_TYPE_DROP,
157 RTE_FLOW_ACTION_TYPE_QUEUE,
158 RTE_FLOW_ACTION_TYPE_MARK,
159 RTE_FLOW_ACTION_TYPE_FLAG,
160 RTE_FLOW_ACTION_TYPE_END,
163 /** Graph of supported items and associated actions. */
164 static const struct mlx5_flow_items mlx5_flow_items[] = {
165 [RTE_FLOW_ITEM_TYPE_END] = {
166 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
167 RTE_FLOW_ITEM_TYPE_VXLAN),
169 [RTE_FLOW_ITEM_TYPE_ETH] = {
170 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
171 RTE_FLOW_ITEM_TYPE_IPV4,
172 RTE_FLOW_ITEM_TYPE_IPV6),
173 .actions = valid_actions,
174 .mask = &(const struct rte_flow_item_eth){
175 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
176 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
179 .default_mask = &rte_flow_item_eth_mask,
180 .mask_sz = sizeof(struct rte_flow_item_eth),
181 .convert = mlx5_flow_create_eth,
182 .dst_sz = sizeof(struct ibv_exp_flow_spec_eth),
184 [RTE_FLOW_ITEM_TYPE_VLAN] = {
185 .items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
186 RTE_FLOW_ITEM_TYPE_IPV6),
187 .actions = valid_actions,
188 .mask = &(const struct rte_flow_item_vlan){
191 .default_mask = &rte_flow_item_vlan_mask,
192 .mask_sz = sizeof(struct rte_flow_item_vlan),
193 .convert = mlx5_flow_create_vlan,
196 [RTE_FLOW_ITEM_TYPE_IPV4] = {
197 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
198 RTE_FLOW_ITEM_TYPE_TCP),
199 .actions = valid_actions,
200 .mask = &(const struct rte_flow_item_ipv4){
204 .type_of_service = -1,
208 .default_mask = &rte_flow_item_ipv4_mask,
209 .mask_sz = sizeof(struct rte_flow_item_ipv4),
210 .convert = mlx5_flow_create_ipv4,
211 .dst_sz = sizeof(struct ibv_exp_flow_spec_ipv4_ext),
213 [RTE_FLOW_ITEM_TYPE_IPV6] = {
214 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
215 RTE_FLOW_ITEM_TYPE_TCP),
216 .actions = valid_actions,
217 .mask = &(const struct rte_flow_item_ipv6){
220 0xff, 0xff, 0xff, 0xff,
221 0xff, 0xff, 0xff, 0xff,
222 0xff, 0xff, 0xff, 0xff,
223 0xff, 0xff, 0xff, 0xff,
226 0xff, 0xff, 0xff, 0xff,
227 0xff, 0xff, 0xff, 0xff,
228 0xff, 0xff, 0xff, 0xff,
229 0xff, 0xff, 0xff, 0xff,
236 .default_mask = &rte_flow_item_ipv6_mask,
237 .mask_sz = sizeof(struct rte_flow_item_ipv6),
238 .convert = mlx5_flow_create_ipv6,
239 .dst_sz = sizeof(struct ibv_exp_flow_spec_ipv6_ext),
241 [RTE_FLOW_ITEM_TYPE_UDP] = {
242 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN),
243 .actions = valid_actions,
244 .mask = &(const struct rte_flow_item_udp){
250 .default_mask = &rte_flow_item_udp_mask,
251 .mask_sz = sizeof(struct rte_flow_item_udp),
252 .convert = mlx5_flow_create_udp,
253 .dst_sz = sizeof(struct ibv_exp_flow_spec_tcp_udp),
255 [RTE_FLOW_ITEM_TYPE_TCP] = {
256 .actions = valid_actions,
257 .mask = &(const struct rte_flow_item_tcp){
263 .default_mask = &rte_flow_item_tcp_mask,
264 .mask_sz = sizeof(struct rte_flow_item_tcp),
265 .convert = mlx5_flow_create_tcp,
266 .dst_sz = sizeof(struct ibv_exp_flow_spec_tcp_udp),
268 [RTE_FLOW_ITEM_TYPE_VXLAN] = {
269 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH),
270 .actions = valid_actions,
271 .mask = &(const struct rte_flow_item_vxlan){
272 .vni = "\xff\xff\xff",
274 .default_mask = &rte_flow_item_vxlan_mask,
275 .mask_sz = sizeof(struct rte_flow_item_vxlan),
276 .convert = mlx5_flow_create_vxlan,
277 .dst_sz = sizeof(struct ibv_exp_flow_spec_tunnel),
281 /** Structure to pass to the conversion function. */
283 struct ibv_exp_flow_attr *ibv_attr; /**< Verbs attribute. */
284 unsigned int offset; /**< Offset in bytes in the ibv_attr buffer. */
285 uint32_t inner; /**< Set once VXLAN is encountered. */
286 uint64_t hash_fields; /**< Fields that participate in the hash. */
289 /** Structure for Drop queue. */
290 struct rte_flow_drop {
291 struct ibv_exp_rwq_ind_table *ind_table; /**< Indirection table. */
292 struct ibv_qp *qp; /**< Verbs queue pair. */
293 struct ibv_exp_wq *wqs[MLX5_DROP_WQ_N]; /**< Verbs work queue. */
294 struct ibv_cq *cq; /**< Verbs completion queue. */
297 struct mlx5_flow_action {
298 uint32_t queue:1; /**< Target is a receive queue. */
299 uint32_t drop:1; /**< Target is a drop queue. */
300 uint32_t mark:1; /**< Mark is present in the flow. */
301 uint32_t mark_id; /**< Mark identifier. */
302 uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
303 uint16_t queues_n; /**< Number of entries in queue[]. */
307 * Check support for a given item.
310 * Item specification.
312 * Bit-masks covering supported fields to compare with spec, last and mask in
315 * Bit-Mask size in bytes.
321 mlx5_flow_item_validate(const struct rte_flow_item *item,
322 const uint8_t *mask, unsigned int size)
326 if (!item->spec && (item->mask || item->last))
328 if (item->spec && !item->mask) {
330 const uint8_t *spec = item->spec;
332 for (i = 0; i < size; ++i)
333 if ((spec[i] | mask[i]) != mask[i])
336 if (item->last && !item->mask) {
338 const uint8_t *spec = item->last;
340 for (i = 0; i < size; ++i)
341 if ((spec[i] | mask[i]) != mask[i])
346 const uint8_t *spec = item->mask;
348 for (i = 0; i < size; ++i)
349 if ((spec[i] | mask[i]) != mask[i])
352 if (item->spec && item->last) {
355 const uint8_t *apply = mask;
360 for (i = 0; i < size; ++i) {
361 spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
362 last[i] = ((const uint8_t *)item->last)[i] & apply[i];
364 ret = memcmp(spec, last, size);
370 * Validate a flow supported by the NIC.
373 * Pointer to private structure.
375 * Flow rule attributes.
377 * Pattern specification (list terminated by the END pattern item).
379 * Associated actions (list terminated by the END action).
381 * Perform verbose error reporting if not NULL.
382 * @param[in, out] flow
383 * Flow structure to update.
384 * @param[in, out] action
385 * Action structure to update.
388 * 0 on success, a negative errno value otherwise and rte_errno is set.
391 priv_flow_validate(struct priv *priv,
392 const struct rte_flow_attr *attr,
393 const struct rte_flow_item items[],
394 const struct rte_flow_action actions[],
395 struct rte_flow_error *error,
396 struct mlx5_flow *flow,
397 struct mlx5_flow_action *action)
399 const struct mlx5_flow_items *cur_item = mlx5_flow_items;
403 rte_flow_error_set(error, ENOTSUP,
404 RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
406 "groups are not supported");
409 if (attr->priority) {
410 rte_flow_error_set(error, ENOTSUP,
411 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
413 "priorities are not supported");
417 rte_flow_error_set(error, ENOTSUP,
418 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
420 "egress is not supported");
423 if (!attr->ingress) {
424 rte_flow_error_set(error, ENOTSUP,
425 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
427 "only ingress is supported");
430 for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
431 const struct mlx5_flow_items *token = NULL;
435 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
439 cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
441 if (cur_item->items[i] == items->type) {
442 token = &mlx5_flow_items[items->type];
447 goto exit_item_not_supported;
449 err = mlx5_flow_item_validate(items,
450 (const uint8_t *)cur_item->mask,
453 goto exit_item_not_supported;
454 if (flow->ibv_attr && cur_item->convert) {
455 err = cur_item->convert(items,
456 (cur_item->default_mask ?
457 cur_item->default_mask :
461 goto exit_item_not_supported;
462 } else if (items->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
464 rte_flow_error_set(error, ENOTSUP,
465 RTE_FLOW_ERROR_TYPE_ITEM,
467 "cannot recognize multiple"
468 " VXLAN encapsulations");
473 flow->offset += cur_item->dst_sz;
475 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
476 if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
478 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
480 } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
481 const struct rte_flow_action_queue *queue =
482 (const struct rte_flow_action_queue *)
487 if (!queue || (queue->index > (priv->rxqs_n - 1)))
488 goto exit_action_not_supported;
489 for (n = 0; n < action->queues_n; ++n) {
490 if (action->queues[n] == queue->index) {
495 if (action->queues_n > 1 && !found) {
496 rte_flow_error_set(error, ENOTSUP,
497 RTE_FLOW_ERROR_TYPE_ACTION,
499 "queue action not in RSS queues");
504 action->queues_n = 1;
505 action->queues[0] = queue->index;
507 } else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
508 const struct rte_flow_action_rss *rss =
509 (const struct rte_flow_action_rss *)
513 if (!rss || !rss->num) {
514 rte_flow_error_set(error, EINVAL,
515 RTE_FLOW_ERROR_TYPE_ACTION,
520 if (action->queues_n == 1) {
523 assert(action->queues_n);
524 for (n = 0; n < rss->num; ++n) {
525 if (action->queues[0] ==
532 rte_flow_error_set(error, ENOTSUP,
533 RTE_FLOW_ERROR_TYPE_ACTION,
535 "queue action not in RSS"
540 for (n = 0; n < rss->num; ++n) {
541 if (rss->queue[n] >= priv->rxqs_n) {
542 rte_flow_error_set(error, EINVAL,
543 RTE_FLOW_ERROR_TYPE_ACTION,
545 "queue id > number of"
551 for (n = 0; n < rss->num; ++n)
552 action->queues[n] = rss->queue[n];
553 action->queues_n = rss->num;
554 } else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
555 const struct rte_flow_action_mark *mark =
556 (const struct rte_flow_action_mark *)
560 rte_flow_error_set(error, EINVAL,
561 RTE_FLOW_ERROR_TYPE_ACTION,
563 "mark must be defined");
565 } else if (mark->id >= MLX5_FLOW_MARK_MAX) {
566 rte_flow_error_set(error, ENOTSUP,
567 RTE_FLOW_ERROR_TYPE_ACTION,
569 "mark must be between 0"
574 action->mark_id = mark->id;
575 } else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
578 goto exit_action_not_supported;
581 if (action->mark && !flow->ibv_attr && !action->drop)
582 flow->offset += sizeof(struct ibv_exp_flow_spec_action_tag);
583 #ifdef HAVE_VERBS_IBV_EXP_FLOW_SPEC_ACTION_DROP
584 if (!flow->ibv_attr && action->drop)
585 flow->offset += sizeof(struct ibv_exp_flow_spec_action_drop);
587 if (!action->queue && !action->drop) {
588 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
589 NULL, "no valid action");
593 exit_item_not_supported:
594 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
595 items, "item not supported");
597 exit_action_not_supported:
598 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
599 actions, "action not supported");
604 * Validate a flow supported by the NIC.
606 * @see rte_flow_validate()
610 mlx5_flow_validate(struct rte_eth_dev *dev,
611 const struct rte_flow_attr *attr,
612 const struct rte_flow_item items[],
613 const struct rte_flow_action actions[],
614 struct rte_flow_error *error)
616 struct priv *priv = dev->data->dev_private;
618 struct mlx5_flow flow = { .offset = sizeof(struct ibv_exp_flow_attr) };
619 struct mlx5_flow_action action = {
623 .mark_id = MLX5_FLOW_MARK_DEFAULT,
628 ret = priv_flow_validate(priv, attr, items, actions, error, &flow,
635 * Convert Ethernet item to Verbs specification.
638 * Item specification.
639 * @param default_mask[in]
640 * Default bit-masks to use when item->mask is not provided.
641 * @param data[in, out]
645 mlx5_flow_create_eth(const struct rte_flow_item *item,
646 const void *default_mask,
649 const struct rte_flow_item_eth *spec = item->spec;
650 const struct rte_flow_item_eth *mask = item->mask;
651 struct mlx5_flow *flow = (struct mlx5_flow *)data;
652 struct ibv_exp_flow_spec_eth *eth;
653 const unsigned int eth_size = sizeof(struct ibv_exp_flow_spec_eth);
656 ++flow->ibv_attr->num_of_specs;
657 flow->ibv_attr->priority = 2;
658 flow->hash_fields = 0;
659 eth = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
660 *eth = (struct ibv_exp_flow_spec_eth) {
661 .type = flow->inner | IBV_EXP_FLOW_SPEC_ETH,
668 memcpy(eth->val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
669 memcpy(eth->val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
670 eth->val.ether_type = spec->type;
671 memcpy(eth->mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
672 memcpy(eth->mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
673 eth->mask.ether_type = mask->type;
674 /* Remove unwanted bits from values. */
675 for (i = 0; i < ETHER_ADDR_LEN; ++i) {
676 eth->val.dst_mac[i] &= eth->mask.dst_mac[i];
677 eth->val.src_mac[i] &= eth->mask.src_mac[i];
679 eth->val.ether_type &= eth->mask.ether_type;
684 * Convert VLAN item to Verbs specification.
687 * Item specification.
688 * @param default_mask[in]
689 * Default bit-masks to use when item->mask is not provided.
690 * @param data[in, out]
694 mlx5_flow_create_vlan(const struct rte_flow_item *item,
695 const void *default_mask,
698 const struct rte_flow_item_vlan *spec = item->spec;
699 const struct rte_flow_item_vlan *mask = item->mask;
700 struct mlx5_flow *flow = (struct mlx5_flow *)data;
701 struct ibv_exp_flow_spec_eth *eth;
702 const unsigned int eth_size = sizeof(struct ibv_exp_flow_spec_eth);
704 eth = (void *)((uintptr_t)flow->ibv_attr + flow->offset - eth_size);
709 eth->val.vlan_tag = spec->tci;
710 eth->mask.vlan_tag = mask->tci;
711 eth->val.vlan_tag &= eth->mask.vlan_tag;
716 * Convert IPv4 item to Verbs specification.
719 * Item specification.
720 * @param default_mask[in]
721 * Default bit-masks to use when item->mask is not provided.
722 * @param data[in, out]
726 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
727 const void *default_mask,
730 const struct rte_flow_item_ipv4 *spec = item->spec;
731 const struct rte_flow_item_ipv4 *mask = item->mask;
732 struct mlx5_flow *flow = (struct mlx5_flow *)data;
733 struct ibv_exp_flow_spec_ipv4_ext *ipv4;
734 unsigned int ipv4_size = sizeof(struct ibv_exp_flow_spec_ipv4_ext);
736 ++flow->ibv_attr->num_of_specs;
737 flow->ibv_attr->priority = 1;
738 flow->hash_fields = (IBV_EXP_RX_HASH_SRC_IPV4 |
739 IBV_EXP_RX_HASH_DST_IPV4);
740 ipv4 = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
741 *ipv4 = (struct ibv_exp_flow_spec_ipv4_ext) {
742 .type = flow->inner | IBV_EXP_FLOW_SPEC_IPV4_EXT,
749 ipv4->val = (struct ibv_exp_flow_ipv4_ext_filter){
750 .src_ip = spec->hdr.src_addr,
751 .dst_ip = spec->hdr.dst_addr,
752 .proto = spec->hdr.next_proto_id,
753 .tos = spec->hdr.type_of_service,
755 ipv4->mask = (struct ibv_exp_flow_ipv4_ext_filter){
756 .src_ip = mask->hdr.src_addr,
757 .dst_ip = mask->hdr.dst_addr,
758 .proto = mask->hdr.next_proto_id,
759 .tos = mask->hdr.type_of_service,
761 /* Remove unwanted bits from values. */
762 ipv4->val.src_ip &= ipv4->mask.src_ip;
763 ipv4->val.dst_ip &= ipv4->mask.dst_ip;
764 ipv4->val.proto &= ipv4->mask.proto;
765 ipv4->val.tos &= ipv4->mask.tos;
770 * Convert IPv6 item to Verbs specification.
773 * Item specification.
774 * @param default_mask[in]
775 * Default bit-masks to use when item->mask is not provided.
776 * @param data[in, out]
780 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
781 const void *default_mask,
784 const struct rte_flow_item_ipv6 *spec = item->spec;
785 const struct rte_flow_item_ipv6 *mask = item->mask;
786 struct mlx5_flow *flow = (struct mlx5_flow *)data;
787 struct ibv_exp_flow_spec_ipv6_ext *ipv6;
788 unsigned int ipv6_size = sizeof(struct ibv_exp_flow_spec_ipv6_ext);
791 ++flow->ibv_attr->num_of_specs;
792 flow->ibv_attr->priority = 1;
793 flow->hash_fields = (IBV_EXP_RX_HASH_SRC_IPV6 |
794 IBV_EXP_RX_HASH_DST_IPV6);
795 ipv6 = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
796 *ipv6 = (struct ibv_exp_flow_spec_ipv6_ext) {
797 .type = flow->inner | IBV_EXP_FLOW_SPEC_IPV6_EXT,
804 memcpy(ipv6->val.src_ip, spec->hdr.src_addr,
805 RTE_DIM(ipv6->val.src_ip));
806 memcpy(ipv6->val.dst_ip, spec->hdr.dst_addr,
807 RTE_DIM(ipv6->val.dst_ip));
808 memcpy(ipv6->mask.src_ip, mask->hdr.src_addr,
809 RTE_DIM(ipv6->mask.src_ip));
810 memcpy(ipv6->mask.dst_ip, mask->hdr.dst_addr,
811 RTE_DIM(ipv6->mask.dst_ip));
812 ipv6->mask.flow_label = mask->hdr.vtc_flow;
813 ipv6->mask.next_hdr = mask->hdr.proto;
814 ipv6->mask.hop_limit = mask->hdr.hop_limits;
815 /* Remove unwanted bits from values. */
816 for (i = 0; i < RTE_DIM(ipv6->val.src_ip); ++i) {
817 ipv6->val.src_ip[i] &= ipv6->mask.src_ip[i];
818 ipv6->val.dst_ip[i] &= ipv6->mask.dst_ip[i];
820 ipv6->val.flow_label &= ipv6->mask.flow_label;
821 ipv6->val.next_hdr &= ipv6->mask.next_hdr;
822 ipv6->val.hop_limit &= ipv6->mask.hop_limit;
827 * Convert UDP item to Verbs specification.
830 * Item specification.
831 * @param default_mask[in]
832 * Default bit-masks to use when item->mask is not provided.
833 * @param data[in, out]
837 mlx5_flow_create_udp(const struct rte_flow_item *item,
838 const void *default_mask,
841 const struct rte_flow_item_udp *spec = item->spec;
842 const struct rte_flow_item_udp *mask = item->mask;
843 struct mlx5_flow *flow = (struct mlx5_flow *)data;
844 struct ibv_exp_flow_spec_tcp_udp *udp;
845 unsigned int udp_size = sizeof(struct ibv_exp_flow_spec_tcp_udp);
847 ++flow->ibv_attr->num_of_specs;
848 flow->ibv_attr->priority = 0;
849 flow->hash_fields |= (IBV_EXP_RX_HASH_SRC_PORT_UDP |
850 IBV_EXP_RX_HASH_DST_PORT_UDP);
851 udp = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
852 *udp = (struct ibv_exp_flow_spec_tcp_udp) {
853 .type = flow->inner | IBV_EXP_FLOW_SPEC_UDP,
860 udp->val.dst_port = spec->hdr.dst_port;
861 udp->val.src_port = spec->hdr.src_port;
862 udp->mask.dst_port = mask->hdr.dst_port;
863 udp->mask.src_port = mask->hdr.src_port;
864 /* Remove unwanted bits from values. */
865 udp->val.src_port &= udp->mask.src_port;
866 udp->val.dst_port &= udp->mask.dst_port;
871 * Convert TCP item to Verbs specification.
874 * Item specification.
875 * @param default_mask[in]
876 * Default bit-masks to use when item->mask is not provided.
877 * @param data[in, out]
881 mlx5_flow_create_tcp(const struct rte_flow_item *item,
882 const void *default_mask,
885 const struct rte_flow_item_tcp *spec = item->spec;
886 const struct rte_flow_item_tcp *mask = item->mask;
887 struct mlx5_flow *flow = (struct mlx5_flow *)data;
888 struct ibv_exp_flow_spec_tcp_udp *tcp;
889 unsigned int tcp_size = sizeof(struct ibv_exp_flow_spec_tcp_udp);
891 ++flow->ibv_attr->num_of_specs;
892 flow->ibv_attr->priority = 0;
893 flow->hash_fields |= (IBV_EXP_RX_HASH_SRC_PORT_TCP |
894 IBV_EXP_RX_HASH_DST_PORT_TCP);
895 tcp = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
896 *tcp = (struct ibv_exp_flow_spec_tcp_udp) {
897 .type = flow->inner | IBV_EXP_FLOW_SPEC_TCP,
904 tcp->val.dst_port = spec->hdr.dst_port;
905 tcp->val.src_port = spec->hdr.src_port;
906 tcp->mask.dst_port = mask->hdr.dst_port;
907 tcp->mask.src_port = mask->hdr.src_port;
908 /* Remove unwanted bits from values. */
909 tcp->val.src_port &= tcp->mask.src_port;
910 tcp->val.dst_port &= tcp->mask.dst_port;
915 * Convert VXLAN item to Verbs specification.
918 * Item specification.
919 * @param default_mask[in]
920 * Default bit-masks to use when item->mask is not provided.
921 * @param data[in, out]
925 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
926 const void *default_mask,
929 const struct rte_flow_item_vxlan *spec = item->spec;
930 const struct rte_flow_item_vxlan *mask = item->mask;
931 struct mlx5_flow *flow = (struct mlx5_flow *)data;
932 struct ibv_exp_flow_spec_tunnel *vxlan;
933 unsigned int size = sizeof(struct ibv_exp_flow_spec_tunnel);
939 ++flow->ibv_attr->num_of_specs;
940 flow->ibv_attr->priority = 0;
942 vxlan = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
943 *vxlan = (struct ibv_exp_flow_spec_tunnel) {
944 .type = flow->inner | IBV_EXP_FLOW_SPEC_VXLAN_TUNNEL,
947 flow->inner = IBV_EXP_FLOW_SPEC_INNER;
952 memcpy(&id.vni[1], spec->vni, 3);
953 vxlan->val.tunnel_id = id.vlan_id;
954 memcpy(&id.vni[1], mask->vni, 3);
955 vxlan->mask.tunnel_id = id.vlan_id;
956 /* Remove unwanted bits from values. */
957 vxlan->val.tunnel_id &= vxlan->mask.tunnel_id;
962 * Convert mark/flag action to Verbs specification.
965 * Pointer to MLX5 flow structure.
970 mlx5_flow_create_flag_mark(struct mlx5_flow *flow, uint32_t mark_id)
972 struct ibv_exp_flow_spec_action_tag *tag;
973 unsigned int size = sizeof(struct ibv_exp_flow_spec_action_tag);
975 tag = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
976 *tag = (struct ibv_exp_flow_spec_action_tag){
977 .type = IBV_EXP_FLOW_SPEC_ACTION_TAG,
979 .tag_id = mlx5_flow_mark_set(mark_id),
981 ++flow->ibv_attr->num_of_specs;
986 * Complete flow rule creation with a drop queue.
989 * Pointer to private structure.
991 * MLX5 flow attributes (filled by mlx5_flow_validate()).
993 * Perform verbose error reporting if not NULL.
996 * A flow if the rule could be created.
998 static struct rte_flow *
999 priv_flow_create_action_queue_drop(struct priv *priv,
1000 struct mlx5_flow *flow,
1001 struct rte_flow_error *error)
1003 struct rte_flow *rte_flow;
1004 #ifdef HAVE_VERBS_IBV_EXP_FLOW_SPEC_ACTION_DROP
1005 struct ibv_exp_flow_spec_action_drop *drop;
1006 unsigned int size = sizeof(struct ibv_exp_flow_spec_action_drop);
1011 rte_flow = rte_calloc(__func__, 1, sizeof(*rte_flow), 0);
1013 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1014 NULL, "cannot allocate flow memory");
1018 #ifdef HAVE_VERBS_IBV_EXP_FLOW_SPEC_ACTION_DROP
1019 drop = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
1020 *drop = (struct ibv_exp_flow_spec_action_drop){
1021 .type = IBV_EXP_FLOW_SPEC_ACTION_DROP,
1024 ++flow->ibv_attr->num_of_specs;
1025 flow->offset += sizeof(struct ibv_exp_flow_spec_action_drop);
1027 rte_flow->ibv_attr = flow->ibv_attr;
1030 rte_flow->qp = priv->flow_drop_queue->qp;
1031 rte_flow->ibv_flow = ibv_exp_create_flow(rte_flow->qp,
1032 rte_flow->ibv_attr);
1033 if (!rte_flow->ibv_flow) {
1034 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1035 NULL, "flow rule creation failure");
1046 * Complete flow rule creation.
1049 * Pointer to private structure.
1051 * MLX5 flow attributes (filled by mlx5_flow_validate()).
1053 * Target action structure.
1055 * Perform verbose error reporting if not NULL.
1058 * A flow if the rule could be created.
1060 static struct rte_flow *
1061 priv_flow_create_action_queue(struct priv *priv,
1062 struct mlx5_flow *flow,
1063 struct mlx5_flow_action *action,
1064 struct rte_flow_error *error)
1066 struct rte_flow *rte_flow;
1069 const unsigned int wqs_n = 1 << log2above(action->queues_n);
1070 struct ibv_exp_wq *wqs[wqs_n];
1074 assert(!action->drop);
1075 rte_flow = rte_calloc(__func__, 1, sizeof(*rte_flow) +
1076 sizeof(*rte_flow->rxqs) * action->queues_n, 0);
1078 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1079 NULL, "cannot allocate flow memory");
1082 for (i = 0; i < action->queues_n; ++i) {
1083 struct rxq_ctrl *rxq;
1085 rxq = container_of((*priv->rxqs)[action->queues[i]],
1086 struct rxq_ctrl, rxq);
1088 rte_flow->rxqs[i] = &rxq->rxq;
1090 rxq->rxq.mark |= action->mark;
1092 /* finalise indirection table. */
1093 for (j = 0; i < wqs_n; ++i, ++j) {
1095 if (j == action->queues_n)
1098 rte_flow->mark = action->mark;
1099 rte_flow->ibv_attr = flow->ibv_attr;
1100 rte_flow->hash_fields = flow->hash_fields;
1101 rte_flow->ind_table = ibv_exp_create_rwq_ind_table(
1103 &(struct ibv_exp_rwq_ind_table_init_attr){
1105 .log_ind_tbl_size = log2above(action->queues_n),
1109 if (!rte_flow->ind_table) {
1110 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1111 NULL, "cannot allocate indirection table");
1114 rte_flow->qp = ibv_exp_create_qp(
1116 &(struct ibv_exp_qp_init_attr){
1117 .qp_type = IBV_QPT_RAW_PACKET,
1119 IBV_EXP_QP_INIT_ATTR_PD |
1120 IBV_EXP_QP_INIT_ATTR_PORT |
1121 IBV_EXP_QP_INIT_ATTR_RX_HASH,
1123 .rx_hash_conf = &(struct ibv_exp_rx_hash_conf){
1125 IBV_EXP_RX_HASH_FUNC_TOEPLITZ,
1126 .rx_hash_key_len = rss_hash_default_key_len,
1127 .rx_hash_key = rss_hash_default_key,
1128 .rx_hash_fields_mask = rte_flow->hash_fields,
1129 .rwq_ind_tbl = rte_flow->ind_table,
1131 .port_num = priv->port,
1133 if (!rte_flow->qp) {
1134 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1135 NULL, "cannot allocate QP");
1140 rte_flow->ibv_flow = ibv_exp_create_flow(rte_flow->qp,
1141 rte_flow->ibv_attr);
1142 if (!rte_flow->ibv_flow) {
1143 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1144 NULL, "flow rule creation failure");
1151 ibv_destroy_qp(rte_flow->qp);
1152 if (rte_flow->ind_table)
1153 ibv_exp_destroy_rwq_ind_table(rte_flow->ind_table);
1162 * Pointer to private structure.
1164 * Flow rule attributes.
1165 * @param[in] pattern
1166 * Pattern specification (list terminated by the END pattern item).
1167 * @param[in] actions
1168 * Associated actions (list terminated by the END action).
1170 * Perform verbose error reporting if not NULL.
1173 * A flow on success, NULL otherwise.
1175 static struct rte_flow *
1176 priv_flow_create(struct priv *priv,
1177 const struct rte_flow_attr *attr,
1178 const struct rte_flow_item items[],
1179 const struct rte_flow_action actions[],
1180 struct rte_flow_error *error)
1182 struct rte_flow *rte_flow;
1183 struct mlx5_flow flow = { .offset = sizeof(struct ibv_exp_flow_attr), };
1184 struct mlx5_flow_action action = {
1188 .mark_id = MLX5_FLOW_MARK_DEFAULT,
1193 err = priv_flow_validate(priv, attr, items, actions, error, &flow,
1197 flow.ibv_attr = rte_malloc(__func__, flow.offset, 0);
1198 flow.offset = sizeof(struct ibv_exp_flow_attr);
1199 if (!flow.ibv_attr) {
1200 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1201 NULL, "cannot allocate ibv_attr memory");
1204 *flow.ibv_attr = (struct ibv_exp_flow_attr){
1205 .type = IBV_EXP_FLOW_ATTR_NORMAL,
1206 .size = sizeof(struct ibv_exp_flow_attr),
1207 .priority = attr->priority,
1214 flow.hash_fields = 0;
1215 claim_zero(priv_flow_validate(priv, attr, items, actions,
1216 error, &flow, &action));
1217 if (action.mark && !action.drop) {
1218 mlx5_flow_create_flag_mark(&flow, action.mark_id);
1219 flow.offset += sizeof(struct ibv_exp_flow_spec_action_tag);
1223 priv_flow_create_action_queue_drop(priv, &flow, error);
1225 rte_flow = priv_flow_create_action_queue(priv, &flow, &action,
1231 rte_free(flow.ibv_attr);
1238 * @see rte_flow_create()
1242 mlx5_flow_create(struct rte_eth_dev *dev,
1243 const struct rte_flow_attr *attr,
1244 const struct rte_flow_item items[],
1245 const struct rte_flow_action actions[],
1246 struct rte_flow_error *error)
1248 struct priv *priv = dev->data->dev_private;
1249 struct rte_flow *flow;
1252 flow = priv_flow_create(priv, attr, items, actions, error);
1254 TAILQ_INSERT_TAIL(&priv->flows, flow, next);
1255 DEBUG("Flow created %p", (void *)flow);
1265 * Pointer to private structure.
1270 priv_flow_destroy(struct priv *priv,
1271 struct rte_flow *flow)
1273 TAILQ_REMOVE(&priv->flows, flow, next);
1275 claim_zero(ibv_exp_destroy_flow(flow->ibv_flow));
1279 claim_zero(ibv_destroy_qp(flow->qp));
1280 if (flow->ind_table)
1281 claim_zero(ibv_exp_destroy_rwq_ind_table(flow->ind_table));
1282 if (flow->drop && flow->wq)
1283 claim_zero(ibv_exp_destroy_wq(flow->wq));
1284 if (flow->drop && flow->cq)
1285 claim_zero(ibv_destroy_cq(flow->cq));
1287 struct rte_flow *tmp;
1289 uint32_t mark_n = 0;
1293 * To remove the mark from the queue, the queue must not be
1294 * present in any other marked flow (RSS or not).
1296 for (queue_n = 0; queue_n < flow->rxqs_n; ++queue_n) {
1297 rxq = flow->rxqs[queue_n];
1298 for (tmp = TAILQ_FIRST(&priv->flows);
1300 tmp = TAILQ_NEXT(tmp, next)) {
1306 tqueue_n < tmp->rxqs_n;
1310 trxq = tmp->rxqs[tqueue_n];
1315 rxq->mark = !!mark_n;
1319 rte_free(flow->ibv_attr);
1320 DEBUG("Flow destroyed %p", (void *)flow);
1327 * @see rte_flow_destroy()
1331 mlx5_flow_destroy(struct rte_eth_dev *dev,
1332 struct rte_flow *flow,
1333 struct rte_flow_error *error)
1335 struct priv *priv = dev->data->dev_private;
1339 priv_flow_destroy(priv, flow);
1345 * Destroy all flows.
1348 * Pointer to private structure.
1351 priv_flow_flush(struct priv *priv)
1353 while (!TAILQ_EMPTY(&priv->flows)) {
1354 struct rte_flow *flow;
1356 flow = TAILQ_FIRST(&priv->flows);
1357 priv_flow_destroy(priv, flow);
1362 * Destroy all flows.
1364 * @see rte_flow_flush()
1368 mlx5_flow_flush(struct rte_eth_dev *dev,
1369 struct rte_flow_error *error)
1371 struct priv *priv = dev->data->dev_private;
1375 priv_flow_flush(priv);
1381 * Create drop queue.
1384 * Pointer to private structure.
1390 priv_flow_create_drop_queue(struct priv *priv)
1392 struct rte_flow_drop *fdq = NULL;
1397 fdq = rte_calloc(__func__, 1, sizeof(*fdq), 0);
1399 WARN("cannot allocate memory for drop queue");
1402 fdq->cq = ibv_exp_create_cq(priv->ctx, 1, NULL, NULL, 0,
1403 &(struct ibv_exp_cq_init_attr){
1407 WARN("cannot allocate CQ for drop queue");
1410 for (i = 0; i != MLX5_DROP_WQ_N; ++i) {
1411 fdq->wqs[i] = ibv_exp_create_wq(priv->ctx,
1412 &(struct ibv_exp_wq_init_attr){
1413 .wq_type = IBV_EXP_WQT_RQ,
1420 WARN("cannot allocate WQ for drop queue");
1424 fdq->ind_table = ibv_exp_create_rwq_ind_table(priv->ctx,
1425 &(struct ibv_exp_rwq_ind_table_init_attr){
1427 .log_ind_tbl_size = 0,
1428 .ind_tbl = fdq->wqs,
1431 if (!fdq->ind_table) {
1432 WARN("cannot allocate indirection table for drop queue");
1435 fdq->qp = ibv_exp_create_qp(priv->ctx,
1436 &(struct ibv_exp_qp_init_attr){
1437 .qp_type = IBV_QPT_RAW_PACKET,
1439 IBV_EXP_QP_INIT_ATTR_PD |
1440 IBV_EXP_QP_INIT_ATTR_PORT |
1441 IBV_EXP_QP_INIT_ATTR_RX_HASH,
1443 .rx_hash_conf = &(struct ibv_exp_rx_hash_conf){
1445 IBV_EXP_RX_HASH_FUNC_TOEPLITZ,
1446 .rx_hash_key_len = rss_hash_default_key_len,
1447 .rx_hash_key = rss_hash_default_key,
1448 .rx_hash_fields_mask = 0,
1449 .rwq_ind_tbl = fdq->ind_table,
1451 .port_num = priv->port,
1454 WARN("cannot allocate QP for drop queue");
1457 priv->flow_drop_queue = fdq;
1461 claim_zero(ibv_destroy_qp(fdq->qp));
1463 claim_zero(ibv_exp_destroy_rwq_ind_table(fdq->ind_table));
1464 for (i = 0; i != MLX5_DROP_WQ_N; ++i) {
1466 claim_zero(ibv_exp_destroy_wq(fdq->wqs[i]));
1469 claim_zero(ibv_destroy_cq(fdq->cq));
1472 priv->flow_drop_queue = NULL;
1477 * Delete drop queue.
1480 * Pointer to private structure.
1483 priv_flow_delete_drop_queue(struct priv *priv)
1485 struct rte_flow_drop *fdq = priv->flow_drop_queue;
1491 claim_zero(ibv_destroy_qp(fdq->qp));
1493 claim_zero(ibv_exp_destroy_rwq_ind_table(fdq->ind_table));
1494 for (i = 0; i != MLX5_DROP_WQ_N; ++i) {
1496 claim_zero(ibv_exp_destroy_wq(fdq->wqs[i]));
1499 claim_zero(ibv_destroy_cq(fdq->cq));
1501 priv->flow_drop_queue = NULL;
1507 * Called by dev_stop() to remove all flows.
1510 * Pointer to private structure.
1513 priv_flow_stop(struct priv *priv)
1515 struct rte_flow *flow;
1517 TAILQ_FOREACH_REVERSE(flow, &priv->flows, mlx5_flows, next) {
1518 claim_zero(ibv_exp_destroy_flow(flow->ibv_flow));
1519 flow->ibv_flow = NULL;
1523 for (n = 0; n < flow->rxqs_n; ++n)
1524 flow->rxqs[n]->mark = 0;
1526 DEBUG("Flow %p removed", (void *)flow);
1528 priv_flow_delete_drop_queue(priv);
1535 * Pointer to private structure.
1538 * 0 on success, a errno value otherwise and rte_errno is set.
1541 priv_flow_start(struct priv *priv)
1544 struct rte_flow *flow;
1546 ret = priv_flow_create_drop_queue(priv);
1549 TAILQ_FOREACH(flow, &priv->flows, next) {
1553 qp = priv->flow_drop_queue->qp;
1556 flow->ibv_flow = ibv_exp_create_flow(qp, flow->ibv_attr);
1557 if (!flow->ibv_flow) {
1558 DEBUG("Flow %p cannot be applied", (void *)flow);
1562 DEBUG("Flow %p applied", (void *)flow);
1566 for (n = 0; n < flow->rxqs_n; ++n)
1567 flow->rxqs[n]->mark = 1;
1574 * Verify if the Rx queue is used in a flow.
1577 * Pointer to private structure.
1579 * Pointer to the queue to search.
1582 * Nonzero if the queue is used by a flow.
1585 priv_flow_rxq_in_use(struct priv *priv, struct rxq *rxq)
1587 struct rte_flow *flow;
1589 for (flow = TAILQ_FIRST(&priv->flows);
1591 flow = TAILQ_NEXT(flow, next)) {
1596 for (n = 0; n < flow->rxqs_n; ++n) {
1597 if (flow->rxqs[n] == rxq)
1607 * @see rte_flow_isolate()
1611 mlx5_flow_isolate(struct rte_eth_dev *dev,
1613 struct rte_flow_error *error)
1615 struct priv *priv = dev->data->dev_private;
1618 if (priv->started) {
1619 rte_flow_error_set(error, EBUSY,
1620 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1622 "port must be stopped first");
1626 priv->isolated = !!enable;