4 * Copyright 2016 6WIND S.A.
5 * Copyright 2016 Mellanox.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of 6WIND S.A. nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #include <sys/queue.h>
38 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
40 #pragma GCC diagnostic ignored "-Wpedantic"
42 #include <infiniband/verbs.h>
44 #pragma GCC diagnostic error "-Wpedantic"
47 #include <rte_ethdev.h>
49 #include <rte_flow_driver.h>
50 #include <rte_malloc.h>
56 mlx5_flow_create_eth(const struct rte_flow_item *item,
57 const void *default_mask,
61 mlx5_flow_create_vlan(const struct rte_flow_item *item,
62 const void *default_mask,
66 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
67 const void *default_mask,
71 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
72 const void *default_mask,
76 mlx5_flow_create_udp(const struct rte_flow_item *item,
77 const void *default_mask,
81 mlx5_flow_create_tcp(const struct rte_flow_item *item,
82 const void *default_mask,
86 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
87 const void *default_mask,
91 LIST_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
92 struct ibv_exp_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
93 struct ibv_exp_rwq_ind_table *ind_table; /**< Indirection table. */
94 struct ibv_qp *qp; /**< Verbs queue pair. */
95 struct ibv_exp_flow *ibv_flow; /**< Verbs flow. */
96 struct ibv_exp_wq *wq; /**< Verbs work queue. */
97 struct ibv_cq *cq; /**< Verbs completion queue. */
98 struct rxq *(*rxqs)[]; /**< Pointer to the queues array. */
99 uint16_t rxqs_n; /**< Number of queues in this flow, 0 if drop queue. */
100 uint32_t mark:1; /**< Set if the flow is marked. */
101 uint32_t drop:1; /**< Drop queue. */
102 uint64_t hash_fields; /**< Fields that participate in the hash. */
105 /** Static initializer for items. */
107 (const enum rte_flow_item_type []){ \
108 __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
111 /** Structure to generate a simple graph of layers supported by the NIC. */
112 struct mlx5_flow_items {
113 /** List of possible actions for these items. */
114 const enum rte_flow_action_type *const actions;
115 /** Bit-masks corresponding to the possibilities for the item. */
118 * Default bit-masks to use when item->mask is not provided. When
119 * \default_mask is also NULL, the full supported bit-mask (\mask) is
122 const void *default_mask;
123 /** Bit-masks size in bytes. */
124 const unsigned int mask_sz;
126 * Conversion function from rte_flow to NIC specific flow.
129 * rte_flow item to convert.
130 * @param default_mask
131 * Default bit-masks to use when item->mask is not provided.
133 * Internal structure to store the conversion.
136 * 0 on success, negative value otherwise.
138 int (*convert)(const struct rte_flow_item *item,
139 const void *default_mask,
141 /** Size in bytes of the destination structure. */
142 const unsigned int dst_sz;
143 /** List of possible following items. */
144 const enum rte_flow_item_type *const items;
147 /** Valid action for this PMD. */
148 static const enum rte_flow_action_type valid_actions[] = {
149 RTE_FLOW_ACTION_TYPE_DROP,
150 RTE_FLOW_ACTION_TYPE_QUEUE,
151 RTE_FLOW_ACTION_TYPE_MARK,
152 RTE_FLOW_ACTION_TYPE_FLAG,
153 RTE_FLOW_ACTION_TYPE_END,
156 /** Graph of supported items and associated actions. */
157 static const struct mlx5_flow_items mlx5_flow_items[] = {
158 [RTE_FLOW_ITEM_TYPE_END] = {
159 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
160 RTE_FLOW_ITEM_TYPE_VXLAN),
162 [RTE_FLOW_ITEM_TYPE_ETH] = {
163 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
164 RTE_FLOW_ITEM_TYPE_IPV4,
165 RTE_FLOW_ITEM_TYPE_IPV6),
166 .actions = valid_actions,
167 .mask = &(const struct rte_flow_item_eth){
168 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
169 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
172 .default_mask = &rte_flow_item_eth_mask,
173 .mask_sz = sizeof(struct rte_flow_item_eth),
174 .convert = mlx5_flow_create_eth,
175 .dst_sz = sizeof(struct ibv_exp_flow_spec_eth),
177 [RTE_FLOW_ITEM_TYPE_VLAN] = {
178 .items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
179 RTE_FLOW_ITEM_TYPE_IPV6),
180 .actions = valid_actions,
181 .mask = &(const struct rte_flow_item_vlan){
184 .default_mask = &rte_flow_item_vlan_mask,
185 .mask_sz = sizeof(struct rte_flow_item_vlan),
186 .convert = mlx5_flow_create_vlan,
189 [RTE_FLOW_ITEM_TYPE_IPV4] = {
190 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
191 RTE_FLOW_ITEM_TYPE_TCP),
192 .actions = valid_actions,
193 .mask = &(const struct rte_flow_item_ipv4){
197 .type_of_service = -1,
201 .default_mask = &rte_flow_item_ipv4_mask,
202 .mask_sz = sizeof(struct rte_flow_item_ipv4),
203 .convert = mlx5_flow_create_ipv4,
204 .dst_sz = sizeof(struct ibv_exp_flow_spec_ipv4_ext),
206 [RTE_FLOW_ITEM_TYPE_IPV6] = {
207 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
208 RTE_FLOW_ITEM_TYPE_TCP),
209 .actions = valid_actions,
210 .mask = &(const struct rte_flow_item_ipv6){
213 0xff, 0xff, 0xff, 0xff,
214 0xff, 0xff, 0xff, 0xff,
215 0xff, 0xff, 0xff, 0xff,
216 0xff, 0xff, 0xff, 0xff,
219 0xff, 0xff, 0xff, 0xff,
220 0xff, 0xff, 0xff, 0xff,
221 0xff, 0xff, 0xff, 0xff,
222 0xff, 0xff, 0xff, 0xff,
229 .default_mask = &rte_flow_item_ipv6_mask,
230 .mask_sz = sizeof(struct rte_flow_item_ipv6),
231 .convert = mlx5_flow_create_ipv6,
232 .dst_sz = sizeof(struct ibv_exp_flow_spec_ipv6_ext),
234 [RTE_FLOW_ITEM_TYPE_UDP] = {
235 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN),
236 .actions = valid_actions,
237 .mask = &(const struct rte_flow_item_udp){
243 .default_mask = &rte_flow_item_udp_mask,
244 .mask_sz = sizeof(struct rte_flow_item_udp),
245 .convert = mlx5_flow_create_udp,
246 .dst_sz = sizeof(struct ibv_exp_flow_spec_tcp_udp),
248 [RTE_FLOW_ITEM_TYPE_TCP] = {
249 .actions = valid_actions,
250 .mask = &(const struct rte_flow_item_tcp){
256 .default_mask = &rte_flow_item_tcp_mask,
257 .mask_sz = sizeof(struct rte_flow_item_tcp),
258 .convert = mlx5_flow_create_tcp,
259 .dst_sz = sizeof(struct ibv_exp_flow_spec_tcp_udp),
261 [RTE_FLOW_ITEM_TYPE_VXLAN] = {
262 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH),
263 .actions = valid_actions,
264 .mask = &(const struct rte_flow_item_vxlan){
265 .vni = "\xff\xff\xff",
267 .default_mask = &rte_flow_item_vxlan_mask,
268 .mask_sz = sizeof(struct rte_flow_item_vxlan),
269 .convert = mlx5_flow_create_vxlan,
270 .dst_sz = sizeof(struct ibv_exp_flow_spec_tunnel),
274 /** Structure to pass to the conversion function. */
276 struct ibv_exp_flow_attr *ibv_attr; /**< Verbs attribute. */
277 unsigned int offset; /**< Offset in bytes in the ibv_attr buffer. */
278 uint32_t inner; /**< Set once VXLAN is encountered. */
279 uint64_t hash_fields; /**< Fields that participate in the hash. */
282 struct mlx5_flow_action {
283 uint32_t queue:1; /**< Target is a receive queue. */
284 uint32_t drop:1; /**< Target is a drop queue. */
285 uint32_t mark:1; /**< Mark is present in the flow. */
286 uint32_t mark_id; /**< Mark identifier. */
287 uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
288 uint16_t queues_n; /**< Number of entries in queue[]. */
292 * Check support for a given item.
295 * Item specification.
297 * Bit-masks covering supported fields to compare with spec, last and mask in
300 * Bit-Mask size in bytes.
306 mlx5_flow_item_validate(const struct rte_flow_item *item,
307 const uint8_t *mask, unsigned int size)
311 if (!item->spec && (item->mask || item->last))
313 if (item->spec && !item->mask) {
315 const uint8_t *spec = item->spec;
317 for (i = 0; i < size; ++i)
318 if ((spec[i] | mask[i]) != mask[i])
321 if (item->last && !item->mask) {
323 const uint8_t *spec = item->last;
325 for (i = 0; i < size; ++i)
326 if ((spec[i] | mask[i]) != mask[i])
331 const uint8_t *spec = item->mask;
333 for (i = 0; i < size; ++i)
334 if ((spec[i] | mask[i]) != mask[i])
337 if (item->spec && item->last) {
340 const uint8_t *apply = mask;
345 for (i = 0; i < size; ++i) {
346 spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
347 last[i] = ((const uint8_t *)item->last)[i] & apply[i];
349 ret = memcmp(spec, last, size);
355 * Validate a flow supported by the NIC.
358 * Pointer to private structure.
360 * Flow rule attributes.
362 * Pattern specification (list terminated by the END pattern item).
364 * Associated actions (list terminated by the END action).
366 * Perform verbose error reporting if not NULL.
367 * @param[in, out] flow
368 * Flow structure to update.
371 * 0 on success, a negative errno value otherwise and rte_errno is set.
374 priv_flow_validate(struct priv *priv,
375 const struct rte_flow_attr *attr,
376 const struct rte_flow_item items[],
377 const struct rte_flow_action actions[],
378 struct rte_flow_error *error,
379 struct mlx5_flow *flow)
381 const struct mlx5_flow_items *cur_item = mlx5_flow_items;
382 struct mlx5_flow_action action = {
390 rte_flow_error_set(error, ENOTSUP,
391 RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
393 "groups are not supported");
396 if (attr->priority) {
397 rte_flow_error_set(error, ENOTSUP,
398 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
400 "priorities are not supported");
404 rte_flow_error_set(error, ENOTSUP,
405 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
407 "egress is not supported");
410 if (!attr->ingress) {
411 rte_flow_error_set(error, ENOTSUP,
412 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
414 "only ingress is supported");
417 for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
418 const struct mlx5_flow_items *token = NULL;
422 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
426 cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
428 if (cur_item->items[i] == items->type) {
429 token = &mlx5_flow_items[items->type];
434 goto exit_item_not_supported;
436 err = mlx5_flow_item_validate(items,
437 (const uint8_t *)cur_item->mask,
440 goto exit_item_not_supported;
441 if (flow->ibv_attr && cur_item->convert) {
442 err = cur_item->convert(items,
443 (cur_item->default_mask ?
444 cur_item->default_mask :
448 goto exit_item_not_supported;
450 flow->offset += cur_item->dst_sz;
452 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
453 if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
455 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
457 } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
458 const struct rte_flow_action_queue *queue =
459 (const struct rte_flow_action_queue *)
464 if (!queue || (queue->index > (priv->rxqs_n - 1)))
465 goto exit_action_not_supported;
466 for (n = 0; n < action.queues_n; ++n) {
467 if (action.queues[n] == queue->index) {
472 if (action.queues_n && !found) {
473 rte_flow_error_set(error, ENOTSUP,
474 RTE_FLOW_ERROR_TYPE_ACTION,
476 "queue action not in RSS queues");
481 action.queues[0] = queue->index;
482 } else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
483 const struct rte_flow_action_rss *rss =
484 (const struct rte_flow_action_rss *)
488 if (action.queues_n == 1) {
491 assert(action.queues_n);
492 for (n = 0; n < rss->num; ++n) {
493 if (action.queues[0] == rss->queue[n]) {
499 rte_flow_error_set(error, ENOTSUP,
500 RTE_FLOW_ERROR_TYPE_ACTION,
502 "queue action not in RSS"
508 for (n = 0; n < rss->num; ++n)
509 action.queues[n] = rss->queue[n];
510 action.queues_n = rss->num;
511 } else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
512 const struct rte_flow_action_mark *mark =
513 (const struct rte_flow_action_mark *)
517 rte_flow_error_set(error, EINVAL,
518 RTE_FLOW_ERROR_TYPE_ACTION,
520 "mark must be defined");
522 } else if (mark->id >= MLX5_FLOW_MARK_MAX) {
523 rte_flow_error_set(error, ENOTSUP,
524 RTE_FLOW_ERROR_TYPE_ACTION,
526 "mark must be between 0"
531 } else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
534 goto exit_action_not_supported;
537 if (action.mark && !flow->ibv_attr && !action.drop)
538 flow->offset += sizeof(struct ibv_exp_flow_spec_action_tag);
539 if (!action.queue && !action.drop) {
540 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
541 NULL, "no valid action");
545 exit_item_not_supported:
546 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
547 items, "item not supported");
549 exit_action_not_supported:
550 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
551 actions, "action not supported");
556 * Validate a flow supported by the NIC.
558 * @see rte_flow_validate()
562 mlx5_flow_validate(struct rte_eth_dev *dev,
563 const struct rte_flow_attr *attr,
564 const struct rte_flow_item items[],
565 const struct rte_flow_action actions[],
566 struct rte_flow_error *error)
568 struct priv *priv = dev->data->dev_private;
570 struct mlx5_flow flow = { .offset = sizeof(struct ibv_exp_flow_attr) };
573 ret = priv_flow_validate(priv, attr, items, actions, error, &flow);
579 * Convert Ethernet item to Verbs specification.
582 * Item specification.
583 * @param default_mask[in]
584 * Default bit-masks to use when item->mask is not provided.
585 * @param data[in, out]
589 mlx5_flow_create_eth(const struct rte_flow_item *item,
590 const void *default_mask,
593 const struct rte_flow_item_eth *spec = item->spec;
594 const struct rte_flow_item_eth *mask = item->mask;
595 struct mlx5_flow *flow = (struct mlx5_flow *)data;
596 struct ibv_exp_flow_spec_eth *eth;
597 const unsigned int eth_size = sizeof(struct ibv_exp_flow_spec_eth);
600 ++flow->ibv_attr->num_of_specs;
601 flow->ibv_attr->priority = 2;
602 flow->hash_fields = 0;
603 eth = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
604 *eth = (struct ibv_exp_flow_spec_eth) {
605 .type = flow->inner | IBV_EXP_FLOW_SPEC_ETH,
612 memcpy(eth->val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
613 memcpy(eth->val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
614 eth->val.ether_type = spec->type;
615 memcpy(eth->mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
616 memcpy(eth->mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
617 eth->mask.ether_type = mask->type;
618 /* Remove unwanted bits from values. */
619 for (i = 0; i < ETHER_ADDR_LEN; ++i) {
620 eth->val.dst_mac[i] &= eth->mask.dst_mac[i];
621 eth->val.src_mac[i] &= eth->mask.src_mac[i];
623 eth->val.ether_type &= eth->mask.ether_type;
628 * Convert VLAN item to Verbs specification.
631 * Item specification.
632 * @param default_mask[in]
633 * Default bit-masks to use when item->mask is not provided.
634 * @param data[in, out]
638 mlx5_flow_create_vlan(const struct rte_flow_item *item,
639 const void *default_mask,
642 const struct rte_flow_item_vlan *spec = item->spec;
643 const struct rte_flow_item_vlan *mask = item->mask;
644 struct mlx5_flow *flow = (struct mlx5_flow *)data;
645 struct ibv_exp_flow_spec_eth *eth;
646 const unsigned int eth_size = sizeof(struct ibv_exp_flow_spec_eth);
648 eth = (void *)((uintptr_t)flow->ibv_attr + flow->offset - eth_size);
653 eth->val.vlan_tag = spec->tci;
654 eth->mask.vlan_tag = mask->tci;
655 eth->val.vlan_tag &= eth->mask.vlan_tag;
660 * Convert IPv4 item to Verbs specification.
663 * Item specification.
664 * @param default_mask[in]
665 * Default bit-masks to use when item->mask is not provided.
666 * @param data[in, out]
670 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
671 const void *default_mask,
674 const struct rte_flow_item_ipv4 *spec = item->spec;
675 const struct rte_flow_item_ipv4 *mask = item->mask;
676 struct mlx5_flow *flow = (struct mlx5_flow *)data;
677 struct ibv_exp_flow_spec_ipv4_ext *ipv4;
678 unsigned int ipv4_size = sizeof(struct ibv_exp_flow_spec_ipv4_ext);
680 ++flow->ibv_attr->num_of_specs;
681 flow->ibv_attr->priority = 1;
682 flow->hash_fields = (IBV_EXP_RX_HASH_SRC_IPV4 |
683 IBV_EXP_RX_HASH_DST_IPV4);
684 ipv4 = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
685 *ipv4 = (struct ibv_exp_flow_spec_ipv4_ext) {
686 .type = flow->inner | IBV_EXP_FLOW_SPEC_IPV4_EXT,
693 ipv4->val = (struct ibv_exp_flow_ipv4_ext_filter){
694 .src_ip = spec->hdr.src_addr,
695 .dst_ip = spec->hdr.dst_addr,
696 .proto = spec->hdr.next_proto_id,
697 .tos = spec->hdr.type_of_service,
699 ipv4->mask = (struct ibv_exp_flow_ipv4_ext_filter){
700 .src_ip = mask->hdr.src_addr,
701 .dst_ip = mask->hdr.dst_addr,
702 .proto = mask->hdr.next_proto_id,
703 .tos = mask->hdr.type_of_service,
705 /* Remove unwanted bits from values. */
706 ipv4->val.src_ip &= ipv4->mask.src_ip;
707 ipv4->val.dst_ip &= ipv4->mask.dst_ip;
708 ipv4->val.proto &= ipv4->mask.proto;
709 ipv4->val.tos &= ipv4->mask.tos;
714 * Convert IPv6 item to Verbs specification.
717 * Item specification.
718 * @param default_mask[in]
719 * Default bit-masks to use when item->mask is not provided.
720 * @param data[in, out]
724 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
725 const void *default_mask,
728 const struct rte_flow_item_ipv6 *spec = item->spec;
729 const struct rte_flow_item_ipv6 *mask = item->mask;
730 struct mlx5_flow *flow = (struct mlx5_flow *)data;
731 struct ibv_exp_flow_spec_ipv6_ext *ipv6;
732 unsigned int ipv6_size = sizeof(struct ibv_exp_flow_spec_ipv6_ext);
734 ++flow->ibv_attr->num_of_specs;
735 flow->ibv_attr->priority = 1;
736 flow->hash_fields = (IBV_EXP_RX_HASH_SRC_IPV6 |
737 IBV_EXP_RX_HASH_DST_IPV6);
738 ipv6 = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
739 *ipv6 = (struct ibv_exp_flow_spec_ipv6_ext) {
740 .type = flow->inner | IBV_EXP_FLOW_SPEC_IPV6_EXT,
747 memcpy(ipv6->val.src_ip, spec->hdr.src_addr,
748 RTE_DIM(ipv6->val.src_ip));
749 memcpy(ipv6->val.dst_ip, spec->hdr.dst_addr,
750 RTE_DIM(ipv6->val.dst_ip));
751 memcpy(ipv6->mask.src_ip, mask->hdr.src_addr,
752 RTE_DIM(ipv6->mask.src_ip));
753 memcpy(ipv6->mask.dst_ip, mask->hdr.dst_addr,
754 RTE_DIM(ipv6->mask.dst_ip));
755 ipv6->mask.flow_label = mask->hdr.vtc_flow;
756 ipv6->mask.next_hdr = mask->hdr.proto;
757 ipv6->mask.hop_limit = mask->hdr.hop_limits;
758 ipv6->val.flow_label &= ipv6->mask.flow_label;
759 ipv6->val.next_hdr &= ipv6->mask.next_hdr;
760 ipv6->val.hop_limit &= ipv6->mask.hop_limit;
765 * Convert UDP item to Verbs specification.
768 * Item specification.
769 * @param default_mask[in]
770 * Default bit-masks to use when item->mask is not provided.
771 * @param data[in, out]
775 mlx5_flow_create_udp(const struct rte_flow_item *item,
776 const void *default_mask,
779 const struct rte_flow_item_udp *spec = item->spec;
780 const struct rte_flow_item_udp *mask = item->mask;
781 struct mlx5_flow *flow = (struct mlx5_flow *)data;
782 struct ibv_exp_flow_spec_tcp_udp *udp;
783 unsigned int udp_size = sizeof(struct ibv_exp_flow_spec_tcp_udp);
785 ++flow->ibv_attr->num_of_specs;
786 flow->ibv_attr->priority = 0;
787 flow->hash_fields |= (IBV_EXP_RX_HASH_SRC_PORT_UDP |
788 IBV_EXP_RX_HASH_DST_PORT_UDP);
789 udp = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
790 *udp = (struct ibv_exp_flow_spec_tcp_udp) {
791 .type = flow->inner | IBV_EXP_FLOW_SPEC_UDP,
798 udp->val.dst_port = spec->hdr.dst_port;
799 udp->val.src_port = spec->hdr.src_port;
800 udp->mask.dst_port = mask->hdr.dst_port;
801 udp->mask.src_port = mask->hdr.src_port;
802 /* Remove unwanted bits from values. */
803 udp->val.src_port &= udp->mask.src_port;
804 udp->val.dst_port &= udp->mask.dst_port;
809 * Convert TCP item to Verbs specification.
812 * Item specification.
813 * @param default_mask[in]
814 * Default bit-masks to use when item->mask is not provided.
815 * @param data[in, out]
819 mlx5_flow_create_tcp(const struct rte_flow_item *item,
820 const void *default_mask,
823 const struct rte_flow_item_tcp *spec = item->spec;
824 const struct rte_flow_item_tcp *mask = item->mask;
825 struct mlx5_flow *flow = (struct mlx5_flow *)data;
826 struct ibv_exp_flow_spec_tcp_udp *tcp;
827 unsigned int tcp_size = sizeof(struct ibv_exp_flow_spec_tcp_udp);
829 ++flow->ibv_attr->num_of_specs;
830 flow->ibv_attr->priority = 0;
831 flow->hash_fields |= (IBV_EXP_RX_HASH_SRC_PORT_TCP |
832 IBV_EXP_RX_HASH_DST_PORT_TCP);
833 tcp = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
834 *tcp = (struct ibv_exp_flow_spec_tcp_udp) {
835 .type = flow->inner | IBV_EXP_FLOW_SPEC_TCP,
842 tcp->val.dst_port = spec->hdr.dst_port;
843 tcp->val.src_port = spec->hdr.src_port;
844 tcp->mask.dst_port = mask->hdr.dst_port;
845 tcp->mask.src_port = mask->hdr.src_port;
846 /* Remove unwanted bits from values. */
847 tcp->val.src_port &= tcp->mask.src_port;
848 tcp->val.dst_port &= tcp->mask.dst_port;
853 * Convert VXLAN item to Verbs specification.
856 * Item specification.
857 * @param default_mask[in]
858 * Default bit-masks to use when item->mask is not provided.
859 * @param data[in, out]
863 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
864 const void *default_mask,
867 const struct rte_flow_item_vxlan *spec = item->spec;
868 const struct rte_flow_item_vxlan *mask = item->mask;
869 struct mlx5_flow *flow = (struct mlx5_flow *)data;
870 struct ibv_exp_flow_spec_tunnel *vxlan;
871 unsigned int size = sizeof(struct ibv_exp_flow_spec_tunnel);
877 ++flow->ibv_attr->num_of_specs;
878 flow->ibv_attr->priority = 0;
880 vxlan = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
881 *vxlan = (struct ibv_exp_flow_spec_tunnel) {
882 .type = flow->inner | IBV_EXP_FLOW_SPEC_VXLAN_TUNNEL,
885 flow->inner = IBV_EXP_FLOW_SPEC_INNER;
890 memcpy(&id.vni[1], spec->vni, 3);
891 vxlan->val.tunnel_id = id.vlan_id;
892 memcpy(&id.vni[1], mask->vni, 3);
893 vxlan->mask.tunnel_id = id.vlan_id;
894 /* Remove unwanted bits from values. */
895 vxlan->val.tunnel_id &= vxlan->mask.tunnel_id;
900 * Convert mark/flag action to Verbs specification.
903 * Pointer to MLX5 flow structure.
908 mlx5_flow_create_flag_mark(struct mlx5_flow *flow, uint32_t mark_id)
910 struct ibv_exp_flow_spec_action_tag *tag;
911 unsigned int size = sizeof(struct ibv_exp_flow_spec_action_tag);
913 tag = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
914 *tag = (struct ibv_exp_flow_spec_action_tag){
915 .type = IBV_EXP_FLOW_SPEC_ACTION_TAG,
917 .tag_id = mlx5_flow_mark_set(mark_id),
919 ++flow->ibv_attr->num_of_specs;
924 * Complete flow rule creation with a drop queue.
927 * Pointer to private structure.
929 * MLX5 flow attributes (filled by mlx5_flow_validate()).
931 * Perform verbose error reporting if not NULL.
934 * A flow if the rule could be created.
936 static struct rte_flow *
937 priv_flow_create_action_queue_drop(struct priv *priv,
938 struct mlx5_flow *flow,
939 struct rte_flow_error *error)
941 struct rte_flow *rte_flow;
945 rte_flow = rte_calloc(__func__, 1, sizeof(*rte_flow), 0);
947 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
948 NULL, "cannot allocate flow memory");
952 ibv_exp_create_cq(priv->ctx, 1, NULL, NULL, 0,
953 &(struct ibv_exp_cq_init_attr){
957 rte_flow_error_set(error, ENOMEM,
958 RTE_FLOW_ERROR_TYPE_HANDLE,
959 NULL, "cannot allocate CQ");
962 rte_flow->wq = ibv_exp_create_wq(priv->ctx,
963 &(struct ibv_exp_wq_init_attr){
964 .wq_type = IBV_EXP_WQT_RQ,
971 rte_flow_error_set(error, ENOMEM,
972 RTE_FLOW_ERROR_TYPE_HANDLE,
973 NULL, "cannot allocate WQ");
977 rte_flow->ibv_attr = flow->ibv_attr;
978 rte_flow->ind_table = ibv_exp_create_rwq_ind_table(
980 &(struct ibv_exp_rwq_ind_table_init_attr){
982 .log_ind_tbl_size = 0,
983 .ind_tbl = &rte_flow->wq,
986 if (!rte_flow->ind_table) {
987 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
988 NULL, "cannot allocate indirection table");
991 rte_flow->qp = ibv_exp_create_qp(
993 &(struct ibv_exp_qp_init_attr){
994 .qp_type = IBV_QPT_RAW_PACKET,
996 IBV_EXP_QP_INIT_ATTR_PD |
997 IBV_EXP_QP_INIT_ATTR_PORT |
998 IBV_EXP_QP_INIT_ATTR_RX_HASH,
1000 .rx_hash_conf = &(struct ibv_exp_rx_hash_conf){
1002 IBV_EXP_RX_HASH_FUNC_TOEPLITZ,
1003 .rx_hash_key_len = rss_hash_default_key_len,
1004 .rx_hash_key = rss_hash_default_key,
1005 .rx_hash_fields_mask = 0,
1006 .rwq_ind_tbl = rte_flow->ind_table,
1008 .port_num = priv->port,
1010 if (!rte_flow->qp) {
1011 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1012 NULL, "cannot allocate QP");
1017 rte_flow->ibv_flow = ibv_exp_create_flow(rte_flow->qp,
1018 rte_flow->ibv_attr);
1019 if (!rte_flow->ibv_flow) {
1020 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1021 NULL, "flow rule creation failure");
1028 ibv_destroy_qp(rte_flow->qp);
1029 if (rte_flow->ind_table)
1030 ibv_exp_destroy_rwq_ind_table(rte_flow->ind_table);
1032 ibv_exp_destroy_wq(rte_flow->wq);
1034 ibv_destroy_cq(rte_flow->cq);
1040 * Complete flow rule creation.
1043 * Pointer to private structure.
1045 * MLX5 flow attributes (filled by mlx5_flow_validate()).
1047 * Target action structure.
1049 * Perform verbose error reporting if not NULL.
1052 * A flow if the rule could be created.
1054 static struct rte_flow *
1055 priv_flow_create_action_queue(struct priv *priv,
1056 struct mlx5_flow *flow,
1057 struct mlx5_flow_action *action,
1058 struct rte_flow_error *error)
1060 struct rte_flow *rte_flow;
1063 const unsigned int wqs_n = 1 << log2above(action->queues_n);
1064 struct ibv_exp_wq *wqs[wqs_n];
1068 assert(!action->drop);
1069 rte_flow = rte_calloc(__func__, 1,
1070 sizeof(*rte_flow) + sizeof(struct rxq *) *
1071 action->queues_n, 0);
1073 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1074 NULL, "cannot allocate flow memory");
1077 rte_flow->rxqs = (struct rxq *(*)[])((uintptr_t)rte_flow +
1078 sizeof(struct rxq *) *
1080 for (i = 0; i < action->queues_n; ++i) {
1081 struct rxq_ctrl *rxq;
1083 rxq = container_of((*priv->rxqs)[action->queues[i]],
1084 struct rxq_ctrl, rxq);
1086 (*rte_flow->rxqs)[i] = &rxq->rxq;
1088 rxq->rxq.mark |= action->mark;
1090 /* finalise indirection table. */
1091 for (j = 0; i < wqs_n; ++i, ++j) {
1093 if (j == action->queues_n)
1096 rte_flow->mark = action->mark;
1097 rte_flow->ibv_attr = flow->ibv_attr;
1098 rte_flow->hash_fields = flow->hash_fields;
1099 rte_flow->ind_table = ibv_exp_create_rwq_ind_table(
1101 &(struct ibv_exp_rwq_ind_table_init_attr){
1103 .log_ind_tbl_size = log2above(action->queues_n),
1107 if (!rte_flow->ind_table) {
1108 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1109 NULL, "cannot allocate indirection table");
1112 rte_flow->qp = ibv_exp_create_qp(
1114 &(struct ibv_exp_qp_init_attr){
1115 .qp_type = IBV_QPT_RAW_PACKET,
1117 IBV_EXP_QP_INIT_ATTR_PD |
1118 IBV_EXP_QP_INIT_ATTR_PORT |
1119 IBV_EXP_QP_INIT_ATTR_RX_HASH,
1121 .rx_hash_conf = &(struct ibv_exp_rx_hash_conf){
1123 IBV_EXP_RX_HASH_FUNC_TOEPLITZ,
1124 .rx_hash_key_len = rss_hash_default_key_len,
1125 .rx_hash_key = rss_hash_default_key,
1126 .rx_hash_fields_mask = rte_flow->hash_fields,
1127 .rwq_ind_tbl = rte_flow->ind_table,
1129 .port_num = priv->port,
1131 if (!rte_flow->qp) {
1132 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1133 NULL, "cannot allocate QP");
1138 rte_flow->ibv_flow = ibv_exp_create_flow(rte_flow->qp,
1139 rte_flow->ibv_attr);
1140 if (!rte_flow->ibv_flow) {
1141 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1142 NULL, "flow rule creation failure");
1149 ibv_destroy_qp(rte_flow->qp);
1150 if (rte_flow->ind_table)
1151 ibv_exp_destroy_rwq_ind_table(rte_flow->ind_table);
1160 * Pointer to private structure.
1162 * Flow rule attributes.
1163 * @param[in] pattern
1164 * Pattern specification (list terminated by the END pattern item).
1165 * @param[in] actions
1166 * Associated actions (list terminated by the END action).
1168 * Perform verbose error reporting if not NULL.
1171 * A flow on success, NULL otherwise.
1173 static struct rte_flow *
1174 priv_flow_create(struct priv *priv,
1175 const struct rte_flow_attr *attr,
1176 const struct rte_flow_item items[],
1177 const struct rte_flow_action actions[],
1178 struct rte_flow_error *error)
1180 struct rte_flow *rte_flow;
1181 struct mlx5_flow_action action;
1182 struct mlx5_flow flow = { .offset = sizeof(struct ibv_exp_flow_attr), };
1185 err = priv_flow_validate(priv, attr, items, actions, error, &flow);
1188 flow.ibv_attr = rte_malloc(__func__, flow.offset, 0);
1189 flow.offset = sizeof(struct ibv_exp_flow_attr);
1190 if (!flow.ibv_attr) {
1191 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1192 NULL, "cannot allocate ibv_attr memory");
1195 *flow.ibv_attr = (struct ibv_exp_flow_attr){
1196 .type = IBV_EXP_FLOW_ATTR_NORMAL,
1197 .size = sizeof(struct ibv_exp_flow_attr),
1198 .priority = attr->priority,
1205 flow.hash_fields = 0;
1206 claim_zero(priv_flow_validate(priv, attr, items, actions,
1208 action = (struct mlx5_flow_action){
1212 .mark_id = MLX5_FLOW_MARK_DEFAULT,
1214 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
1215 if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
1217 } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
1219 action.queues[action.queues_n++] =
1220 ((const struct rte_flow_action_queue *)
1221 actions->conf)->index;
1222 } else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
1223 const struct rte_flow_action_rss *rss =
1224 (const struct rte_flow_action_rss *)
1229 action.queues_n = rss->num;
1230 for (n = 0; n < rss->num; ++n)
1231 action.queues[n] = rss->queue[n];
1232 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
1235 } else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
1236 const struct rte_flow_action_mark *mark =
1237 (const struct rte_flow_action_mark *)
1241 action.mark_id = mark->id;
1242 action.mark = !action.drop;
1243 } else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
1246 rte_flow_error_set(error, ENOTSUP,
1247 RTE_FLOW_ERROR_TYPE_ACTION,
1248 actions, "unsupported action");
1253 mlx5_flow_create_flag_mark(&flow, action.mark_id);
1254 flow.offset += sizeof(struct ibv_exp_flow_spec_action_tag);
1258 priv_flow_create_action_queue_drop(priv, &flow, error);
1260 rte_flow = priv_flow_create_action_queue(priv, &flow, &action,
1266 rte_free(flow.ibv_attr);
1273 * @see rte_flow_create()
1277 mlx5_flow_create(struct rte_eth_dev *dev,
1278 const struct rte_flow_attr *attr,
1279 const struct rte_flow_item items[],
1280 const struct rte_flow_action actions[],
1281 struct rte_flow_error *error)
1283 struct priv *priv = dev->data->dev_private;
1284 struct rte_flow *flow;
1287 flow = priv_flow_create(priv, attr, items, actions, error);
1289 LIST_INSERT_HEAD(&priv->flows, flow, next);
1290 DEBUG("Flow created %p", (void *)flow);
1300 * Pointer to private structure.
1305 priv_flow_destroy(struct priv *priv,
1306 struct rte_flow *flow)
1309 LIST_REMOVE(flow, next);
1311 claim_zero(ibv_exp_destroy_flow(flow->ibv_flow));
1313 claim_zero(ibv_destroy_qp(flow->qp));
1314 if (flow->ind_table)
1315 claim_zero(ibv_exp_destroy_rwq_ind_table(flow->ind_table));
1316 if (flow->drop && flow->wq)
1317 claim_zero(ibv_exp_destroy_wq(flow->wq));
1318 if (flow->drop && flow->cq)
1319 claim_zero(ibv_destroy_cq(flow->cq));
1321 struct rte_flow *tmp;
1323 uint32_t mark_n = 0;
1327 * To remove the mark from the queue, the queue must not be
1328 * present in any other marked flow (RSS or not).
1330 for (queue_n = 0; queue_n < flow->rxqs_n; ++queue_n) {
1331 rxq = (*flow->rxqs)[queue_n];
1332 for (tmp = LIST_FIRST(&priv->flows);
1334 tmp = LIST_NEXT(tmp, next)) {
1340 tqueue_n < tmp->rxqs_n;
1344 trxq = (*tmp->rxqs)[tqueue_n];
1349 rxq->mark = !!mark_n;
1352 rte_free(flow->ibv_attr);
1353 DEBUG("Flow destroyed %p", (void *)flow);
1360 * @see rte_flow_destroy()
1364 mlx5_flow_destroy(struct rte_eth_dev *dev,
1365 struct rte_flow *flow,
1366 struct rte_flow_error *error)
1368 struct priv *priv = dev->data->dev_private;
1372 priv_flow_destroy(priv, flow);
1378 * Destroy all flows.
1381 * Pointer to private structure.
1384 priv_flow_flush(struct priv *priv)
1386 while (!LIST_EMPTY(&priv->flows)) {
1387 struct rte_flow *flow;
1389 flow = LIST_FIRST(&priv->flows);
1390 priv_flow_destroy(priv, flow);
1395 * Destroy all flows.
1397 * @see rte_flow_flush()
1401 mlx5_flow_flush(struct rte_eth_dev *dev,
1402 struct rte_flow_error *error)
1404 struct priv *priv = dev->data->dev_private;
1408 priv_flow_flush(priv);
1416 * Called by dev_stop() to remove all flows.
1419 * Pointer to private structure.
1422 priv_flow_stop(struct priv *priv)
1424 struct rte_flow *flow;
1426 for (flow = LIST_FIRST(&priv->flows);
1428 flow = LIST_NEXT(flow, next)) {
1429 claim_zero(ibv_exp_destroy_flow(flow->ibv_flow));
1430 flow->ibv_flow = NULL;
1434 for (n = 0; n < flow->rxqs_n; ++n)
1435 (*flow->rxqs)[n]->mark = 0;
1437 DEBUG("Flow %p removed", (void *)flow);
1445 * Pointer to private structure.
1448 * 0 on success, a errno value otherwise and rte_errno is set.
1451 priv_flow_start(struct priv *priv)
1453 struct rte_flow *flow;
1455 for (flow = LIST_FIRST(&priv->flows);
1457 flow = LIST_NEXT(flow, next)) {
1458 flow->ibv_flow = ibv_exp_create_flow(flow->qp,
1460 if (!flow->ibv_flow) {
1461 DEBUG("Flow %p cannot be applied", (void *)flow);
1465 DEBUG("Flow %p applied", (void *)flow);
1469 for (n = 0; n < flow->rxqs_n; ++n)
1470 (*flow->rxqs)[n]->mark = 1;