4 * Copyright 2016 6WIND S.A.
5 * Copyright 2016 Mellanox.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of 6WIND S.A. nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #include <sys/queue.h>
38 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
40 #pragma GCC diagnostic ignored "-Wpedantic"
42 #include <infiniband/verbs.h>
44 #pragma GCC diagnostic error "-Wpedantic"
47 #include <rte_ethdev.h>
49 #include <rte_flow_driver.h>
50 #include <rte_malloc.h>
55 /* Number of Work Queue necessary for the DROP queue. */
56 #define MLX5_DROP_WQ_N 4
59 mlx5_flow_create_eth(const struct rte_flow_item *item,
60 const void *default_mask,
64 mlx5_flow_create_vlan(const struct rte_flow_item *item,
65 const void *default_mask,
69 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
70 const void *default_mask,
74 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
75 const void *default_mask,
79 mlx5_flow_create_udp(const struct rte_flow_item *item,
80 const void *default_mask,
84 mlx5_flow_create_tcp(const struct rte_flow_item *item,
85 const void *default_mask,
89 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
90 const void *default_mask,
94 TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
95 struct ibv_exp_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
96 struct ibv_exp_rwq_ind_table *ind_table; /**< Indirection table. */
97 struct ibv_qp *qp; /**< Verbs queue pair. */
98 struct ibv_exp_flow *ibv_flow; /**< Verbs flow. */
99 struct ibv_exp_wq *wq; /**< Verbs work queue. */
100 struct ibv_cq *cq; /**< Verbs completion queue. */
101 uint16_t rxqs_n; /**< Number of queues in this flow, 0 if drop queue. */
102 uint32_t mark:1; /**< Set if the flow is marked. */
103 uint32_t drop:1; /**< Drop queue. */
104 uint64_t hash_fields; /**< Fields that participate in the hash. */
105 struct rxq *rxqs[]; /**< Pointer to the queues array. */
108 /** Static initializer for items. */
110 (const enum rte_flow_item_type []){ \
111 __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
114 /** Structure to generate a simple graph of layers supported by the NIC. */
115 struct mlx5_flow_items {
116 /** List of possible actions for these items. */
117 const enum rte_flow_action_type *const actions;
118 /** Bit-masks corresponding to the possibilities for the item. */
121 * Default bit-masks to use when item->mask is not provided. When
122 * \default_mask is also NULL, the full supported bit-mask (\mask) is
125 const void *default_mask;
126 /** Bit-masks size in bytes. */
127 const unsigned int mask_sz;
129 * Conversion function from rte_flow to NIC specific flow.
132 * rte_flow item to convert.
133 * @param default_mask
134 * Default bit-masks to use when item->mask is not provided.
136 * Internal structure to store the conversion.
139 * 0 on success, negative value otherwise.
141 int (*convert)(const struct rte_flow_item *item,
142 const void *default_mask,
144 /** Size in bytes of the destination structure. */
145 const unsigned int dst_sz;
146 /** List of possible following items. */
147 const enum rte_flow_item_type *const items;
150 /** Valid action for this PMD. */
151 static const enum rte_flow_action_type valid_actions[] = {
152 RTE_FLOW_ACTION_TYPE_DROP,
153 RTE_FLOW_ACTION_TYPE_QUEUE,
154 RTE_FLOW_ACTION_TYPE_MARK,
155 RTE_FLOW_ACTION_TYPE_FLAG,
156 RTE_FLOW_ACTION_TYPE_END,
159 /** Graph of supported items and associated actions. */
160 static const struct mlx5_flow_items mlx5_flow_items[] = {
161 [RTE_FLOW_ITEM_TYPE_END] = {
162 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
163 RTE_FLOW_ITEM_TYPE_VXLAN),
165 [RTE_FLOW_ITEM_TYPE_ETH] = {
166 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
167 RTE_FLOW_ITEM_TYPE_IPV4,
168 RTE_FLOW_ITEM_TYPE_IPV6),
169 .actions = valid_actions,
170 .mask = &(const struct rte_flow_item_eth){
171 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
172 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
175 .default_mask = &rte_flow_item_eth_mask,
176 .mask_sz = sizeof(struct rte_flow_item_eth),
177 .convert = mlx5_flow_create_eth,
178 .dst_sz = sizeof(struct ibv_exp_flow_spec_eth),
180 [RTE_FLOW_ITEM_TYPE_VLAN] = {
181 .items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
182 RTE_FLOW_ITEM_TYPE_IPV6),
183 .actions = valid_actions,
184 .mask = &(const struct rte_flow_item_vlan){
187 .default_mask = &rte_flow_item_vlan_mask,
188 .mask_sz = sizeof(struct rte_flow_item_vlan),
189 .convert = mlx5_flow_create_vlan,
192 [RTE_FLOW_ITEM_TYPE_IPV4] = {
193 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
194 RTE_FLOW_ITEM_TYPE_TCP),
195 .actions = valid_actions,
196 .mask = &(const struct rte_flow_item_ipv4){
200 .type_of_service = -1,
204 .default_mask = &rte_flow_item_ipv4_mask,
205 .mask_sz = sizeof(struct rte_flow_item_ipv4),
206 .convert = mlx5_flow_create_ipv4,
207 .dst_sz = sizeof(struct ibv_exp_flow_spec_ipv4_ext),
209 [RTE_FLOW_ITEM_TYPE_IPV6] = {
210 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
211 RTE_FLOW_ITEM_TYPE_TCP),
212 .actions = valid_actions,
213 .mask = &(const struct rte_flow_item_ipv6){
216 0xff, 0xff, 0xff, 0xff,
217 0xff, 0xff, 0xff, 0xff,
218 0xff, 0xff, 0xff, 0xff,
219 0xff, 0xff, 0xff, 0xff,
222 0xff, 0xff, 0xff, 0xff,
223 0xff, 0xff, 0xff, 0xff,
224 0xff, 0xff, 0xff, 0xff,
225 0xff, 0xff, 0xff, 0xff,
232 .default_mask = &rte_flow_item_ipv6_mask,
233 .mask_sz = sizeof(struct rte_flow_item_ipv6),
234 .convert = mlx5_flow_create_ipv6,
235 .dst_sz = sizeof(struct ibv_exp_flow_spec_ipv6_ext),
237 [RTE_FLOW_ITEM_TYPE_UDP] = {
238 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN),
239 .actions = valid_actions,
240 .mask = &(const struct rte_flow_item_udp){
246 .default_mask = &rte_flow_item_udp_mask,
247 .mask_sz = sizeof(struct rte_flow_item_udp),
248 .convert = mlx5_flow_create_udp,
249 .dst_sz = sizeof(struct ibv_exp_flow_spec_tcp_udp),
251 [RTE_FLOW_ITEM_TYPE_TCP] = {
252 .actions = valid_actions,
253 .mask = &(const struct rte_flow_item_tcp){
259 .default_mask = &rte_flow_item_tcp_mask,
260 .mask_sz = sizeof(struct rte_flow_item_tcp),
261 .convert = mlx5_flow_create_tcp,
262 .dst_sz = sizeof(struct ibv_exp_flow_spec_tcp_udp),
264 [RTE_FLOW_ITEM_TYPE_VXLAN] = {
265 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH),
266 .actions = valid_actions,
267 .mask = &(const struct rte_flow_item_vxlan){
268 .vni = "\xff\xff\xff",
270 .default_mask = &rte_flow_item_vxlan_mask,
271 .mask_sz = sizeof(struct rte_flow_item_vxlan),
272 .convert = mlx5_flow_create_vxlan,
273 .dst_sz = sizeof(struct ibv_exp_flow_spec_tunnel),
277 /** Structure to pass to the conversion function. */
279 struct ibv_exp_flow_attr *ibv_attr; /**< Verbs attribute. */
280 unsigned int offset; /**< Offset in bytes in the ibv_attr buffer. */
281 uint32_t inner; /**< Set once VXLAN is encountered. */
282 uint64_t hash_fields; /**< Fields that participate in the hash. */
285 /** Structure for Drop queue. */
286 struct rte_flow_drop {
287 struct ibv_exp_rwq_ind_table *ind_table; /**< Indirection table. */
288 struct ibv_qp *qp; /**< Verbs queue pair. */
289 struct ibv_exp_wq *wqs[MLX5_DROP_WQ_N]; /**< Verbs work queue. */
290 struct ibv_cq *cq; /**< Verbs completion queue. */
293 struct mlx5_flow_action {
294 uint32_t queue:1; /**< Target is a receive queue. */
295 uint32_t drop:1; /**< Target is a drop queue. */
296 uint32_t mark:1; /**< Mark is present in the flow. */
297 uint32_t mark_id; /**< Mark identifier. */
298 uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
299 uint16_t queues_n; /**< Number of entries in queue[]. */
303 * Check support for a given item.
306 * Item specification.
308 * Bit-masks covering supported fields to compare with spec, last and mask in
311 * Bit-Mask size in bytes.
317 mlx5_flow_item_validate(const struct rte_flow_item *item,
318 const uint8_t *mask, unsigned int size)
322 if (!item->spec && (item->mask || item->last))
324 if (item->spec && !item->mask) {
326 const uint8_t *spec = item->spec;
328 for (i = 0; i < size; ++i)
329 if ((spec[i] | mask[i]) != mask[i])
332 if (item->last && !item->mask) {
334 const uint8_t *spec = item->last;
336 for (i = 0; i < size; ++i)
337 if ((spec[i] | mask[i]) != mask[i])
342 const uint8_t *spec = item->mask;
344 for (i = 0; i < size; ++i)
345 if ((spec[i] | mask[i]) != mask[i])
348 if (item->spec && item->last) {
351 const uint8_t *apply = mask;
356 for (i = 0; i < size; ++i) {
357 spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
358 last[i] = ((const uint8_t *)item->last)[i] & apply[i];
360 ret = memcmp(spec, last, size);
366 * Validate a flow supported by the NIC.
369 * Pointer to private structure.
371 * Flow rule attributes.
373 * Pattern specification (list terminated by the END pattern item).
375 * Associated actions (list terminated by the END action).
377 * Perform verbose error reporting if not NULL.
378 * @param[in, out] flow
379 * Flow structure to update.
380 * @param[in, out] action
381 * Action structure to update.
384 * 0 on success, a negative errno value otherwise and rte_errno is set.
387 priv_flow_validate(struct priv *priv,
388 const struct rte_flow_attr *attr,
389 const struct rte_flow_item items[],
390 const struct rte_flow_action actions[],
391 struct rte_flow_error *error,
392 struct mlx5_flow *flow,
393 struct mlx5_flow_action *action)
395 const struct mlx5_flow_items *cur_item = mlx5_flow_items;
399 rte_flow_error_set(error, ENOTSUP,
400 RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
402 "groups are not supported");
405 if (attr->priority) {
406 rte_flow_error_set(error, ENOTSUP,
407 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
409 "priorities are not supported");
413 rte_flow_error_set(error, ENOTSUP,
414 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
416 "egress is not supported");
419 if (!attr->ingress) {
420 rte_flow_error_set(error, ENOTSUP,
421 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
423 "only ingress is supported");
426 for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
427 const struct mlx5_flow_items *token = NULL;
431 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
435 cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
437 if (cur_item->items[i] == items->type) {
438 token = &mlx5_flow_items[items->type];
443 goto exit_item_not_supported;
445 err = mlx5_flow_item_validate(items,
446 (const uint8_t *)cur_item->mask,
449 goto exit_item_not_supported;
450 if (flow->ibv_attr && cur_item->convert) {
451 err = cur_item->convert(items,
452 (cur_item->default_mask ?
453 cur_item->default_mask :
457 goto exit_item_not_supported;
458 } else if (items->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
460 rte_flow_error_set(error, ENOTSUP,
461 RTE_FLOW_ERROR_TYPE_ITEM,
463 "cannot recognize multiple"
464 " VXLAN encapsulations");
469 flow->offset += cur_item->dst_sz;
471 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
472 if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
474 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
476 } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
477 const struct rte_flow_action_queue *queue =
478 (const struct rte_flow_action_queue *)
483 if (!queue || (queue->index > (priv->rxqs_n - 1)))
484 goto exit_action_not_supported;
485 for (n = 0; n < action->queues_n; ++n) {
486 if (action->queues[n] == queue->index) {
491 if (action->queues_n > 1 && !found) {
492 rte_flow_error_set(error, ENOTSUP,
493 RTE_FLOW_ERROR_TYPE_ACTION,
495 "queue action not in RSS queues");
500 action->queues_n = 1;
501 action->queues[0] = queue->index;
503 } else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
504 const struct rte_flow_action_rss *rss =
505 (const struct rte_flow_action_rss *)
509 if (!rss || !rss->num) {
510 rte_flow_error_set(error, EINVAL,
511 RTE_FLOW_ERROR_TYPE_ACTION,
516 if (action->queues_n == 1) {
519 assert(action->queues_n);
520 for (n = 0; n < rss->num; ++n) {
521 if (action->queues[0] ==
528 rte_flow_error_set(error, ENOTSUP,
529 RTE_FLOW_ERROR_TYPE_ACTION,
531 "queue action not in RSS"
536 for (n = 0; n < rss->num; ++n) {
537 if (rss->queue[n] >= priv->rxqs_n) {
538 rte_flow_error_set(error, EINVAL,
539 RTE_FLOW_ERROR_TYPE_ACTION,
541 "queue id > number of"
547 for (n = 0; n < rss->num; ++n)
548 action->queues[n] = rss->queue[n];
549 action->queues_n = rss->num;
550 } else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
551 const struct rte_flow_action_mark *mark =
552 (const struct rte_flow_action_mark *)
556 rte_flow_error_set(error, EINVAL,
557 RTE_FLOW_ERROR_TYPE_ACTION,
559 "mark must be defined");
561 } else if (mark->id >= MLX5_FLOW_MARK_MAX) {
562 rte_flow_error_set(error, ENOTSUP,
563 RTE_FLOW_ERROR_TYPE_ACTION,
565 "mark must be between 0"
570 action->mark_id = mark->id;
571 } else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
574 goto exit_action_not_supported;
577 if (action->mark && !flow->ibv_attr && !action->drop)
578 flow->offset += sizeof(struct ibv_exp_flow_spec_action_tag);
579 if (!action->queue && !action->drop) {
580 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
581 NULL, "no valid action");
585 exit_item_not_supported:
586 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
587 items, "item not supported");
589 exit_action_not_supported:
590 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
591 actions, "action not supported");
596 * Validate a flow supported by the NIC.
598 * @see rte_flow_validate()
602 mlx5_flow_validate(struct rte_eth_dev *dev,
603 const struct rte_flow_attr *attr,
604 const struct rte_flow_item items[],
605 const struct rte_flow_action actions[],
606 struct rte_flow_error *error)
608 struct priv *priv = dev->data->dev_private;
610 struct mlx5_flow flow = { .offset = sizeof(struct ibv_exp_flow_attr) };
611 struct mlx5_flow_action action = {
615 .mark_id = MLX5_FLOW_MARK_DEFAULT,
620 ret = priv_flow_validate(priv, attr, items, actions, error, &flow,
627 * Convert Ethernet item to Verbs specification.
630 * Item specification.
631 * @param default_mask[in]
632 * Default bit-masks to use when item->mask is not provided.
633 * @param data[in, out]
637 mlx5_flow_create_eth(const struct rte_flow_item *item,
638 const void *default_mask,
641 const struct rte_flow_item_eth *spec = item->spec;
642 const struct rte_flow_item_eth *mask = item->mask;
643 struct mlx5_flow *flow = (struct mlx5_flow *)data;
644 struct ibv_exp_flow_spec_eth *eth;
645 const unsigned int eth_size = sizeof(struct ibv_exp_flow_spec_eth);
648 ++flow->ibv_attr->num_of_specs;
649 flow->ibv_attr->priority = 2;
650 flow->hash_fields = 0;
651 eth = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
652 *eth = (struct ibv_exp_flow_spec_eth) {
653 .type = flow->inner | IBV_EXP_FLOW_SPEC_ETH,
660 memcpy(eth->val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
661 memcpy(eth->val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
662 eth->val.ether_type = spec->type;
663 memcpy(eth->mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
664 memcpy(eth->mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
665 eth->mask.ether_type = mask->type;
666 /* Remove unwanted bits from values. */
667 for (i = 0; i < ETHER_ADDR_LEN; ++i) {
668 eth->val.dst_mac[i] &= eth->mask.dst_mac[i];
669 eth->val.src_mac[i] &= eth->mask.src_mac[i];
671 eth->val.ether_type &= eth->mask.ether_type;
676 * Convert VLAN item to Verbs specification.
679 * Item specification.
680 * @param default_mask[in]
681 * Default bit-masks to use when item->mask is not provided.
682 * @param data[in, out]
686 mlx5_flow_create_vlan(const struct rte_flow_item *item,
687 const void *default_mask,
690 const struct rte_flow_item_vlan *spec = item->spec;
691 const struct rte_flow_item_vlan *mask = item->mask;
692 struct mlx5_flow *flow = (struct mlx5_flow *)data;
693 struct ibv_exp_flow_spec_eth *eth;
694 const unsigned int eth_size = sizeof(struct ibv_exp_flow_spec_eth);
696 eth = (void *)((uintptr_t)flow->ibv_attr + flow->offset - eth_size);
701 eth->val.vlan_tag = spec->tci;
702 eth->mask.vlan_tag = mask->tci;
703 eth->val.vlan_tag &= eth->mask.vlan_tag;
708 * Convert IPv4 item to Verbs specification.
711 * Item specification.
712 * @param default_mask[in]
713 * Default bit-masks to use when item->mask is not provided.
714 * @param data[in, out]
718 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
719 const void *default_mask,
722 const struct rte_flow_item_ipv4 *spec = item->spec;
723 const struct rte_flow_item_ipv4 *mask = item->mask;
724 struct mlx5_flow *flow = (struct mlx5_flow *)data;
725 struct ibv_exp_flow_spec_ipv4_ext *ipv4;
726 unsigned int ipv4_size = sizeof(struct ibv_exp_flow_spec_ipv4_ext);
728 ++flow->ibv_attr->num_of_specs;
729 flow->ibv_attr->priority = 1;
730 flow->hash_fields = (IBV_EXP_RX_HASH_SRC_IPV4 |
731 IBV_EXP_RX_HASH_DST_IPV4);
732 ipv4 = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
733 *ipv4 = (struct ibv_exp_flow_spec_ipv4_ext) {
734 .type = flow->inner | IBV_EXP_FLOW_SPEC_IPV4_EXT,
741 ipv4->val = (struct ibv_exp_flow_ipv4_ext_filter){
742 .src_ip = spec->hdr.src_addr,
743 .dst_ip = spec->hdr.dst_addr,
744 .proto = spec->hdr.next_proto_id,
745 .tos = spec->hdr.type_of_service,
747 ipv4->mask = (struct ibv_exp_flow_ipv4_ext_filter){
748 .src_ip = mask->hdr.src_addr,
749 .dst_ip = mask->hdr.dst_addr,
750 .proto = mask->hdr.next_proto_id,
751 .tos = mask->hdr.type_of_service,
753 /* Remove unwanted bits from values. */
754 ipv4->val.src_ip &= ipv4->mask.src_ip;
755 ipv4->val.dst_ip &= ipv4->mask.dst_ip;
756 ipv4->val.proto &= ipv4->mask.proto;
757 ipv4->val.tos &= ipv4->mask.tos;
762 * Convert IPv6 item to Verbs specification.
765 * Item specification.
766 * @param default_mask[in]
767 * Default bit-masks to use when item->mask is not provided.
768 * @param data[in, out]
772 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
773 const void *default_mask,
776 const struct rte_flow_item_ipv6 *spec = item->spec;
777 const struct rte_flow_item_ipv6 *mask = item->mask;
778 struct mlx5_flow *flow = (struct mlx5_flow *)data;
779 struct ibv_exp_flow_spec_ipv6_ext *ipv6;
780 unsigned int ipv6_size = sizeof(struct ibv_exp_flow_spec_ipv6_ext);
783 ++flow->ibv_attr->num_of_specs;
784 flow->ibv_attr->priority = 1;
785 flow->hash_fields = (IBV_EXP_RX_HASH_SRC_IPV6 |
786 IBV_EXP_RX_HASH_DST_IPV6);
787 ipv6 = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
788 *ipv6 = (struct ibv_exp_flow_spec_ipv6_ext) {
789 .type = flow->inner | IBV_EXP_FLOW_SPEC_IPV6_EXT,
796 memcpy(ipv6->val.src_ip, spec->hdr.src_addr,
797 RTE_DIM(ipv6->val.src_ip));
798 memcpy(ipv6->val.dst_ip, spec->hdr.dst_addr,
799 RTE_DIM(ipv6->val.dst_ip));
800 memcpy(ipv6->mask.src_ip, mask->hdr.src_addr,
801 RTE_DIM(ipv6->mask.src_ip));
802 memcpy(ipv6->mask.dst_ip, mask->hdr.dst_addr,
803 RTE_DIM(ipv6->mask.dst_ip));
804 ipv6->mask.flow_label = mask->hdr.vtc_flow;
805 ipv6->mask.next_hdr = mask->hdr.proto;
806 ipv6->mask.hop_limit = mask->hdr.hop_limits;
807 /* Remove unwanted bits from values. */
808 for (i = 0; i < RTE_DIM(ipv6->val.src_ip); ++i) {
809 ipv6->val.src_ip[i] &= ipv6->mask.src_ip[i];
810 ipv6->val.dst_ip[i] &= ipv6->mask.dst_ip[i];
812 ipv6->val.flow_label &= ipv6->mask.flow_label;
813 ipv6->val.next_hdr &= ipv6->mask.next_hdr;
814 ipv6->val.hop_limit &= ipv6->mask.hop_limit;
819 * Convert UDP item to Verbs specification.
822 * Item specification.
823 * @param default_mask[in]
824 * Default bit-masks to use when item->mask is not provided.
825 * @param data[in, out]
829 mlx5_flow_create_udp(const struct rte_flow_item *item,
830 const void *default_mask,
833 const struct rte_flow_item_udp *spec = item->spec;
834 const struct rte_flow_item_udp *mask = item->mask;
835 struct mlx5_flow *flow = (struct mlx5_flow *)data;
836 struct ibv_exp_flow_spec_tcp_udp *udp;
837 unsigned int udp_size = sizeof(struct ibv_exp_flow_spec_tcp_udp);
839 ++flow->ibv_attr->num_of_specs;
840 flow->ibv_attr->priority = 0;
841 flow->hash_fields |= (IBV_EXP_RX_HASH_SRC_PORT_UDP |
842 IBV_EXP_RX_HASH_DST_PORT_UDP);
843 udp = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
844 *udp = (struct ibv_exp_flow_spec_tcp_udp) {
845 .type = flow->inner | IBV_EXP_FLOW_SPEC_UDP,
852 udp->val.dst_port = spec->hdr.dst_port;
853 udp->val.src_port = spec->hdr.src_port;
854 udp->mask.dst_port = mask->hdr.dst_port;
855 udp->mask.src_port = mask->hdr.src_port;
856 /* Remove unwanted bits from values. */
857 udp->val.src_port &= udp->mask.src_port;
858 udp->val.dst_port &= udp->mask.dst_port;
863 * Convert TCP item to Verbs specification.
866 * Item specification.
867 * @param default_mask[in]
868 * Default bit-masks to use when item->mask is not provided.
869 * @param data[in, out]
873 mlx5_flow_create_tcp(const struct rte_flow_item *item,
874 const void *default_mask,
877 const struct rte_flow_item_tcp *spec = item->spec;
878 const struct rte_flow_item_tcp *mask = item->mask;
879 struct mlx5_flow *flow = (struct mlx5_flow *)data;
880 struct ibv_exp_flow_spec_tcp_udp *tcp;
881 unsigned int tcp_size = sizeof(struct ibv_exp_flow_spec_tcp_udp);
883 ++flow->ibv_attr->num_of_specs;
884 flow->ibv_attr->priority = 0;
885 flow->hash_fields |= (IBV_EXP_RX_HASH_SRC_PORT_TCP |
886 IBV_EXP_RX_HASH_DST_PORT_TCP);
887 tcp = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
888 *tcp = (struct ibv_exp_flow_spec_tcp_udp) {
889 .type = flow->inner | IBV_EXP_FLOW_SPEC_TCP,
896 tcp->val.dst_port = spec->hdr.dst_port;
897 tcp->val.src_port = spec->hdr.src_port;
898 tcp->mask.dst_port = mask->hdr.dst_port;
899 tcp->mask.src_port = mask->hdr.src_port;
900 /* Remove unwanted bits from values. */
901 tcp->val.src_port &= tcp->mask.src_port;
902 tcp->val.dst_port &= tcp->mask.dst_port;
907 * Convert VXLAN item to Verbs specification.
910 * Item specification.
911 * @param default_mask[in]
912 * Default bit-masks to use when item->mask is not provided.
913 * @param data[in, out]
917 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
918 const void *default_mask,
921 const struct rte_flow_item_vxlan *spec = item->spec;
922 const struct rte_flow_item_vxlan *mask = item->mask;
923 struct mlx5_flow *flow = (struct mlx5_flow *)data;
924 struct ibv_exp_flow_spec_tunnel *vxlan;
925 unsigned int size = sizeof(struct ibv_exp_flow_spec_tunnel);
931 ++flow->ibv_attr->num_of_specs;
932 flow->ibv_attr->priority = 0;
934 vxlan = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
935 *vxlan = (struct ibv_exp_flow_spec_tunnel) {
936 .type = flow->inner | IBV_EXP_FLOW_SPEC_VXLAN_TUNNEL,
939 flow->inner = IBV_EXP_FLOW_SPEC_INNER;
944 memcpy(&id.vni[1], spec->vni, 3);
945 vxlan->val.tunnel_id = id.vlan_id;
946 memcpy(&id.vni[1], mask->vni, 3);
947 vxlan->mask.tunnel_id = id.vlan_id;
948 /* Remove unwanted bits from values. */
949 vxlan->val.tunnel_id &= vxlan->mask.tunnel_id;
954 * Convert mark/flag action to Verbs specification.
957 * Pointer to MLX5 flow structure.
962 mlx5_flow_create_flag_mark(struct mlx5_flow *flow, uint32_t mark_id)
964 struct ibv_exp_flow_spec_action_tag *tag;
965 unsigned int size = sizeof(struct ibv_exp_flow_spec_action_tag);
967 tag = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
968 *tag = (struct ibv_exp_flow_spec_action_tag){
969 .type = IBV_EXP_FLOW_SPEC_ACTION_TAG,
971 .tag_id = mlx5_flow_mark_set(mark_id),
973 ++flow->ibv_attr->num_of_specs;
978 * Complete flow rule creation with a drop queue.
981 * Pointer to private structure.
983 * MLX5 flow attributes (filled by mlx5_flow_validate()).
985 * Perform verbose error reporting if not NULL.
988 * A flow if the rule could be created.
990 static struct rte_flow *
991 priv_flow_create_action_queue_drop(struct priv *priv,
992 struct mlx5_flow *flow,
993 struct rte_flow_error *error)
995 struct rte_flow *rte_flow;
999 rte_flow = rte_calloc(__func__, 1, sizeof(*rte_flow), 0);
1001 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1002 NULL, "cannot allocate flow memory");
1006 rte_flow->ibv_attr = flow->ibv_attr;
1007 rte_flow->qp = priv->flow_drop_queue->qp;
1010 rte_flow->ibv_flow = ibv_exp_create_flow(rte_flow->qp,
1011 rte_flow->ibv_attr);
1012 if (!rte_flow->ibv_flow) {
1013 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1014 NULL, "flow rule creation failure");
1025 * Complete flow rule creation.
1028 * Pointer to private structure.
1030 * MLX5 flow attributes (filled by mlx5_flow_validate()).
1032 * Target action structure.
1034 * Perform verbose error reporting if not NULL.
1037 * A flow if the rule could be created.
1039 static struct rte_flow *
1040 priv_flow_create_action_queue(struct priv *priv,
1041 struct mlx5_flow *flow,
1042 struct mlx5_flow_action *action,
1043 struct rte_flow_error *error)
1045 struct rte_flow *rte_flow;
1048 const unsigned int wqs_n = 1 << log2above(action->queues_n);
1049 struct ibv_exp_wq *wqs[wqs_n];
1053 assert(!action->drop);
1054 rte_flow = rte_calloc(__func__, 1, sizeof(*rte_flow) +
1055 sizeof(*rte_flow->rxqs) * action->queues_n, 0);
1057 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1058 NULL, "cannot allocate flow memory");
1061 for (i = 0; i < action->queues_n; ++i) {
1062 struct rxq_ctrl *rxq;
1064 rxq = container_of((*priv->rxqs)[action->queues[i]],
1065 struct rxq_ctrl, rxq);
1067 rte_flow->rxqs[i] = &rxq->rxq;
1069 rxq->rxq.mark |= action->mark;
1071 /* finalise indirection table. */
1072 for (j = 0; i < wqs_n; ++i, ++j) {
1074 if (j == action->queues_n)
1077 rte_flow->mark = action->mark;
1078 rte_flow->ibv_attr = flow->ibv_attr;
1079 rte_flow->hash_fields = flow->hash_fields;
1080 rte_flow->ind_table = ibv_exp_create_rwq_ind_table(
1082 &(struct ibv_exp_rwq_ind_table_init_attr){
1084 .log_ind_tbl_size = log2above(action->queues_n),
1088 if (!rte_flow->ind_table) {
1089 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1090 NULL, "cannot allocate indirection table");
1093 rte_flow->qp = ibv_exp_create_qp(
1095 &(struct ibv_exp_qp_init_attr){
1096 .qp_type = IBV_QPT_RAW_PACKET,
1098 IBV_EXP_QP_INIT_ATTR_PD |
1099 IBV_EXP_QP_INIT_ATTR_PORT |
1100 IBV_EXP_QP_INIT_ATTR_RX_HASH,
1102 .rx_hash_conf = &(struct ibv_exp_rx_hash_conf){
1104 IBV_EXP_RX_HASH_FUNC_TOEPLITZ,
1105 .rx_hash_key_len = rss_hash_default_key_len,
1106 .rx_hash_key = rss_hash_default_key,
1107 .rx_hash_fields_mask = rte_flow->hash_fields,
1108 .rwq_ind_tbl = rte_flow->ind_table,
1110 .port_num = priv->port,
1112 if (!rte_flow->qp) {
1113 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1114 NULL, "cannot allocate QP");
1119 rte_flow->ibv_flow = ibv_exp_create_flow(rte_flow->qp,
1120 rte_flow->ibv_attr);
1121 if (!rte_flow->ibv_flow) {
1122 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1123 NULL, "flow rule creation failure");
1130 ibv_destroy_qp(rte_flow->qp);
1131 if (rte_flow->ind_table)
1132 ibv_exp_destroy_rwq_ind_table(rte_flow->ind_table);
1141 * Pointer to private structure.
1143 * Flow rule attributes.
1144 * @param[in] pattern
1145 * Pattern specification (list terminated by the END pattern item).
1146 * @param[in] actions
1147 * Associated actions (list terminated by the END action).
1149 * Perform verbose error reporting if not NULL.
1152 * A flow on success, NULL otherwise.
1154 static struct rte_flow *
1155 priv_flow_create(struct priv *priv,
1156 const struct rte_flow_attr *attr,
1157 const struct rte_flow_item items[],
1158 const struct rte_flow_action actions[],
1159 struct rte_flow_error *error)
1161 struct rte_flow *rte_flow;
1162 struct mlx5_flow flow = { .offset = sizeof(struct ibv_exp_flow_attr), };
1163 struct mlx5_flow_action action = {
1167 .mark_id = MLX5_FLOW_MARK_DEFAULT,
1172 err = priv_flow_validate(priv, attr, items, actions, error, &flow,
1176 flow.ibv_attr = rte_malloc(__func__, flow.offset, 0);
1177 flow.offset = sizeof(struct ibv_exp_flow_attr);
1178 if (!flow.ibv_attr) {
1179 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1180 NULL, "cannot allocate ibv_attr memory");
1183 *flow.ibv_attr = (struct ibv_exp_flow_attr){
1184 .type = IBV_EXP_FLOW_ATTR_NORMAL,
1185 .size = sizeof(struct ibv_exp_flow_attr),
1186 .priority = attr->priority,
1193 flow.hash_fields = 0;
1194 claim_zero(priv_flow_validate(priv, attr, items, actions,
1195 error, &flow, &action));
1196 if (action.mark && !action.drop) {
1197 mlx5_flow_create_flag_mark(&flow, action.mark_id);
1198 flow.offset += sizeof(struct ibv_exp_flow_spec_action_tag);
1202 priv_flow_create_action_queue_drop(priv, &flow, error);
1204 rte_flow = priv_flow_create_action_queue(priv, &flow, &action,
1210 rte_free(flow.ibv_attr);
1217 * @see rte_flow_create()
1221 mlx5_flow_create(struct rte_eth_dev *dev,
1222 const struct rte_flow_attr *attr,
1223 const struct rte_flow_item items[],
1224 const struct rte_flow_action actions[],
1225 struct rte_flow_error *error)
1227 struct priv *priv = dev->data->dev_private;
1228 struct rte_flow *flow;
1231 flow = priv_flow_create(priv, attr, items, actions, error);
1233 TAILQ_INSERT_TAIL(&priv->flows, flow, next);
1234 DEBUG("Flow created %p", (void *)flow);
1244 * Pointer to private structure.
1249 priv_flow_destroy(struct priv *priv,
1250 struct rte_flow *flow)
1252 TAILQ_REMOVE(&priv->flows, flow, next);
1254 claim_zero(ibv_exp_destroy_flow(flow->ibv_flow));
1258 claim_zero(ibv_destroy_qp(flow->qp));
1259 if (flow->ind_table)
1260 claim_zero(ibv_exp_destroy_rwq_ind_table(flow->ind_table));
1261 if (flow->drop && flow->wq)
1262 claim_zero(ibv_exp_destroy_wq(flow->wq));
1263 if (flow->drop && flow->cq)
1264 claim_zero(ibv_destroy_cq(flow->cq));
1266 struct rte_flow *tmp;
1268 uint32_t mark_n = 0;
1272 * To remove the mark from the queue, the queue must not be
1273 * present in any other marked flow (RSS or not).
1275 for (queue_n = 0; queue_n < flow->rxqs_n; ++queue_n) {
1276 rxq = flow->rxqs[queue_n];
1277 for (tmp = TAILQ_FIRST(&priv->flows);
1279 tmp = TAILQ_NEXT(tmp, next)) {
1285 tqueue_n < tmp->rxqs_n;
1289 trxq = tmp->rxqs[tqueue_n];
1294 rxq->mark = !!mark_n;
1298 rte_free(flow->ibv_attr);
1299 DEBUG("Flow destroyed %p", (void *)flow);
1306 * @see rte_flow_destroy()
1310 mlx5_flow_destroy(struct rte_eth_dev *dev,
1311 struct rte_flow *flow,
1312 struct rte_flow_error *error)
1314 struct priv *priv = dev->data->dev_private;
1318 priv_flow_destroy(priv, flow);
1324 * Destroy all flows.
1327 * Pointer to private structure.
1330 priv_flow_flush(struct priv *priv)
1332 while (!TAILQ_EMPTY(&priv->flows)) {
1333 struct rte_flow *flow;
1335 flow = TAILQ_FIRST(&priv->flows);
1336 priv_flow_destroy(priv, flow);
1341 * Destroy all flows.
1343 * @see rte_flow_flush()
1347 mlx5_flow_flush(struct rte_eth_dev *dev,
1348 struct rte_flow_error *error)
1350 struct priv *priv = dev->data->dev_private;
1354 priv_flow_flush(priv);
1360 * Create drop queue.
1363 * Pointer to private structure.
1369 priv_flow_create_drop_queue(struct priv *priv)
1371 struct rte_flow_drop *fdq = NULL;
1376 fdq = rte_calloc(__func__, 1, sizeof(*fdq), 0);
1378 WARN("cannot allocate memory for drop queue");
1381 fdq->cq = ibv_exp_create_cq(priv->ctx, 1, NULL, NULL, 0,
1382 &(struct ibv_exp_cq_init_attr){
1386 WARN("cannot allocate CQ for drop queue");
1389 for (i = 0; i != MLX5_DROP_WQ_N; ++i) {
1390 fdq->wqs[i] = ibv_exp_create_wq(priv->ctx,
1391 &(struct ibv_exp_wq_init_attr){
1392 .wq_type = IBV_EXP_WQT_RQ,
1399 WARN("cannot allocate WQ for drop queue");
1403 fdq->ind_table = ibv_exp_create_rwq_ind_table(priv->ctx,
1404 &(struct ibv_exp_rwq_ind_table_init_attr){
1406 .log_ind_tbl_size = 0,
1407 .ind_tbl = fdq->wqs,
1410 if (!fdq->ind_table) {
1411 WARN("cannot allocate indirection table for drop queue");
1414 fdq->qp = ibv_exp_create_qp(priv->ctx,
1415 &(struct ibv_exp_qp_init_attr){
1416 .qp_type = IBV_QPT_RAW_PACKET,
1418 IBV_EXP_QP_INIT_ATTR_PD |
1419 IBV_EXP_QP_INIT_ATTR_PORT |
1420 IBV_EXP_QP_INIT_ATTR_RX_HASH,
1422 .rx_hash_conf = &(struct ibv_exp_rx_hash_conf){
1424 IBV_EXP_RX_HASH_FUNC_TOEPLITZ,
1425 .rx_hash_key_len = rss_hash_default_key_len,
1426 .rx_hash_key = rss_hash_default_key,
1427 .rx_hash_fields_mask = 0,
1428 .rwq_ind_tbl = fdq->ind_table,
1430 .port_num = priv->port,
1433 WARN("cannot allocate QP for drop queue");
1436 priv->flow_drop_queue = fdq;
1440 claim_zero(ibv_destroy_qp(fdq->qp));
1442 claim_zero(ibv_exp_destroy_rwq_ind_table(fdq->ind_table));
1443 for (i = 0; i != MLX5_DROP_WQ_N; ++i) {
1445 claim_zero(ibv_exp_destroy_wq(fdq->wqs[i]));
1448 claim_zero(ibv_destroy_cq(fdq->cq));
1451 priv->flow_drop_queue = NULL;
1456 * Delete drop queue.
1459 * Pointer to private structure.
1462 priv_flow_delete_drop_queue(struct priv *priv)
1464 struct rte_flow_drop *fdq = priv->flow_drop_queue;
1470 claim_zero(ibv_destroy_qp(fdq->qp));
1472 claim_zero(ibv_exp_destroy_rwq_ind_table(fdq->ind_table));
1473 for (i = 0; i != MLX5_DROP_WQ_N; ++i) {
1475 claim_zero(ibv_exp_destroy_wq(fdq->wqs[i]));
1478 claim_zero(ibv_destroy_cq(fdq->cq));
1480 priv->flow_drop_queue = NULL;
1486 * Called by dev_stop() to remove all flows.
1489 * Pointer to private structure.
1492 priv_flow_stop(struct priv *priv)
1494 struct rte_flow *flow;
1496 TAILQ_FOREACH_REVERSE(flow, &priv->flows, mlx5_flows, next) {
1497 claim_zero(ibv_exp_destroy_flow(flow->ibv_flow));
1498 flow->ibv_flow = NULL;
1502 for (n = 0; n < flow->rxqs_n; ++n)
1503 flow->rxqs[n]->mark = 0;
1505 DEBUG("Flow %p removed", (void *)flow);
1507 priv_flow_delete_drop_queue(priv);
1514 * Pointer to private structure.
1517 * 0 on success, a errno value otherwise and rte_errno is set.
1520 priv_flow_start(struct priv *priv)
1523 struct rte_flow *flow;
1525 ret = priv_flow_create_drop_queue(priv);
1528 TAILQ_FOREACH(flow, &priv->flows, next) {
1532 qp = priv->flow_drop_queue->qp;
1535 flow->ibv_flow = ibv_exp_create_flow(qp, flow->ibv_attr);
1536 if (!flow->ibv_flow) {
1537 DEBUG("Flow %p cannot be applied", (void *)flow);
1541 DEBUG("Flow %p applied", (void *)flow);
1545 for (n = 0; n < flow->rxqs_n; ++n)
1546 flow->rxqs[n]->mark = 1;
1553 * Verify if the Rx queue is used in a flow.
1556 * Pointer to private structure.
1558 * Pointer to the queue to search.
1561 * Nonzero if the queue is used by a flow.
1564 priv_flow_rxq_in_use(struct priv *priv, struct rxq *rxq)
1566 struct rte_flow *flow;
1568 for (flow = TAILQ_FIRST(&priv->flows);
1570 flow = TAILQ_NEXT(flow, next)) {
1575 for (n = 0; n < flow->rxqs_n; ++n) {
1576 if (flow->rxqs[n] == rxq)