4 * Copyright 2016 6WIND S.A.
5 * Copyright 2016 Mellanox.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of 6WIND S.A. nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #include <sys/queue.h>
38 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
40 #pragma GCC diagnostic ignored "-Wpedantic"
42 #include <infiniband/verbs.h>
44 #pragma GCC diagnostic error "-Wpedantic"
47 #include <rte_ethdev.h>
49 #include <rte_flow_driver.h>
50 #include <rte_malloc.h>
55 /* Number of Work Queue necessary for the DROP queue. */
56 #define MLX5_DROP_WQ_N 4
59 mlx5_flow_create_eth(const struct rte_flow_item *item,
60 const void *default_mask,
64 mlx5_flow_create_vlan(const struct rte_flow_item *item,
65 const void *default_mask,
69 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
70 const void *default_mask,
74 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
75 const void *default_mask,
79 mlx5_flow_create_udp(const struct rte_flow_item *item,
80 const void *default_mask,
84 mlx5_flow_create_tcp(const struct rte_flow_item *item,
85 const void *default_mask,
89 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
90 const void *default_mask,
94 LIST_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
95 struct ibv_exp_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
96 struct ibv_exp_rwq_ind_table *ind_table; /**< Indirection table. */
97 struct ibv_qp *qp; /**< Verbs queue pair. */
98 struct ibv_exp_flow *ibv_flow; /**< Verbs flow. */
99 struct ibv_exp_wq *wq; /**< Verbs work queue. */
100 struct ibv_cq *cq; /**< Verbs completion queue. */
101 uint16_t rxqs_n; /**< Number of queues in this flow, 0 if drop queue. */
102 uint32_t mark:1; /**< Set if the flow is marked. */
103 uint32_t drop:1; /**< Drop queue. */
104 uint64_t hash_fields; /**< Fields that participate in the hash. */
105 struct rxq *rxqs[]; /**< Pointer to the queues array. */
108 /** Static initializer for items. */
110 (const enum rte_flow_item_type []){ \
111 __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
114 /** Structure to generate a simple graph of layers supported by the NIC. */
115 struct mlx5_flow_items {
116 /** List of possible actions for these items. */
117 const enum rte_flow_action_type *const actions;
118 /** Bit-masks corresponding to the possibilities for the item. */
121 * Default bit-masks to use when item->mask is not provided. When
122 * \default_mask is also NULL, the full supported bit-mask (\mask) is
125 const void *default_mask;
126 /** Bit-masks size in bytes. */
127 const unsigned int mask_sz;
129 * Conversion function from rte_flow to NIC specific flow.
132 * rte_flow item to convert.
133 * @param default_mask
134 * Default bit-masks to use when item->mask is not provided.
136 * Internal structure to store the conversion.
139 * 0 on success, negative value otherwise.
141 int (*convert)(const struct rte_flow_item *item,
142 const void *default_mask,
144 /** Size in bytes of the destination structure. */
145 const unsigned int dst_sz;
146 /** List of possible following items. */
147 const enum rte_flow_item_type *const items;
150 /** Valid action for this PMD. */
151 static const enum rte_flow_action_type valid_actions[] = {
152 RTE_FLOW_ACTION_TYPE_DROP,
153 RTE_FLOW_ACTION_TYPE_QUEUE,
154 RTE_FLOW_ACTION_TYPE_MARK,
155 RTE_FLOW_ACTION_TYPE_FLAG,
156 RTE_FLOW_ACTION_TYPE_END,
159 /** Graph of supported items and associated actions. */
160 static const struct mlx5_flow_items mlx5_flow_items[] = {
161 [RTE_FLOW_ITEM_TYPE_END] = {
162 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
163 RTE_FLOW_ITEM_TYPE_VXLAN),
165 [RTE_FLOW_ITEM_TYPE_ETH] = {
166 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
167 RTE_FLOW_ITEM_TYPE_IPV4,
168 RTE_FLOW_ITEM_TYPE_IPV6),
169 .actions = valid_actions,
170 .mask = &(const struct rte_flow_item_eth){
171 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
172 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
175 .default_mask = &rte_flow_item_eth_mask,
176 .mask_sz = sizeof(struct rte_flow_item_eth),
177 .convert = mlx5_flow_create_eth,
178 .dst_sz = sizeof(struct ibv_exp_flow_spec_eth),
180 [RTE_FLOW_ITEM_TYPE_VLAN] = {
181 .items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
182 RTE_FLOW_ITEM_TYPE_IPV6),
183 .actions = valid_actions,
184 .mask = &(const struct rte_flow_item_vlan){
187 .default_mask = &rte_flow_item_vlan_mask,
188 .mask_sz = sizeof(struct rte_flow_item_vlan),
189 .convert = mlx5_flow_create_vlan,
192 [RTE_FLOW_ITEM_TYPE_IPV4] = {
193 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
194 RTE_FLOW_ITEM_TYPE_TCP),
195 .actions = valid_actions,
196 .mask = &(const struct rte_flow_item_ipv4){
200 .type_of_service = -1,
204 .default_mask = &rte_flow_item_ipv4_mask,
205 .mask_sz = sizeof(struct rte_flow_item_ipv4),
206 .convert = mlx5_flow_create_ipv4,
207 .dst_sz = sizeof(struct ibv_exp_flow_spec_ipv4_ext),
209 [RTE_FLOW_ITEM_TYPE_IPV6] = {
210 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
211 RTE_FLOW_ITEM_TYPE_TCP),
212 .actions = valid_actions,
213 .mask = &(const struct rte_flow_item_ipv6){
216 0xff, 0xff, 0xff, 0xff,
217 0xff, 0xff, 0xff, 0xff,
218 0xff, 0xff, 0xff, 0xff,
219 0xff, 0xff, 0xff, 0xff,
222 0xff, 0xff, 0xff, 0xff,
223 0xff, 0xff, 0xff, 0xff,
224 0xff, 0xff, 0xff, 0xff,
225 0xff, 0xff, 0xff, 0xff,
232 .default_mask = &rte_flow_item_ipv6_mask,
233 .mask_sz = sizeof(struct rte_flow_item_ipv6),
234 .convert = mlx5_flow_create_ipv6,
235 .dst_sz = sizeof(struct ibv_exp_flow_spec_ipv6_ext),
237 [RTE_FLOW_ITEM_TYPE_UDP] = {
238 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN),
239 .actions = valid_actions,
240 .mask = &(const struct rte_flow_item_udp){
246 .default_mask = &rte_flow_item_udp_mask,
247 .mask_sz = sizeof(struct rte_flow_item_udp),
248 .convert = mlx5_flow_create_udp,
249 .dst_sz = sizeof(struct ibv_exp_flow_spec_tcp_udp),
251 [RTE_FLOW_ITEM_TYPE_TCP] = {
252 .actions = valid_actions,
253 .mask = &(const struct rte_flow_item_tcp){
259 .default_mask = &rte_flow_item_tcp_mask,
260 .mask_sz = sizeof(struct rte_flow_item_tcp),
261 .convert = mlx5_flow_create_tcp,
262 .dst_sz = sizeof(struct ibv_exp_flow_spec_tcp_udp),
264 [RTE_FLOW_ITEM_TYPE_VXLAN] = {
265 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH),
266 .actions = valid_actions,
267 .mask = &(const struct rte_flow_item_vxlan){
268 .vni = "\xff\xff\xff",
270 .default_mask = &rte_flow_item_vxlan_mask,
271 .mask_sz = sizeof(struct rte_flow_item_vxlan),
272 .convert = mlx5_flow_create_vxlan,
273 .dst_sz = sizeof(struct ibv_exp_flow_spec_tunnel),
277 /** Structure to pass to the conversion function. */
279 struct ibv_exp_flow_attr *ibv_attr; /**< Verbs attribute. */
280 unsigned int offset; /**< Offset in bytes in the ibv_attr buffer. */
281 uint32_t inner; /**< Set once VXLAN is encountered. */
282 uint64_t hash_fields; /**< Fields that participate in the hash. */
285 /** Structure for Drop queue. */
286 struct rte_flow_drop {
287 struct ibv_exp_rwq_ind_table *ind_table; /**< Indirection table. */
288 struct ibv_qp *qp; /**< Verbs queue pair. */
289 struct ibv_exp_wq *wqs[MLX5_DROP_WQ_N]; /**< Verbs work queue. */
290 struct ibv_cq *cq; /**< Verbs completion queue. */
293 struct mlx5_flow_action {
294 uint32_t queue:1; /**< Target is a receive queue. */
295 uint32_t drop:1; /**< Target is a drop queue. */
296 uint32_t mark:1; /**< Mark is present in the flow. */
297 uint32_t mark_id; /**< Mark identifier. */
298 uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
299 uint16_t queues_n; /**< Number of entries in queue[]. */
303 * Check support for a given item.
306 * Item specification.
308 * Bit-masks covering supported fields to compare with spec, last and mask in
311 * Bit-Mask size in bytes.
317 mlx5_flow_item_validate(const struct rte_flow_item *item,
318 const uint8_t *mask, unsigned int size)
322 if (!item->spec && (item->mask || item->last))
324 if (item->spec && !item->mask) {
326 const uint8_t *spec = item->spec;
328 for (i = 0; i < size; ++i)
329 if ((spec[i] | mask[i]) != mask[i])
332 if (item->last && !item->mask) {
334 const uint8_t *spec = item->last;
336 for (i = 0; i < size; ++i)
337 if ((spec[i] | mask[i]) != mask[i])
342 const uint8_t *spec = item->mask;
344 for (i = 0; i < size; ++i)
345 if ((spec[i] | mask[i]) != mask[i])
348 if (item->spec && item->last) {
351 const uint8_t *apply = mask;
356 for (i = 0; i < size; ++i) {
357 spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
358 last[i] = ((const uint8_t *)item->last)[i] & apply[i];
360 ret = memcmp(spec, last, size);
366 * Validate a flow supported by the NIC.
369 * Pointer to private structure.
371 * Flow rule attributes.
373 * Pattern specification (list terminated by the END pattern item).
375 * Associated actions (list terminated by the END action).
377 * Perform verbose error reporting if not NULL.
378 * @param[in, out] flow
379 * Flow structure to update.
380 * @param[in, out] action
381 * Action structure to update.
384 * 0 on success, a negative errno value otherwise and rte_errno is set.
387 priv_flow_validate(struct priv *priv,
388 const struct rte_flow_attr *attr,
389 const struct rte_flow_item items[],
390 const struct rte_flow_action actions[],
391 struct rte_flow_error *error,
392 struct mlx5_flow *flow,
393 struct mlx5_flow_action *action)
395 const struct mlx5_flow_items *cur_item = mlx5_flow_items;
399 rte_flow_error_set(error, ENOTSUP,
400 RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
402 "groups are not supported");
405 if (attr->priority) {
406 rte_flow_error_set(error, ENOTSUP,
407 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
409 "priorities are not supported");
413 rte_flow_error_set(error, ENOTSUP,
414 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
416 "egress is not supported");
419 if (!attr->ingress) {
420 rte_flow_error_set(error, ENOTSUP,
421 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
423 "only ingress is supported");
426 for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
427 const struct mlx5_flow_items *token = NULL;
431 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
435 cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
437 if (cur_item->items[i] == items->type) {
438 token = &mlx5_flow_items[items->type];
443 goto exit_item_not_supported;
445 err = mlx5_flow_item_validate(items,
446 (const uint8_t *)cur_item->mask,
449 goto exit_item_not_supported;
450 if (flow->ibv_attr && cur_item->convert) {
451 err = cur_item->convert(items,
452 (cur_item->default_mask ?
453 cur_item->default_mask :
457 goto exit_item_not_supported;
458 } else if (items->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
460 rte_flow_error_set(error, ENOTSUP,
461 RTE_FLOW_ERROR_TYPE_ITEM,
463 "cannot recognize multiple"
464 " VXLAN encapsulations");
469 flow->offset += cur_item->dst_sz;
471 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
472 if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
474 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
476 } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
477 const struct rte_flow_action_queue *queue =
478 (const struct rte_flow_action_queue *)
483 if (!queue || (queue->index > (priv->rxqs_n - 1)))
484 goto exit_action_not_supported;
485 for (n = 0; n < action->queues_n; ++n) {
486 if (action->queues[n] == queue->index) {
491 if (action->queues_n > 1 && !found) {
492 rte_flow_error_set(error, ENOTSUP,
493 RTE_FLOW_ERROR_TYPE_ACTION,
495 "queue action not in RSS queues");
500 action->queues_n = 1;
501 action->queues[0] = queue->index;
503 } else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
504 const struct rte_flow_action_rss *rss =
505 (const struct rte_flow_action_rss *)
509 if (action->queues_n == 1) {
512 assert(action->queues_n);
513 for (n = 0; n < rss->num; ++n) {
514 if (action->queues[0] ==
521 rte_flow_error_set(error, ENOTSUP,
522 RTE_FLOW_ERROR_TYPE_ACTION,
524 "queue action not in RSS"
529 for (n = 0; n < rss->num; ++n) {
530 if (rss->queue[n] >= priv->rxqs_n) {
531 rte_flow_error_set(error, EINVAL,
532 RTE_FLOW_ERROR_TYPE_ACTION,
534 "queue id > number of"
540 for (n = 0; n < rss->num; ++n)
541 action->queues[n] = rss->queue[n];
542 action->queues_n = rss->num;
543 } else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
544 const struct rte_flow_action_mark *mark =
545 (const struct rte_flow_action_mark *)
549 rte_flow_error_set(error, EINVAL,
550 RTE_FLOW_ERROR_TYPE_ACTION,
552 "mark must be defined");
554 } else if (mark->id >= MLX5_FLOW_MARK_MAX) {
555 rte_flow_error_set(error, ENOTSUP,
556 RTE_FLOW_ERROR_TYPE_ACTION,
558 "mark must be between 0"
563 action->mark_id = mark->id;
564 } else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
567 goto exit_action_not_supported;
570 if (action->mark && !flow->ibv_attr && !action->drop)
571 flow->offset += sizeof(struct ibv_exp_flow_spec_action_tag);
572 if (!action->queue && !action->drop) {
573 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
574 NULL, "no valid action");
578 exit_item_not_supported:
579 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
580 items, "item not supported");
582 exit_action_not_supported:
583 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
584 actions, "action not supported");
589 * Validate a flow supported by the NIC.
591 * @see rte_flow_validate()
595 mlx5_flow_validate(struct rte_eth_dev *dev,
596 const struct rte_flow_attr *attr,
597 const struct rte_flow_item items[],
598 const struct rte_flow_action actions[],
599 struct rte_flow_error *error)
601 struct priv *priv = dev->data->dev_private;
603 struct mlx5_flow flow = { .offset = sizeof(struct ibv_exp_flow_attr) };
604 struct mlx5_flow_action action = {
608 .mark_id = MLX5_FLOW_MARK_DEFAULT,
613 ret = priv_flow_validate(priv, attr, items, actions, error, &flow,
620 * Convert Ethernet item to Verbs specification.
623 * Item specification.
624 * @param default_mask[in]
625 * Default bit-masks to use when item->mask is not provided.
626 * @param data[in, out]
630 mlx5_flow_create_eth(const struct rte_flow_item *item,
631 const void *default_mask,
634 const struct rte_flow_item_eth *spec = item->spec;
635 const struct rte_flow_item_eth *mask = item->mask;
636 struct mlx5_flow *flow = (struct mlx5_flow *)data;
637 struct ibv_exp_flow_spec_eth *eth;
638 const unsigned int eth_size = sizeof(struct ibv_exp_flow_spec_eth);
641 ++flow->ibv_attr->num_of_specs;
642 flow->ibv_attr->priority = 2;
643 flow->hash_fields = 0;
644 eth = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
645 *eth = (struct ibv_exp_flow_spec_eth) {
646 .type = flow->inner | IBV_EXP_FLOW_SPEC_ETH,
653 memcpy(eth->val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
654 memcpy(eth->val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
655 eth->val.ether_type = spec->type;
656 memcpy(eth->mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
657 memcpy(eth->mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
658 eth->mask.ether_type = mask->type;
659 /* Remove unwanted bits from values. */
660 for (i = 0; i < ETHER_ADDR_LEN; ++i) {
661 eth->val.dst_mac[i] &= eth->mask.dst_mac[i];
662 eth->val.src_mac[i] &= eth->mask.src_mac[i];
664 eth->val.ether_type &= eth->mask.ether_type;
669 * Convert VLAN item to Verbs specification.
672 * Item specification.
673 * @param default_mask[in]
674 * Default bit-masks to use when item->mask is not provided.
675 * @param data[in, out]
679 mlx5_flow_create_vlan(const struct rte_flow_item *item,
680 const void *default_mask,
683 const struct rte_flow_item_vlan *spec = item->spec;
684 const struct rte_flow_item_vlan *mask = item->mask;
685 struct mlx5_flow *flow = (struct mlx5_flow *)data;
686 struct ibv_exp_flow_spec_eth *eth;
687 const unsigned int eth_size = sizeof(struct ibv_exp_flow_spec_eth);
689 eth = (void *)((uintptr_t)flow->ibv_attr + flow->offset - eth_size);
694 eth->val.vlan_tag = spec->tci;
695 eth->mask.vlan_tag = mask->tci;
696 eth->val.vlan_tag &= eth->mask.vlan_tag;
701 * Convert IPv4 item to Verbs specification.
704 * Item specification.
705 * @param default_mask[in]
706 * Default bit-masks to use when item->mask is not provided.
707 * @param data[in, out]
711 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
712 const void *default_mask,
715 const struct rte_flow_item_ipv4 *spec = item->spec;
716 const struct rte_flow_item_ipv4 *mask = item->mask;
717 struct mlx5_flow *flow = (struct mlx5_flow *)data;
718 struct ibv_exp_flow_spec_ipv4_ext *ipv4;
719 unsigned int ipv4_size = sizeof(struct ibv_exp_flow_spec_ipv4_ext);
721 ++flow->ibv_attr->num_of_specs;
722 flow->ibv_attr->priority = 1;
723 flow->hash_fields = (IBV_EXP_RX_HASH_SRC_IPV4 |
724 IBV_EXP_RX_HASH_DST_IPV4);
725 ipv4 = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
726 *ipv4 = (struct ibv_exp_flow_spec_ipv4_ext) {
727 .type = flow->inner | IBV_EXP_FLOW_SPEC_IPV4_EXT,
734 ipv4->val = (struct ibv_exp_flow_ipv4_ext_filter){
735 .src_ip = spec->hdr.src_addr,
736 .dst_ip = spec->hdr.dst_addr,
737 .proto = spec->hdr.next_proto_id,
738 .tos = spec->hdr.type_of_service,
740 ipv4->mask = (struct ibv_exp_flow_ipv4_ext_filter){
741 .src_ip = mask->hdr.src_addr,
742 .dst_ip = mask->hdr.dst_addr,
743 .proto = mask->hdr.next_proto_id,
744 .tos = mask->hdr.type_of_service,
746 /* Remove unwanted bits from values. */
747 ipv4->val.src_ip &= ipv4->mask.src_ip;
748 ipv4->val.dst_ip &= ipv4->mask.dst_ip;
749 ipv4->val.proto &= ipv4->mask.proto;
750 ipv4->val.tos &= ipv4->mask.tos;
755 * Convert IPv6 item to Verbs specification.
758 * Item specification.
759 * @param default_mask[in]
760 * Default bit-masks to use when item->mask is not provided.
761 * @param data[in, out]
765 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
766 const void *default_mask,
769 const struct rte_flow_item_ipv6 *spec = item->spec;
770 const struct rte_flow_item_ipv6 *mask = item->mask;
771 struct mlx5_flow *flow = (struct mlx5_flow *)data;
772 struct ibv_exp_flow_spec_ipv6_ext *ipv6;
773 unsigned int ipv6_size = sizeof(struct ibv_exp_flow_spec_ipv6_ext);
775 ++flow->ibv_attr->num_of_specs;
776 flow->ibv_attr->priority = 1;
777 flow->hash_fields = (IBV_EXP_RX_HASH_SRC_IPV6 |
778 IBV_EXP_RX_HASH_DST_IPV6);
779 ipv6 = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
780 *ipv6 = (struct ibv_exp_flow_spec_ipv6_ext) {
781 .type = flow->inner | IBV_EXP_FLOW_SPEC_IPV6_EXT,
788 memcpy(ipv6->val.src_ip, spec->hdr.src_addr,
789 RTE_DIM(ipv6->val.src_ip));
790 memcpy(ipv6->val.dst_ip, spec->hdr.dst_addr,
791 RTE_DIM(ipv6->val.dst_ip));
792 memcpy(ipv6->mask.src_ip, mask->hdr.src_addr,
793 RTE_DIM(ipv6->mask.src_ip));
794 memcpy(ipv6->mask.dst_ip, mask->hdr.dst_addr,
795 RTE_DIM(ipv6->mask.dst_ip));
796 ipv6->mask.flow_label = mask->hdr.vtc_flow;
797 ipv6->mask.next_hdr = mask->hdr.proto;
798 ipv6->mask.hop_limit = mask->hdr.hop_limits;
799 ipv6->val.flow_label &= ipv6->mask.flow_label;
800 ipv6->val.next_hdr &= ipv6->mask.next_hdr;
801 ipv6->val.hop_limit &= ipv6->mask.hop_limit;
806 * Convert UDP item to Verbs specification.
809 * Item specification.
810 * @param default_mask[in]
811 * Default bit-masks to use when item->mask is not provided.
812 * @param data[in, out]
816 mlx5_flow_create_udp(const struct rte_flow_item *item,
817 const void *default_mask,
820 const struct rte_flow_item_udp *spec = item->spec;
821 const struct rte_flow_item_udp *mask = item->mask;
822 struct mlx5_flow *flow = (struct mlx5_flow *)data;
823 struct ibv_exp_flow_spec_tcp_udp *udp;
824 unsigned int udp_size = sizeof(struct ibv_exp_flow_spec_tcp_udp);
826 ++flow->ibv_attr->num_of_specs;
827 flow->ibv_attr->priority = 0;
828 flow->hash_fields |= (IBV_EXP_RX_HASH_SRC_PORT_UDP |
829 IBV_EXP_RX_HASH_DST_PORT_UDP);
830 udp = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
831 *udp = (struct ibv_exp_flow_spec_tcp_udp) {
832 .type = flow->inner | IBV_EXP_FLOW_SPEC_UDP,
839 udp->val.dst_port = spec->hdr.dst_port;
840 udp->val.src_port = spec->hdr.src_port;
841 udp->mask.dst_port = mask->hdr.dst_port;
842 udp->mask.src_port = mask->hdr.src_port;
843 /* Remove unwanted bits from values. */
844 udp->val.src_port &= udp->mask.src_port;
845 udp->val.dst_port &= udp->mask.dst_port;
850 * Convert TCP item to Verbs specification.
853 * Item specification.
854 * @param default_mask[in]
855 * Default bit-masks to use when item->mask is not provided.
856 * @param data[in, out]
860 mlx5_flow_create_tcp(const struct rte_flow_item *item,
861 const void *default_mask,
864 const struct rte_flow_item_tcp *spec = item->spec;
865 const struct rte_flow_item_tcp *mask = item->mask;
866 struct mlx5_flow *flow = (struct mlx5_flow *)data;
867 struct ibv_exp_flow_spec_tcp_udp *tcp;
868 unsigned int tcp_size = sizeof(struct ibv_exp_flow_spec_tcp_udp);
870 ++flow->ibv_attr->num_of_specs;
871 flow->ibv_attr->priority = 0;
872 flow->hash_fields |= (IBV_EXP_RX_HASH_SRC_PORT_TCP |
873 IBV_EXP_RX_HASH_DST_PORT_TCP);
874 tcp = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
875 *tcp = (struct ibv_exp_flow_spec_tcp_udp) {
876 .type = flow->inner | IBV_EXP_FLOW_SPEC_TCP,
883 tcp->val.dst_port = spec->hdr.dst_port;
884 tcp->val.src_port = spec->hdr.src_port;
885 tcp->mask.dst_port = mask->hdr.dst_port;
886 tcp->mask.src_port = mask->hdr.src_port;
887 /* Remove unwanted bits from values. */
888 tcp->val.src_port &= tcp->mask.src_port;
889 tcp->val.dst_port &= tcp->mask.dst_port;
894 * Convert VXLAN item to Verbs specification.
897 * Item specification.
898 * @param default_mask[in]
899 * Default bit-masks to use when item->mask is not provided.
900 * @param data[in, out]
904 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
905 const void *default_mask,
908 const struct rte_flow_item_vxlan *spec = item->spec;
909 const struct rte_flow_item_vxlan *mask = item->mask;
910 struct mlx5_flow *flow = (struct mlx5_flow *)data;
911 struct ibv_exp_flow_spec_tunnel *vxlan;
912 unsigned int size = sizeof(struct ibv_exp_flow_spec_tunnel);
918 ++flow->ibv_attr->num_of_specs;
919 flow->ibv_attr->priority = 0;
921 vxlan = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
922 *vxlan = (struct ibv_exp_flow_spec_tunnel) {
923 .type = flow->inner | IBV_EXP_FLOW_SPEC_VXLAN_TUNNEL,
926 flow->inner = IBV_EXP_FLOW_SPEC_INNER;
931 memcpy(&id.vni[1], spec->vni, 3);
932 vxlan->val.tunnel_id = id.vlan_id;
933 memcpy(&id.vni[1], mask->vni, 3);
934 vxlan->mask.tunnel_id = id.vlan_id;
935 /* Remove unwanted bits from values. */
936 vxlan->val.tunnel_id &= vxlan->mask.tunnel_id;
941 * Convert mark/flag action to Verbs specification.
944 * Pointer to MLX5 flow structure.
949 mlx5_flow_create_flag_mark(struct mlx5_flow *flow, uint32_t mark_id)
951 struct ibv_exp_flow_spec_action_tag *tag;
952 unsigned int size = sizeof(struct ibv_exp_flow_spec_action_tag);
954 tag = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
955 *tag = (struct ibv_exp_flow_spec_action_tag){
956 .type = IBV_EXP_FLOW_SPEC_ACTION_TAG,
958 .tag_id = mlx5_flow_mark_set(mark_id),
960 ++flow->ibv_attr->num_of_specs;
965 * Complete flow rule creation with a drop queue.
968 * Pointer to private structure.
970 * MLX5 flow attributes (filled by mlx5_flow_validate()).
972 * Perform verbose error reporting if not NULL.
975 * A flow if the rule could be created.
977 static struct rte_flow *
978 priv_flow_create_action_queue_drop(struct priv *priv,
979 struct mlx5_flow *flow,
980 struct rte_flow_error *error)
982 struct rte_flow *rte_flow;
986 rte_flow = rte_calloc(__func__, 1, sizeof(*rte_flow), 0);
988 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
989 NULL, "cannot allocate flow memory");
993 rte_flow->ibv_attr = flow->ibv_attr;
994 rte_flow->qp = priv->flow_drop_queue->qp;
997 rte_flow->ibv_flow = ibv_exp_create_flow(rte_flow->qp,
999 if (!rte_flow->ibv_flow) {
1000 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1001 NULL, "flow rule creation failure");
1012 * Complete flow rule creation.
1015 * Pointer to private structure.
1017 * MLX5 flow attributes (filled by mlx5_flow_validate()).
1019 * Target action structure.
1021 * Perform verbose error reporting if not NULL.
1024 * A flow if the rule could be created.
1026 static struct rte_flow *
1027 priv_flow_create_action_queue(struct priv *priv,
1028 struct mlx5_flow *flow,
1029 struct mlx5_flow_action *action,
1030 struct rte_flow_error *error)
1032 struct rte_flow *rte_flow;
1035 const unsigned int wqs_n = 1 << log2above(action->queues_n);
1036 struct ibv_exp_wq *wqs[wqs_n];
1040 assert(!action->drop);
1041 rte_flow = rte_calloc(__func__, 1, sizeof(*rte_flow) +
1042 sizeof(*rte_flow->rxqs) * action->queues_n, 0);
1044 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1045 NULL, "cannot allocate flow memory");
1048 for (i = 0; i < action->queues_n; ++i) {
1049 struct rxq_ctrl *rxq;
1051 rxq = container_of((*priv->rxqs)[action->queues[i]],
1052 struct rxq_ctrl, rxq);
1054 rte_flow->rxqs[i] = &rxq->rxq;
1056 rxq->rxq.mark |= action->mark;
1058 /* finalise indirection table. */
1059 for (j = 0; i < wqs_n; ++i, ++j) {
1061 if (j == action->queues_n)
1064 rte_flow->mark = action->mark;
1065 rte_flow->ibv_attr = flow->ibv_attr;
1066 rte_flow->hash_fields = flow->hash_fields;
1067 rte_flow->ind_table = ibv_exp_create_rwq_ind_table(
1069 &(struct ibv_exp_rwq_ind_table_init_attr){
1071 .log_ind_tbl_size = log2above(action->queues_n),
1075 if (!rte_flow->ind_table) {
1076 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1077 NULL, "cannot allocate indirection table");
1080 rte_flow->qp = ibv_exp_create_qp(
1082 &(struct ibv_exp_qp_init_attr){
1083 .qp_type = IBV_QPT_RAW_PACKET,
1085 IBV_EXP_QP_INIT_ATTR_PD |
1086 IBV_EXP_QP_INIT_ATTR_PORT |
1087 IBV_EXP_QP_INIT_ATTR_RX_HASH,
1089 .rx_hash_conf = &(struct ibv_exp_rx_hash_conf){
1091 IBV_EXP_RX_HASH_FUNC_TOEPLITZ,
1092 .rx_hash_key_len = rss_hash_default_key_len,
1093 .rx_hash_key = rss_hash_default_key,
1094 .rx_hash_fields_mask = rte_flow->hash_fields,
1095 .rwq_ind_tbl = rte_flow->ind_table,
1097 .port_num = priv->port,
1099 if (!rte_flow->qp) {
1100 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1101 NULL, "cannot allocate QP");
1106 rte_flow->ibv_flow = ibv_exp_create_flow(rte_flow->qp,
1107 rte_flow->ibv_attr);
1108 if (!rte_flow->ibv_flow) {
1109 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1110 NULL, "flow rule creation failure");
1117 ibv_destroy_qp(rte_flow->qp);
1118 if (rte_flow->ind_table)
1119 ibv_exp_destroy_rwq_ind_table(rte_flow->ind_table);
1128 * Pointer to private structure.
1130 * Flow rule attributes.
1131 * @param[in] pattern
1132 * Pattern specification (list terminated by the END pattern item).
1133 * @param[in] actions
1134 * Associated actions (list terminated by the END action).
1136 * Perform verbose error reporting if not NULL.
1139 * A flow on success, NULL otherwise.
1141 static struct rte_flow *
1142 priv_flow_create(struct priv *priv,
1143 const struct rte_flow_attr *attr,
1144 const struct rte_flow_item items[],
1145 const struct rte_flow_action actions[],
1146 struct rte_flow_error *error)
1148 struct rte_flow *rte_flow;
1149 struct mlx5_flow flow = { .offset = sizeof(struct ibv_exp_flow_attr), };
1150 struct mlx5_flow_action action = {
1154 .mark_id = MLX5_FLOW_MARK_DEFAULT,
1159 err = priv_flow_validate(priv, attr, items, actions, error, &flow,
1163 flow.ibv_attr = rte_malloc(__func__, flow.offset, 0);
1164 flow.offset = sizeof(struct ibv_exp_flow_attr);
1165 if (!flow.ibv_attr) {
1166 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1167 NULL, "cannot allocate ibv_attr memory");
1170 *flow.ibv_attr = (struct ibv_exp_flow_attr){
1171 .type = IBV_EXP_FLOW_ATTR_NORMAL,
1172 .size = sizeof(struct ibv_exp_flow_attr),
1173 .priority = attr->priority,
1180 flow.hash_fields = 0;
1181 claim_zero(priv_flow_validate(priv, attr, items, actions,
1182 error, &flow, &action));
1183 if (action.mark && !action.drop) {
1184 mlx5_flow_create_flag_mark(&flow, action.mark_id);
1185 flow.offset += sizeof(struct ibv_exp_flow_spec_action_tag);
1189 priv_flow_create_action_queue_drop(priv, &flow, error);
1191 rte_flow = priv_flow_create_action_queue(priv, &flow, &action,
1197 rte_free(flow.ibv_attr);
1204 * @see rte_flow_create()
1208 mlx5_flow_create(struct rte_eth_dev *dev,
1209 const struct rte_flow_attr *attr,
1210 const struct rte_flow_item items[],
1211 const struct rte_flow_action actions[],
1212 struct rte_flow_error *error)
1214 struct priv *priv = dev->data->dev_private;
1215 struct rte_flow *flow;
1218 flow = priv_flow_create(priv, attr, items, actions, error);
1220 LIST_INSERT_HEAD(&priv->flows, flow, next);
1221 DEBUG("Flow created %p", (void *)flow);
1231 * Pointer to private structure.
1236 priv_flow_destroy(struct priv *priv,
1237 struct rte_flow *flow)
1240 LIST_REMOVE(flow, next);
1242 claim_zero(ibv_exp_destroy_flow(flow->ibv_flow));
1246 claim_zero(ibv_destroy_qp(flow->qp));
1247 if (flow->ind_table)
1248 claim_zero(ibv_exp_destroy_rwq_ind_table(flow->ind_table));
1249 if (flow->drop && flow->wq)
1250 claim_zero(ibv_exp_destroy_wq(flow->wq));
1251 if (flow->drop && flow->cq)
1252 claim_zero(ibv_destroy_cq(flow->cq));
1254 struct rte_flow *tmp;
1256 uint32_t mark_n = 0;
1260 * To remove the mark from the queue, the queue must not be
1261 * present in any other marked flow (RSS or not).
1263 for (queue_n = 0; queue_n < flow->rxqs_n; ++queue_n) {
1264 rxq = flow->rxqs[queue_n];
1265 for (tmp = LIST_FIRST(&priv->flows);
1267 tmp = LIST_NEXT(tmp, next)) {
1273 tqueue_n < tmp->rxqs_n;
1277 trxq = tmp->rxqs[tqueue_n];
1282 rxq->mark = !!mark_n;
1286 rte_free(flow->ibv_attr);
1287 DEBUG("Flow destroyed %p", (void *)flow);
1294 * @see rte_flow_destroy()
1298 mlx5_flow_destroy(struct rte_eth_dev *dev,
1299 struct rte_flow *flow,
1300 struct rte_flow_error *error)
1302 struct priv *priv = dev->data->dev_private;
1306 priv_flow_destroy(priv, flow);
1312 * Destroy all flows.
1315 * Pointer to private structure.
1318 priv_flow_flush(struct priv *priv)
1320 while (!LIST_EMPTY(&priv->flows)) {
1321 struct rte_flow *flow;
1323 flow = LIST_FIRST(&priv->flows);
1324 priv_flow_destroy(priv, flow);
1329 * Destroy all flows.
1331 * @see rte_flow_flush()
1335 mlx5_flow_flush(struct rte_eth_dev *dev,
1336 struct rte_flow_error *error)
1338 struct priv *priv = dev->data->dev_private;
1342 priv_flow_flush(priv);
1348 * Create drop queue.
1351 * Pointer to private structure.
1357 priv_flow_create_drop_queue(struct priv *priv)
1359 struct rte_flow_drop *fdq = NULL;
1364 fdq = rte_calloc(__func__, 1, sizeof(*fdq), 0);
1366 WARN("cannot allocate memory for drop queue");
1369 fdq->cq = ibv_exp_create_cq(priv->ctx, 1, NULL, NULL, 0,
1370 &(struct ibv_exp_cq_init_attr){
1374 WARN("cannot allocate CQ for drop queue");
1377 for (i = 0; i != MLX5_DROP_WQ_N; ++i) {
1378 fdq->wqs[i] = ibv_exp_create_wq(priv->ctx,
1379 &(struct ibv_exp_wq_init_attr){
1380 .wq_type = IBV_EXP_WQT_RQ,
1387 WARN("cannot allocate WQ for drop queue");
1391 fdq->ind_table = ibv_exp_create_rwq_ind_table(priv->ctx,
1392 &(struct ibv_exp_rwq_ind_table_init_attr){
1394 .log_ind_tbl_size = 0,
1395 .ind_tbl = fdq->wqs,
1398 if (!fdq->ind_table) {
1399 WARN("cannot allocate indirection table for drop queue");
1402 fdq->qp = ibv_exp_create_qp(priv->ctx,
1403 &(struct ibv_exp_qp_init_attr){
1404 .qp_type = IBV_QPT_RAW_PACKET,
1406 IBV_EXP_QP_INIT_ATTR_PD |
1407 IBV_EXP_QP_INIT_ATTR_PORT |
1408 IBV_EXP_QP_INIT_ATTR_RX_HASH,
1410 .rx_hash_conf = &(struct ibv_exp_rx_hash_conf){
1412 IBV_EXP_RX_HASH_FUNC_TOEPLITZ,
1413 .rx_hash_key_len = rss_hash_default_key_len,
1414 .rx_hash_key = rss_hash_default_key,
1415 .rx_hash_fields_mask = 0,
1416 .rwq_ind_tbl = fdq->ind_table,
1418 .port_num = priv->port,
1421 WARN("cannot allocate QP for drop queue");
1424 priv->flow_drop_queue = fdq;
1428 claim_zero(ibv_destroy_qp(fdq->qp));
1430 claim_zero(ibv_exp_destroy_rwq_ind_table(fdq->ind_table));
1431 for (i = 0; i != MLX5_DROP_WQ_N; ++i) {
1433 claim_zero(ibv_exp_destroy_wq(fdq->wqs[i]));
1436 claim_zero(ibv_destroy_cq(fdq->cq));
1439 priv->flow_drop_queue = NULL;
1444 * Delete drop queue.
1447 * Pointer to private structure.
1450 priv_flow_delete_drop_queue(struct priv *priv)
1452 struct rte_flow_drop *fdq = priv->flow_drop_queue;
1455 claim_zero(ibv_destroy_qp(fdq->qp));
1456 claim_zero(ibv_exp_destroy_rwq_ind_table(fdq->ind_table));
1457 for (i = 0; i != MLX5_DROP_WQ_N; ++i) {
1458 assert(fdq->wqs[i]);
1459 claim_zero(ibv_exp_destroy_wq(fdq->wqs[i]));
1461 claim_zero(ibv_destroy_cq(fdq->cq));
1463 priv->flow_drop_queue = NULL;
1469 * Called by dev_stop() to remove all flows.
1472 * Pointer to private structure.
1475 priv_flow_stop(struct priv *priv)
1477 struct rte_flow *flow;
1479 for (flow = LIST_FIRST(&priv->flows);
1481 flow = LIST_NEXT(flow, next)) {
1482 claim_zero(ibv_exp_destroy_flow(flow->ibv_flow));
1483 flow->ibv_flow = NULL;
1487 for (n = 0; n < flow->rxqs_n; ++n)
1488 flow->rxqs[n]->mark = 0;
1490 DEBUG("Flow %p removed", (void *)flow);
1492 priv_flow_delete_drop_queue(priv);
1499 * Pointer to private structure.
1502 * 0 on success, a errno value otherwise and rte_errno is set.
1505 priv_flow_start(struct priv *priv)
1508 struct rte_flow *flow;
1510 ret = priv_flow_create_drop_queue(priv);
1513 for (flow = LIST_FIRST(&priv->flows);
1515 flow = LIST_NEXT(flow, next)) {
1519 qp = priv->flow_drop_queue->qp;
1522 flow->ibv_flow = ibv_exp_create_flow(qp, flow->ibv_attr);
1523 if (!flow->ibv_flow) {
1524 DEBUG("Flow %p cannot be applied", (void *)flow);
1528 DEBUG("Flow %p applied", (void *)flow);
1532 for (n = 0; n < flow->rxqs_n; ++n)
1533 flow->rxqs[n]->mark = 1;
1540 * Verify if the Rx queue is used in a flow.
1543 * Pointer to private structure.
1545 * Pointer to the queue to search.
1548 * Nonzero if the queue is used by a flow.
1551 priv_flow_rxq_in_use(struct priv *priv, struct rxq *rxq)
1553 struct rte_flow *flow;
1555 for (flow = LIST_FIRST(&priv->flows);
1557 flow = LIST_NEXT(flow, next)) {
1562 for (n = 0; n < flow->rxqs_n; ++n) {
1563 if (flow->rxqs[n] == rxq)