4 * Copyright 2016 6WIND S.A.
5 * Copyright 2016 Mellanox.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of 6WIND S.A. nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #include <sys/queue.h>
38 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
40 #pragma GCC diagnostic ignored "-Wpedantic"
42 #include <infiniband/verbs.h>
44 #pragma GCC diagnostic error "-Wpedantic"
47 #include <rte_ethdev.h>
49 #include <rte_flow_driver.h>
50 #include <rte_malloc.h>
55 /* Number of Work Queue necessary for the DROP queue. */
56 #define MLX5_DROP_WQ_N 4
59 mlx5_flow_create_eth(const struct rte_flow_item *item,
60 const void *default_mask,
64 mlx5_flow_create_vlan(const struct rte_flow_item *item,
65 const void *default_mask,
69 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
70 const void *default_mask,
74 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
75 const void *default_mask,
79 mlx5_flow_create_udp(const struct rte_flow_item *item,
80 const void *default_mask,
84 mlx5_flow_create_tcp(const struct rte_flow_item *item,
85 const void *default_mask,
89 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
90 const void *default_mask,
94 LIST_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
95 struct ibv_exp_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
96 struct ibv_exp_rwq_ind_table *ind_table; /**< Indirection table. */
97 struct ibv_qp *qp; /**< Verbs queue pair. */
98 struct ibv_exp_flow *ibv_flow; /**< Verbs flow. */
99 struct ibv_exp_wq *wq; /**< Verbs work queue. */
100 struct ibv_cq *cq; /**< Verbs completion queue. */
101 struct rxq *(*rxqs)[]; /**< Pointer to the queues array. */
102 uint16_t rxqs_n; /**< Number of queues in this flow, 0 if drop queue. */
103 uint32_t mark:1; /**< Set if the flow is marked. */
104 uint32_t drop:1; /**< Drop queue. */
105 uint64_t hash_fields; /**< Fields that participate in the hash. */
108 /** Static initializer for items. */
110 (const enum rte_flow_item_type []){ \
111 __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
114 /** Structure to generate a simple graph of layers supported by the NIC. */
115 struct mlx5_flow_items {
116 /** List of possible actions for these items. */
117 const enum rte_flow_action_type *const actions;
118 /** Bit-masks corresponding to the possibilities for the item. */
121 * Default bit-masks to use when item->mask is not provided. When
122 * \default_mask is also NULL, the full supported bit-mask (\mask) is
125 const void *default_mask;
126 /** Bit-masks size in bytes. */
127 const unsigned int mask_sz;
129 * Conversion function from rte_flow to NIC specific flow.
132 * rte_flow item to convert.
133 * @param default_mask
134 * Default bit-masks to use when item->mask is not provided.
136 * Internal structure to store the conversion.
139 * 0 on success, negative value otherwise.
141 int (*convert)(const struct rte_flow_item *item,
142 const void *default_mask,
144 /** Size in bytes of the destination structure. */
145 const unsigned int dst_sz;
146 /** List of possible following items. */
147 const enum rte_flow_item_type *const items;
150 /** Valid action for this PMD. */
151 static const enum rte_flow_action_type valid_actions[] = {
152 RTE_FLOW_ACTION_TYPE_DROP,
153 RTE_FLOW_ACTION_TYPE_QUEUE,
154 RTE_FLOW_ACTION_TYPE_MARK,
155 RTE_FLOW_ACTION_TYPE_FLAG,
156 RTE_FLOW_ACTION_TYPE_END,
159 /** Graph of supported items and associated actions. */
160 static const struct mlx5_flow_items mlx5_flow_items[] = {
161 [RTE_FLOW_ITEM_TYPE_END] = {
162 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
163 RTE_FLOW_ITEM_TYPE_VXLAN),
165 [RTE_FLOW_ITEM_TYPE_ETH] = {
166 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
167 RTE_FLOW_ITEM_TYPE_IPV4,
168 RTE_FLOW_ITEM_TYPE_IPV6),
169 .actions = valid_actions,
170 .mask = &(const struct rte_flow_item_eth){
171 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
172 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
175 .default_mask = &rte_flow_item_eth_mask,
176 .mask_sz = sizeof(struct rte_flow_item_eth),
177 .convert = mlx5_flow_create_eth,
178 .dst_sz = sizeof(struct ibv_exp_flow_spec_eth),
180 [RTE_FLOW_ITEM_TYPE_VLAN] = {
181 .items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
182 RTE_FLOW_ITEM_TYPE_IPV6),
183 .actions = valid_actions,
184 .mask = &(const struct rte_flow_item_vlan){
187 .default_mask = &rte_flow_item_vlan_mask,
188 .mask_sz = sizeof(struct rte_flow_item_vlan),
189 .convert = mlx5_flow_create_vlan,
192 [RTE_FLOW_ITEM_TYPE_IPV4] = {
193 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
194 RTE_FLOW_ITEM_TYPE_TCP),
195 .actions = valid_actions,
196 .mask = &(const struct rte_flow_item_ipv4){
200 .type_of_service = -1,
204 .default_mask = &rte_flow_item_ipv4_mask,
205 .mask_sz = sizeof(struct rte_flow_item_ipv4),
206 .convert = mlx5_flow_create_ipv4,
207 .dst_sz = sizeof(struct ibv_exp_flow_spec_ipv4_ext),
209 [RTE_FLOW_ITEM_TYPE_IPV6] = {
210 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
211 RTE_FLOW_ITEM_TYPE_TCP),
212 .actions = valid_actions,
213 .mask = &(const struct rte_flow_item_ipv6){
216 0xff, 0xff, 0xff, 0xff,
217 0xff, 0xff, 0xff, 0xff,
218 0xff, 0xff, 0xff, 0xff,
219 0xff, 0xff, 0xff, 0xff,
222 0xff, 0xff, 0xff, 0xff,
223 0xff, 0xff, 0xff, 0xff,
224 0xff, 0xff, 0xff, 0xff,
225 0xff, 0xff, 0xff, 0xff,
232 .default_mask = &rte_flow_item_ipv6_mask,
233 .mask_sz = sizeof(struct rte_flow_item_ipv6),
234 .convert = mlx5_flow_create_ipv6,
235 .dst_sz = sizeof(struct ibv_exp_flow_spec_ipv6_ext),
237 [RTE_FLOW_ITEM_TYPE_UDP] = {
238 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN),
239 .actions = valid_actions,
240 .mask = &(const struct rte_flow_item_udp){
246 .default_mask = &rte_flow_item_udp_mask,
247 .mask_sz = sizeof(struct rte_flow_item_udp),
248 .convert = mlx5_flow_create_udp,
249 .dst_sz = sizeof(struct ibv_exp_flow_spec_tcp_udp),
251 [RTE_FLOW_ITEM_TYPE_TCP] = {
252 .actions = valid_actions,
253 .mask = &(const struct rte_flow_item_tcp){
259 .default_mask = &rte_flow_item_tcp_mask,
260 .mask_sz = sizeof(struct rte_flow_item_tcp),
261 .convert = mlx5_flow_create_tcp,
262 .dst_sz = sizeof(struct ibv_exp_flow_spec_tcp_udp),
264 [RTE_FLOW_ITEM_TYPE_VXLAN] = {
265 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH),
266 .actions = valid_actions,
267 .mask = &(const struct rte_flow_item_vxlan){
268 .vni = "\xff\xff\xff",
270 .default_mask = &rte_flow_item_vxlan_mask,
271 .mask_sz = sizeof(struct rte_flow_item_vxlan),
272 .convert = mlx5_flow_create_vxlan,
273 .dst_sz = sizeof(struct ibv_exp_flow_spec_tunnel),
277 /** Structure to pass to the conversion function. */
279 struct ibv_exp_flow_attr *ibv_attr; /**< Verbs attribute. */
280 unsigned int offset; /**< Offset in bytes in the ibv_attr buffer. */
281 uint32_t inner; /**< Set once VXLAN is encountered. */
282 uint64_t hash_fields; /**< Fields that participate in the hash. */
285 /** Structure for Drop queue. */
286 struct rte_flow_drop {
287 struct ibv_exp_rwq_ind_table *ind_table; /**< Indirection table. */
288 struct ibv_qp *qp; /**< Verbs queue pair. */
289 struct ibv_exp_wq *wqs[MLX5_DROP_WQ_N]; /**< Verbs work queue. */
290 struct ibv_cq *cq; /**< Verbs completion queue. */
293 struct mlx5_flow_action {
294 uint32_t queue:1; /**< Target is a receive queue. */
295 uint32_t drop:1; /**< Target is a drop queue. */
296 uint32_t mark:1; /**< Mark is present in the flow. */
297 uint32_t mark_id; /**< Mark identifier. */
298 uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
299 uint16_t queues_n; /**< Number of entries in queue[]. */
303 * Check support for a given item.
306 * Item specification.
308 * Bit-masks covering supported fields to compare with spec, last and mask in
311 * Bit-Mask size in bytes.
317 mlx5_flow_item_validate(const struct rte_flow_item *item,
318 const uint8_t *mask, unsigned int size)
322 if (!item->spec && (item->mask || item->last))
324 if (item->spec && !item->mask) {
326 const uint8_t *spec = item->spec;
328 for (i = 0; i < size; ++i)
329 if ((spec[i] | mask[i]) != mask[i])
332 if (item->last && !item->mask) {
334 const uint8_t *spec = item->last;
336 for (i = 0; i < size; ++i)
337 if ((spec[i] | mask[i]) != mask[i])
342 const uint8_t *spec = item->mask;
344 for (i = 0; i < size; ++i)
345 if ((spec[i] | mask[i]) != mask[i])
348 if (item->spec && item->last) {
351 const uint8_t *apply = mask;
356 for (i = 0; i < size; ++i) {
357 spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
358 last[i] = ((const uint8_t *)item->last)[i] & apply[i];
360 ret = memcmp(spec, last, size);
366 * Validate a flow supported by the NIC.
369 * Pointer to private structure.
371 * Flow rule attributes.
373 * Pattern specification (list terminated by the END pattern item).
375 * Associated actions (list terminated by the END action).
377 * Perform verbose error reporting if not NULL.
378 * @param[in, out] flow
379 * Flow structure to update.
382 * 0 on success, a negative errno value otherwise and rte_errno is set.
385 priv_flow_validate(struct priv *priv,
386 const struct rte_flow_attr *attr,
387 const struct rte_flow_item items[],
388 const struct rte_flow_action actions[],
389 struct rte_flow_error *error,
390 struct mlx5_flow *flow)
392 const struct mlx5_flow_items *cur_item = mlx5_flow_items;
393 struct mlx5_flow_action action = {
401 rte_flow_error_set(error, ENOTSUP,
402 RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
404 "groups are not supported");
407 if (attr->priority) {
408 rte_flow_error_set(error, ENOTSUP,
409 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
411 "priorities are not supported");
415 rte_flow_error_set(error, ENOTSUP,
416 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
418 "egress is not supported");
421 if (!attr->ingress) {
422 rte_flow_error_set(error, ENOTSUP,
423 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
425 "only ingress is supported");
428 for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
429 const struct mlx5_flow_items *token = NULL;
433 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
437 cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
439 if (cur_item->items[i] == items->type) {
440 token = &mlx5_flow_items[items->type];
445 goto exit_item_not_supported;
447 err = mlx5_flow_item_validate(items,
448 (const uint8_t *)cur_item->mask,
451 goto exit_item_not_supported;
452 if (flow->ibv_attr && cur_item->convert) {
453 err = cur_item->convert(items,
454 (cur_item->default_mask ?
455 cur_item->default_mask :
459 goto exit_item_not_supported;
461 flow->offset += cur_item->dst_sz;
463 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
464 if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
466 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
468 } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
469 const struct rte_flow_action_queue *queue =
470 (const struct rte_flow_action_queue *)
475 if (!queue || (queue->index > (priv->rxqs_n - 1)))
476 goto exit_action_not_supported;
477 for (n = 0; n < action.queues_n; ++n) {
478 if (action.queues[n] == queue->index) {
483 if (action.queues_n && !found) {
484 rte_flow_error_set(error, ENOTSUP,
485 RTE_FLOW_ERROR_TYPE_ACTION,
487 "queue action not in RSS queues");
492 action.queues[0] = queue->index;
493 } else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
494 const struct rte_flow_action_rss *rss =
495 (const struct rte_flow_action_rss *)
499 if (action.queues_n == 1) {
502 assert(action.queues_n);
503 for (n = 0; n < rss->num; ++n) {
504 if (action.queues[0] == rss->queue[n]) {
510 rte_flow_error_set(error, ENOTSUP,
511 RTE_FLOW_ERROR_TYPE_ACTION,
513 "queue action not in RSS"
519 for (n = 0; n < rss->num; ++n)
520 action.queues[n] = rss->queue[n];
521 action.queues_n = rss->num;
522 } else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
523 const struct rte_flow_action_mark *mark =
524 (const struct rte_flow_action_mark *)
528 rte_flow_error_set(error, EINVAL,
529 RTE_FLOW_ERROR_TYPE_ACTION,
531 "mark must be defined");
533 } else if (mark->id >= MLX5_FLOW_MARK_MAX) {
534 rte_flow_error_set(error, ENOTSUP,
535 RTE_FLOW_ERROR_TYPE_ACTION,
537 "mark must be between 0"
542 } else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
545 goto exit_action_not_supported;
548 if (action.mark && !flow->ibv_attr && !action.drop)
549 flow->offset += sizeof(struct ibv_exp_flow_spec_action_tag);
550 if (!action.queue && !action.drop) {
551 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
552 NULL, "no valid action");
556 exit_item_not_supported:
557 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
558 items, "item not supported");
560 exit_action_not_supported:
561 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
562 actions, "action not supported");
567 * Validate a flow supported by the NIC.
569 * @see rte_flow_validate()
573 mlx5_flow_validate(struct rte_eth_dev *dev,
574 const struct rte_flow_attr *attr,
575 const struct rte_flow_item items[],
576 const struct rte_flow_action actions[],
577 struct rte_flow_error *error)
579 struct priv *priv = dev->data->dev_private;
581 struct mlx5_flow flow = { .offset = sizeof(struct ibv_exp_flow_attr) };
584 ret = priv_flow_validate(priv, attr, items, actions, error, &flow);
590 * Convert Ethernet item to Verbs specification.
593 * Item specification.
594 * @param default_mask[in]
595 * Default bit-masks to use when item->mask is not provided.
596 * @param data[in, out]
600 mlx5_flow_create_eth(const struct rte_flow_item *item,
601 const void *default_mask,
604 const struct rte_flow_item_eth *spec = item->spec;
605 const struct rte_flow_item_eth *mask = item->mask;
606 struct mlx5_flow *flow = (struct mlx5_flow *)data;
607 struct ibv_exp_flow_spec_eth *eth;
608 const unsigned int eth_size = sizeof(struct ibv_exp_flow_spec_eth);
611 ++flow->ibv_attr->num_of_specs;
612 flow->ibv_attr->priority = 2;
613 flow->hash_fields = 0;
614 eth = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
615 *eth = (struct ibv_exp_flow_spec_eth) {
616 .type = flow->inner | IBV_EXP_FLOW_SPEC_ETH,
623 memcpy(eth->val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
624 memcpy(eth->val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
625 eth->val.ether_type = spec->type;
626 memcpy(eth->mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
627 memcpy(eth->mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
628 eth->mask.ether_type = mask->type;
629 /* Remove unwanted bits from values. */
630 for (i = 0; i < ETHER_ADDR_LEN; ++i) {
631 eth->val.dst_mac[i] &= eth->mask.dst_mac[i];
632 eth->val.src_mac[i] &= eth->mask.src_mac[i];
634 eth->val.ether_type &= eth->mask.ether_type;
639 * Convert VLAN item to Verbs specification.
642 * Item specification.
643 * @param default_mask[in]
644 * Default bit-masks to use when item->mask is not provided.
645 * @param data[in, out]
649 mlx5_flow_create_vlan(const struct rte_flow_item *item,
650 const void *default_mask,
653 const struct rte_flow_item_vlan *spec = item->spec;
654 const struct rte_flow_item_vlan *mask = item->mask;
655 struct mlx5_flow *flow = (struct mlx5_flow *)data;
656 struct ibv_exp_flow_spec_eth *eth;
657 const unsigned int eth_size = sizeof(struct ibv_exp_flow_spec_eth);
659 eth = (void *)((uintptr_t)flow->ibv_attr + flow->offset - eth_size);
664 eth->val.vlan_tag = spec->tci;
665 eth->mask.vlan_tag = mask->tci;
666 eth->val.vlan_tag &= eth->mask.vlan_tag;
671 * Convert IPv4 item to Verbs specification.
674 * Item specification.
675 * @param default_mask[in]
676 * Default bit-masks to use when item->mask is not provided.
677 * @param data[in, out]
681 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
682 const void *default_mask,
685 const struct rte_flow_item_ipv4 *spec = item->spec;
686 const struct rte_flow_item_ipv4 *mask = item->mask;
687 struct mlx5_flow *flow = (struct mlx5_flow *)data;
688 struct ibv_exp_flow_spec_ipv4_ext *ipv4;
689 unsigned int ipv4_size = sizeof(struct ibv_exp_flow_spec_ipv4_ext);
691 ++flow->ibv_attr->num_of_specs;
692 flow->ibv_attr->priority = 1;
693 flow->hash_fields = (IBV_EXP_RX_HASH_SRC_IPV4 |
694 IBV_EXP_RX_HASH_DST_IPV4);
695 ipv4 = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
696 *ipv4 = (struct ibv_exp_flow_spec_ipv4_ext) {
697 .type = flow->inner | IBV_EXP_FLOW_SPEC_IPV4_EXT,
704 ipv4->val = (struct ibv_exp_flow_ipv4_ext_filter){
705 .src_ip = spec->hdr.src_addr,
706 .dst_ip = spec->hdr.dst_addr,
707 .proto = spec->hdr.next_proto_id,
708 .tos = spec->hdr.type_of_service,
710 ipv4->mask = (struct ibv_exp_flow_ipv4_ext_filter){
711 .src_ip = mask->hdr.src_addr,
712 .dst_ip = mask->hdr.dst_addr,
713 .proto = mask->hdr.next_proto_id,
714 .tos = mask->hdr.type_of_service,
716 /* Remove unwanted bits from values. */
717 ipv4->val.src_ip &= ipv4->mask.src_ip;
718 ipv4->val.dst_ip &= ipv4->mask.dst_ip;
719 ipv4->val.proto &= ipv4->mask.proto;
720 ipv4->val.tos &= ipv4->mask.tos;
725 * Convert IPv6 item to Verbs specification.
728 * Item specification.
729 * @param default_mask[in]
730 * Default bit-masks to use when item->mask is not provided.
731 * @param data[in, out]
735 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
736 const void *default_mask,
739 const struct rte_flow_item_ipv6 *spec = item->spec;
740 const struct rte_flow_item_ipv6 *mask = item->mask;
741 struct mlx5_flow *flow = (struct mlx5_flow *)data;
742 struct ibv_exp_flow_spec_ipv6_ext *ipv6;
743 unsigned int ipv6_size = sizeof(struct ibv_exp_flow_spec_ipv6_ext);
745 ++flow->ibv_attr->num_of_specs;
746 flow->ibv_attr->priority = 1;
747 flow->hash_fields = (IBV_EXP_RX_HASH_SRC_IPV6 |
748 IBV_EXP_RX_HASH_DST_IPV6);
749 ipv6 = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
750 *ipv6 = (struct ibv_exp_flow_spec_ipv6_ext) {
751 .type = flow->inner | IBV_EXP_FLOW_SPEC_IPV6_EXT,
758 memcpy(ipv6->val.src_ip, spec->hdr.src_addr,
759 RTE_DIM(ipv6->val.src_ip));
760 memcpy(ipv6->val.dst_ip, spec->hdr.dst_addr,
761 RTE_DIM(ipv6->val.dst_ip));
762 memcpy(ipv6->mask.src_ip, mask->hdr.src_addr,
763 RTE_DIM(ipv6->mask.src_ip));
764 memcpy(ipv6->mask.dst_ip, mask->hdr.dst_addr,
765 RTE_DIM(ipv6->mask.dst_ip));
766 ipv6->mask.flow_label = mask->hdr.vtc_flow;
767 ipv6->mask.next_hdr = mask->hdr.proto;
768 ipv6->mask.hop_limit = mask->hdr.hop_limits;
769 ipv6->val.flow_label &= ipv6->mask.flow_label;
770 ipv6->val.next_hdr &= ipv6->mask.next_hdr;
771 ipv6->val.hop_limit &= ipv6->mask.hop_limit;
776 * Convert UDP item to Verbs specification.
779 * Item specification.
780 * @param default_mask[in]
781 * Default bit-masks to use when item->mask is not provided.
782 * @param data[in, out]
786 mlx5_flow_create_udp(const struct rte_flow_item *item,
787 const void *default_mask,
790 const struct rte_flow_item_udp *spec = item->spec;
791 const struct rte_flow_item_udp *mask = item->mask;
792 struct mlx5_flow *flow = (struct mlx5_flow *)data;
793 struct ibv_exp_flow_spec_tcp_udp *udp;
794 unsigned int udp_size = sizeof(struct ibv_exp_flow_spec_tcp_udp);
796 ++flow->ibv_attr->num_of_specs;
797 flow->ibv_attr->priority = 0;
798 flow->hash_fields |= (IBV_EXP_RX_HASH_SRC_PORT_UDP |
799 IBV_EXP_RX_HASH_DST_PORT_UDP);
800 udp = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
801 *udp = (struct ibv_exp_flow_spec_tcp_udp) {
802 .type = flow->inner | IBV_EXP_FLOW_SPEC_UDP,
809 udp->val.dst_port = spec->hdr.dst_port;
810 udp->val.src_port = spec->hdr.src_port;
811 udp->mask.dst_port = mask->hdr.dst_port;
812 udp->mask.src_port = mask->hdr.src_port;
813 /* Remove unwanted bits from values. */
814 udp->val.src_port &= udp->mask.src_port;
815 udp->val.dst_port &= udp->mask.dst_port;
820 * Convert TCP item to Verbs specification.
823 * Item specification.
824 * @param default_mask[in]
825 * Default bit-masks to use when item->mask is not provided.
826 * @param data[in, out]
830 mlx5_flow_create_tcp(const struct rte_flow_item *item,
831 const void *default_mask,
834 const struct rte_flow_item_tcp *spec = item->spec;
835 const struct rte_flow_item_tcp *mask = item->mask;
836 struct mlx5_flow *flow = (struct mlx5_flow *)data;
837 struct ibv_exp_flow_spec_tcp_udp *tcp;
838 unsigned int tcp_size = sizeof(struct ibv_exp_flow_spec_tcp_udp);
840 ++flow->ibv_attr->num_of_specs;
841 flow->ibv_attr->priority = 0;
842 flow->hash_fields |= (IBV_EXP_RX_HASH_SRC_PORT_TCP |
843 IBV_EXP_RX_HASH_DST_PORT_TCP);
844 tcp = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
845 *tcp = (struct ibv_exp_flow_spec_tcp_udp) {
846 .type = flow->inner | IBV_EXP_FLOW_SPEC_TCP,
853 tcp->val.dst_port = spec->hdr.dst_port;
854 tcp->val.src_port = spec->hdr.src_port;
855 tcp->mask.dst_port = mask->hdr.dst_port;
856 tcp->mask.src_port = mask->hdr.src_port;
857 /* Remove unwanted bits from values. */
858 tcp->val.src_port &= tcp->mask.src_port;
859 tcp->val.dst_port &= tcp->mask.dst_port;
864 * Convert VXLAN item to Verbs specification.
867 * Item specification.
868 * @param default_mask[in]
869 * Default bit-masks to use when item->mask is not provided.
870 * @param data[in, out]
874 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
875 const void *default_mask,
878 const struct rte_flow_item_vxlan *spec = item->spec;
879 const struct rte_flow_item_vxlan *mask = item->mask;
880 struct mlx5_flow *flow = (struct mlx5_flow *)data;
881 struct ibv_exp_flow_spec_tunnel *vxlan;
882 unsigned int size = sizeof(struct ibv_exp_flow_spec_tunnel);
888 ++flow->ibv_attr->num_of_specs;
889 flow->ibv_attr->priority = 0;
891 vxlan = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
892 *vxlan = (struct ibv_exp_flow_spec_tunnel) {
893 .type = flow->inner | IBV_EXP_FLOW_SPEC_VXLAN_TUNNEL,
896 flow->inner = IBV_EXP_FLOW_SPEC_INNER;
901 memcpy(&id.vni[1], spec->vni, 3);
902 vxlan->val.tunnel_id = id.vlan_id;
903 memcpy(&id.vni[1], mask->vni, 3);
904 vxlan->mask.tunnel_id = id.vlan_id;
905 /* Remove unwanted bits from values. */
906 vxlan->val.tunnel_id &= vxlan->mask.tunnel_id;
911 * Convert mark/flag action to Verbs specification.
914 * Pointer to MLX5 flow structure.
919 mlx5_flow_create_flag_mark(struct mlx5_flow *flow, uint32_t mark_id)
921 struct ibv_exp_flow_spec_action_tag *tag;
922 unsigned int size = sizeof(struct ibv_exp_flow_spec_action_tag);
924 tag = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
925 *tag = (struct ibv_exp_flow_spec_action_tag){
926 .type = IBV_EXP_FLOW_SPEC_ACTION_TAG,
928 .tag_id = mlx5_flow_mark_set(mark_id),
930 ++flow->ibv_attr->num_of_specs;
935 * Complete flow rule creation with a drop queue.
938 * Pointer to private structure.
940 * MLX5 flow attributes (filled by mlx5_flow_validate()).
942 * Perform verbose error reporting if not NULL.
945 * A flow if the rule could be created.
947 static struct rte_flow *
948 priv_flow_create_action_queue_drop(struct priv *priv,
949 struct mlx5_flow *flow,
950 struct rte_flow_error *error)
952 struct rte_flow *rte_flow;
956 rte_flow = rte_calloc(__func__, 1, sizeof(*rte_flow), 0);
958 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
959 NULL, "cannot allocate flow memory");
963 rte_flow->ibv_attr = flow->ibv_attr;
964 rte_flow->qp = priv->flow_drop_queue->qp;
967 rte_flow->ibv_flow = ibv_exp_create_flow(rte_flow->qp,
969 if (!rte_flow->ibv_flow) {
970 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
971 NULL, "flow rule creation failure");
982 * Complete flow rule creation.
985 * Pointer to private structure.
987 * MLX5 flow attributes (filled by mlx5_flow_validate()).
989 * Target action structure.
991 * Perform verbose error reporting if not NULL.
994 * A flow if the rule could be created.
996 static struct rte_flow *
997 priv_flow_create_action_queue(struct priv *priv,
998 struct mlx5_flow *flow,
999 struct mlx5_flow_action *action,
1000 struct rte_flow_error *error)
1002 struct rte_flow *rte_flow;
1005 const unsigned int wqs_n = 1 << log2above(action->queues_n);
1006 struct ibv_exp_wq *wqs[wqs_n];
1010 assert(!action->drop);
1011 rte_flow = rte_calloc(__func__, 1,
1012 sizeof(*rte_flow) + sizeof(struct rxq *) *
1013 action->queues_n, 0);
1015 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1016 NULL, "cannot allocate flow memory");
1019 rte_flow->rxqs = (struct rxq *(*)[])((uintptr_t)rte_flow +
1020 sizeof(struct rxq *) *
1022 for (i = 0; i < action->queues_n; ++i) {
1023 struct rxq_ctrl *rxq;
1025 rxq = container_of((*priv->rxqs)[action->queues[i]],
1026 struct rxq_ctrl, rxq);
1028 (*rte_flow->rxqs)[i] = &rxq->rxq;
1030 rxq->rxq.mark |= action->mark;
1032 /* finalise indirection table. */
1033 for (j = 0; i < wqs_n; ++i, ++j) {
1035 if (j == action->queues_n)
1038 rte_flow->mark = action->mark;
1039 rte_flow->ibv_attr = flow->ibv_attr;
1040 rte_flow->hash_fields = flow->hash_fields;
1041 rte_flow->ind_table = ibv_exp_create_rwq_ind_table(
1043 &(struct ibv_exp_rwq_ind_table_init_attr){
1045 .log_ind_tbl_size = log2above(action->queues_n),
1049 if (!rte_flow->ind_table) {
1050 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1051 NULL, "cannot allocate indirection table");
1054 rte_flow->qp = ibv_exp_create_qp(
1056 &(struct ibv_exp_qp_init_attr){
1057 .qp_type = IBV_QPT_RAW_PACKET,
1059 IBV_EXP_QP_INIT_ATTR_PD |
1060 IBV_EXP_QP_INIT_ATTR_PORT |
1061 IBV_EXP_QP_INIT_ATTR_RX_HASH,
1063 .rx_hash_conf = &(struct ibv_exp_rx_hash_conf){
1065 IBV_EXP_RX_HASH_FUNC_TOEPLITZ,
1066 .rx_hash_key_len = rss_hash_default_key_len,
1067 .rx_hash_key = rss_hash_default_key,
1068 .rx_hash_fields_mask = rte_flow->hash_fields,
1069 .rwq_ind_tbl = rte_flow->ind_table,
1071 .port_num = priv->port,
1073 if (!rte_flow->qp) {
1074 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1075 NULL, "cannot allocate QP");
1080 rte_flow->ibv_flow = ibv_exp_create_flow(rte_flow->qp,
1081 rte_flow->ibv_attr);
1082 if (!rte_flow->ibv_flow) {
1083 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1084 NULL, "flow rule creation failure");
1091 ibv_destroy_qp(rte_flow->qp);
1092 if (rte_flow->ind_table)
1093 ibv_exp_destroy_rwq_ind_table(rte_flow->ind_table);
1102 * Pointer to private structure.
1104 * Flow rule attributes.
1105 * @param[in] pattern
1106 * Pattern specification (list terminated by the END pattern item).
1107 * @param[in] actions
1108 * Associated actions (list terminated by the END action).
1110 * Perform verbose error reporting if not NULL.
1113 * A flow on success, NULL otherwise.
1115 static struct rte_flow *
1116 priv_flow_create(struct priv *priv,
1117 const struct rte_flow_attr *attr,
1118 const struct rte_flow_item items[],
1119 const struct rte_flow_action actions[],
1120 struct rte_flow_error *error)
1122 struct rte_flow *rte_flow;
1123 struct mlx5_flow_action action;
1124 struct mlx5_flow flow = { .offset = sizeof(struct ibv_exp_flow_attr), };
1127 err = priv_flow_validate(priv, attr, items, actions, error, &flow);
1130 flow.ibv_attr = rte_malloc(__func__, flow.offset, 0);
1131 flow.offset = sizeof(struct ibv_exp_flow_attr);
1132 if (!flow.ibv_attr) {
1133 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1134 NULL, "cannot allocate ibv_attr memory");
1137 *flow.ibv_attr = (struct ibv_exp_flow_attr){
1138 .type = IBV_EXP_FLOW_ATTR_NORMAL,
1139 .size = sizeof(struct ibv_exp_flow_attr),
1140 .priority = attr->priority,
1147 flow.hash_fields = 0;
1148 claim_zero(priv_flow_validate(priv, attr, items, actions,
1150 action = (struct mlx5_flow_action){
1154 .mark_id = MLX5_FLOW_MARK_DEFAULT,
1156 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
1157 if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
1159 } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
1161 action.queues[action.queues_n++] =
1162 ((const struct rte_flow_action_queue *)
1163 actions->conf)->index;
1164 } else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
1165 const struct rte_flow_action_rss *rss =
1166 (const struct rte_flow_action_rss *)
1171 action.queues_n = rss->num;
1172 for (n = 0; n < rss->num; ++n)
1173 action.queues[n] = rss->queue[n];
1174 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
1177 } else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
1178 const struct rte_flow_action_mark *mark =
1179 (const struct rte_flow_action_mark *)
1183 action.mark_id = mark->id;
1184 action.mark = !action.drop;
1185 } else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
1188 rte_flow_error_set(error, ENOTSUP,
1189 RTE_FLOW_ERROR_TYPE_ACTION,
1190 actions, "unsupported action");
1195 mlx5_flow_create_flag_mark(&flow, action.mark_id);
1196 flow.offset += sizeof(struct ibv_exp_flow_spec_action_tag);
1200 priv_flow_create_action_queue_drop(priv, &flow, error);
1202 rte_flow = priv_flow_create_action_queue(priv, &flow, &action,
1208 rte_free(flow.ibv_attr);
1215 * @see rte_flow_create()
1219 mlx5_flow_create(struct rte_eth_dev *dev,
1220 const struct rte_flow_attr *attr,
1221 const struct rte_flow_item items[],
1222 const struct rte_flow_action actions[],
1223 struct rte_flow_error *error)
1225 struct priv *priv = dev->data->dev_private;
1226 struct rte_flow *flow;
1229 flow = priv_flow_create(priv, attr, items, actions, error);
1231 LIST_INSERT_HEAD(&priv->flows, flow, next);
1232 DEBUG("Flow created %p", (void *)flow);
1242 * Pointer to private structure.
1247 priv_flow_destroy(struct priv *priv,
1248 struct rte_flow *flow)
1251 LIST_REMOVE(flow, next);
1253 claim_zero(ibv_exp_destroy_flow(flow->ibv_flow));
1257 claim_zero(ibv_destroy_qp(flow->qp));
1258 if (flow->ind_table)
1259 claim_zero(ibv_exp_destroy_rwq_ind_table(flow->ind_table));
1260 if (flow->drop && flow->wq)
1261 claim_zero(ibv_exp_destroy_wq(flow->wq));
1262 if (flow->drop && flow->cq)
1263 claim_zero(ibv_destroy_cq(flow->cq));
1265 struct rte_flow *tmp;
1267 uint32_t mark_n = 0;
1271 * To remove the mark from the queue, the queue must not be
1272 * present in any other marked flow (RSS or not).
1274 for (queue_n = 0; queue_n < flow->rxqs_n; ++queue_n) {
1275 rxq = (*flow->rxqs)[queue_n];
1276 for (tmp = LIST_FIRST(&priv->flows);
1278 tmp = LIST_NEXT(tmp, next)) {
1284 tqueue_n < tmp->rxqs_n;
1288 trxq = (*tmp->rxqs)[tqueue_n];
1293 rxq->mark = !!mark_n;
1297 rte_free(flow->ibv_attr);
1298 DEBUG("Flow destroyed %p", (void *)flow);
1305 * @see rte_flow_destroy()
1309 mlx5_flow_destroy(struct rte_eth_dev *dev,
1310 struct rte_flow *flow,
1311 struct rte_flow_error *error)
1313 struct priv *priv = dev->data->dev_private;
1317 priv_flow_destroy(priv, flow);
1323 * Destroy all flows.
1326 * Pointer to private structure.
1329 priv_flow_flush(struct priv *priv)
1331 while (!LIST_EMPTY(&priv->flows)) {
1332 struct rte_flow *flow;
1334 flow = LIST_FIRST(&priv->flows);
1335 priv_flow_destroy(priv, flow);
1340 * Destroy all flows.
1342 * @see rte_flow_flush()
1346 mlx5_flow_flush(struct rte_eth_dev *dev,
1347 struct rte_flow_error *error)
1349 struct priv *priv = dev->data->dev_private;
1353 priv_flow_flush(priv);
1359 * Create drop queue.
1362 * Pointer to private structure.
1368 priv_flow_create_drop_queue(struct priv *priv)
1370 struct rte_flow_drop *fdq = NULL;
1375 fdq = rte_calloc(__func__, 1, sizeof(*fdq), 0);
1377 WARN("cannot allocate memory for drop queue");
1380 fdq->cq = ibv_exp_create_cq(priv->ctx, 1, NULL, NULL, 0,
1381 &(struct ibv_exp_cq_init_attr){
1385 WARN("cannot allocate CQ for drop queue");
1388 for (i = 0; i != MLX5_DROP_WQ_N; ++i) {
1389 fdq->wqs[i] = ibv_exp_create_wq(priv->ctx,
1390 &(struct ibv_exp_wq_init_attr){
1391 .wq_type = IBV_EXP_WQT_RQ,
1398 WARN("cannot allocate WQ for drop queue");
1402 fdq->ind_table = ibv_exp_create_rwq_ind_table(priv->ctx,
1403 &(struct ibv_exp_rwq_ind_table_init_attr){
1405 .log_ind_tbl_size = 0,
1406 .ind_tbl = fdq->wqs,
1409 if (!fdq->ind_table) {
1410 WARN("cannot allocate indirection table for drop queue");
1413 fdq->qp = ibv_exp_create_qp(priv->ctx,
1414 &(struct ibv_exp_qp_init_attr){
1415 .qp_type = IBV_QPT_RAW_PACKET,
1417 IBV_EXP_QP_INIT_ATTR_PD |
1418 IBV_EXP_QP_INIT_ATTR_PORT |
1419 IBV_EXP_QP_INIT_ATTR_RX_HASH,
1421 .rx_hash_conf = &(struct ibv_exp_rx_hash_conf){
1423 IBV_EXP_RX_HASH_FUNC_TOEPLITZ,
1424 .rx_hash_key_len = rss_hash_default_key_len,
1425 .rx_hash_key = rss_hash_default_key,
1426 .rx_hash_fields_mask = 0,
1427 .rwq_ind_tbl = fdq->ind_table,
1429 .port_num = priv->port,
1432 WARN("cannot allocate QP for drop queue");
1435 priv->flow_drop_queue = fdq;
1439 claim_zero(ibv_destroy_qp(fdq->qp));
1441 claim_zero(ibv_exp_destroy_rwq_ind_table(fdq->ind_table));
1442 for (i = 0; i != MLX5_DROP_WQ_N; ++i) {
1444 claim_zero(ibv_exp_destroy_wq(fdq->wqs[i]));
1447 claim_zero(ibv_destroy_cq(fdq->cq));
1450 priv->flow_drop_queue = NULL;
1455 * Delete drop queue.
1458 * Pointer to private structure.
1461 priv_flow_delete_drop_queue(struct priv *priv)
1463 struct rte_flow_drop *fdq = priv->flow_drop_queue;
1466 claim_zero(ibv_destroy_qp(fdq->qp));
1467 claim_zero(ibv_exp_destroy_rwq_ind_table(fdq->ind_table));
1468 for (i = 0; i != MLX5_DROP_WQ_N; ++i) {
1469 assert(fdq->wqs[i]);
1470 claim_zero(ibv_exp_destroy_wq(fdq->wqs[i]));
1472 claim_zero(ibv_destroy_cq(fdq->cq));
1474 priv->flow_drop_queue = NULL;
1480 * Called by dev_stop() to remove all flows.
1483 * Pointer to private structure.
1486 priv_flow_stop(struct priv *priv)
1488 struct rte_flow *flow;
1490 for (flow = LIST_FIRST(&priv->flows);
1492 flow = LIST_NEXT(flow, next)) {
1493 claim_zero(ibv_exp_destroy_flow(flow->ibv_flow));
1494 flow->ibv_flow = NULL;
1498 for (n = 0; n < flow->rxqs_n; ++n)
1499 (*flow->rxqs)[n]->mark = 0;
1501 DEBUG("Flow %p removed", (void *)flow);
1503 priv_flow_delete_drop_queue(priv);
1510 * Pointer to private structure.
1513 * 0 on success, a errno value otherwise and rte_errno is set.
1516 priv_flow_start(struct priv *priv)
1519 struct rte_flow *flow;
1521 ret = priv_flow_create_drop_queue(priv);
1524 for (flow = LIST_FIRST(&priv->flows);
1526 flow = LIST_NEXT(flow, next)) {
1530 qp = priv->flow_drop_queue->qp;
1533 flow->ibv_flow = ibv_exp_create_flow(qp, flow->ibv_attr);
1534 if (!flow->ibv_flow) {
1535 DEBUG("Flow %p cannot be applied", (void *)flow);
1539 DEBUG("Flow %p applied", (void *)flow);
1543 for (n = 0; n < flow->rxqs_n; ++n)
1544 (*flow->rxqs)[n]->mark = 1;