4 * Copyright 2016 6WIND S.A.
5 * Copyright 2016 Mellanox.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of 6WIND S.A. nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #include <sys/queue.h>
38 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
40 #pragma GCC diagnostic ignored "-Wpedantic"
42 #include <infiniband/verbs.h>
44 #pragma GCC diagnostic error "-Wpedantic"
47 #include <rte_ethdev.h>
49 #include <rte_flow_driver.h>
50 #include <rte_malloc.h>
55 /* Number of Work Queue necessary for the DROP queue. */
56 #define MLX5_DROP_WQ_N 4
59 mlx5_flow_create_eth(const struct rte_flow_item *item,
60 const void *default_mask,
64 mlx5_flow_create_vlan(const struct rte_flow_item *item,
65 const void *default_mask,
69 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
70 const void *default_mask,
74 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
75 const void *default_mask,
79 mlx5_flow_create_udp(const struct rte_flow_item *item,
80 const void *default_mask,
84 mlx5_flow_create_tcp(const struct rte_flow_item *item,
85 const void *default_mask,
89 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
90 const void *default_mask,
94 LIST_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
95 struct ibv_exp_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
96 struct ibv_exp_rwq_ind_table *ind_table; /**< Indirection table. */
97 struct ibv_qp *qp; /**< Verbs queue pair. */
98 struct ibv_exp_flow *ibv_flow; /**< Verbs flow. */
99 struct ibv_exp_wq *wq; /**< Verbs work queue. */
100 struct ibv_cq *cq; /**< Verbs completion queue. */
101 struct rxq *(*rxqs)[]; /**< Pointer to the queues array. */
102 uint16_t rxqs_n; /**< Number of queues in this flow, 0 if drop queue. */
103 uint32_t mark:1; /**< Set if the flow is marked. */
104 uint32_t drop:1; /**< Drop queue. */
105 uint64_t hash_fields; /**< Fields that participate in the hash. */
108 /** Static initializer for items. */
110 (const enum rte_flow_item_type []){ \
111 __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
114 /** Structure to generate a simple graph of layers supported by the NIC. */
115 struct mlx5_flow_items {
116 /** List of possible actions for these items. */
117 const enum rte_flow_action_type *const actions;
118 /** Bit-masks corresponding to the possibilities for the item. */
121 * Default bit-masks to use when item->mask is not provided. When
122 * \default_mask is also NULL, the full supported bit-mask (\mask) is
125 const void *default_mask;
126 /** Bit-masks size in bytes. */
127 const unsigned int mask_sz;
129 * Conversion function from rte_flow to NIC specific flow.
132 * rte_flow item to convert.
133 * @param default_mask
134 * Default bit-masks to use when item->mask is not provided.
136 * Internal structure to store the conversion.
139 * 0 on success, negative value otherwise.
141 int (*convert)(const struct rte_flow_item *item,
142 const void *default_mask,
144 /** Size in bytes of the destination structure. */
145 const unsigned int dst_sz;
146 /** List of possible following items. */
147 const enum rte_flow_item_type *const items;
150 /** Valid action for this PMD. */
151 static const enum rte_flow_action_type valid_actions[] = {
152 RTE_FLOW_ACTION_TYPE_DROP,
153 RTE_FLOW_ACTION_TYPE_QUEUE,
154 RTE_FLOW_ACTION_TYPE_MARK,
155 RTE_FLOW_ACTION_TYPE_FLAG,
156 RTE_FLOW_ACTION_TYPE_END,
159 /** Graph of supported items and associated actions. */
160 static const struct mlx5_flow_items mlx5_flow_items[] = {
161 [RTE_FLOW_ITEM_TYPE_END] = {
162 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
163 RTE_FLOW_ITEM_TYPE_VXLAN),
165 [RTE_FLOW_ITEM_TYPE_ETH] = {
166 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
167 RTE_FLOW_ITEM_TYPE_IPV4,
168 RTE_FLOW_ITEM_TYPE_IPV6),
169 .actions = valid_actions,
170 .mask = &(const struct rte_flow_item_eth){
171 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
172 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
175 .default_mask = &rte_flow_item_eth_mask,
176 .mask_sz = sizeof(struct rte_flow_item_eth),
177 .convert = mlx5_flow_create_eth,
178 .dst_sz = sizeof(struct ibv_exp_flow_spec_eth),
180 [RTE_FLOW_ITEM_TYPE_VLAN] = {
181 .items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
182 RTE_FLOW_ITEM_TYPE_IPV6),
183 .actions = valid_actions,
184 .mask = &(const struct rte_flow_item_vlan){
187 .default_mask = &rte_flow_item_vlan_mask,
188 .mask_sz = sizeof(struct rte_flow_item_vlan),
189 .convert = mlx5_flow_create_vlan,
192 [RTE_FLOW_ITEM_TYPE_IPV4] = {
193 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
194 RTE_FLOW_ITEM_TYPE_TCP),
195 .actions = valid_actions,
196 .mask = &(const struct rte_flow_item_ipv4){
200 .type_of_service = -1,
204 .default_mask = &rte_flow_item_ipv4_mask,
205 .mask_sz = sizeof(struct rte_flow_item_ipv4),
206 .convert = mlx5_flow_create_ipv4,
207 .dst_sz = sizeof(struct ibv_exp_flow_spec_ipv4_ext),
209 [RTE_FLOW_ITEM_TYPE_IPV6] = {
210 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
211 RTE_FLOW_ITEM_TYPE_TCP),
212 .actions = valid_actions,
213 .mask = &(const struct rte_flow_item_ipv6){
216 0xff, 0xff, 0xff, 0xff,
217 0xff, 0xff, 0xff, 0xff,
218 0xff, 0xff, 0xff, 0xff,
219 0xff, 0xff, 0xff, 0xff,
222 0xff, 0xff, 0xff, 0xff,
223 0xff, 0xff, 0xff, 0xff,
224 0xff, 0xff, 0xff, 0xff,
225 0xff, 0xff, 0xff, 0xff,
232 .default_mask = &rte_flow_item_ipv6_mask,
233 .mask_sz = sizeof(struct rte_flow_item_ipv6),
234 .convert = mlx5_flow_create_ipv6,
235 .dst_sz = sizeof(struct ibv_exp_flow_spec_ipv6_ext),
237 [RTE_FLOW_ITEM_TYPE_UDP] = {
238 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN),
239 .actions = valid_actions,
240 .mask = &(const struct rte_flow_item_udp){
246 .default_mask = &rte_flow_item_udp_mask,
247 .mask_sz = sizeof(struct rte_flow_item_udp),
248 .convert = mlx5_flow_create_udp,
249 .dst_sz = sizeof(struct ibv_exp_flow_spec_tcp_udp),
251 [RTE_FLOW_ITEM_TYPE_TCP] = {
252 .actions = valid_actions,
253 .mask = &(const struct rte_flow_item_tcp){
259 .default_mask = &rte_flow_item_tcp_mask,
260 .mask_sz = sizeof(struct rte_flow_item_tcp),
261 .convert = mlx5_flow_create_tcp,
262 .dst_sz = sizeof(struct ibv_exp_flow_spec_tcp_udp),
264 [RTE_FLOW_ITEM_TYPE_VXLAN] = {
265 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH),
266 .actions = valid_actions,
267 .mask = &(const struct rte_flow_item_vxlan){
268 .vni = "\xff\xff\xff",
270 .default_mask = &rte_flow_item_vxlan_mask,
271 .mask_sz = sizeof(struct rte_flow_item_vxlan),
272 .convert = mlx5_flow_create_vxlan,
273 .dst_sz = sizeof(struct ibv_exp_flow_spec_tunnel),
277 /** Structure to pass to the conversion function. */
279 struct ibv_exp_flow_attr *ibv_attr; /**< Verbs attribute. */
280 unsigned int offset; /**< Offset in bytes in the ibv_attr buffer. */
281 uint32_t inner; /**< Set once VXLAN is encountered. */
282 uint64_t hash_fields; /**< Fields that participate in the hash. */
285 /** Structure for Drop queue. */
286 struct rte_flow_drop {
287 struct ibv_exp_rwq_ind_table *ind_table; /**< Indirection table. */
288 struct ibv_qp *qp; /**< Verbs queue pair. */
289 struct ibv_exp_wq *wqs[MLX5_DROP_WQ_N]; /**< Verbs work queue. */
290 struct ibv_cq *cq; /**< Verbs completion queue. */
293 struct mlx5_flow_action {
294 uint32_t queue:1; /**< Target is a receive queue. */
295 uint32_t drop:1; /**< Target is a drop queue. */
296 uint32_t mark:1; /**< Mark is present in the flow. */
297 uint32_t mark_id; /**< Mark identifier. */
298 uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
299 uint16_t queues_n; /**< Number of entries in queue[]. */
303 * Check support for a given item.
306 * Item specification.
308 * Bit-masks covering supported fields to compare with spec, last and mask in
311 * Bit-Mask size in bytes.
317 mlx5_flow_item_validate(const struct rte_flow_item *item,
318 const uint8_t *mask, unsigned int size)
322 if (!item->spec && (item->mask || item->last))
324 if (item->spec && !item->mask) {
326 const uint8_t *spec = item->spec;
328 for (i = 0; i < size; ++i)
329 if ((spec[i] | mask[i]) != mask[i])
332 if (item->last && !item->mask) {
334 const uint8_t *spec = item->last;
336 for (i = 0; i < size; ++i)
337 if ((spec[i] | mask[i]) != mask[i])
342 const uint8_t *spec = item->mask;
344 for (i = 0; i < size; ++i)
345 if ((spec[i] | mask[i]) != mask[i])
348 if (item->spec && item->last) {
351 const uint8_t *apply = mask;
356 for (i = 0; i < size; ++i) {
357 spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
358 last[i] = ((const uint8_t *)item->last)[i] & apply[i];
360 ret = memcmp(spec, last, size);
366 * Validate a flow supported by the NIC.
369 * Pointer to private structure.
371 * Flow rule attributes.
373 * Pattern specification (list terminated by the END pattern item).
375 * Associated actions (list terminated by the END action).
377 * Perform verbose error reporting if not NULL.
378 * @param[in, out] flow
379 * Flow structure to update.
382 * 0 on success, a negative errno value otherwise and rte_errno is set.
385 priv_flow_validate(struct priv *priv,
386 const struct rte_flow_attr *attr,
387 const struct rte_flow_item items[],
388 const struct rte_flow_action actions[],
389 struct rte_flow_error *error,
390 struct mlx5_flow *flow)
392 const struct mlx5_flow_items *cur_item = mlx5_flow_items;
393 struct mlx5_flow_action action = {
401 rte_flow_error_set(error, ENOTSUP,
402 RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
404 "groups are not supported");
407 if (attr->priority) {
408 rte_flow_error_set(error, ENOTSUP,
409 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
411 "priorities are not supported");
415 rte_flow_error_set(error, ENOTSUP,
416 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
418 "egress is not supported");
421 if (!attr->ingress) {
422 rte_flow_error_set(error, ENOTSUP,
423 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
425 "only ingress is supported");
428 for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
429 const struct mlx5_flow_items *token = NULL;
433 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
437 cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
439 if (cur_item->items[i] == items->type) {
440 token = &mlx5_flow_items[items->type];
445 goto exit_item_not_supported;
447 err = mlx5_flow_item_validate(items,
448 (const uint8_t *)cur_item->mask,
451 goto exit_item_not_supported;
452 if (flow->ibv_attr && cur_item->convert) {
453 err = cur_item->convert(items,
454 (cur_item->default_mask ?
455 cur_item->default_mask :
459 goto exit_item_not_supported;
460 } else if (items->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
462 rte_flow_error_set(error, ENOTSUP,
463 RTE_FLOW_ERROR_TYPE_ITEM,
465 "cannot recognize multiple"
466 " VXLAN encapsulations");
471 flow->offset += cur_item->dst_sz;
473 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
474 if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
476 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
478 } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
479 const struct rte_flow_action_queue *queue =
480 (const struct rte_flow_action_queue *)
485 if (!queue || (queue->index > (priv->rxqs_n - 1)))
486 goto exit_action_not_supported;
487 for (n = 0; n < action.queues_n; ++n) {
488 if (action.queues[n] == queue->index) {
493 if (action.queues_n && !found) {
494 rte_flow_error_set(error, ENOTSUP,
495 RTE_FLOW_ERROR_TYPE_ACTION,
497 "queue action not in RSS queues");
502 action.queues[0] = queue->index;
503 } else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
504 const struct rte_flow_action_rss *rss =
505 (const struct rte_flow_action_rss *)
509 if (action.queues_n == 1) {
512 assert(action.queues_n);
513 for (n = 0; n < rss->num; ++n) {
514 if (action.queues[0] == rss->queue[n]) {
520 rte_flow_error_set(error, ENOTSUP,
521 RTE_FLOW_ERROR_TYPE_ACTION,
523 "queue action not in RSS"
529 for (n = 0; n < rss->num; ++n)
530 action.queues[n] = rss->queue[n];
531 action.queues_n = rss->num;
532 } else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
533 const struct rte_flow_action_mark *mark =
534 (const struct rte_flow_action_mark *)
538 rte_flow_error_set(error, EINVAL,
539 RTE_FLOW_ERROR_TYPE_ACTION,
541 "mark must be defined");
543 } else if (mark->id >= MLX5_FLOW_MARK_MAX) {
544 rte_flow_error_set(error, ENOTSUP,
545 RTE_FLOW_ERROR_TYPE_ACTION,
547 "mark must be between 0"
552 } else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
555 goto exit_action_not_supported;
558 if (action.mark && !flow->ibv_attr && !action.drop)
559 flow->offset += sizeof(struct ibv_exp_flow_spec_action_tag);
560 if (!action.queue && !action.drop) {
561 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
562 NULL, "no valid action");
566 exit_item_not_supported:
567 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
568 items, "item not supported");
570 exit_action_not_supported:
571 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
572 actions, "action not supported");
577 * Validate a flow supported by the NIC.
579 * @see rte_flow_validate()
583 mlx5_flow_validate(struct rte_eth_dev *dev,
584 const struct rte_flow_attr *attr,
585 const struct rte_flow_item items[],
586 const struct rte_flow_action actions[],
587 struct rte_flow_error *error)
589 struct priv *priv = dev->data->dev_private;
591 struct mlx5_flow flow = { .offset = sizeof(struct ibv_exp_flow_attr) };
594 ret = priv_flow_validate(priv, attr, items, actions, error, &flow);
600 * Convert Ethernet item to Verbs specification.
603 * Item specification.
604 * @param default_mask[in]
605 * Default bit-masks to use when item->mask is not provided.
606 * @param data[in, out]
610 mlx5_flow_create_eth(const struct rte_flow_item *item,
611 const void *default_mask,
614 const struct rte_flow_item_eth *spec = item->spec;
615 const struct rte_flow_item_eth *mask = item->mask;
616 struct mlx5_flow *flow = (struct mlx5_flow *)data;
617 struct ibv_exp_flow_spec_eth *eth;
618 const unsigned int eth_size = sizeof(struct ibv_exp_flow_spec_eth);
621 ++flow->ibv_attr->num_of_specs;
622 flow->ibv_attr->priority = 2;
623 flow->hash_fields = 0;
624 eth = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
625 *eth = (struct ibv_exp_flow_spec_eth) {
626 .type = flow->inner | IBV_EXP_FLOW_SPEC_ETH,
633 memcpy(eth->val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
634 memcpy(eth->val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
635 eth->val.ether_type = spec->type;
636 memcpy(eth->mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
637 memcpy(eth->mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
638 eth->mask.ether_type = mask->type;
639 /* Remove unwanted bits from values. */
640 for (i = 0; i < ETHER_ADDR_LEN; ++i) {
641 eth->val.dst_mac[i] &= eth->mask.dst_mac[i];
642 eth->val.src_mac[i] &= eth->mask.src_mac[i];
644 eth->val.ether_type &= eth->mask.ether_type;
649 * Convert VLAN item to Verbs specification.
652 * Item specification.
653 * @param default_mask[in]
654 * Default bit-masks to use when item->mask is not provided.
655 * @param data[in, out]
659 mlx5_flow_create_vlan(const struct rte_flow_item *item,
660 const void *default_mask,
663 const struct rte_flow_item_vlan *spec = item->spec;
664 const struct rte_flow_item_vlan *mask = item->mask;
665 struct mlx5_flow *flow = (struct mlx5_flow *)data;
666 struct ibv_exp_flow_spec_eth *eth;
667 const unsigned int eth_size = sizeof(struct ibv_exp_flow_spec_eth);
669 eth = (void *)((uintptr_t)flow->ibv_attr + flow->offset - eth_size);
674 eth->val.vlan_tag = spec->tci;
675 eth->mask.vlan_tag = mask->tci;
676 eth->val.vlan_tag &= eth->mask.vlan_tag;
681 * Convert IPv4 item to Verbs specification.
684 * Item specification.
685 * @param default_mask[in]
686 * Default bit-masks to use when item->mask is not provided.
687 * @param data[in, out]
691 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
692 const void *default_mask,
695 const struct rte_flow_item_ipv4 *spec = item->spec;
696 const struct rte_flow_item_ipv4 *mask = item->mask;
697 struct mlx5_flow *flow = (struct mlx5_flow *)data;
698 struct ibv_exp_flow_spec_ipv4_ext *ipv4;
699 unsigned int ipv4_size = sizeof(struct ibv_exp_flow_spec_ipv4_ext);
701 ++flow->ibv_attr->num_of_specs;
702 flow->ibv_attr->priority = 1;
703 flow->hash_fields = (IBV_EXP_RX_HASH_SRC_IPV4 |
704 IBV_EXP_RX_HASH_DST_IPV4);
705 ipv4 = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
706 *ipv4 = (struct ibv_exp_flow_spec_ipv4_ext) {
707 .type = flow->inner | IBV_EXP_FLOW_SPEC_IPV4_EXT,
714 ipv4->val = (struct ibv_exp_flow_ipv4_ext_filter){
715 .src_ip = spec->hdr.src_addr,
716 .dst_ip = spec->hdr.dst_addr,
717 .proto = spec->hdr.next_proto_id,
718 .tos = spec->hdr.type_of_service,
720 ipv4->mask = (struct ibv_exp_flow_ipv4_ext_filter){
721 .src_ip = mask->hdr.src_addr,
722 .dst_ip = mask->hdr.dst_addr,
723 .proto = mask->hdr.next_proto_id,
724 .tos = mask->hdr.type_of_service,
726 /* Remove unwanted bits from values. */
727 ipv4->val.src_ip &= ipv4->mask.src_ip;
728 ipv4->val.dst_ip &= ipv4->mask.dst_ip;
729 ipv4->val.proto &= ipv4->mask.proto;
730 ipv4->val.tos &= ipv4->mask.tos;
735 * Convert IPv6 item to Verbs specification.
738 * Item specification.
739 * @param default_mask[in]
740 * Default bit-masks to use when item->mask is not provided.
741 * @param data[in, out]
745 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
746 const void *default_mask,
749 const struct rte_flow_item_ipv6 *spec = item->spec;
750 const struct rte_flow_item_ipv6 *mask = item->mask;
751 struct mlx5_flow *flow = (struct mlx5_flow *)data;
752 struct ibv_exp_flow_spec_ipv6_ext *ipv6;
753 unsigned int ipv6_size = sizeof(struct ibv_exp_flow_spec_ipv6_ext);
755 ++flow->ibv_attr->num_of_specs;
756 flow->ibv_attr->priority = 1;
757 flow->hash_fields = (IBV_EXP_RX_HASH_SRC_IPV6 |
758 IBV_EXP_RX_HASH_DST_IPV6);
759 ipv6 = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
760 *ipv6 = (struct ibv_exp_flow_spec_ipv6_ext) {
761 .type = flow->inner | IBV_EXP_FLOW_SPEC_IPV6_EXT,
768 memcpy(ipv6->val.src_ip, spec->hdr.src_addr,
769 RTE_DIM(ipv6->val.src_ip));
770 memcpy(ipv6->val.dst_ip, spec->hdr.dst_addr,
771 RTE_DIM(ipv6->val.dst_ip));
772 memcpy(ipv6->mask.src_ip, mask->hdr.src_addr,
773 RTE_DIM(ipv6->mask.src_ip));
774 memcpy(ipv6->mask.dst_ip, mask->hdr.dst_addr,
775 RTE_DIM(ipv6->mask.dst_ip));
776 ipv6->mask.flow_label = mask->hdr.vtc_flow;
777 ipv6->mask.next_hdr = mask->hdr.proto;
778 ipv6->mask.hop_limit = mask->hdr.hop_limits;
779 ipv6->val.flow_label &= ipv6->mask.flow_label;
780 ipv6->val.next_hdr &= ipv6->mask.next_hdr;
781 ipv6->val.hop_limit &= ipv6->mask.hop_limit;
786 * Convert UDP item to Verbs specification.
789 * Item specification.
790 * @param default_mask[in]
791 * Default bit-masks to use when item->mask is not provided.
792 * @param data[in, out]
796 mlx5_flow_create_udp(const struct rte_flow_item *item,
797 const void *default_mask,
800 const struct rte_flow_item_udp *spec = item->spec;
801 const struct rte_flow_item_udp *mask = item->mask;
802 struct mlx5_flow *flow = (struct mlx5_flow *)data;
803 struct ibv_exp_flow_spec_tcp_udp *udp;
804 unsigned int udp_size = sizeof(struct ibv_exp_flow_spec_tcp_udp);
806 ++flow->ibv_attr->num_of_specs;
807 flow->ibv_attr->priority = 0;
808 flow->hash_fields |= (IBV_EXP_RX_HASH_SRC_PORT_UDP |
809 IBV_EXP_RX_HASH_DST_PORT_UDP);
810 udp = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
811 *udp = (struct ibv_exp_flow_spec_tcp_udp) {
812 .type = flow->inner | IBV_EXP_FLOW_SPEC_UDP,
819 udp->val.dst_port = spec->hdr.dst_port;
820 udp->val.src_port = spec->hdr.src_port;
821 udp->mask.dst_port = mask->hdr.dst_port;
822 udp->mask.src_port = mask->hdr.src_port;
823 /* Remove unwanted bits from values. */
824 udp->val.src_port &= udp->mask.src_port;
825 udp->val.dst_port &= udp->mask.dst_port;
830 * Convert TCP item to Verbs specification.
833 * Item specification.
834 * @param default_mask[in]
835 * Default bit-masks to use when item->mask is not provided.
836 * @param data[in, out]
840 mlx5_flow_create_tcp(const struct rte_flow_item *item,
841 const void *default_mask,
844 const struct rte_flow_item_tcp *spec = item->spec;
845 const struct rte_flow_item_tcp *mask = item->mask;
846 struct mlx5_flow *flow = (struct mlx5_flow *)data;
847 struct ibv_exp_flow_spec_tcp_udp *tcp;
848 unsigned int tcp_size = sizeof(struct ibv_exp_flow_spec_tcp_udp);
850 ++flow->ibv_attr->num_of_specs;
851 flow->ibv_attr->priority = 0;
852 flow->hash_fields |= (IBV_EXP_RX_HASH_SRC_PORT_TCP |
853 IBV_EXP_RX_HASH_DST_PORT_TCP);
854 tcp = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
855 *tcp = (struct ibv_exp_flow_spec_tcp_udp) {
856 .type = flow->inner | IBV_EXP_FLOW_SPEC_TCP,
863 tcp->val.dst_port = spec->hdr.dst_port;
864 tcp->val.src_port = spec->hdr.src_port;
865 tcp->mask.dst_port = mask->hdr.dst_port;
866 tcp->mask.src_port = mask->hdr.src_port;
867 /* Remove unwanted bits from values. */
868 tcp->val.src_port &= tcp->mask.src_port;
869 tcp->val.dst_port &= tcp->mask.dst_port;
874 * Convert VXLAN item to Verbs specification.
877 * Item specification.
878 * @param default_mask[in]
879 * Default bit-masks to use when item->mask is not provided.
880 * @param data[in, out]
884 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
885 const void *default_mask,
888 const struct rte_flow_item_vxlan *spec = item->spec;
889 const struct rte_flow_item_vxlan *mask = item->mask;
890 struct mlx5_flow *flow = (struct mlx5_flow *)data;
891 struct ibv_exp_flow_spec_tunnel *vxlan;
892 unsigned int size = sizeof(struct ibv_exp_flow_spec_tunnel);
898 ++flow->ibv_attr->num_of_specs;
899 flow->ibv_attr->priority = 0;
901 vxlan = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
902 *vxlan = (struct ibv_exp_flow_spec_tunnel) {
903 .type = flow->inner | IBV_EXP_FLOW_SPEC_VXLAN_TUNNEL,
906 flow->inner = IBV_EXP_FLOW_SPEC_INNER;
911 memcpy(&id.vni[1], spec->vni, 3);
912 vxlan->val.tunnel_id = id.vlan_id;
913 memcpy(&id.vni[1], mask->vni, 3);
914 vxlan->mask.tunnel_id = id.vlan_id;
915 /* Remove unwanted bits from values. */
916 vxlan->val.tunnel_id &= vxlan->mask.tunnel_id;
921 * Convert mark/flag action to Verbs specification.
924 * Pointer to MLX5 flow structure.
929 mlx5_flow_create_flag_mark(struct mlx5_flow *flow, uint32_t mark_id)
931 struct ibv_exp_flow_spec_action_tag *tag;
932 unsigned int size = sizeof(struct ibv_exp_flow_spec_action_tag);
934 tag = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
935 *tag = (struct ibv_exp_flow_spec_action_tag){
936 .type = IBV_EXP_FLOW_SPEC_ACTION_TAG,
938 .tag_id = mlx5_flow_mark_set(mark_id),
940 ++flow->ibv_attr->num_of_specs;
945 * Complete flow rule creation with a drop queue.
948 * Pointer to private structure.
950 * MLX5 flow attributes (filled by mlx5_flow_validate()).
952 * Perform verbose error reporting if not NULL.
955 * A flow if the rule could be created.
957 static struct rte_flow *
958 priv_flow_create_action_queue_drop(struct priv *priv,
959 struct mlx5_flow *flow,
960 struct rte_flow_error *error)
962 struct rte_flow *rte_flow;
966 rte_flow = rte_calloc(__func__, 1, sizeof(*rte_flow), 0);
968 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
969 NULL, "cannot allocate flow memory");
973 rte_flow->ibv_attr = flow->ibv_attr;
974 rte_flow->qp = priv->flow_drop_queue->qp;
977 rte_flow->ibv_flow = ibv_exp_create_flow(rte_flow->qp,
979 if (!rte_flow->ibv_flow) {
980 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
981 NULL, "flow rule creation failure");
992 * Complete flow rule creation.
995 * Pointer to private structure.
997 * MLX5 flow attributes (filled by mlx5_flow_validate()).
999 * Target action structure.
1001 * Perform verbose error reporting if not NULL.
1004 * A flow if the rule could be created.
1006 static struct rte_flow *
1007 priv_flow_create_action_queue(struct priv *priv,
1008 struct mlx5_flow *flow,
1009 struct mlx5_flow_action *action,
1010 struct rte_flow_error *error)
1012 struct rte_flow *rte_flow;
1015 const unsigned int wqs_n = 1 << log2above(action->queues_n);
1016 struct ibv_exp_wq *wqs[wqs_n];
1020 assert(!action->drop);
1021 rte_flow = rte_calloc(__func__, 1,
1022 sizeof(*rte_flow) + sizeof(struct rxq *) *
1023 action->queues_n, 0);
1025 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1026 NULL, "cannot allocate flow memory");
1029 rte_flow->rxqs = (struct rxq *(*)[])((uintptr_t)rte_flow +
1030 sizeof(struct rxq *) *
1032 for (i = 0; i < action->queues_n; ++i) {
1033 struct rxq_ctrl *rxq;
1035 rxq = container_of((*priv->rxqs)[action->queues[i]],
1036 struct rxq_ctrl, rxq);
1038 (*rte_flow->rxqs)[i] = &rxq->rxq;
1040 rxq->rxq.mark |= action->mark;
1042 /* finalise indirection table. */
1043 for (j = 0; i < wqs_n; ++i, ++j) {
1045 if (j == action->queues_n)
1048 rte_flow->mark = action->mark;
1049 rte_flow->ibv_attr = flow->ibv_attr;
1050 rte_flow->hash_fields = flow->hash_fields;
1051 rte_flow->ind_table = ibv_exp_create_rwq_ind_table(
1053 &(struct ibv_exp_rwq_ind_table_init_attr){
1055 .log_ind_tbl_size = log2above(action->queues_n),
1059 if (!rte_flow->ind_table) {
1060 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1061 NULL, "cannot allocate indirection table");
1064 rte_flow->qp = ibv_exp_create_qp(
1066 &(struct ibv_exp_qp_init_attr){
1067 .qp_type = IBV_QPT_RAW_PACKET,
1069 IBV_EXP_QP_INIT_ATTR_PD |
1070 IBV_EXP_QP_INIT_ATTR_PORT |
1071 IBV_EXP_QP_INIT_ATTR_RX_HASH,
1073 .rx_hash_conf = &(struct ibv_exp_rx_hash_conf){
1075 IBV_EXP_RX_HASH_FUNC_TOEPLITZ,
1076 .rx_hash_key_len = rss_hash_default_key_len,
1077 .rx_hash_key = rss_hash_default_key,
1078 .rx_hash_fields_mask = rte_flow->hash_fields,
1079 .rwq_ind_tbl = rte_flow->ind_table,
1081 .port_num = priv->port,
1083 if (!rte_flow->qp) {
1084 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1085 NULL, "cannot allocate QP");
1090 rte_flow->ibv_flow = ibv_exp_create_flow(rte_flow->qp,
1091 rte_flow->ibv_attr);
1092 if (!rte_flow->ibv_flow) {
1093 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1094 NULL, "flow rule creation failure");
1101 ibv_destroy_qp(rte_flow->qp);
1102 if (rte_flow->ind_table)
1103 ibv_exp_destroy_rwq_ind_table(rte_flow->ind_table);
1112 * Pointer to private structure.
1114 * Flow rule attributes.
1115 * @param[in] pattern
1116 * Pattern specification (list terminated by the END pattern item).
1117 * @param[in] actions
1118 * Associated actions (list terminated by the END action).
1120 * Perform verbose error reporting if not NULL.
1123 * A flow on success, NULL otherwise.
1125 static struct rte_flow *
1126 priv_flow_create(struct priv *priv,
1127 const struct rte_flow_attr *attr,
1128 const struct rte_flow_item items[],
1129 const struct rte_flow_action actions[],
1130 struct rte_flow_error *error)
1132 struct rte_flow *rte_flow;
1133 struct mlx5_flow_action action;
1134 struct mlx5_flow flow = { .offset = sizeof(struct ibv_exp_flow_attr), };
1137 err = priv_flow_validate(priv, attr, items, actions, error, &flow);
1140 flow.ibv_attr = rte_malloc(__func__, flow.offset, 0);
1141 flow.offset = sizeof(struct ibv_exp_flow_attr);
1142 if (!flow.ibv_attr) {
1143 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1144 NULL, "cannot allocate ibv_attr memory");
1147 *flow.ibv_attr = (struct ibv_exp_flow_attr){
1148 .type = IBV_EXP_FLOW_ATTR_NORMAL,
1149 .size = sizeof(struct ibv_exp_flow_attr),
1150 .priority = attr->priority,
1157 flow.hash_fields = 0;
1158 claim_zero(priv_flow_validate(priv, attr, items, actions,
1160 action = (struct mlx5_flow_action){
1164 .mark_id = MLX5_FLOW_MARK_DEFAULT,
1166 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
1167 if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
1169 } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
1171 action.queues[action.queues_n++] =
1172 ((const struct rte_flow_action_queue *)
1173 actions->conf)->index;
1174 } else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
1175 const struct rte_flow_action_rss *rss =
1176 (const struct rte_flow_action_rss *)
1181 action.queues_n = rss->num;
1182 for (n = 0; n < rss->num; ++n)
1183 action.queues[n] = rss->queue[n];
1184 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
1187 } else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
1188 const struct rte_flow_action_mark *mark =
1189 (const struct rte_flow_action_mark *)
1193 action.mark_id = mark->id;
1194 action.mark = !action.drop;
1195 } else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
1198 rte_flow_error_set(error, ENOTSUP,
1199 RTE_FLOW_ERROR_TYPE_ACTION,
1200 actions, "unsupported action");
1205 mlx5_flow_create_flag_mark(&flow, action.mark_id);
1206 flow.offset += sizeof(struct ibv_exp_flow_spec_action_tag);
1210 priv_flow_create_action_queue_drop(priv, &flow, error);
1212 rte_flow = priv_flow_create_action_queue(priv, &flow, &action,
1218 rte_free(flow.ibv_attr);
1225 * @see rte_flow_create()
1229 mlx5_flow_create(struct rte_eth_dev *dev,
1230 const struct rte_flow_attr *attr,
1231 const struct rte_flow_item items[],
1232 const struct rte_flow_action actions[],
1233 struct rte_flow_error *error)
1235 struct priv *priv = dev->data->dev_private;
1236 struct rte_flow *flow;
1239 flow = priv_flow_create(priv, attr, items, actions, error);
1241 LIST_INSERT_HEAD(&priv->flows, flow, next);
1242 DEBUG("Flow created %p", (void *)flow);
1252 * Pointer to private structure.
1257 priv_flow_destroy(struct priv *priv,
1258 struct rte_flow *flow)
1261 LIST_REMOVE(flow, next);
1263 claim_zero(ibv_exp_destroy_flow(flow->ibv_flow));
1267 claim_zero(ibv_destroy_qp(flow->qp));
1268 if (flow->ind_table)
1269 claim_zero(ibv_exp_destroy_rwq_ind_table(flow->ind_table));
1270 if (flow->drop && flow->wq)
1271 claim_zero(ibv_exp_destroy_wq(flow->wq));
1272 if (flow->drop && flow->cq)
1273 claim_zero(ibv_destroy_cq(flow->cq));
1275 struct rte_flow *tmp;
1277 uint32_t mark_n = 0;
1281 * To remove the mark from the queue, the queue must not be
1282 * present in any other marked flow (RSS or not).
1284 for (queue_n = 0; queue_n < flow->rxqs_n; ++queue_n) {
1285 rxq = (*flow->rxqs)[queue_n];
1286 for (tmp = LIST_FIRST(&priv->flows);
1288 tmp = LIST_NEXT(tmp, next)) {
1294 tqueue_n < tmp->rxqs_n;
1298 trxq = (*tmp->rxqs)[tqueue_n];
1303 rxq->mark = !!mark_n;
1307 rte_free(flow->ibv_attr);
1308 DEBUG("Flow destroyed %p", (void *)flow);
1315 * @see rte_flow_destroy()
1319 mlx5_flow_destroy(struct rte_eth_dev *dev,
1320 struct rte_flow *flow,
1321 struct rte_flow_error *error)
1323 struct priv *priv = dev->data->dev_private;
1327 priv_flow_destroy(priv, flow);
1333 * Destroy all flows.
1336 * Pointer to private structure.
1339 priv_flow_flush(struct priv *priv)
1341 while (!LIST_EMPTY(&priv->flows)) {
1342 struct rte_flow *flow;
1344 flow = LIST_FIRST(&priv->flows);
1345 priv_flow_destroy(priv, flow);
1350 * Destroy all flows.
1352 * @see rte_flow_flush()
1356 mlx5_flow_flush(struct rte_eth_dev *dev,
1357 struct rte_flow_error *error)
1359 struct priv *priv = dev->data->dev_private;
1363 priv_flow_flush(priv);
1369 * Create drop queue.
1372 * Pointer to private structure.
1378 priv_flow_create_drop_queue(struct priv *priv)
1380 struct rte_flow_drop *fdq = NULL;
1385 fdq = rte_calloc(__func__, 1, sizeof(*fdq), 0);
1387 WARN("cannot allocate memory for drop queue");
1390 fdq->cq = ibv_exp_create_cq(priv->ctx, 1, NULL, NULL, 0,
1391 &(struct ibv_exp_cq_init_attr){
1395 WARN("cannot allocate CQ for drop queue");
1398 for (i = 0; i != MLX5_DROP_WQ_N; ++i) {
1399 fdq->wqs[i] = ibv_exp_create_wq(priv->ctx,
1400 &(struct ibv_exp_wq_init_attr){
1401 .wq_type = IBV_EXP_WQT_RQ,
1408 WARN("cannot allocate WQ for drop queue");
1412 fdq->ind_table = ibv_exp_create_rwq_ind_table(priv->ctx,
1413 &(struct ibv_exp_rwq_ind_table_init_attr){
1415 .log_ind_tbl_size = 0,
1416 .ind_tbl = fdq->wqs,
1419 if (!fdq->ind_table) {
1420 WARN("cannot allocate indirection table for drop queue");
1423 fdq->qp = ibv_exp_create_qp(priv->ctx,
1424 &(struct ibv_exp_qp_init_attr){
1425 .qp_type = IBV_QPT_RAW_PACKET,
1427 IBV_EXP_QP_INIT_ATTR_PD |
1428 IBV_EXP_QP_INIT_ATTR_PORT |
1429 IBV_EXP_QP_INIT_ATTR_RX_HASH,
1431 .rx_hash_conf = &(struct ibv_exp_rx_hash_conf){
1433 IBV_EXP_RX_HASH_FUNC_TOEPLITZ,
1434 .rx_hash_key_len = rss_hash_default_key_len,
1435 .rx_hash_key = rss_hash_default_key,
1436 .rx_hash_fields_mask = 0,
1437 .rwq_ind_tbl = fdq->ind_table,
1439 .port_num = priv->port,
1442 WARN("cannot allocate QP for drop queue");
1445 priv->flow_drop_queue = fdq;
1449 claim_zero(ibv_destroy_qp(fdq->qp));
1451 claim_zero(ibv_exp_destroy_rwq_ind_table(fdq->ind_table));
1452 for (i = 0; i != MLX5_DROP_WQ_N; ++i) {
1454 claim_zero(ibv_exp_destroy_wq(fdq->wqs[i]));
1457 claim_zero(ibv_destroy_cq(fdq->cq));
1460 priv->flow_drop_queue = NULL;
1465 * Delete drop queue.
1468 * Pointer to private structure.
1471 priv_flow_delete_drop_queue(struct priv *priv)
1473 struct rte_flow_drop *fdq = priv->flow_drop_queue;
1476 claim_zero(ibv_destroy_qp(fdq->qp));
1477 claim_zero(ibv_exp_destroy_rwq_ind_table(fdq->ind_table));
1478 for (i = 0; i != MLX5_DROP_WQ_N; ++i) {
1479 assert(fdq->wqs[i]);
1480 claim_zero(ibv_exp_destroy_wq(fdq->wqs[i]));
1482 claim_zero(ibv_destroy_cq(fdq->cq));
1484 priv->flow_drop_queue = NULL;
1490 * Called by dev_stop() to remove all flows.
1493 * Pointer to private structure.
1496 priv_flow_stop(struct priv *priv)
1498 struct rte_flow *flow;
1500 for (flow = LIST_FIRST(&priv->flows);
1502 flow = LIST_NEXT(flow, next)) {
1503 claim_zero(ibv_exp_destroy_flow(flow->ibv_flow));
1504 flow->ibv_flow = NULL;
1508 for (n = 0; n < flow->rxqs_n; ++n)
1509 (*flow->rxqs)[n]->mark = 0;
1511 DEBUG("Flow %p removed", (void *)flow);
1513 priv_flow_delete_drop_queue(priv);
1520 * Pointer to private structure.
1523 * 0 on success, a errno value otherwise and rte_errno is set.
1526 priv_flow_start(struct priv *priv)
1529 struct rte_flow *flow;
1531 ret = priv_flow_create_drop_queue(priv);
1534 for (flow = LIST_FIRST(&priv->flows);
1536 flow = LIST_NEXT(flow, next)) {
1540 qp = priv->flow_drop_queue->qp;
1543 flow->ibv_flow = ibv_exp_create_flow(qp, flow->ibv_attr);
1544 if (!flow->ibv_flow) {
1545 DEBUG("Flow %p cannot be applied", (void *)flow);
1549 DEBUG("Flow %p applied", (void *)flow);
1553 for (n = 0; n < flow->rxqs_n; ++n)
1554 (*flow->rxqs)[n]->mark = 1;