1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2016 6WIND S.A.
3 * Copyright 2016 Mellanox Technologies, Ltd
11 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
13 #pragma GCC diagnostic ignored "-Wpedantic"
15 #include <infiniband/verbs.h>
17 #pragma GCC diagnostic error "-Wpedantic"
20 #include <rte_common.h>
21 #include <rte_ether.h>
22 #include <rte_eth_ctrl.h>
23 #include <rte_ethdev_driver.h>
25 #include <rte_flow_driver.h>
26 #include <rte_malloc.h>
30 #include "mlx5_defs.h"
32 #include "mlx5_glue.h"
34 /* Dev ops structure defined in mlx5.c */
35 extern const struct eth_dev_ops mlx5_dev_ops;
36 extern const struct eth_dev_ops mlx5_dev_ops_isolate;
38 /* Pattern Layer bits. */
39 #define MLX5_FLOW_LAYER_OUTER_L2 (1u << 0)
40 #define MLX5_FLOW_LAYER_OUTER_L3_IPV4 (1u << 1)
41 #define MLX5_FLOW_LAYER_OUTER_L3_IPV6 (1u << 2)
42 #define MLX5_FLOW_LAYER_OUTER_L4_UDP (1u << 3)
43 #define MLX5_FLOW_LAYER_OUTER_L4_TCP (1u << 4)
44 #define MLX5_FLOW_LAYER_OUTER_VLAN (1u << 5)
46 #define MLX5_FLOW_LAYER_OUTER_L3 \
47 (MLX5_FLOW_LAYER_OUTER_L3_IPV4 | MLX5_FLOW_LAYER_OUTER_L3_IPV6)
48 #define MLX5_FLOW_LAYER_OUTER_L4 \
49 (MLX5_FLOW_LAYER_OUTER_L4_UDP | MLX5_FLOW_LAYER_OUTER_L4_TCP)
51 /* Actions that modify the fate of matching traffic. */
52 #define MLX5_FLOW_FATE_DROP (1u << 0)
53 #define MLX5_FLOW_FATE_QUEUE (1u << 1)
55 /* Modify a packet. */
56 #define MLX5_FLOW_MOD_FLAG (1u << 0)
57 #define MLX5_FLOW_MOD_MARK (1u << 1)
59 /* possible L3 layers protocols filtering. */
60 #define MLX5_IP_PROTOCOL_TCP 6
61 #define MLX5_IP_PROTOCOL_UDP 17
63 /** Handles information leading to a drop fate. */
64 struct mlx5_flow_verbs {
65 unsigned int size; /**< Size of the attribute. */
67 struct ibv_flow_attr *attr;
68 /**< Pointer to the Specification buffer. */
69 uint8_t *specs; /**< Pointer to the specifications. */
71 struct ibv_flow *flow; /**< Verbs flow pointer. */
72 struct mlx5_hrxq *hrxq; /**< Hash Rx queue object. */
77 TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
78 struct rte_flow_attr attributes; /**< User flow attribute. */
79 uint32_t l3_protocol_en:1; /**< Protocol filtering requested. */
81 /**< Bit-fields of present layers see MLX5_FLOW_LAYER_*. */
83 /**< Bit-fields of present modifier see MLX5_FLOW_MOD_*. */
85 /**< Bit-fields of present fate see MLX5_FLOW_FATE_*. */
86 uint8_t l3_protocol; /**< valid when l3_protocol_en is set. */
87 struct mlx5_flow_verbs verbs; /* Verbs flow. */
88 uint16_t queue; /**< Destination queue to redirect traffic to. */
91 static const struct rte_flow_ops mlx5_flow_ops = {
92 .validate = mlx5_flow_validate,
93 .create = mlx5_flow_create,
94 .destroy = mlx5_flow_destroy,
95 .flush = mlx5_flow_flush,
96 .isolate = mlx5_flow_isolate,
99 /* Convert FDIR request to Generic flow. */
101 struct rte_flow_attr attr;
102 struct rte_flow_action actions[2];
103 struct rte_flow_item items[4];
104 struct rte_flow_item_eth l2;
105 struct rte_flow_item_eth l2_mask;
107 struct rte_flow_item_ipv4 ipv4;
108 struct rte_flow_item_ipv6 ipv6;
111 struct rte_flow_item_ipv4 ipv4;
112 struct rte_flow_item_ipv6 ipv6;
115 struct rte_flow_item_udp udp;
116 struct rte_flow_item_tcp tcp;
119 struct rte_flow_item_udp udp;
120 struct rte_flow_item_tcp tcp;
122 struct rte_flow_action_queue queue;
125 /* Verbs specification header. */
126 struct ibv_spec_header {
127 enum ibv_flow_spec_type type;
132 * Discover the maximum number of priority available.
135 * Pointer to Ethernet device.
138 * number of supported flow priority on success, a negative errno value
139 * otherwise and rte_errno is set.
142 mlx5_flow_discover_priorities(struct rte_eth_dev *dev)
145 struct ibv_flow_attr attr;
146 struct ibv_flow_spec_eth eth;
147 struct ibv_flow_spec_action_drop drop;
153 .type = IBV_FLOW_SPEC_ETH,
154 .size = sizeof(struct ibv_flow_spec_eth),
157 .size = sizeof(struct ibv_flow_spec_action_drop),
158 .type = IBV_FLOW_SPEC_ACTION_DROP,
161 struct ibv_flow *flow;
162 struct mlx5_hrxq *drop = mlx5_hrxq_drop_new(dev);
163 uint16_t vprio[] = { 8, 16 };
170 for (i = 0; i != RTE_DIM(vprio); i++) {
171 flow_attr.attr.priority = vprio[i] - 1;
172 flow = mlx5_glue->create_flow(drop->qp, &flow_attr.attr);
175 claim_zero(mlx5_glue->destroy_flow(flow));
177 mlx5_hrxq_drop_release(dev);
178 DRV_LOG(INFO, "port %u flow maximum priority: %d",
179 dev->data->port_id, vprio[i - 1]);
184 * Verify the @p attributes will be correctly understood by the NIC and store
185 * them in the @p flow if everything is correct.
188 * Pointer to Ethernet device.
189 * @param[in] attributes
190 * Pointer to flow attributes
191 * @param[in, out] flow
192 * Pointer to the rte_flow structure.
194 * Pointer to error structure.
197 * 0 on success, a negative errno value otherwise and rte_errno is set.
200 mlx5_flow_attributes(struct rte_eth_dev *dev,
201 const struct rte_flow_attr *attributes,
202 struct rte_flow *flow,
203 struct rte_flow_error *error)
205 uint32_t priority_max =
206 ((struct priv *)dev->data->dev_private)->config.flow_prio;
208 if (attributes->group)
209 return rte_flow_error_set(error, ENOTSUP,
210 RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
212 "groups is not supported");
213 if (attributes->priority >= priority_max)
214 return rte_flow_error_set(error, ENOTSUP,
215 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
217 "priority out of range");
218 if (attributes->egress)
219 return rte_flow_error_set(error, ENOTSUP,
220 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
222 "egress is not supported");
223 if (attributes->transfer)
224 return rte_flow_error_set(error, ENOTSUP,
225 RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
227 "transfer is not supported");
228 if (!attributes->ingress)
229 return rte_flow_error_set(error, ENOTSUP,
230 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
232 "ingress attribute is mandatory");
233 flow->attributes = *attributes;
238 * Verify the @p item specifications (spec, last, mask) are compatible with the
242 * Item specification.
244 * @p item->mask or flow default bit-masks.
245 * @param[in] nic_mask
246 * Bit-masks covering supported fields by the NIC to compare with user mask.
248 * Bit-masks size in bytes.
250 * Pointer to error structure.
253 * 0 on success, a negative errno value otherwise and rte_errno is set.
256 mlx5_flow_item_acceptable(const struct rte_flow_item *item,
258 const uint8_t *nic_mask,
260 struct rte_flow_error *error)
265 for (i = 0; i < size; ++i)
266 if ((nic_mask[i] | mask[i]) != nic_mask[i])
267 return rte_flow_error_set(error, ENOTSUP,
268 RTE_FLOW_ERROR_TYPE_ITEM,
270 "mask enables non supported"
272 if (!item->spec && (item->mask || item->last))
273 return rte_flow_error_set(error, EINVAL,
274 RTE_FLOW_ERROR_TYPE_ITEM,
276 "mask/last without a spec is not"
278 if (item->spec && item->last) {
284 for (i = 0; i < size; ++i) {
285 spec[i] = ((const uint8_t *)item->spec)[i] & mask[i];
286 last[i] = ((const uint8_t *)item->last)[i] & mask[i];
288 ret = memcmp(spec, last, size);
290 return rte_flow_error_set(error, ENOTSUP,
291 RTE_FLOW_ERROR_TYPE_ITEM,
293 "range is not supported");
299 * Add a verbs specification into @p flow.
301 * @param[in, out] flow
302 * Pointer to flow structure.
304 * Create specification.
306 * Size in bytes of the specification to copy.
309 mlx5_flow_spec_verbs_add(struct rte_flow *flow, void *src, unsigned int size)
311 if (flow->verbs.specs) {
314 dst = (void *)(flow->verbs.specs + flow->verbs.size);
315 memcpy(dst, src, size);
316 ++flow->verbs.attr->num_of_specs;
318 flow->verbs.size += size;
322 * Convert the @p item into a Verbs specification after ensuring the NIC
323 * will understand and process it correctly.
324 * If the necessary size for the conversion is greater than the @p flow_size,
325 * nothing is written in @p flow, the validation is still performed.
328 * Item specification.
329 * @param[in, out] flow
330 * Pointer to flow structure.
331 * @param[in] flow_size
332 * Size in bytes of the available space in @p flow, if too small, nothing is
335 * Pointer to error structure.
338 * On success the number of bytes consumed/necessary, if the returned value
339 * is lesser or equal to @p flow_size, the @p item has fully been converted,
340 * otherwise another call with this returned memory size should be done.
341 * On error, a negative errno value is returned and rte_errno is set.
344 mlx5_flow_item_eth(const struct rte_flow_item *item, struct rte_flow *flow,
345 const size_t flow_size, struct rte_flow_error *error)
347 const struct rte_flow_item_eth *spec = item->spec;
348 const struct rte_flow_item_eth *mask = item->mask;
349 const struct rte_flow_item_eth nic_mask = {
350 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
351 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
352 .type = RTE_BE16(0xffff),
354 const unsigned int size = sizeof(struct ibv_flow_spec_eth);
355 struct ibv_flow_spec_eth eth = {
356 .type = IBV_FLOW_SPEC_ETH,
361 if (flow->layers & MLX5_FLOW_LAYER_OUTER_L2)
362 return rte_flow_error_set(error, ENOTSUP,
363 RTE_FLOW_ERROR_TYPE_ITEM,
365 "L2 layers already configured");
367 mask = &rte_flow_item_eth_mask;
368 ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
369 (const uint8_t *)&nic_mask,
370 sizeof(struct rte_flow_item_eth),
374 flow->layers |= MLX5_FLOW_LAYER_OUTER_L2;
375 if (size > flow_size)
380 memcpy(ð.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
381 memcpy(ð.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
382 eth.val.ether_type = spec->type;
383 memcpy(ð.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
384 memcpy(ð.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
385 eth.mask.ether_type = mask->type;
386 /* Remove unwanted bits from values. */
387 for (i = 0; i < ETHER_ADDR_LEN; ++i) {
388 eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
389 eth.val.src_mac[i] &= eth.mask.src_mac[i];
391 eth.val.ether_type &= eth.mask.ether_type;
393 mlx5_flow_spec_verbs_add(flow, ð, size);
398 * Update the VLAN tag in the Verbs Ethernet specification.
400 * @param[in, out] attr
401 * Pointer to Verbs attributes structure.
403 * Verbs structure containing the VLAN information to copy.
406 mlx5_flow_item_vlan_update(struct ibv_flow_attr *attr,
407 struct ibv_flow_spec_eth *eth)
410 enum ibv_flow_spec_type search = IBV_FLOW_SPEC_ETH;
411 struct ibv_spec_header *hdr = (struct ibv_spec_header *)
412 ((uint8_t *)attr + sizeof(struct ibv_flow_attr));
414 for (i = 0; i != attr->num_of_specs; ++i) {
415 if (hdr->type == search) {
416 struct ibv_flow_spec_eth *e =
417 (struct ibv_flow_spec_eth *)hdr;
419 e->val.vlan_tag = eth->val.vlan_tag;
420 e->mask.vlan_tag = eth->mask.vlan_tag;
421 e->val.ether_type = eth->val.ether_type;
422 e->mask.ether_type = eth->mask.ether_type;
425 hdr = (struct ibv_spec_header *)((uint8_t *)hdr + hdr->size);
430 * Convert the @p item into @p flow (or by updating the already present
431 * Ethernet Verbs) specification after ensuring the NIC will understand and
432 * process it correctly.
433 * If the necessary size for the conversion is greater than the @p flow_size,
434 * nothing is written in @p flow, the validation is still performed.
437 * Item specification.
438 * @param[in, out] flow
439 * Pointer to flow structure.
440 * @param[in] flow_size
441 * Size in bytes of the available space in @p flow, if too small, nothing is
444 * Pointer to error structure.
447 * On success the number of bytes consumed/necessary, if the returned value
448 * is lesser or equal to @p flow_size, the @p item has fully been converted,
449 * otherwise another call with this returned memory size should be done.
450 * On error, a negative errno value is returned and rte_errno is set.
453 mlx5_flow_item_vlan(const struct rte_flow_item *item, struct rte_flow *flow,
454 const size_t flow_size, struct rte_flow_error *error)
456 const struct rte_flow_item_vlan *spec = item->spec;
457 const struct rte_flow_item_vlan *mask = item->mask;
458 const struct rte_flow_item_vlan nic_mask = {
459 .tci = RTE_BE16(0x0fff),
460 .inner_type = RTE_BE16(0xffff),
462 unsigned int size = sizeof(struct ibv_flow_spec_eth);
463 struct ibv_flow_spec_eth eth = {
464 .type = IBV_FLOW_SPEC_ETH,
468 const uint32_t l34m = MLX5_FLOW_LAYER_OUTER_L3 |
469 MLX5_FLOW_LAYER_OUTER_L4;
470 const uint32_t vlanm = MLX5_FLOW_LAYER_OUTER_VLAN;
471 const uint32_t l2m = MLX5_FLOW_LAYER_OUTER_L2;
473 if (flow->layers & vlanm)
474 return rte_flow_error_set(error, ENOTSUP,
475 RTE_FLOW_ERROR_TYPE_ITEM,
477 "VLAN layer already configured");
478 else if ((flow->layers & l34m) != 0)
479 return rte_flow_error_set(error, ENOTSUP,
480 RTE_FLOW_ERROR_TYPE_ITEM,
482 "L2 layer cannot follow L3/L4 layer");
484 mask = &rte_flow_item_vlan_mask;
485 ret = mlx5_flow_item_acceptable
486 (item, (const uint8_t *)mask,
487 (const uint8_t *)&nic_mask,
488 sizeof(struct rte_flow_item_vlan), error);
492 eth.val.vlan_tag = spec->tci;
493 eth.mask.vlan_tag = mask->tci;
494 eth.val.vlan_tag &= eth.mask.vlan_tag;
495 eth.val.ether_type = spec->inner_type;
496 eth.mask.ether_type = mask->inner_type;
497 eth.val.ether_type &= eth.mask.ether_type;
500 * From verbs perspective an empty VLAN is equivalent
501 * to a packet without VLAN layer.
503 if (!eth.mask.vlan_tag)
504 return rte_flow_error_set(error, EINVAL,
505 RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
507 "VLAN cannot be empty");
508 if (!(flow->layers & l2m)) {
509 if (size <= flow_size)
510 mlx5_flow_spec_verbs_add(flow, ð, size);
512 if (flow->verbs.attr)
513 mlx5_flow_item_vlan_update(flow->verbs.attr, ð);
514 size = 0; /* Only an update is done in eth specification. */
516 flow->layers |= MLX5_FLOW_LAYER_OUTER_L2 |
517 MLX5_FLOW_LAYER_OUTER_VLAN;
522 * Convert the @p item into a Verbs specification after ensuring the NIC
523 * will understand and process it correctly.
524 * If the necessary size for the conversion is greater than the @p flow_size,
525 * nothing is written in @p flow, the validation is still performed.
528 * Item specification.
529 * @param[in, out] flow
530 * Pointer to flow structure.
531 * @param[in] flow_size
532 * Size in bytes of the available space in @p flow, if too small, nothing is
535 * Pointer to error structure.
538 * On success the number of bytes consumed/necessary, if the returned value
539 * is lesser or equal to @p flow_size, the @p item has fully been converted,
540 * otherwise another call with this returned memory size should be done.
541 * On error, a negative errno value is returned and rte_errno is set.
544 mlx5_flow_item_ipv4(const struct rte_flow_item *item, struct rte_flow *flow,
545 const size_t flow_size, struct rte_flow_error *error)
547 const struct rte_flow_item_ipv4 *spec = item->spec;
548 const struct rte_flow_item_ipv4 *mask = item->mask;
549 const struct rte_flow_item_ipv4 nic_mask = {
551 .src_addr = RTE_BE32(0xffffffff),
552 .dst_addr = RTE_BE32(0xffffffff),
553 .type_of_service = 0xff,
554 .next_proto_id = 0xff,
557 unsigned int size = sizeof(struct ibv_flow_spec_ipv4_ext);
558 struct ibv_flow_spec_ipv4_ext ipv4 = {
559 .type = IBV_FLOW_SPEC_IPV4_EXT,
564 if (flow->layers & MLX5_FLOW_LAYER_OUTER_L3)
565 return rte_flow_error_set(error, ENOTSUP,
566 RTE_FLOW_ERROR_TYPE_ITEM,
568 "multiple L3 layers not supported");
569 else if (flow->layers & MLX5_FLOW_LAYER_OUTER_L4)
570 return rte_flow_error_set(error, ENOTSUP,
571 RTE_FLOW_ERROR_TYPE_ITEM,
573 "L3 cannot follow an L4 layer.");
575 mask = &rte_flow_item_ipv4_mask;
576 ret = mlx5_flow_item_acceptable
577 (item, (const uint8_t *)mask,
578 (const uint8_t *)&nic_mask,
579 sizeof(struct rte_flow_item_ipv4), error);
582 flow->layers |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
584 ipv4.val = (struct ibv_flow_ipv4_ext_filter){
585 .src_ip = spec->hdr.src_addr,
586 .dst_ip = spec->hdr.dst_addr,
587 .proto = spec->hdr.next_proto_id,
588 .tos = spec->hdr.type_of_service,
590 ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
591 .src_ip = mask->hdr.src_addr,
592 .dst_ip = mask->hdr.dst_addr,
593 .proto = mask->hdr.next_proto_id,
594 .tos = mask->hdr.type_of_service,
596 /* Remove unwanted bits from values. */
597 ipv4.val.src_ip &= ipv4.mask.src_ip;
598 ipv4.val.dst_ip &= ipv4.mask.dst_ip;
599 ipv4.val.proto &= ipv4.mask.proto;
600 ipv4.val.tos &= ipv4.mask.tos;
602 flow->l3_protocol_en = !!ipv4.mask.proto;
603 flow->l3_protocol = ipv4.val.proto;
604 if (size <= flow_size)
605 mlx5_flow_spec_verbs_add(flow, &ipv4, size);
610 * Convert the @p item into a Verbs specification after ensuring the NIC
611 * will understand and process it correctly.
612 * If the necessary size for the conversion is greater than the @p flow_size,
613 * nothing is written in @p flow, the validation is still performed.
616 * Item specification.
617 * @param[in, out] flow
618 * Pointer to flow structure.
619 * @param[in] flow_size
620 * Size in bytes of the available space in @p flow, if too small, nothing is
623 * Pointer to error structure.
626 * On success the number of bytes consumed/necessary, if the returned value
627 * is lesser or equal to @p flow_size, the @p item has fully been converted,
628 * otherwise another call with this returned memory size should be done.
629 * On error, a negative errno value is returned and rte_errno is set.
632 mlx5_flow_item_ipv6(const struct rte_flow_item *item, struct rte_flow *flow,
633 const size_t flow_size, struct rte_flow_error *error)
635 const struct rte_flow_item_ipv6 *spec = item->spec;
636 const struct rte_flow_item_ipv6 *mask = item->mask;
637 const struct rte_flow_item_ipv6 nic_mask = {
640 "\xff\xff\xff\xff\xff\xff\xff\xff"
641 "\xff\xff\xff\xff\xff\xff\xff\xff",
643 "\xff\xff\xff\xff\xff\xff\xff\xff"
644 "\xff\xff\xff\xff\xff\xff\xff\xff",
645 .vtc_flow = RTE_BE32(0xffffffff),
650 unsigned int size = sizeof(struct ibv_flow_spec_ipv6);
651 struct ibv_flow_spec_ipv6 ipv6 = {
652 .type = IBV_FLOW_SPEC_IPV6,
657 if (flow->layers & MLX5_FLOW_LAYER_OUTER_L3)
658 return rte_flow_error_set(error, ENOTSUP,
659 RTE_FLOW_ERROR_TYPE_ITEM,
661 "multiple L3 layers not supported");
662 else if (flow->layers & MLX5_FLOW_LAYER_OUTER_L4)
663 return rte_flow_error_set(error, ENOTSUP,
664 RTE_FLOW_ERROR_TYPE_ITEM,
666 "L3 cannot follow an L4 layer.");
668 mask = &rte_flow_item_ipv6_mask;
669 ret = mlx5_flow_item_acceptable
670 (item, (const uint8_t *)mask,
671 (const uint8_t *)&nic_mask,
672 sizeof(struct rte_flow_item_ipv6), error);
675 flow->layers |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
678 uint32_t vtc_flow_val;
679 uint32_t vtc_flow_mask;
681 memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
682 RTE_DIM(ipv6.val.src_ip));
683 memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
684 RTE_DIM(ipv6.val.dst_ip));
685 memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
686 RTE_DIM(ipv6.mask.src_ip));
687 memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
688 RTE_DIM(ipv6.mask.dst_ip));
689 vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
690 vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
691 ipv6.val.flow_label =
692 rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >>
694 ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >>
696 ipv6.val.next_hdr = spec->hdr.proto;
697 ipv6.val.hop_limit = spec->hdr.hop_limits;
698 ipv6.mask.flow_label =
699 rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >>
701 ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >>
703 ipv6.mask.next_hdr = mask->hdr.proto;
704 ipv6.mask.hop_limit = mask->hdr.hop_limits;
705 /* Remove unwanted bits from values. */
706 for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
707 ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
708 ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
710 ipv6.val.flow_label &= ipv6.mask.flow_label;
711 ipv6.val.traffic_class &= ipv6.mask.traffic_class;
712 ipv6.val.next_hdr &= ipv6.mask.next_hdr;
713 ipv6.val.hop_limit &= ipv6.mask.hop_limit;
715 flow->l3_protocol_en = !!ipv6.mask.next_hdr;
716 flow->l3_protocol = ipv6.val.next_hdr;
717 if (size <= flow_size)
718 mlx5_flow_spec_verbs_add(flow, &ipv6, size);
723 * Convert the @p item into a Verbs specification after ensuring the NIC
724 * will understand and process it correctly.
725 * If the necessary size for the conversion is greater than the @p flow_size,
726 * nothing is written in @p flow, the validation is still performed.
729 * Item specification.
730 * @param[in, out] flow
731 * Pointer to flow structure.
732 * @param[in] flow_size
733 * Size in bytes of the available space in @p flow, if too small, nothing is
736 * Pointer to error structure.
739 * On success the number of bytes consumed/necessary, if the returned value
740 * is lesser or equal to @p flow_size, the @p item has fully been converted,
741 * otherwise another call with this returned memory size should be done.
742 * On error, a negative errno value is returned and rte_errno is set.
745 mlx5_flow_item_udp(const struct rte_flow_item *item, struct rte_flow *flow,
746 const size_t flow_size, struct rte_flow_error *error)
748 const struct rte_flow_item_udp *spec = item->spec;
749 const struct rte_flow_item_udp *mask = item->mask;
750 unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp);
751 struct ibv_flow_spec_tcp_udp udp = {
752 .type = IBV_FLOW_SPEC_UDP,
757 if (!(flow->layers & MLX5_FLOW_LAYER_OUTER_L3))
758 return rte_flow_error_set(error, ENOTSUP,
759 RTE_FLOW_ERROR_TYPE_ITEM,
761 "L3 is mandatory to filter on L4");
762 if (flow->layers & MLX5_FLOW_LAYER_OUTER_L4)
763 return rte_flow_error_set(error, ENOTSUP,
764 RTE_FLOW_ERROR_TYPE_ITEM,
766 "L4 layer is already present");
767 if (flow->l3_protocol_en && flow->l3_protocol != MLX5_IP_PROTOCOL_UDP)
768 return rte_flow_error_set(error, ENOTSUP,
769 RTE_FLOW_ERROR_TYPE_ITEM,
771 "protocol filtering not compatible"
774 mask = &rte_flow_item_udp_mask;
775 ret = mlx5_flow_item_acceptable
776 (item, (const uint8_t *)mask,
777 (const uint8_t *)&rte_flow_item_udp_mask,
778 sizeof(struct rte_flow_item_udp), error);
781 flow->layers |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
782 if (size > flow_size)
785 udp.val.dst_port = spec->hdr.dst_port;
786 udp.val.src_port = spec->hdr.src_port;
787 udp.mask.dst_port = mask->hdr.dst_port;
788 udp.mask.src_port = mask->hdr.src_port;
789 /* Remove unwanted bits from values. */
790 udp.val.src_port &= udp.mask.src_port;
791 udp.val.dst_port &= udp.mask.dst_port;
793 mlx5_flow_spec_verbs_add(flow, &udp, size);
798 * Convert the @p item into a Verbs specification after ensuring the NIC
799 * will understand and process it correctly.
800 * If the necessary size for the conversion is greater than the @p flow_size,
801 * nothing is written in @p flow, the validation is still performed.
804 * Item specification.
805 * @param[in, out] flow
806 * Pointer to flow structure.
807 * @param[in] flow_size
808 * Size in bytes of the available space in @p flow, if too small, nothing is
811 * Pointer to error structure.
814 * On success the number of bytes consumed/necessary, if the returned value
815 * is lesser or equal to @p flow_size, the @p item has fully been converted,
816 * otherwise another call with this returned memory size should be done.
817 * On error, a negative errno value is returned and rte_errno is set.
820 mlx5_flow_item_tcp(const struct rte_flow_item *item, struct rte_flow *flow,
821 const size_t flow_size, struct rte_flow_error *error)
823 const struct rte_flow_item_tcp *spec = item->spec;
824 const struct rte_flow_item_tcp *mask = item->mask;
825 unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp);
826 struct ibv_flow_spec_tcp_udp tcp = {
827 .type = IBV_FLOW_SPEC_TCP,
832 if (flow->l3_protocol_en && flow->l3_protocol != MLX5_IP_PROTOCOL_TCP)
833 return rte_flow_error_set(error, ENOTSUP,
834 RTE_FLOW_ERROR_TYPE_ITEM,
836 "protocol filtering not compatible"
838 if (!(flow->layers & MLX5_FLOW_LAYER_OUTER_L3))
839 return rte_flow_error_set(error, ENOTSUP,
840 RTE_FLOW_ERROR_TYPE_ITEM,
842 "L3 is mandatory to filter on L4");
843 if (flow->layers & MLX5_FLOW_LAYER_OUTER_L4)
844 return rte_flow_error_set(error, ENOTSUP,
845 RTE_FLOW_ERROR_TYPE_ITEM,
847 "L4 layer is already present");
849 mask = &rte_flow_item_tcp_mask;
850 ret = mlx5_flow_item_acceptable
851 (item, (const uint8_t *)mask,
852 (const uint8_t *)&rte_flow_item_tcp_mask,
853 sizeof(struct rte_flow_item_tcp), error);
856 flow->layers |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
857 if (size > flow_size)
860 tcp.val.dst_port = spec->hdr.dst_port;
861 tcp.val.src_port = spec->hdr.src_port;
862 tcp.mask.dst_port = mask->hdr.dst_port;
863 tcp.mask.src_port = mask->hdr.src_port;
864 /* Remove unwanted bits from values. */
865 tcp.val.src_port &= tcp.mask.src_port;
866 tcp.val.dst_port &= tcp.mask.dst_port;
868 mlx5_flow_spec_verbs_add(flow, &tcp, size);
873 * Convert the @p pattern into a Verbs specifications after ensuring the NIC
874 * will understand and process it correctly.
875 * The conversion is performed item per item, each of them is written into
876 * the @p flow if its size is lesser or equal to @p flow_size.
877 * Validation and memory consumption computation are still performed until the
878 * end of @p pattern, unless an error is encountered.
882 * @param[in, out] flow
883 * Pointer to the rte_flow structure.
884 * @param[in] flow_size
885 * Size in bytes of the available space in @p flow, if too small some
886 * garbage may be present.
888 * Pointer to error structure.
891 * On success the number of bytes consumed/necessary, if the returned value
892 * is lesser or equal to @p flow_size, the @pattern has fully been
893 * converted, otherwise another call with this returned memory size should
895 * On error, a negative errno value is returned and rte_errno is set.
898 mlx5_flow_items(const struct rte_flow_item pattern[],
899 struct rte_flow *flow, const size_t flow_size,
900 struct rte_flow_error *error)
902 int remain = flow_size;
905 for (; pattern->type != RTE_FLOW_ITEM_TYPE_END; pattern++) {
908 switch (pattern->type) {
909 case RTE_FLOW_ITEM_TYPE_VOID:
911 case RTE_FLOW_ITEM_TYPE_ETH:
912 ret = mlx5_flow_item_eth(pattern, flow, remain, error);
914 case RTE_FLOW_ITEM_TYPE_VLAN:
915 ret = mlx5_flow_item_vlan(pattern, flow, remain, error);
917 case RTE_FLOW_ITEM_TYPE_IPV4:
918 ret = mlx5_flow_item_ipv4(pattern, flow, remain, error);
920 case RTE_FLOW_ITEM_TYPE_IPV6:
921 ret = mlx5_flow_item_ipv6(pattern, flow, remain, error);
923 case RTE_FLOW_ITEM_TYPE_UDP:
924 ret = mlx5_flow_item_udp(pattern, flow, remain, error);
926 case RTE_FLOW_ITEM_TYPE_TCP:
927 ret = mlx5_flow_item_tcp(pattern, flow, remain, error);
930 return rte_flow_error_set(error, ENOTSUP,
931 RTE_FLOW_ERROR_TYPE_ITEM,
933 "item not supported");
944 const struct rte_flow_item item = {
945 .type = RTE_FLOW_ITEM_TYPE_ETH,
948 return mlx5_flow_item_eth(&item, flow, flow_size, error);
954 * Convert the @p action into a Verbs specification after ensuring the NIC
955 * will understand and process it correctly.
956 * If the necessary size for the conversion is greater than the @p flow_size,
957 * nothing is written in @p flow, the validation is still performed.
960 * Action configuration.
961 * @param[in, out] flow
962 * Pointer to flow structure.
963 * @param[in] flow_size
964 * Size in bytes of the available space in @p flow, if too small, nothing is
967 * Pointer to error structure.
970 * On success the number of bytes consumed/necessary, if the returned value
971 * is lesser or equal to @p flow_size, the @p action has fully been
972 * converted, otherwise another call with this returned memory size should
974 * On error, a negative errno value is returned and rte_errno is set.
977 mlx5_flow_action_drop(const struct rte_flow_action *action,
978 struct rte_flow *flow, const size_t flow_size,
979 struct rte_flow_error *error)
981 unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
982 struct ibv_flow_spec_action_drop drop = {
983 .type = IBV_FLOW_SPEC_ACTION_DROP,
988 return rte_flow_error_set(error, ENOTSUP,
989 RTE_FLOW_ERROR_TYPE_ACTION,
991 "multiple fate actions are not"
993 if (flow->modifier & (MLX5_FLOW_MOD_FLAG | MLX5_FLOW_MOD_MARK))
994 return rte_flow_error_set(error, ENOTSUP,
995 RTE_FLOW_ERROR_TYPE_ACTION,
997 "drop is not compatible with"
998 " flag/mark action");
999 if (size < flow_size)
1000 mlx5_flow_spec_verbs_add(flow, &drop, size);
1001 flow->fate |= MLX5_FLOW_FATE_DROP;
1006 * Convert the @p action into @p flow after ensuring the NIC will understand
1007 * and process it correctly.
1010 * Pointer to Ethernet device structure.
1012 * Action configuration.
1013 * @param[in, out] flow
1014 * Pointer to flow structure.
1016 * Pointer to error structure.
1019 * 0 on success, a negative errno value otherwise and rte_errno is set.
1022 mlx5_flow_action_queue(struct rte_eth_dev *dev,
1023 const struct rte_flow_action *action,
1024 struct rte_flow *flow,
1025 struct rte_flow_error *error)
1027 struct priv *priv = dev->data->dev_private;
1028 const struct rte_flow_action_queue *queue = action->conf;
1031 return rte_flow_error_set(error, ENOTSUP,
1032 RTE_FLOW_ERROR_TYPE_ACTION,
1034 "multiple fate actions are not"
1036 if (queue->index >= priv->rxqs_n)
1037 return rte_flow_error_set(error, EINVAL,
1038 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1040 "queue index out of range");
1041 if (!(*priv->rxqs)[queue->index])
1042 return rte_flow_error_set(error, EINVAL,
1043 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1045 "queue is not configured");
1046 flow->queue = queue->index;
1047 flow->fate |= MLX5_FLOW_FATE_QUEUE;
1052 * Convert the @p action into a Verbs specification after ensuring the NIC
1053 * will understand and process it correctly.
1054 * If the necessary size for the conversion is greater than the @p flow_size,
1055 * nothing is written in @p flow, the validation is still performed.
1058 * Action configuration.
1059 * @param[in, out] flow
1060 * Pointer to flow structure.
1061 * @param[in] flow_size
1062 * Size in bytes of the available space in @p flow, if too small, nothing is
1065 * Pointer to error structure.
1068 * On success the number of bytes consumed/necessary, if the returned value
1069 * is lesser or equal to @p flow_size, the @p action has fully been
1070 * converted, otherwise another call with this returned memory size should
1072 * On error, a negative errno value is returned and rte_errno is set.
1075 mlx5_flow_action_flag(const struct rte_flow_action *action,
1076 struct rte_flow *flow, const size_t flow_size,
1077 struct rte_flow_error *error)
1079 unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1080 struct ibv_flow_spec_action_tag tag = {
1081 .type = IBV_FLOW_SPEC_ACTION_TAG,
1083 .tag_id = mlx5_flow_mark_set(MLX5_FLOW_MARK_DEFAULT),
1086 if (flow->modifier & MLX5_FLOW_MOD_FLAG)
1087 return rte_flow_error_set(error, ENOTSUP,
1088 RTE_FLOW_ERROR_TYPE_ACTION,
1090 "flag action already present");
1091 if (flow->fate & MLX5_FLOW_FATE_DROP)
1092 return rte_flow_error_set(error, ENOTSUP,
1093 RTE_FLOW_ERROR_TYPE_ACTION,
1095 "flag is not compatible with drop"
1097 if (flow->modifier & MLX5_FLOW_MOD_MARK)
1099 flow->modifier |= MLX5_FLOW_MOD_FLAG;
1100 if (size <= flow_size)
1101 mlx5_flow_spec_verbs_add(flow, &tag, size);
1106 * Update verbs specification to modify the flag to mark.
1108 * @param[in, out] flow
1109 * Pointer to the rte_flow structure.
1110 * @param[in] mark_id
1111 * Mark identifier to replace the flag.
1114 mlx5_flow_verbs_mark_update(struct rte_flow *flow, uint32_t mark_id)
1116 struct ibv_spec_header *hdr;
1119 /* Update Verbs specification. */
1120 hdr = (struct ibv_spec_header *)flow->verbs.specs;
1123 for (i = 0; i != flow->verbs.attr->num_of_specs; ++i) {
1124 if (hdr->type == IBV_FLOW_SPEC_ACTION_TAG) {
1125 struct ibv_flow_spec_action_tag *t =
1126 (struct ibv_flow_spec_action_tag *)hdr;
1128 t->tag_id = mlx5_flow_mark_set(mark_id);
1130 hdr = (struct ibv_spec_header *)((uintptr_t)hdr + hdr->size);
1135 * Convert the @p action into @p flow (or by updating the already present
1136 * Flag Verbs specification) after ensuring the NIC will understand and
1137 * process it correctly.
1138 * If the necessary size for the conversion is greater than the @p flow_size,
1139 * nothing is written in @p flow, the validation is still performed.
1142 * Action configuration.
1143 * @param[in, out] flow
1144 * Pointer to flow structure.
1145 * @param[in] flow_size
1146 * Size in bytes of the available space in @p flow, if too small, nothing is
1149 * Pointer to error structure.
1152 * On success the number of bytes consumed/necessary, if the returned value
1153 * is lesser or equal to @p flow_size, the @p action has fully been
1154 * converted, otherwise another call with this returned memory size should
1156 * On error, a negative errno value is returned and rte_errno is set.
1159 mlx5_flow_action_mark(const struct rte_flow_action *action,
1160 struct rte_flow *flow, const size_t flow_size,
1161 struct rte_flow_error *error)
1163 const struct rte_flow_action_mark *mark = action->conf;
1164 unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1165 struct ibv_flow_spec_action_tag tag = {
1166 .type = IBV_FLOW_SPEC_ACTION_TAG,
1171 return rte_flow_error_set(error, EINVAL,
1172 RTE_FLOW_ERROR_TYPE_ACTION,
1174 "configuration cannot be null");
1175 if (mark->id >= MLX5_FLOW_MARK_MAX)
1176 return rte_flow_error_set(error, EINVAL,
1177 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1179 "mark id must in 0 <= id < "
1180 RTE_STR(MLX5_FLOW_MARK_MAX));
1181 if (flow->modifier & MLX5_FLOW_MOD_MARK)
1182 return rte_flow_error_set(error, ENOTSUP,
1183 RTE_FLOW_ERROR_TYPE_ACTION,
1185 "mark action already present");
1186 if (flow->fate & MLX5_FLOW_FATE_DROP)
1187 return rte_flow_error_set(error, ENOTSUP,
1188 RTE_FLOW_ERROR_TYPE_ACTION,
1190 "mark is not compatible with drop"
1192 if (flow->modifier & MLX5_FLOW_MOD_FLAG) {
1193 mlx5_flow_verbs_mark_update(flow, mark->id);
1194 size = 0; /**< Only an update is done in the specification. */
1196 tag.tag_id = mlx5_flow_mark_set(mark->id);
1197 if (size <= flow_size) {
1198 tag.tag_id = mlx5_flow_mark_set(mark->id);
1199 mlx5_flow_spec_verbs_add(flow, &tag, size);
1202 flow->modifier |= MLX5_FLOW_MOD_MARK;
1207 * Convert the @p action into @p flow after ensuring the NIC will understand
1208 * and process it correctly.
1209 * The conversion is performed action per action, each of them is written into
1210 * the @p flow if its size is lesser or equal to @p flow_size.
1211 * Validation and memory consumption computation are still performed until the
1212 * end of @p action, unless an error is encountered.
1215 * Pointer to Ethernet device structure.
1216 * @param[in] actions
1217 * Pointer to flow actions array.
1218 * @param[in, out] flow
1219 * Pointer to the rte_flow structure.
1220 * @param[in] flow_size
1221 * Size in bytes of the available space in @p flow, if too small some
1222 * garbage may be present.
1224 * Pointer to error structure.
1227 * On success the number of bytes consumed/necessary, if the returned value
1228 * is lesser or equal to @p flow_size, the @p actions has fully been
1229 * converted, otherwise another call with this returned memory size should
1231 * On error, a negative errno value is returned and rte_errno is set.
1234 mlx5_flow_actions(struct rte_eth_dev *dev,
1235 const struct rte_flow_action actions[],
1236 struct rte_flow *flow, const size_t flow_size,
1237 struct rte_flow_error *error)
1240 int remain = flow_size;
1243 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1244 switch (actions->type) {
1245 case RTE_FLOW_ACTION_TYPE_VOID:
1247 case RTE_FLOW_ACTION_TYPE_FLAG:
1248 ret = mlx5_flow_action_flag(actions, flow, remain,
1251 case RTE_FLOW_ACTION_TYPE_MARK:
1252 ret = mlx5_flow_action_mark(actions, flow, remain,
1255 case RTE_FLOW_ACTION_TYPE_DROP:
1256 ret = mlx5_flow_action_drop(actions, flow, remain,
1259 case RTE_FLOW_ACTION_TYPE_QUEUE:
1260 ret = mlx5_flow_action_queue(dev, actions, flow, error);
1263 return rte_flow_error_set(error, ENOTSUP,
1264 RTE_FLOW_ERROR_TYPE_ACTION,
1266 "action not supported");
1277 return rte_flow_error_set(error, ENOTSUP,
1278 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1280 "no fate action found");
1285 * Convert the @p attributes, @p pattern, @p action, into an flow for the NIC
1286 * after ensuring the NIC will understand and process it correctly.
1287 * The conversion is only performed item/action per item/action, each of
1288 * them is written into the @p flow if its size is lesser or equal to @p
1290 * Validation and memory consumption computation are still performed until the
1291 * end, unless an error is encountered.
1294 * Pointer to Ethernet device.
1295 * @param[in, out] flow
1296 * Pointer to flow structure.
1297 * @param[in] flow_size
1298 * Size in bytes of the available space in @p flow, if too small some
1299 * garbage may be present.
1300 * @param[in] attributes
1301 * Flow rule attributes.
1302 * @param[in] pattern
1303 * Pattern specification (list terminated by the END pattern item).
1304 * @param[in] actions
1305 * Associated actions (list terminated by the END action).
1307 * Perform verbose error reporting if not NULL.
1310 * On success the number of bytes consumed/necessary, if the returned value
1311 * is lesser or equal to @p flow_size, the flow has fully been converted and
1312 * can be applied, otherwise another call with this returned memory size
1314 * On error, a negative errno value is returned and rte_errno is set.
1317 mlx5_flow_merge(struct rte_eth_dev *dev, struct rte_flow *flow,
1318 const size_t flow_size,
1319 const struct rte_flow_attr *attributes,
1320 const struct rte_flow_item pattern[],
1321 const struct rte_flow_action actions[],
1322 struct rte_flow_error *error)
1324 struct rte_flow local_flow = { .layers = 0, };
1325 size_t size = sizeof(*flow) + sizeof(struct ibv_flow_attr);
1326 int remain = (flow_size > size) ? flow_size - size : 0;
1331 ret = mlx5_flow_attributes(dev, attributes, flow, error);
1334 ret = mlx5_flow_items(pattern, flow, remain, error);
1338 remain = (flow_size > size) ? flow_size - size : 0;
1339 ret = mlx5_flow_actions(dev, actions, flow, remain, error);
1343 if (size <= flow_size)
1344 flow->verbs.attr->priority = flow->attributes.priority;
1349 * Mark the Rx queues mark flag if the flow has a mark or flag modifier.
1352 * Pointer to Ethernet device.
1354 * Pointer to flow structure.
1357 mlx5_flow_rxq_mark_set(struct rte_eth_dev *dev, struct rte_flow *flow)
1359 struct priv *priv = dev->data->dev_private;
1361 if (flow->modifier & (MLX5_FLOW_MOD_FLAG | MLX5_FLOW_MOD_MARK)) {
1362 struct mlx5_rxq_ctrl *rxq_ctrl =
1363 container_of((*priv->rxqs)[flow->queue],
1364 struct mlx5_rxq_ctrl, rxq);
1366 rxq_ctrl->rxq.mark = 1;
1367 rxq_ctrl->flow_mark_n++;
1372 * Clear the Rx queue mark associated with the @p flow if no other flow uses
1373 * it with a mark request.
1376 * Pointer to Ethernet device.
1378 * Pointer to the flow.
1381 mlx5_flow_rxq_mark_trim(struct rte_eth_dev *dev, struct rte_flow *flow)
1383 struct priv *priv = dev->data->dev_private;
1385 if (flow->modifier & (MLX5_FLOW_MOD_FLAG | MLX5_FLOW_MOD_MARK)) {
1386 struct mlx5_rxq_ctrl *rxq_ctrl =
1387 container_of((*priv->rxqs)[flow->queue],
1388 struct mlx5_rxq_ctrl, rxq);
1390 rxq_ctrl->flow_mark_n--;
1391 rxq_ctrl->rxq.mark = !!rxq_ctrl->flow_mark_n;
1396 * Clear the mark bit in all Rx queues.
1399 * Pointer to Ethernet device.
1402 mlx5_flow_rxq_mark_clear(struct rte_eth_dev *dev)
1404 struct priv *priv = dev->data->dev_private;
1408 for (idx = 0, i = 0; idx != priv->rxqs_n; ++i) {
1409 struct mlx5_rxq_ctrl *rxq_ctrl;
1411 if (!(*priv->rxqs)[idx])
1413 rxq_ctrl = container_of((*priv->rxqs)[idx],
1414 struct mlx5_rxq_ctrl, rxq);
1415 rxq_ctrl->flow_mark_n = 0;
1416 rxq_ctrl->rxq.mark = 0;
1422 * Validate a flow supported by the NIC.
1424 * @see rte_flow_validate()
1428 mlx5_flow_validate(struct rte_eth_dev *dev,
1429 const struct rte_flow_attr *attr,
1430 const struct rte_flow_item items[],
1431 const struct rte_flow_action actions[],
1432 struct rte_flow_error *error)
1434 int ret = mlx5_flow_merge(dev, NULL, 0, attr, items, actions, error);
1445 * Pointer to Ethernet device.
1446 * @param[in, out] flow
1447 * Pointer to flow structure.
1450 mlx5_flow_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
1452 if (flow->fate & MLX5_FLOW_FATE_DROP) {
1453 if (flow->verbs.flow) {
1454 claim_zero(mlx5_glue->destroy_flow(flow->verbs.flow));
1455 flow->verbs.flow = NULL;
1458 if (flow->verbs.hrxq) {
1459 if (flow->fate & MLX5_FLOW_FATE_DROP)
1460 mlx5_hrxq_drop_release(dev);
1461 else if (flow->fate & MLX5_FLOW_FATE_QUEUE)
1462 mlx5_hrxq_release(dev, flow->verbs.hrxq);
1463 flow->verbs.hrxq = NULL;
1471 * Pointer to Ethernet device structure.
1472 * @param[in, out] flow
1473 * Pointer to flow structure.
1475 * Pointer to error structure.
1478 * 0 on success, a negative errno value otherwise and rte_errno is set.
1481 mlx5_flow_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
1482 struct rte_flow_error *error)
1484 if (flow->fate & MLX5_FLOW_FATE_DROP) {
1485 flow->verbs.hrxq = mlx5_hrxq_drop_new(dev);
1486 if (!flow->verbs.hrxq)
1487 return rte_flow_error_set
1489 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1491 "cannot allocate Drop queue");
1492 } else if (flow->fate & MLX5_FLOW_FATE_QUEUE) {
1493 struct mlx5_hrxq *hrxq;
1495 hrxq = mlx5_hrxq_get(dev, rss_hash_default_key,
1496 rss_hash_default_key_len, 0,
1497 &flow->queue, 1, 0, 0);
1499 hrxq = mlx5_hrxq_new(dev, rss_hash_default_key,
1500 rss_hash_default_key_len, 0,
1501 &flow->queue, 1, 0, 0);
1503 return rte_flow_error_set(error, rte_errno,
1504 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1506 "cannot create flow");
1507 flow->verbs.hrxq = hrxq;
1510 mlx5_glue->create_flow(flow->verbs.hrxq->qp, flow->verbs.attr);
1511 if (!flow->verbs.flow) {
1512 if (flow->fate & MLX5_FLOW_FATE_DROP)
1513 mlx5_hrxq_drop_release(dev);
1515 mlx5_hrxq_release(dev, flow->verbs.hrxq);
1516 flow->verbs.hrxq = NULL;
1517 return rte_flow_error_set(error, errno,
1518 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1520 "kernel module refuses to create"
1527 * Create a flow and add it to @p list.
1530 * Pointer to Ethernet device.
1532 * Pointer to a TAILQ flow list.
1534 * Flow rule attributes.
1536 * Pattern specification (list terminated by the END pattern item).
1537 * @param[in] actions
1538 * Associated actions (list terminated by the END action).
1540 * Perform verbose error reporting if not NULL.
1543 * A flow on success, NULL otherwise and rte_errno is set.
1545 static struct rte_flow *
1546 mlx5_flow_list_create(struct rte_eth_dev *dev,
1547 struct mlx5_flows *list,
1548 const struct rte_flow_attr *attr,
1549 const struct rte_flow_item items[],
1550 const struct rte_flow_action actions[],
1551 struct rte_flow_error *error)
1553 struct rte_flow *flow;
1557 ret = mlx5_flow_merge(dev, NULL, 0, attr, items, actions, error);
1561 flow = rte_zmalloc(__func__, size, 0);
1563 rte_flow_error_set(error, ENOMEM,
1564 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1566 "cannot allocate memory");
1569 flow->verbs.attr = (struct ibv_flow_attr *)(flow + 1);
1570 flow->verbs.specs = (uint8_t *)(flow->verbs.attr + 1);
1571 ret = mlx5_flow_merge(dev, flow, size, attr, items, actions, error);
1574 assert((size_t)ret == size);
1575 if (dev->data->dev_started) {
1576 ret = mlx5_flow_apply(dev, flow, error);
1580 mlx5_flow_rxq_mark_set(dev, flow);
1581 TAILQ_INSERT_TAIL(list, flow, next);
1584 ret = rte_errno; /* Save rte_errno before cleanup. */
1585 mlx5_flow_remove(dev, flow);
1587 rte_errno = ret; /* Restore rte_errno. */
1594 * @see rte_flow_create()
1598 mlx5_flow_create(struct rte_eth_dev *dev,
1599 const struct rte_flow_attr *attr,
1600 const struct rte_flow_item items[],
1601 const struct rte_flow_action actions[],
1602 struct rte_flow_error *error)
1604 return mlx5_flow_list_create
1605 (dev, &((struct priv *)dev->data->dev_private)->flows,
1606 attr, items, actions, error);
1610 * Destroy a flow in a list.
1613 * Pointer to Ethernet device.
1615 * Pointer to a TAILQ flow list.
1620 mlx5_flow_list_destroy(struct rte_eth_dev *dev, struct mlx5_flows *list,
1621 struct rte_flow *flow)
1623 mlx5_flow_remove(dev, flow);
1624 TAILQ_REMOVE(list, flow, next);
1625 mlx5_flow_rxq_mark_trim(dev, flow);
1630 * Destroy all flows.
1633 * Pointer to Ethernet device.
1635 * Pointer to a TAILQ flow list.
1638 mlx5_flow_list_flush(struct rte_eth_dev *dev, struct mlx5_flows *list)
1640 while (!TAILQ_EMPTY(list)) {
1641 struct rte_flow *flow;
1643 flow = TAILQ_FIRST(list);
1644 mlx5_flow_list_destroy(dev, list, flow);
1652 * Pointer to Ethernet device.
1654 * Pointer to a TAILQ flow list.
1657 mlx5_flow_stop(struct rte_eth_dev *dev, struct mlx5_flows *list)
1659 struct rte_flow *flow;
1661 TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next)
1662 mlx5_flow_remove(dev, flow);
1663 mlx5_flow_rxq_mark_clear(dev);
1670 * Pointer to Ethernet device.
1672 * Pointer to a TAILQ flow list.
1675 * 0 on success, a negative errno value otherwise and rte_errno is set.
1678 mlx5_flow_start(struct rte_eth_dev *dev, struct mlx5_flows *list)
1680 struct rte_flow *flow;
1681 struct rte_flow_error error;
1684 TAILQ_FOREACH(flow, list, next) {
1685 ret = mlx5_flow_apply(dev, flow, &error);
1688 mlx5_flow_rxq_mark_set(dev, flow);
1692 ret = rte_errno; /* Save rte_errno before cleanup. */
1693 mlx5_flow_stop(dev, list);
1694 rte_errno = ret; /* Restore rte_errno. */
1699 * Verify the flow list is empty
1702 * Pointer to Ethernet device.
1704 * @return the number of flows not released.
1707 mlx5_flow_verify(struct rte_eth_dev *dev)
1709 struct priv *priv = dev->data->dev_private;
1710 struct rte_flow *flow;
1713 TAILQ_FOREACH(flow, &priv->flows, next) {
1714 DRV_LOG(DEBUG, "port %u flow %p still referenced",
1715 dev->data->port_id, (void *)flow);
1722 * Enable a control flow configured from the control plane.
1725 * Pointer to Ethernet device.
1727 * An Ethernet flow spec to apply.
1729 * An Ethernet flow mask to apply.
1731 * A VLAN flow spec to apply.
1733 * A VLAN flow mask to apply.
1736 * 0 on success, a negative errno value otherwise and rte_errno is set.
1739 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
1740 struct rte_flow_item_eth *eth_spec,
1741 struct rte_flow_item_eth *eth_mask,
1742 struct rte_flow_item_vlan *vlan_spec,
1743 struct rte_flow_item_vlan *vlan_mask)
1745 struct priv *priv = dev->data->dev_private;
1746 const struct rte_flow_attr attr = {
1748 .priority = priv->config.flow_prio - 1,
1750 struct rte_flow_item items[] = {
1752 .type = RTE_FLOW_ITEM_TYPE_ETH,
1758 .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
1759 RTE_FLOW_ITEM_TYPE_END,
1765 .type = RTE_FLOW_ITEM_TYPE_END,
1768 uint16_t queue[priv->reta_idx_n];
1769 struct rte_flow_action_rss action_rss = {
1770 .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
1772 .types = priv->rss_conf.rss_hf,
1773 .key_len = priv->rss_conf.rss_key_len,
1774 .queue_num = priv->reta_idx_n,
1775 .key = priv->rss_conf.rss_key,
1778 struct rte_flow_action actions[] = {
1780 .type = RTE_FLOW_ACTION_TYPE_RSS,
1781 .conf = &action_rss,
1784 .type = RTE_FLOW_ACTION_TYPE_END,
1787 struct rte_flow *flow;
1788 struct rte_flow_error error;
1791 if (!priv->reta_idx_n) {
1795 for (i = 0; i != priv->reta_idx_n; ++i)
1796 queue[i] = (*priv->reta_idx)[i];
1797 flow = mlx5_flow_list_create(dev, &priv->ctrl_flows, &attr, items,
1805 * Enable a flow control configured from the control plane.
1808 * Pointer to Ethernet device.
1810 * An Ethernet flow spec to apply.
1812 * An Ethernet flow mask to apply.
1815 * 0 on success, a negative errno value otherwise and rte_errno is set.
1818 mlx5_ctrl_flow(struct rte_eth_dev *dev,
1819 struct rte_flow_item_eth *eth_spec,
1820 struct rte_flow_item_eth *eth_mask)
1822 return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
1828 * @see rte_flow_destroy()
1832 mlx5_flow_destroy(struct rte_eth_dev *dev,
1833 struct rte_flow *flow,
1834 struct rte_flow_error *error __rte_unused)
1836 struct priv *priv = dev->data->dev_private;
1838 mlx5_flow_list_destroy(dev, &priv->flows, flow);
1843 * Destroy all flows.
1845 * @see rte_flow_flush()
1849 mlx5_flow_flush(struct rte_eth_dev *dev,
1850 struct rte_flow_error *error __rte_unused)
1852 struct priv *priv = dev->data->dev_private;
1854 mlx5_flow_list_flush(dev, &priv->flows);
1861 * @see rte_flow_isolate()
1865 mlx5_flow_isolate(struct rte_eth_dev *dev,
1867 struct rte_flow_error *error)
1869 struct priv *priv = dev->data->dev_private;
1871 if (dev->data->dev_started) {
1872 rte_flow_error_set(error, EBUSY,
1873 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1875 "port must be stopped first");
1878 priv->isolated = !!enable;
1880 dev->dev_ops = &mlx5_dev_ops_isolate;
1882 dev->dev_ops = &mlx5_dev_ops;
1887 * Convert a flow director filter to a generic flow.
1890 * Pointer to Ethernet device.
1891 * @param fdir_filter
1892 * Flow director filter to add.
1894 * Generic flow parameters structure.
1897 * 0 on success, a negative errno value otherwise and rte_errno is set.
1900 mlx5_fdir_filter_convert(struct rte_eth_dev *dev,
1901 const struct rte_eth_fdir_filter *fdir_filter,
1902 struct mlx5_fdir *attributes)
1904 struct priv *priv = dev->data->dev_private;
1905 const struct rte_eth_fdir_input *input = &fdir_filter->input;
1906 const struct rte_eth_fdir_masks *mask =
1907 &dev->data->dev_conf.fdir_conf.mask;
1909 /* Validate queue number. */
1910 if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
1911 DRV_LOG(ERR, "port %u invalid queue number %d",
1912 dev->data->port_id, fdir_filter->action.rx_queue);
1916 attributes->attr.ingress = 1;
1917 attributes->items[0] = (struct rte_flow_item) {
1918 .type = RTE_FLOW_ITEM_TYPE_ETH,
1919 .spec = &attributes->l2,
1920 .mask = &attributes->l2_mask,
1922 switch (fdir_filter->action.behavior) {
1923 case RTE_ETH_FDIR_ACCEPT:
1924 attributes->actions[0] = (struct rte_flow_action){
1925 .type = RTE_FLOW_ACTION_TYPE_QUEUE,
1926 .conf = &attributes->queue,
1929 case RTE_ETH_FDIR_REJECT:
1930 attributes->actions[0] = (struct rte_flow_action){
1931 .type = RTE_FLOW_ACTION_TYPE_DROP,
1935 DRV_LOG(ERR, "port %u invalid behavior %d",
1937 fdir_filter->action.behavior);
1938 rte_errno = ENOTSUP;
1941 attributes->queue.index = fdir_filter->action.rx_queue;
1943 switch (fdir_filter->input.flow_type) {
1944 case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
1945 case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
1946 case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
1947 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
1948 .src_addr = input->flow.ip4_flow.src_ip,
1949 .dst_addr = input->flow.ip4_flow.dst_ip,
1950 .time_to_live = input->flow.ip4_flow.ttl,
1951 .type_of_service = input->flow.ip4_flow.tos,
1952 .next_proto_id = input->flow.ip4_flow.proto,
1954 attributes->l3_mask.ipv4.hdr = (struct ipv4_hdr){
1955 .src_addr = mask->ipv4_mask.src_ip,
1956 .dst_addr = mask->ipv4_mask.dst_ip,
1957 .time_to_live = mask->ipv4_mask.ttl,
1958 .type_of_service = mask->ipv4_mask.tos,
1959 .next_proto_id = mask->ipv4_mask.proto,
1961 attributes->items[1] = (struct rte_flow_item){
1962 .type = RTE_FLOW_ITEM_TYPE_IPV4,
1963 .spec = &attributes->l3,
1964 .mask = &attributes->l3_mask,
1967 case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
1968 case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
1969 case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
1970 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
1971 .hop_limits = input->flow.ipv6_flow.hop_limits,
1972 .proto = input->flow.ipv6_flow.proto,
1975 memcpy(attributes->l3.ipv6.hdr.src_addr,
1976 input->flow.ipv6_flow.src_ip,
1977 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
1978 memcpy(attributes->l3.ipv6.hdr.dst_addr,
1979 input->flow.ipv6_flow.dst_ip,
1980 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
1981 memcpy(attributes->l3_mask.ipv6.hdr.src_addr,
1982 mask->ipv6_mask.src_ip,
1983 RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
1984 memcpy(attributes->l3_mask.ipv6.hdr.dst_addr,
1985 mask->ipv6_mask.dst_ip,
1986 RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
1987 attributes->items[1] = (struct rte_flow_item){
1988 .type = RTE_FLOW_ITEM_TYPE_IPV6,
1989 .spec = &attributes->l3,
1990 .mask = &attributes->l3_mask,
1994 DRV_LOG(ERR, "port %u invalid flow type%d",
1995 dev->data->port_id, fdir_filter->input.flow_type);
1996 rte_errno = ENOTSUP;
2000 switch (fdir_filter->input.flow_type) {
2001 case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
2002 attributes->l4.udp.hdr = (struct udp_hdr){
2003 .src_port = input->flow.udp4_flow.src_port,
2004 .dst_port = input->flow.udp4_flow.dst_port,
2006 attributes->l4_mask.udp.hdr = (struct udp_hdr){
2007 .src_port = mask->src_port_mask,
2008 .dst_port = mask->dst_port_mask,
2010 attributes->items[2] = (struct rte_flow_item){
2011 .type = RTE_FLOW_ITEM_TYPE_UDP,
2012 .spec = &attributes->l4,
2013 .mask = &attributes->l4_mask,
2016 case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
2017 attributes->l4.tcp.hdr = (struct tcp_hdr){
2018 .src_port = input->flow.tcp4_flow.src_port,
2019 .dst_port = input->flow.tcp4_flow.dst_port,
2021 attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
2022 .src_port = mask->src_port_mask,
2023 .dst_port = mask->dst_port_mask,
2025 attributes->items[2] = (struct rte_flow_item){
2026 .type = RTE_FLOW_ITEM_TYPE_TCP,
2027 .spec = &attributes->l4,
2028 .mask = &attributes->l4_mask,
2031 case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
2032 attributes->l4.udp.hdr = (struct udp_hdr){
2033 .src_port = input->flow.udp6_flow.src_port,
2034 .dst_port = input->flow.udp6_flow.dst_port,
2036 attributes->l4_mask.udp.hdr = (struct udp_hdr){
2037 .src_port = mask->src_port_mask,
2038 .dst_port = mask->dst_port_mask,
2040 attributes->items[2] = (struct rte_flow_item){
2041 .type = RTE_FLOW_ITEM_TYPE_UDP,
2042 .spec = &attributes->l4,
2043 .mask = &attributes->l4_mask,
2046 case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
2047 attributes->l4.tcp.hdr = (struct tcp_hdr){
2048 .src_port = input->flow.tcp6_flow.src_port,
2049 .dst_port = input->flow.tcp6_flow.dst_port,
2051 attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
2052 .src_port = mask->src_port_mask,
2053 .dst_port = mask->dst_port_mask,
2055 attributes->items[2] = (struct rte_flow_item){
2056 .type = RTE_FLOW_ITEM_TYPE_TCP,
2057 .spec = &attributes->l4,
2058 .mask = &attributes->l4_mask,
2061 case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
2062 case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
2065 DRV_LOG(ERR, "port %u invalid flow type%d",
2066 dev->data->port_id, fdir_filter->input.flow_type);
2067 rte_errno = ENOTSUP;
2074 * Add new flow director filter and store it in list.
2077 * Pointer to Ethernet device.
2078 * @param fdir_filter
2079 * Flow director filter to add.
2082 * 0 on success, a negative errno value otherwise and rte_errno is set.
2085 mlx5_fdir_filter_add(struct rte_eth_dev *dev,
2086 const struct rte_eth_fdir_filter *fdir_filter)
2088 struct priv *priv = dev->data->dev_private;
2089 struct mlx5_fdir attributes = {
2092 .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2093 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2097 struct rte_flow_error error;
2098 struct rte_flow *flow;
2101 ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
2104 flow = mlx5_flow_list_create(dev, &priv->flows, &attributes.attr,
2105 attributes.items, attributes.actions,
2108 DRV_LOG(DEBUG, "port %u FDIR created %p", dev->data->port_id,
2116 * Delete specific filter.
2119 * Pointer to Ethernet device.
2120 * @param fdir_filter
2121 * Filter to be deleted.
2124 * 0 on success, a negative errno value otherwise and rte_errno is set.
2127 mlx5_fdir_filter_delete(struct rte_eth_dev *dev __rte_unused,
2128 const struct rte_eth_fdir_filter *fdir_filter
2131 rte_errno = ENOTSUP;
2136 * Update queue for specific filter.
2139 * Pointer to Ethernet device.
2140 * @param fdir_filter
2141 * Filter to be updated.
2144 * 0 on success, a negative errno value otherwise and rte_errno is set.
2147 mlx5_fdir_filter_update(struct rte_eth_dev *dev,
2148 const struct rte_eth_fdir_filter *fdir_filter)
2152 ret = mlx5_fdir_filter_delete(dev, fdir_filter);
2155 return mlx5_fdir_filter_add(dev, fdir_filter);
2159 * Flush all filters.
2162 * Pointer to Ethernet device.
2165 mlx5_fdir_filter_flush(struct rte_eth_dev *dev)
2167 struct priv *priv = dev->data->dev_private;
2169 mlx5_flow_list_flush(dev, &priv->flows);
2173 * Get flow director information.
2176 * Pointer to Ethernet device.
2177 * @param[out] fdir_info
2178 * Resulting flow director information.
2181 mlx5_fdir_info_get(struct rte_eth_dev *dev, struct rte_eth_fdir_info *fdir_info)
2183 struct rte_eth_fdir_masks *mask =
2184 &dev->data->dev_conf.fdir_conf.mask;
2186 fdir_info->mode = dev->data->dev_conf.fdir_conf.mode;
2187 fdir_info->guarant_spc = 0;
2188 rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
2189 fdir_info->max_flexpayload = 0;
2190 fdir_info->flow_types_mask[0] = 0;
2191 fdir_info->flex_payload_unit = 0;
2192 fdir_info->max_flex_payload_segment_num = 0;
2193 fdir_info->flex_payload_limit = 0;
2194 memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
2198 * Deal with flow director operations.
2201 * Pointer to Ethernet device.
2203 * Operation to perform.
2205 * Pointer to operation-specific structure.
2208 * 0 on success, a negative errno value otherwise and rte_errno is set.
2211 mlx5_fdir_ctrl_func(struct rte_eth_dev *dev, enum rte_filter_op filter_op,
2214 enum rte_fdir_mode fdir_mode =
2215 dev->data->dev_conf.fdir_conf.mode;
2217 if (filter_op == RTE_ETH_FILTER_NOP)
2219 if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
2220 fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
2221 DRV_LOG(ERR, "port %u flow director mode %d not supported",
2222 dev->data->port_id, fdir_mode);
2226 switch (filter_op) {
2227 case RTE_ETH_FILTER_ADD:
2228 return mlx5_fdir_filter_add(dev, arg);
2229 case RTE_ETH_FILTER_UPDATE:
2230 return mlx5_fdir_filter_update(dev, arg);
2231 case RTE_ETH_FILTER_DELETE:
2232 return mlx5_fdir_filter_delete(dev, arg);
2233 case RTE_ETH_FILTER_FLUSH:
2234 mlx5_fdir_filter_flush(dev);
2236 case RTE_ETH_FILTER_INFO:
2237 mlx5_fdir_info_get(dev, arg);
2240 DRV_LOG(DEBUG, "port %u unknown operation %u",
2241 dev->data->port_id, filter_op);
2249 * Manage filter operations.
2252 * Pointer to Ethernet device structure.
2253 * @param filter_type
2256 * Operation to perform.
2258 * Pointer to operation-specific structure.
2261 * 0 on success, a negative errno value otherwise and rte_errno is set.
2264 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
2265 enum rte_filter_type filter_type,
2266 enum rte_filter_op filter_op,
2269 switch (filter_type) {
2270 case RTE_ETH_FILTER_GENERIC:
2271 if (filter_op != RTE_ETH_FILTER_GET) {
2275 *(const void **)arg = &mlx5_flow_ops;
2277 case RTE_ETH_FILTER_FDIR:
2278 return mlx5_fdir_ctrl_func(dev, filter_op, arg);
2280 DRV_LOG(ERR, "port %u filter type (%d) not supported",
2281 dev->data->port_id, filter_type);
2282 rte_errno = ENOTSUP;