1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2016 6WIND S.A.
3 * Copyright 2016 Mellanox Technologies, Ltd
11 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
13 #pragma GCC diagnostic ignored "-Wpedantic"
15 #include <infiniband/verbs.h>
17 #pragma GCC diagnostic error "-Wpedantic"
20 #include <rte_common.h>
21 #include <rte_ether.h>
22 #include <rte_eth_ctrl.h>
23 #include <rte_ethdev_driver.h>
25 #include <rte_flow_driver.h>
26 #include <rte_malloc.h>
30 #include "mlx5_defs.h"
32 #include "mlx5_glue.h"
34 /* Dev ops structure defined in mlx5.c */
35 extern const struct eth_dev_ops mlx5_dev_ops;
36 extern const struct eth_dev_ops mlx5_dev_ops_isolate;
38 /* Pattern Layer bits. */
39 #define MLX5_FLOW_LAYER_OUTER_L2 (1u << 0)
40 #define MLX5_FLOW_LAYER_OUTER_L3_IPV4 (1u << 1)
41 #define MLX5_FLOW_LAYER_OUTER_L3_IPV6 (1u << 2)
42 #define MLX5_FLOW_LAYER_OUTER_L4_UDP (1u << 3)
43 #define MLX5_FLOW_LAYER_OUTER_L4_TCP (1u << 4)
44 #define MLX5_FLOW_LAYER_OUTER_VLAN (1u << 5)
46 #define MLX5_FLOW_LAYER_OUTER_L3 \
47 (MLX5_FLOW_LAYER_OUTER_L3_IPV4 | MLX5_FLOW_LAYER_OUTER_L3_IPV6)
48 #define MLX5_FLOW_LAYER_OUTER_L4 \
49 (MLX5_FLOW_LAYER_OUTER_L4_UDP | MLX5_FLOW_LAYER_OUTER_L4_TCP)
51 /* Actions that modify the fate of matching traffic. */
52 #define MLX5_FLOW_FATE_DROP (1u << 0)
53 #define MLX5_FLOW_FATE_QUEUE (1u << 1)
54 #define MLX5_FLOW_FATE_RSS (1u << 2)
56 /* Modify a packet. */
57 #define MLX5_FLOW_MOD_FLAG (1u << 0)
58 #define MLX5_FLOW_MOD_MARK (1u << 1)
60 /* possible L3 layers protocols filtering. */
61 #define MLX5_IP_PROTOCOL_TCP 6
62 #define MLX5_IP_PROTOCOL_UDP 17
64 /* Priority reserved for default flows. */
65 #define MLX5_FLOW_PRIO_RSVD ((uint32_t)-1)
71 MLX5_EXPANSION_IPV4_UDP,
72 MLX5_EXPANSION_IPV4_TCP,
74 MLX5_EXPANSION_IPV6_UDP,
75 MLX5_EXPANSION_IPV6_TCP,
78 /** Supported expansion of items. */
79 static const struct rte_flow_expand_node mlx5_support_expansion[] = {
80 [MLX5_EXPANSION_ROOT] = {
81 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
84 .type = RTE_FLOW_ITEM_TYPE_END,
86 [MLX5_EXPANSION_ETH] = {
87 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
89 .type = RTE_FLOW_ITEM_TYPE_ETH,
91 [MLX5_EXPANSION_IPV4] = {
92 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4_UDP,
93 MLX5_EXPANSION_IPV4_TCP),
94 .type = RTE_FLOW_ITEM_TYPE_IPV4,
95 .rss_types = ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
96 ETH_RSS_NONFRAG_IPV4_OTHER,
98 [MLX5_EXPANSION_IPV4_UDP] = {
99 .type = RTE_FLOW_ITEM_TYPE_UDP,
100 .rss_types = ETH_RSS_NONFRAG_IPV4_UDP,
102 [MLX5_EXPANSION_IPV4_TCP] = {
103 .type = RTE_FLOW_ITEM_TYPE_TCP,
104 .rss_types = ETH_RSS_NONFRAG_IPV4_TCP,
106 [MLX5_EXPANSION_IPV6] = {
107 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV6_UDP,
108 MLX5_EXPANSION_IPV6_TCP),
109 .type = RTE_FLOW_ITEM_TYPE_IPV6,
110 .rss_types = ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 |
111 ETH_RSS_NONFRAG_IPV6_OTHER,
113 [MLX5_EXPANSION_IPV6_UDP] = {
114 .type = RTE_FLOW_ITEM_TYPE_UDP,
115 .rss_types = ETH_RSS_NONFRAG_IPV6_UDP,
117 [MLX5_EXPANSION_IPV6_TCP] = {
118 .type = RTE_FLOW_ITEM_TYPE_TCP,
119 .rss_types = ETH_RSS_NONFRAG_IPV6_TCP,
123 /** Handles information leading to a drop fate. */
124 struct mlx5_flow_verbs {
125 LIST_ENTRY(mlx5_flow_verbs) next;
126 unsigned int size; /**< Size of the attribute. */
128 struct ibv_flow_attr *attr;
129 /**< Pointer to the Specification buffer. */
130 uint8_t *specs; /**< Pointer to the specifications. */
132 struct ibv_flow *flow; /**< Verbs flow pointer. */
133 struct mlx5_hrxq *hrxq; /**< Hash Rx queue object. */
134 uint64_t hash_fields; /**< Verbs hash Rx queue hash fields. */
137 /* Flow structure. */
139 TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
140 struct rte_flow_attr attributes; /**< User flow attribute. */
141 uint32_t l3_protocol_en:1; /**< Protocol filtering requested. */
143 /**< Bit-fields of present layers see MLX5_FLOW_LAYER_*. */
145 /**< Bit-fields of present modifier see MLX5_FLOW_MOD_*. */
147 /**< Bit-fields of present fate see MLX5_FLOW_FATE_*. */
148 uint8_t l3_protocol; /**< valid when l3_protocol_en is set. */
149 LIST_HEAD(verbs, mlx5_flow_verbs) verbs; /**< Verbs flows list. */
150 struct mlx5_flow_verbs *cur_verbs;
151 /**< Current Verbs flow structure being filled. */
152 struct rte_flow_action_rss rss;/**< RSS context. */
153 uint8_t key[MLX5_RSS_HASH_KEY_LEN]; /**< RSS hash key. */
154 uint16_t (*queue)[]; /**< Destination queues to redirect traffic to. */
157 static const struct rte_flow_ops mlx5_flow_ops = {
158 .validate = mlx5_flow_validate,
159 .create = mlx5_flow_create,
160 .destroy = mlx5_flow_destroy,
161 .flush = mlx5_flow_flush,
162 .isolate = mlx5_flow_isolate,
165 /* Convert FDIR request to Generic flow. */
167 struct rte_flow_attr attr;
168 struct rte_flow_action actions[2];
169 struct rte_flow_item items[4];
170 struct rte_flow_item_eth l2;
171 struct rte_flow_item_eth l2_mask;
173 struct rte_flow_item_ipv4 ipv4;
174 struct rte_flow_item_ipv6 ipv6;
177 struct rte_flow_item_ipv4 ipv4;
178 struct rte_flow_item_ipv6 ipv6;
181 struct rte_flow_item_udp udp;
182 struct rte_flow_item_tcp tcp;
185 struct rte_flow_item_udp udp;
186 struct rte_flow_item_tcp tcp;
188 struct rte_flow_action_queue queue;
191 /* Verbs specification header. */
192 struct ibv_spec_header {
193 enum ibv_flow_spec_type type;
198 * Number of sub priorities.
199 * For each kind of pattern matching i.e. L2, L3, L4 to have a correct
200 * matching on the NIC (firmware dependent) L4 most have the higher priority
201 * followed by L3 and ending with L2.
203 #define MLX5_PRIORITY_MAP_L2 2
204 #define MLX5_PRIORITY_MAP_L3 1
205 #define MLX5_PRIORITY_MAP_L4 0
206 #define MLX5_PRIORITY_MAP_MAX 3
208 /* Map of Verbs to Flow priority with 8 Verbs priorities. */
209 static const uint32_t priority_map_3[][MLX5_PRIORITY_MAP_MAX] = {
210 { 0, 1, 2 }, { 2, 3, 4 }, { 5, 6, 7 },
213 /* Map of Verbs to Flow priority with 16 Verbs priorities. */
214 static const uint32_t priority_map_5[][MLX5_PRIORITY_MAP_MAX] = {
215 { 0, 1, 2 }, { 3, 4, 5 }, { 6, 7, 8 },
216 { 9, 10, 11 }, { 12, 13, 14 },
220 * Discover the maximum number of priority available.
223 * Pointer to Ethernet device.
226 * number of supported flow priority on success, a negative errno
227 * value otherwise and rte_errno is set.
230 mlx5_flow_discover_priorities(struct rte_eth_dev *dev)
233 struct ibv_flow_attr attr;
234 struct ibv_flow_spec_eth eth;
235 struct ibv_flow_spec_action_drop drop;
241 .type = IBV_FLOW_SPEC_ETH,
242 .size = sizeof(struct ibv_flow_spec_eth),
245 .size = sizeof(struct ibv_flow_spec_action_drop),
246 .type = IBV_FLOW_SPEC_ACTION_DROP,
249 struct ibv_flow *flow;
250 struct mlx5_hrxq *drop = mlx5_hrxq_drop_new(dev);
251 uint16_t vprio[] = { 8, 16 };
259 for (i = 0; i != RTE_DIM(vprio); i++) {
260 flow_attr.attr.priority = vprio[i] - 1;
261 flow = mlx5_glue->create_flow(drop->qp, &flow_attr.attr);
264 claim_zero(mlx5_glue->destroy_flow(flow));
269 priority = RTE_DIM(priority_map_3);
272 priority = RTE_DIM(priority_map_5);
277 "port %u verbs maximum priority: %d expected 8/16",
278 dev->data->port_id, vprio[i]);
281 mlx5_hrxq_drop_release(dev);
282 DRV_LOG(INFO, "port %u flow maximum priority: %d",
283 dev->data->port_id, priority);
288 * Adjust flow priority.
291 * Pointer to Ethernet device.
293 * Pointer to an rte flow.
296 mlx5_flow_adjust_priority(struct rte_eth_dev *dev, struct rte_flow *flow)
298 struct priv *priv = dev->data->dev_private;
299 uint32_t priority = flow->attributes.priority;
300 uint32_t subpriority = flow->cur_verbs->attr->priority;
302 switch (priv->config.flow_prio) {
303 case RTE_DIM(priority_map_3):
304 priority = priority_map_3[priority][subpriority];
306 case RTE_DIM(priority_map_5):
307 priority = priority_map_5[priority][subpriority];
310 flow->cur_verbs->attr->priority = priority;
314 * Verify the @p attributes will be correctly understood by the NIC and store
315 * them in the @p flow if everything is correct.
318 * Pointer to Ethernet device.
319 * @param[in] attributes
320 * Pointer to flow attributes
321 * @param[in, out] flow
322 * Pointer to the rte_flow structure.
324 * Pointer to error structure.
327 * 0 on success, a negative errno value otherwise and rte_errno is set.
330 mlx5_flow_attributes(struct rte_eth_dev *dev,
331 const struct rte_flow_attr *attributes,
332 struct rte_flow *flow,
333 struct rte_flow_error *error)
335 uint32_t priority_max =
336 ((struct priv *)dev->data->dev_private)->config.flow_prio - 1;
338 if (attributes->group)
339 return rte_flow_error_set(error, ENOTSUP,
340 RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
342 "groups is not supported");
343 if (attributes->priority != MLX5_FLOW_PRIO_RSVD &&
344 attributes->priority >= priority_max)
345 return rte_flow_error_set(error, ENOTSUP,
346 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
348 "priority out of range");
349 if (attributes->egress)
350 return rte_flow_error_set(error, ENOTSUP,
351 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
353 "egress is not supported");
354 if (attributes->transfer)
355 return rte_flow_error_set(error, ENOTSUP,
356 RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
358 "transfer is not supported");
359 if (!attributes->ingress)
360 return rte_flow_error_set(error, ENOTSUP,
361 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
363 "ingress attribute is mandatory");
364 flow->attributes = *attributes;
365 if (attributes->priority == MLX5_FLOW_PRIO_RSVD)
366 flow->attributes.priority = priority_max;
371 * Verify the @p item specifications (spec, last, mask) are compatible with the
375 * Item specification.
377 * @p item->mask or flow default bit-masks.
378 * @param[in] nic_mask
379 * Bit-masks covering supported fields by the NIC to compare with user mask.
381 * Bit-masks size in bytes.
383 * Pointer to error structure.
386 * 0 on success, a negative errno value otherwise and rte_errno is set.
389 mlx5_flow_item_acceptable(const struct rte_flow_item *item,
391 const uint8_t *nic_mask,
393 struct rte_flow_error *error)
398 for (i = 0; i < size; ++i)
399 if ((nic_mask[i] | mask[i]) != nic_mask[i])
400 return rte_flow_error_set(error, ENOTSUP,
401 RTE_FLOW_ERROR_TYPE_ITEM,
403 "mask enables non supported"
405 if (!item->spec && (item->mask || item->last))
406 return rte_flow_error_set(error, EINVAL,
407 RTE_FLOW_ERROR_TYPE_ITEM,
409 "mask/last without a spec is not"
411 if (item->spec && item->last) {
417 for (i = 0; i < size; ++i) {
418 spec[i] = ((const uint8_t *)item->spec)[i] & mask[i];
419 last[i] = ((const uint8_t *)item->last)[i] & mask[i];
421 ret = memcmp(spec, last, size);
423 return rte_flow_error_set(error, ENOTSUP,
424 RTE_FLOW_ERROR_TYPE_ITEM,
426 "range is not supported");
432 * Add a verbs item specification into @p flow.
434 * @param[in, out] flow
435 * Pointer to flow structure.
437 * Create specification.
439 * Size in bytes of the specification to copy.
442 mlx5_flow_spec_verbs_add(struct rte_flow *flow, void *src, unsigned int size)
444 struct mlx5_flow_verbs *verbs = flow->cur_verbs;
449 dst = (void *)(verbs->specs + verbs->size);
450 memcpy(dst, src, size);
451 ++verbs->attr->num_of_specs;
457 * Convert the @p item into a Verbs specification after ensuring the NIC
458 * will understand and process it correctly.
459 * If the necessary size for the conversion is greater than the @p flow_size,
460 * nothing is written in @p flow, the validation is still performed.
463 * Item specification.
464 * @param[in, out] flow
465 * Pointer to flow structure.
466 * @param[in] flow_size
467 * Size in bytes of the available space in @p flow, if too small, nothing is
470 * Pointer to error structure.
473 * On success the number of bytes consumed/necessary, if the returned value
474 * is lesser or equal to @p flow_size, the @p item has fully been converted,
475 * otherwise another call with this returned memory size should be done.
476 * On error, a negative errno value is returned and rte_errno is set.
479 mlx5_flow_item_eth(const struct rte_flow_item *item, struct rte_flow *flow,
480 const size_t flow_size, struct rte_flow_error *error)
482 const struct rte_flow_item_eth *spec = item->spec;
483 const struct rte_flow_item_eth *mask = item->mask;
484 const struct rte_flow_item_eth nic_mask = {
485 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
486 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
487 .type = RTE_BE16(0xffff),
489 const unsigned int size = sizeof(struct ibv_flow_spec_eth);
490 struct ibv_flow_spec_eth eth = {
491 .type = IBV_FLOW_SPEC_ETH,
496 if (flow->layers & MLX5_FLOW_LAYER_OUTER_L2)
497 return rte_flow_error_set(error, ENOTSUP,
498 RTE_FLOW_ERROR_TYPE_ITEM,
500 "L2 layers already configured");
502 mask = &rte_flow_item_eth_mask;
503 ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
504 (const uint8_t *)&nic_mask,
505 sizeof(struct rte_flow_item_eth),
509 flow->layers |= MLX5_FLOW_LAYER_OUTER_L2;
510 if (size > flow_size)
515 memcpy(ð.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
516 memcpy(ð.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
517 eth.val.ether_type = spec->type;
518 memcpy(ð.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
519 memcpy(ð.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
520 eth.mask.ether_type = mask->type;
521 /* Remove unwanted bits from values. */
522 for (i = 0; i < ETHER_ADDR_LEN; ++i) {
523 eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
524 eth.val.src_mac[i] &= eth.mask.src_mac[i];
526 eth.val.ether_type &= eth.mask.ether_type;
528 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2;
529 mlx5_flow_spec_verbs_add(flow, ð, size);
534 * Update the VLAN tag in the Verbs Ethernet specification.
536 * @param[in, out] attr
537 * Pointer to Verbs attributes structure.
539 * Verbs structure containing the VLAN information to copy.
542 mlx5_flow_item_vlan_update(struct ibv_flow_attr *attr,
543 struct ibv_flow_spec_eth *eth)
546 enum ibv_flow_spec_type search = IBV_FLOW_SPEC_ETH;
547 struct ibv_spec_header *hdr = (struct ibv_spec_header *)
548 ((uint8_t *)attr + sizeof(struct ibv_flow_attr));
550 for (i = 0; i != attr->num_of_specs; ++i) {
551 if (hdr->type == search) {
552 struct ibv_flow_spec_eth *e =
553 (struct ibv_flow_spec_eth *)hdr;
555 e->val.vlan_tag = eth->val.vlan_tag;
556 e->mask.vlan_tag = eth->mask.vlan_tag;
557 e->val.ether_type = eth->val.ether_type;
558 e->mask.ether_type = eth->mask.ether_type;
561 hdr = (struct ibv_spec_header *)((uint8_t *)hdr + hdr->size);
566 * Convert the @p item into @p flow (or by updating the already present
567 * Ethernet Verbs) specification after ensuring the NIC will understand and
568 * process it correctly.
569 * If the necessary size for the conversion is greater than the @p flow_size,
570 * nothing is written in @p flow, the validation is still performed.
573 * Item specification.
574 * @param[in, out] flow
575 * Pointer to flow structure.
576 * @param[in] flow_size
577 * Size in bytes of the available space in @p flow, if too small, nothing is
580 * Pointer to error structure.
583 * On success the number of bytes consumed/necessary, if the returned value
584 * is lesser or equal to @p flow_size, the @p item has fully been converted,
585 * otherwise another call with this returned memory size should be done.
586 * On error, a negative errno value is returned and rte_errno is set.
589 mlx5_flow_item_vlan(const struct rte_flow_item *item, struct rte_flow *flow,
590 const size_t flow_size, struct rte_flow_error *error)
592 const struct rte_flow_item_vlan *spec = item->spec;
593 const struct rte_flow_item_vlan *mask = item->mask;
594 const struct rte_flow_item_vlan nic_mask = {
595 .tci = RTE_BE16(0x0fff),
596 .inner_type = RTE_BE16(0xffff),
598 unsigned int size = sizeof(struct ibv_flow_spec_eth);
599 struct mlx5_flow_verbs *verbs = flow->cur_verbs;
600 struct ibv_flow_spec_eth eth = {
601 .type = IBV_FLOW_SPEC_ETH,
605 const uint32_t l34m = MLX5_FLOW_LAYER_OUTER_L3 |
606 MLX5_FLOW_LAYER_OUTER_L4;
607 const uint32_t vlanm = MLX5_FLOW_LAYER_OUTER_VLAN;
608 const uint32_t l2m = MLX5_FLOW_LAYER_OUTER_L2;
610 if (flow->layers & vlanm)
611 return rte_flow_error_set(error, ENOTSUP,
612 RTE_FLOW_ERROR_TYPE_ITEM,
614 "VLAN layer already configured");
615 else if ((flow->layers & l34m) != 0)
616 return rte_flow_error_set(error, ENOTSUP,
617 RTE_FLOW_ERROR_TYPE_ITEM,
619 "L2 layer cannot follow L3/L4 layer");
621 mask = &rte_flow_item_vlan_mask;
622 ret = mlx5_flow_item_acceptable
623 (item, (const uint8_t *)mask,
624 (const uint8_t *)&nic_mask,
625 sizeof(struct rte_flow_item_vlan), error);
629 eth.val.vlan_tag = spec->tci;
630 eth.mask.vlan_tag = mask->tci;
631 eth.val.vlan_tag &= eth.mask.vlan_tag;
632 eth.val.ether_type = spec->inner_type;
633 eth.mask.ether_type = mask->inner_type;
634 eth.val.ether_type &= eth.mask.ether_type;
637 * From verbs perspective an empty VLAN is equivalent
638 * to a packet without VLAN layer.
640 if (!eth.mask.vlan_tag)
641 return rte_flow_error_set(error, EINVAL,
642 RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
644 "VLAN cannot be empty");
645 if (!(flow->layers & l2m)) {
646 if (size <= flow_size) {
647 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2;
648 mlx5_flow_spec_verbs_add(flow, ð, size);
652 mlx5_flow_item_vlan_update(verbs->attr, ð);
653 size = 0; /* Only an update is done in eth specification. */
655 flow->layers |= MLX5_FLOW_LAYER_OUTER_L2 | MLX5_FLOW_LAYER_OUTER_VLAN;
660 * Convert the @p item into a Verbs specification after ensuring the NIC
661 * will understand and process it correctly.
662 * If the necessary size for the conversion is greater than the @p flow_size,
663 * nothing is written in @p flow, the validation is still performed.
666 * Item specification.
667 * @param[in, out] flow
668 * Pointer to flow structure.
669 * @param[in] flow_size
670 * Size in bytes of the available space in @p flow, if too small, nothing is
673 * Pointer to error structure.
676 * On success the number of bytes consumed/necessary, if the returned value
677 * is lesser or equal to @p flow_size, the @p item has fully been converted,
678 * otherwise another call with this returned memory size should be done.
679 * On error, a negative errno value is returned and rte_errno is set.
682 mlx5_flow_item_ipv4(const struct rte_flow_item *item, struct rte_flow *flow,
683 const size_t flow_size, struct rte_flow_error *error)
685 const struct rte_flow_item_ipv4 *spec = item->spec;
686 const struct rte_flow_item_ipv4 *mask = item->mask;
687 const struct rte_flow_item_ipv4 nic_mask = {
689 .src_addr = RTE_BE32(0xffffffff),
690 .dst_addr = RTE_BE32(0xffffffff),
691 .type_of_service = 0xff,
692 .next_proto_id = 0xff,
695 unsigned int size = sizeof(struct ibv_flow_spec_ipv4_ext);
696 struct ibv_flow_spec_ipv4_ext ipv4 = {
697 .type = IBV_FLOW_SPEC_IPV4_EXT,
702 if (flow->layers & MLX5_FLOW_LAYER_OUTER_L3)
703 return rte_flow_error_set(error, ENOTSUP,
704 RTE_FLOW_ERROR_TYPE_ITEM,
706 "multiple L3 layers not supported");
707 else if (flow->layers & MLX5_FLOW_LAYER_OUTER_L4)
708 return rte_flow_error_set(error, ENOTSUP,
709 RTE_FLOW_ERROR_TYPE_ITEM,
711 "L3 cannot follow an L4 layer.");
713 mask = &rte_flow_item_ipv4_mask;
714 ret = mlx5_flow_item_acceptable
715 (item, (const uint8_t *)mask,
716 (const uint8_t *)&nic_mask,
717 sizeof(struct rte_flow_item_ipv4), error);
720 flow->layers |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
722 ipv4.val = (struct ibv_flow_ipv4_ext_filter){
723 .src_ip = spec->hdr.src_addr,
724 .dst_ip = spec->hdr.dst_addr,
725 .proto = spec->hdr.next_proto_id,
726 .tos = spec->hdr.type_of_service,
728 ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
729 .src_ip = mask->hdr.src_addr,
730 .dst_ip = mask->hdr.dst_addr,
731 .proto = mask->hdr.next_proto_id,
732 .tos = mask->hdr.type_of_service,
734 /* Remove unwanted bits from values. */
735 ipv4.val.src_ip &= ipv4.mask.src_ip;
736 ipv4.val.dst_ip &= ipv4.mask.dst_ip;
737 ipv4.val.proto &= ipv4.mask.proto;
738 ipv4.val.tos &= ipv4.mask.tos;
740 flow->l3_protocol_en = !!ipv4.mask.proto;
741 flow->l3_protocol = ipv4.val.proto;
742 if (size <= flow_size) {
743 uint64_t hash_fields = IBV_RX_HASH_SRC_IPV4 |
744 IBV_RX_HASH_DST_IPV4;
746 if (!(flow->rss.types &
747 (ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
748 ETH_RSS_NONFRAG_IPV4_OTHER)))
750 flow->cur_verbs->hash_fields |= hash_fields;
751 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L3;
752 mlx5_flow_spec_verbs_add(flow, &ipv4, size);
758 * Convert the @p item into a Verbs specification after ensuring the NIC
759 * will understand and process it correctly.
760 * If the necessary size for the conversion is greater than the @p flow_size,
761 * nothing is written in @p flow, the validation is still performed.
764 * Item specification.
765 * @param[in, out] flow
766 * Pointer to flow structure.
767 * @param[in] flow_size
768 * Size in bytes of the available space in @p flow, if too small, nothing is
771 * Pointer to error structure.
774 * On success the number of bytes consumed/necessary, if the returned value
775 * is lesser or equal to @p flow_size, the @p item has fully been converted,
776 * otherwise another call with this returned memory size should be done.
777 * On error, a negative errno value is returned and rte_errno is set.
780 mlx5_flow_item_ipv6(const struct rte_flow_item *item, struct rte_flow *flow,
781 const size_t flow_size, struct rte_flow_error *error)
783 const struct rte_flow_item_ipv6 *spec = item->spec;
784 const struct rte_flow_item_ipv6 *mask = item->mask;
785 const struct rte_flow_item_ipv6 nic_mask = {
788 "\xff\xff\xff\xff\xff\xff\xff\xff"
789 "\xff\xff\xff\xff\xff\xff\xff\xff",
791 "\xff\xff\xff\xff\xff\xff\xff\xff"
792 "\xff\xff\xff\xff\xff\xff\xff\xff",
793 .vtc_flow = RTE_BE32(0xffffffff),
798 unsigned int size = sizeof(struct ibv_flow_spec_ipv6);
799 struct ibv_flow_spec_ipv6 ipv6 = {
800 .type = IBV_FLOW_SPEC_IPV6,
805 if (flow->layers & MLX5_FLOW_LAYER_OUTER_L3)
806 return rte_flow_error_set(error, ENOTSUP,
807 RTE_FLOW_ERROR_TYPE_ITEM,
809 "multiple L3 layers not supported");
810 else if (flow->layers & MLX5_FLOW_LAYER_OUTER_L4)
811 return rte_flow_error_set(error, ENOTSUP,
812 RTE_FLOW_ERROR_TYPE_ITEM,
814 "L3 cannot follow an L4 layer.");
816 mask = &rte_flow_item_ipv6_mask;
817 ret = mlx5_flow_item_acceptable
818 (item, (const uint8_t *)mask,
819 (const uint8_t *)&nic_mask,
820 sizeof(struct rte_flow_item_ipv6), error);
823 flow->layers |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
826 uint32_t vtc_flow_val;
827 uint32_t vtc_flow_mask;
829 memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
830 RTE_DIM(ipv6.val.src_ip));
831 memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
832 RTE_DIM(ipv6.val.dst_ip));
833 memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
834 RTE_DIM(ipv6.mask.src_ip));
835 memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
836 RTE_DIM(ipv6.mask.dst_ip));
837 vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
838 vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
839 ipv6.val.flow_label =
840 rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >>
842 ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >>
844 ipv6.val.next_hdr = spec->hdr.proto;
845 ipv6.val.hop_limit = spec->hdr.hop_limits;
846 ipv6.mask.flow_label =
847 rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >>
849 ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >>
851 ipv6.mask.next_hdr = mask->hdr.proto;
852 ipv6.mask.hop_limit = mask->hdr.hop_limits;
853 /* Remove unwanted bits from values. */
854 for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
855 ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
856 ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
858 ipv6.val.flow_label &= ipv6.mask.flow_label;
859 ipv6.val.traffic_class &= ipv6.mask.traffic_class;
860 ipv6.val.next_hdr &= ipv6.mask.next_hdr;
861 ipv6.val.hop_limit &= ipv6.mask.hop_limit;
863 flow->l3_protocol_en = !!ipv6.mask.next_hdr;
864 flow->l3_protocol = ipv6.val.next_hdr;
865 if (size <= flow_size) {
866 uint64_t hash_fields = IBV_RX_HASH_SRC_IPV6 |
867 IBV_RX_HASH_DST_IPV6;
869 if (!(flow->rss.types &
870 (ETH_RSS_IPV6 | ETH_RSS_NONFRAG_IPV6_OTHER)))
872 flow->cur_verbs->hash_fields |= hash_fields;
873 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L3;
874 mlx5_flow_spec_verbs_add(flow, &ipv6, size);
880 * Convert the @p item into a Verbs specification after ensuring the NIC
881 * will understand and process it correctly.
882 * If the necessary size for the conversion is greater than the @p flow_size,
883 * nothing is written in @p flow, the validation is still performed.
886 * Item specification.
887 * @param[in, out] flow
888 * Pointer to flow structure.
889 * @param[in] flow_size
890 * Size in bytes of the available space in @p flow, if too small, nothing is
893 * Pointer to error structure.
896 * On success the number of bytes consumed/necessary, if the returned value
897 * is lesser or equal to @p flow_size, the @p item has fully been converted,
898 * otherwise another call with this returned memory size should be done.
899 * On error, a negative errno value is returned and rte_errno is set.
902 mlx5_flow_item_udp(const struct rte_flow_item *item, struct rte_flow *flow,
903 const size_t flow_size, struct rte_flow_error *error)
905 const struct rte_flow_item_udp *spec = item->spec;
906 const struct rte_flow_item_udp *mask = item->mask;
907 unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp);
908 struct ibv_flow_spec_tcp_udp udp = {
909 .type = IBV_FLOW_SPEC_UDP,
914 if (flow->l3_protocol_en && flow->l3_protocol != MLX5_IP_PROTOCOL_UDP)
915 return rte_flow_error_set(error, ENOTSUP,
916 RTE_FLOW_ERROR_TYPE_ITEM,
918 "protocol filtering not compatible"
920 if (!(flow->layers & MLX5_FLOW_LAYER_OUTER_L3))
921 return rte_flow_error_set(error, ENOTSUP,
922 RTE_FLOW_ERROR_TYPE_ITEM,
924 "L3 is mandatory to filter"
926 if (flow->layers & MLX5_FLOW_LAYER_OUTER_L4)
927 return rte_flow_error_set(error, ENOTSUP,
928 RTE_FLOW_ERROR_TYPE_ITEM,
930 "L4 layer is already"
933 mask = &rte_flow_item_udp_mask;
934 ret = mlx5_flow_item_acceptable
935 (item, (const uint8_t *)mask,
936 (const uint8_t *)&rte_flow_item_udp_mask,
937 sizeof(struct rte_flow_item_udp), error);
940 flow->layers |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
942 udp.val.dst_port = spec->hdr.dst_port;
943 udp.val.src_port = spec->hdr.src_port;
944 udp.mask.dst_port = mask->hdr.dst_port;
945 udp.mask.src_port = mask->hdr.src_port;
946 /* Remove unwanted bits from values. */
947 udp.val.src_port &= udp.mask.src_port;
948 udp.val.dst_port &= udp.mask.dst_port;
950 if (size <= flow_size) {
951 uint64_t hash_fields = IBV_RX_HASH_SRC_PORT_UDP |
952 IBV_RX_HASH_DST_PORT_UDP;
954 if (!(flow->rss.types & ETH_RSS_UDP))
956 flow->cur_verbs->hash_fields |= hash_fields;
957 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L4;
958 mlx5_flow_spec_verbs_add(flow, &udp, size);
964 * Convert the @p item into a Verbs specification after ensuring the NIC
965 * will understand and process it correctly.
966 * If the necessary size for the conversion is greater than the @p flow_size,
967 * nothing is written in @p flow, the validation is still performed.
970 * Item specification.
971 * @param[in, out] flow
972 * Pointer to flow structure.
973 * @param[in] flow_size
974 * Size in bytes of the available space in @p flow, if too small, nothing is
977 * Pointer to error structure.
980 * On success the number of bytes consumed/necessary, if the returned value
981 * is lesser or equal to @p flow_size, the @p item has fully been converted,
982 * otherwise another call with this returned memory size should be done.
983 * On error, a negative errno value is returned and rte_errno is set.
986 mlx5_flow_item_tcp(const struct rte_flow_item *item, struct rte_flow *flow,
987 const size_t flow_size, struct rte_flow_error *error)
989 const struct rte_flow_item_tcp *spec = item->spec;
990 const struct rte_flow_item_tcp *mask = item->mask;
991 unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp);
992 struct ibv_flow_spec_tcp_udp tcp = {
993 .type = IBV_FLOW_SPEC_TCP,
998 if (flow->l3_protocol_en && flow->l3_protocol != MLX5_IP_PROTOCOL_TCP)
999 return rte_flow_error_set(error, ENOTSUP,
1000 RTE_FLOW_ERROR_TYPE_ITEM,
1002 "protocol filtering not compatible"
1004 if (!(flow->layers & MLX5_FLOW_LAYER_OUTER_L3))
1005 return rte_flow_error_set(error, ENOTSUP,
1006 RTE_FLOW_ERROR_TYPE_ITEM,
1008 "L3 is mandatory to filter on L4");
1009 if (flow->layers & MLX5_FLOW_LAYER_OUTER_L4)
1010 return rte_flow_error_set(error, ENOTSUP,
1011 RTE_FLOW_ERROR_TYPE_ITEM,
1013 "L4 layer is already present");
1015 mask = &rte_flow_item_tcp_mask;
1016 ret = mlx5_flow_item_acceptable
1017 (item, (const uint8_t *)mask,
1018 (const uint8_t *)&rte_flow_item_tcp_mask,
1019 sizeof(struct rte_flow_item_tcp), error);
1022 flow->layers |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
1024 tcp.val.dst_port = spec->hdr.dst_port;
1025 tcp.val.src_port = spec->hdr.src_port;
1026 tcp.mask.dst_port = mask->hdr.dst_port;
1027 tcp.mask.src_port = mask->hdr.src_port;
1028 /* Remove unwanted bits from values. */
1029 tcp.val.src_port &= tcp.mask.src_port;
1030 tcp.val.dst_port &= tcp.mask.dst_port;
1032 if (size <= flow_size) {
1033 uint64_t hash_fields = IBV_RX_HASH_SRC_PORT_TCP |
1034 IBV_RX_HASH_DST_PORT_TCP;
1036 if (!(flow->rss.types & ETH_RSS_TCP))
1038 flow->cur_verbs->hash_fields |= hash_fields;
1039 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L4;
1040 mlx5_flow_spec_verbs_add(flow, &tcp, size);
1046 * Convert the @p pattern into a Verbs specifications after ensuring the NIC
1047 * will understand and process it correctly.
1048 * The conversion is performed item per item, each of them is written into
1049 * the @p flow if its size is lesser or equal to @p flow_size.
1050 * Validation and memory consumption computation are still performed until the
1051 * end of @p pattern, unless an error is encountered.
1053 * @param[in] pattern
1055 * @param[in, out] flow
1056 * Pointer to the rte_flow structure.
1057 * @param[in] flow_size
1058 * Size in bytes of the available space in @p flow, if too small some
1059 * garbage may be present.
1061 * Pointer to error structure.
1064 * On success the number of bytes consumed/necessary, if the returned value
1065 * is lesser or equal to @p flow_size, the @pattern has fully been
1066 * converted, otherwise another call with this returned memory size should
1068 * On error, a negative errno value is returned and rte_errno is set.
1071 mlx5_flow_items(const struct rte_flow_item pattern[],
1072 struct rte_flow *flow, const size_t flow_size,
1073 struct rte_flow_error *error)
1075 int remain = flow_size;
1078 for (; pattern->type != RTE_FLOW_ITEM_TYPE_END; pattern++) {
1081 switch (pattern->type) {
1082 case RTE_FLOW_ITEM_TYPE_VOID:
1084 case RTE_FLOW_ITEM_TYPE_ETH:
1085 ret = mlx5_flow_item_eth(pattern, flow, remain, error);
1087 case RTE_FLOW_ITEM_TYPE_VLAN:
1088 ret = mlx5_flow_item_vlan(pattern, flow, remain, error);
1090 case RTE_FLOW_ITEM_TYPE_IPV4:
1091 ret = mlx5_flow_item_ipv4(pattern, flow, remain, error);
1093 case RTE_FLOW_ITEM_TYPE_IPV6:
1094 ret = mlx5_flow_item_ipv6(pattern, flow, remain, error);
1096 case RTE_FLOW_ITEM_TYPE_UDP:
1097 ret = mlx5_flow_item_udp(pattern, flow, remain, error);
1099 case RTE_FLOW_ITEM_TYPE_TCP:
1100 ret = mlx5_flow_item_tcp(pattern, flow, remain, error);
1103 return rte_flow_error_set(error, ENOTSUP,
1104 RTE_FLOW_ERROR_TYPE_ITEM,
1106 "item not supported");
1116 if (!flow->layers) {
1117 const struct rte_flow_item item = {
1118 .type = RTE_FLOW_ITEM_TYPE_ETH,
1121 return mlx5_flow_item_eth(&item, flow, flow_size, error);
1127 * Convert the @p action into a Verbs specification after ensuring the NIC
1128 * will understand and process it correctly.
1129 * If the necessary size for the conversion is greater than the @p flow_size,
1130 * nothing is written in @p flow, the validation is still performed.
1133 * Action configuration.
1134 * @param[in, out] flow
1135 * Pointer to flow structure.
1136 * @param[in] flow_size
1137 * Size in bytes of the available space in @p flow, if too small, nothing is
1140 * Pointer to error structure.
1143 * On success the number of bytes consumed/necessary, if the returned value
1144 * is lesser or equal to @p flow_size, the @p action has fully been
1145 * converted, otherwise another call with this returned memory size should
1147 * On error, a negative errno value is returned and rte_errno is set.
1150 mlx5_flow_action_drop(const struct rte_flow_action *action,
1151 struct rte_flow *flow, const size_t flow_size,
1152 struct rte_flow_error *error)
1154 unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
1155 struct ibv_flow_spec_action_drop drop = {
1156 .type = IBV_FLOW_SPEC_ACTION_DROP,
1161 return rte_flow_error_set(error, ENOTSUP,
1162 RTE_FLOW_ERROR_TYPE_ACTION,
1164 "multiple fate actions are not"
1166 if (flow->modifier & (MLX5_FLOW_MOD_FLAG | MLX5_FLOW_MOD_MARK))
1167 return rte_flow_error_set(error, ENOTSUP,
1168 RTE_FLOW_ERROR_TYPE_ACTION,
1170 "drop is not compatible with"
1171 " flag/mark action");
1172 if (size < flow_size)
1173 mlx5_flow_spec_verbs_add(flow, &drop, size);
1174 flow->fate |= MLX5_FLOW_FATE_DROP;
1179 * Convert the @p action into @p flow after ensuring the NIC will understand
1180 * and process it correctly.
1183 * Pointer to Ethernet device structure.
1185 * Action configuration.
1186 * @param[in, out] flow
1187 * Pointer to flow structure.
1189 * Pointer to error structure.
1192 * 0 on success, a negative errno value otherwise and rte_errno is set.
1195 mlx5_flow_action_queue(struct rte_eth_dev *dev,
1196 const struct rte_flow_action *action,
1197 struct rte_flow *flow,
1198 struct rte_flow_error *error)
1200 struct priv *priv = dev->data->dev_private;
1201 const struct rte_flow_action_queue *queue = action->conf;
1204 return rte_flow_error_set(error, ENOTSUP,
1205 RTE_FLOW_ERROR_TYPE_ACTION,
1207 "multiple fate actions are not"
1209 if (queue->index >= priv->rxqs_n)
1210 return rte_flow_error_set(error, EINVAL,
1211 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1213 "queue index out of range");
1214 if (!(*priv->rxqs)[queue->index])
1215 return rte_flow_error_set(error, EINVAL,
1216 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1218 "queue is not configured");
1220 (*flow->queue)[0] = queue->index;
1221 flow->rss.queue_num = 1;
1222 flow->fate |= MLX5_FLOW_FATE_QUEUE;
1227 * Ensure the @p action will be understood and used correctly by the NIC.
1230 * Pointer to Ethernet device structure.
1232 * Pointer to flow actions array.
1233 * @param flow[in, out]
1234 * Pointer to the rte_flow structure.
1235 * @param error[in, out]
1236 * Pointer to error structure.
1239 * On success @p flow->queue array and @p flow->rss are filled and valid.
1240 * On error, a negative errno value is returned and rte_errno is set.
1243 mlx5_flow_action_rss(struct rte_eth_dev *dev,
1244 const struct rte_flow_action *action,
1245 struct rte_flow *flow,
1246 struct rte_flow_error *error)
1248 struct priv *priv = dev->data->dev_private;
1249 const struct rte_flow_action_rss *rss = action->conf;
1253 return rte_flow_error_set(error, ENOTSUP,
1254 RTE_FLOW_ERROR_TYPE_ACTION,
1256 "multiple fate actions are not"
1258 if (rss->func != RTE_ETH_HASH_FUNCTION_DEFAULT &&
1259 rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ)
1260 return rte_flow_error_set(error, ENOTSUP,
1261 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1263 "RSS hash function not supported");
1265 return rte_flow_error_set(error, ENOTSUP,
1266 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1268 "tunnel RSS is not supported");
1269 if (rss->key_len < MLX5_RSS_HASH_KEY_LEN)
1270 return rte_flow_error_set(error, ENOTSUP,
1271 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1273 "RSS hash key too small");
1274 if (rss->key_len > MLX5_RSS_HASH_KEY_LEN)
1275 return rte_flow_error_set(error, ENOTSUP,
1276 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1278 "RSS hash key too large");
1279 if (rss->queue_num > priv->config.ind_table_max_size)
1280 return rte_flow_error_set(error, ENOTSUP,
1281 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1283 "number of queues too large");
1284 if (rss->types & MLX5_RSS_HF_MASK)
1285 return rte_flow_error_set(error, ENOTSUP,
1286 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1288 "some RSS protocols are not"
1290 for (i = 0; i != rss->queue_num; ++i) {
1291 if (!(*priv->rxqs)[rss->queue[i]])
1292 return rte_flow_error_set
1294 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1296 "queue is not configured");
1299 memcpy((*flow->queue), rss->queue,
1300 rss->queue_num * sizeof(uint16_t));
1301 flow->rss.queue_num = rss->queue_num;
1302 memcpy(flow->key, rss->key, MLX5_RSS_HASH_KEY_LEN);
1303 flow->rss.types = rss->types;
1304 flow->fate |= MLX5_FLOW_FATE_RSS;
1309 * Convert the @p action into a Verbs specification after ensuring the NIC
1310 * will understand and process it correctly.
1311 * If the necessary size for the conversion is greater than the @p flow_size,
1312 * nothing is written in @p flow, the validation is still performed.
1315 * Action configuration.
1316 * @param[in, out] flow
1317 * Pointer to flow structure.
1318 * @param[in] flow_size
1319 * Size in bytes of the available space in @p flow, if too small, nothing is
1322 * Pointer to error structure.
1325 * On success the number of bytes consumed/necessary, if the returned value
1326 * is lesser or equal to @p flow_size, the @p action has fully been
1327 * converted, otherwise another call with this returned memory size should
1329 * On error, a negative errno value is returned and rte_errno is set.
1332 mlx5_flow_action_flag(const struct rte_flow_action *action,
1333 struct rte_flow *flow, const size_t flow_size,
1334 struct rte_flow_error *error)
1336 unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1337 struct ibv_flow_spec_action_tag tag = {
1338 .type = IBV_FLOW_SPEC_ACTION_TAG,
1340 .tag_id = mlx5_flow_mark_set(MLX5_FLOW_MARK_DEFAULT),
1342 struct mlx5_flow_verbs *verbs = flow->cur_verbs;
1344 if (flow->modifier & MLX5_FLOW_MOD_FLAG)
1345 return rte_flow_error_set(error, ENOTSUP,
1346 RTE_FLOW_ERROR_TYPE_ACTION,
1348 "flag action already present");
1349 if (flow->fate & MLX5_FLOW_FATE_DROP)
1350 return rte_flow_error_set(error, ENOTSUP,
1351 RTE_FLOW_ERROR_TYPE_ACTION,
1353 "flag is not compatible with drop"
1355 if (flow->modifier & MLX5_FLOW_MOD_MARK)
1357 else if (size <= flow_size && verbs)
1358 mlx5_flow_spec_verbs_add(flow, &tag, size);
1359 flow->modifier |= MLX5_FLOW_MOD_FLAG;
1364 * Update verbs specification to modify the flag to mark.
1366 * @param[in, out] verbs
1367 * Pointer to the mlx5_flow_verbs structure.
1368 * @param[in] mark_id
1369 * Mark identifier to replace the flag.
1372 mlx5_flow_verbs_mark_update(struct mlx5_flow_verbs *verbs, uint32_t mark_id)
1374 struct ibv_spec_header *hdr;
1379 /* Update Verbs specification. */
1380 hdr = (struct ibv_spec_header *)verbs->specs;
1383 for (i = 0; i != verbs->attr->num_of_specs; ++i) {
1384 if (hdr->type == IBV_FLOW_SPEC_ACTION_TAG) {
1385 struct ibv_flow_spec_action_tag *t =
1386 (struct ibv_flow_spec_action_tag *)hdr;
1388 t->tag_id = mlx5_flow_mark_set(mark_id);
1390 hdr = (struct ibv_spec_header *)((uintptr_t)hdr + hdr->size);
1395 * Convert the @p action into @p flow (or by updating the already present
1396 * Flag Verbs specification) after ensuring the NIC will understand and
1397 * process it correctly.
1398 * If the necessary size for the conversion is greater than the @p flow_size,
1399 * nothing is written in @p flow, the validation is still performed.
1402 * Action configuration.
1403 * @param[in, out] flow
1404 * Pointer to flow structure.
1405 * @param[in] flow_size
1406 * Size in bytes of the available space in @p flow, if too small, nothing is
1409 * Pointer to error structure.
1412 * On success the number of bytes consumed/necessary, if the returned value
1413 * is lesser or equal to @p flow_size, the @p action has fully been
1414 * converted, otherwise another call with this returned memory size should
1416 * On error, a negative errno value is returned and rte_errno is set.
1419 mlx5_flow_action_mark(const struct rte_flow_action *action,
1420 struct rte_flow *flow, const size_t flow_size,
1421 struct rte_flow_error *error)
1423 const struct rte_flow_action_mark *mark = action->conf;
1424 unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1425 struct ibv_flow_spec_action_tag tag = {
1426 .type = IBV_FLOW_SPEC_ACTION_TAG,
1429 struct mlx5_flow_verbs *verbs = flow->cur_verbs;
1432 return rte_flow_error_set(error, EINVAL,
1433 RTE_FLOW_ERROR_TYPE_ACTION,
1435 "configuration cannot be null");
1436 if (mark->id >= MLX5_FLOW_MARK_MAX)
1437 return rte_flow_error_set(error, EINVAL,
1438 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1440 "mark id must in 0 <= id < "
1441 RTE_STR(MLX5_FLOW_MARK_MAX));
1442 if (flow->modifier & MLX5_FLOW_MOD_MARK)
1443 return rte_flow_error_set(error, ENOTSUP,
1444 RTE_FLOW_ERROR_TYPE_ACTION,
1446 "mark action already present");
1447 if (flow->fate & MLX5_FLOW_FATE_DROP)
1448 return rte_flow_error_set(error, ENOTSUP,
1449 RTE_FLOW_ERROR_TYPE_ACTION,
1451 "mark is not compatible with drop"
1453 if (flow->modifier & MLX5_FLOW_MOD_FLAG) {
1454 mlx5_flow_verbs_mark_update(verbs, mark->id);
1456 } else if (size <= flow_size) {
1457 tag.tag_id = mlx5_flow_mark_set(mark->id);
1458 mlx5_flow_spec_verbs_add(flow, &tag, size);
1460 flow->modifier |= MLX5_FLOW_MOD_MARK;
1465 * Convert the @p action into @p flow after ensuring the NIC will understand
1466 * and process it correctly.
1467 * The conversion is performed action per action, each of them is written into
1468 * the @p flow if its size is lesser or equal to @p flow_size.
1469 * Validation and memory consumption computation are still performed until the
1470 * end of @p action, unless an error is encountered.
1473 * Pointer to Ethernet device structure.
1474 * @param[in] actions
1475 * Pointer to flow actions array.
1476 * @param[in, out] flow
1477 * Pointer to the rte_flow structure.
1478 * @param[in] flow_size
1479 * Size in bytes of the available space in @p flow, if too small some
1480 * garbage may be present.
1482 * Pointer to error structure.
1485 * On success the number of bytes consumed/necessary, if the returned value
1486 * is lesser or equal to @p flow_size, the @p actions has fully been
1487 * converted, otherwise another call with this returned memory size should
1489 * On error, a negative errno value is returned and rte_errno is set.
1492 mlx5_flow_actions(struct rte_eth_dev *dev,
1493 const struct rte_flow_action actions[],
1494 struct rte_flow *flow, const size_t flow_size,
1495 struct rte_flow_error *error)
1498 int remain = flow_size;
1501 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1502 switch (actions->type) {
1503 case RTE_FLOW_ACTION_TYPE_VOID:
1505 case RTE_FLOW_ACTION_TYPE_FLAG:
1506 ret = mlx5_flow_action_flag(actions, flow, remain,
1509 case RTE_FLOW_ACTION_TYPE_MARK:
1510 ret = mlx5_flow_action_mark(actions, flow, remain,
1513 case RTE_FLOW_ACTION_TYPE_DROP:
1514 ret = mlx5_flow_action_drop(actions, flow, remain,
1517 case RTE_FLOW_ACTION_TYPE_QUEUE:
1518 ret = mlx5_flow_action_queue(dev, actions, flow, error);
1520 case RTE_FLOW_ACTION_TYPE_RSS:
1521 ret = mlx5_flow_action_rss(dev, actions, flow, error);
1524 return rte_flow_error_set(error, ENOTSUP,
1525 RTE_FLOW_ERROR_TYPE_ACTION,
1527 "action not supported");
1538 return rte_flow_error_set(error, ENOTSUP,
1539 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1541 "no fate action found");
1546 * Convert the @p attributes, @p pattern, @p action, into an flow for the NIC
1547 * after ensuring the NIC will understand and process it correctly.
1548 * The conversion is only performed item/action per item/action, each of
1549 * them is written into the @p flow if its size is lesser or equal to @p
1551 * Validation and memory consumption computation are still performed until the
1552 * end, unless an error is encountered.
1555 * Pointer to Ethernet device.
1556 * @param[in, out] flow
1557 * Pointer to flow structure.
1558 * @param[in] flow_size
1559 * Size in bytes of the available space in @p flow, if too small some
1560 * garbage may be present.
1561 * @param[in] attributes
1562 * Flow rule attributes.
1563 * @param[in] pattern
1564 * Pattern specification (list terminated by the END pattern item).
1565 * @param[in] actions
1566 * Associated actions (list terminated by the END action).
1568 * Perform verbose error reporting if not NULL.
1571 * On success the number of bytes consumed/necessary, if the returned value
1572 * is lesser or equal to @p flow_size, the flow has fully been converted and
1573 * can be applied, otherwise another call with this returned memory size
1575 * On error, a negative errno value is returned and rte_errno is set.
1578 mlx5_flow_merge(struct rte_eth_dev *dev, struct rte_flow *flow,
1579 const size_t flow_size,
1580 const struct rte_flow_attr *attributes,
1581 const struct rte_flow_item pattern[],
1582 const struct rte_flow_action actions[],
1583 struct rte_flow_error *error)
1585 struct rte_flow local_flow = { .layers = 0, };
1586 size_t size = sizeof(*flow);
1588 struct rte_flow_expand_rss buf;
1589 uint8_t buffer[2048];
1591 struct rte_flow_expand_rss *buf = &expand_buffer.buf;
1592 struct mlx5_flow_verbs *original_verbs = NULL;
1593 size_t original_verbs_size = 0;
1594 uint32_t original_layers = 0;
1595 int expanded_pattern_idx = 0;
1599 if (size > flow_size)
1601 ret = mlx5_flow_attributes(dev, attributes, flow, error);
1604 ret = mlx5_flow_actions(dev, actions, &local_flow, 0, error);
1607 if (local_flow.rss.types) {
1608 ret = rte_flow_expand_rss(buf, sizeof(expand_buffer.buffer),
1609 pattern, local_flow.rss.types,
1610 mlx5_support_expansion,
1611 MLX5_EXPANSION_ROOT);
1613 (unsigned int)ret < sizeof(expand_buffer.buffer));
1616 buf->entry[0].pattern = (void *)(uintptr_t)pattern;
1618 size += RTE_ALIGN_CEIL(local_flow.rss.queue_num * sizeof(uint16_t),
1620 if (size <= flow_size)
1621 flow->queue = (void *)(flow + 1);
1622 LIST_INIT(&flow->verbs);
1626 for (i = 0; i != buf->entries; ++i) {
1630 flow->layers = original_layers;
1631 size += sizeof(struct ibv_flow_attr) +
1632 sizeof(struct mlx5_flow_verbs);
1634 if (size < flow_size) {
1635 flow->cur_verbs = (void *)((uintptr_t)flow + off);
1636 flow->cur_verbs->attr = (void *)(flow->cur_verbs + 1);
1637 flow->cur_verbs->specs =
1638 (void *)(flow->cur_verbs->attr + 1);
1640 /* First iteration convert the pattern into Verbs. */
1642 /* Actions don't need to be converted several time. */
1643 ret = mlx5_flow_actions(dev, actions, flow,
1644 (size < flow_size) ?
1645 flow_size - size : 0,
1652 * Next iteration means the pattern has already been
1653 * converted and an expansion is necessary to match
1654 * the user RSS request. For that only the expanded
1655 * items will be converted, the common part with the
1656 * user pattern are just copied into the next buffer
1659 size += original_verbs_size;
1660 if (size < flow_size) {
1661 rte_memcpy(flow->cur_verbs->attr,
1662 original_verbs->attr,
1663 original_verbs_size +
1664 sizeof(struct ibv_flow_attr));
1665 flow->cur_verbs->size = original_verbs_size;
1668 ret = mlx5_flow_items
1669 ((const struct rte_flow_item *)
1670 &buf->entry[i].pattern[expanded_pattern_idx],
1672 (size < flow_size) ? flow_size - size : 0, error);
1676 if (size <= flow_size) {
1677 mlx5_flow_adjust_priority(dev, flow);
1678 LIST_INSERT_HEAD(&flow->verbs, flow->cur_verbs, next);
1681 * Keep a pointer of the first verbs conversion and the layers
1682 * it has encountered.
1685 original_verbs = flow->cur_verbs;
1686 original_verbs_size = size - off2;
1687 original_layers = flow->layers;
1689 * move the index of the expanded pattern to the
1690 * first item not addressed yet.
1692 if (pattern->type == RTE_FLOW_ITEM_TYPE_END) {
1693 expanded_pattern_idx++;
1695 const struct rte_flow_item *item = pattern;
1697 for (item = pattern;
1698 item->type != RTE_FLOW_ITEM_TYPE_END;
1700 expanded_pattern_idx++;
1704 /* Restore the origin layers in the flow. */
1705 flow->layers = original_layers;
1710 * Mark the Rx queues mark flag if the flow has a mark or flag modifier.
1713 * Pointer to Ethernet device.
1715 * Pointer to flow structure.
1718 mlx5_flow_rxq_mark_set(struct rte_eth_dev *dev, struct rte_flow *flow)
1720 struct priv *priv = dev->data->dev_private;
1722 if (flow->modifier & (MLX5_FLOW_MOD_FLAG | MLX5_FLOW_MOD_MARK)) {
1725 for (i = 0; i != flow->rss.queue_num; ++i) {
1726 int idx = (*flow->queue)[i];
1727 struct mlx5_rxq_ctrl *rxq_ctrl =
1728 container_of((*priv->rxqs)[idx],
1729 struct mlx5_rxq_ctrl, rxq);
1731 rxq_ctrl->rxq.mark = 1;
1732 rxq_ctrl->flow_mark_n++;
1738 * Clear the Rx queue mark associated with the @p flow if no other flow uses
1739 * it with a mark request.
1742 * Pointer to Ethernet device.
1744 * Pointer to the flow.
1747 mlx5_flow_rxq_mark_trim(struct rte_eth_dev *dev, struct rte_flow *flow)
1749 struct priv *priv = dev->data->dev_private;
1751 if (flow->modifier & (MLX5_FLOW_MOD_FLAG | MLX5_FLOW_MOD_MARK)) {
1754 for (i = 0; i != flow->rss.queue_num; ++i) {
1755 int idx = (*flow->queue)[i];
1756 struct mlx5_rxq_ctrl *rxq_ctrl =
1757 container_of((*priv->rxqs)[idx],
1758 struct mlx5_rxq_ctrl, rxq);
1760 rxq_ctrl->flow_mark_n--;
1761 rxq_ctrl->rxq.mark = !!rxq_ctrl->flow_mark_n;
1767 * Clear the mark bit in all Rx queues.
1770 * Pointer to Ethernet device.
1773 mlx5_flow_rxq_mark_clear(struct rte_eth_dev *dev)
1775 struct priv *priv = dev->data->dev_private;
1779 for (idx = 0, i = 0; idx != priv->rxqs_n; ++i) {
1780 struct mlx5_rxq_ctrl *rxq_ctrl;
1782 if (!(*priv->rxqs)[idx])
1784 rxq_ctrl = container_of((*priv->rxqs)[idx],
1785 struct mlx5_rxq_ctrl, rxq);
1786 rxq_ctrl->flow_mark_n = 0;
1787 rxq_ctrl->rxq.mark = 0;
1793 * Validate a flow supported by the NIC.
1795 * @see rte_flow_validate()
1799 mlx5_flow_validate(struct rte_eth_dev *dev,
1800 const struct rte_flow_attr *attr,
1801 const struct rte_flow_item items[],
1802 const struct rte_flow_action actions[],
1803 struct rte_flow_error *error)
1805 int ret = mlx5_flow_merge(dev, NULL, 0, attr, items, actions, error);
1816 * Pointer to Ethernet device.
1817 * @param[in, out] flow
1818 * Pointer to flow structure.
1821 mlx5_flow_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
1823 struct mlx5_flow_verbs *verbs;
1825 LIST_FOREACH(verbs, &flow->verbs, next) {
1827 claim_zero(mlx5_glue->destroy_flow(verbs->flow));
1831 if (flow->fate & MLX5_FLOW_FATE_DROP)
1832 mlx5_hrxq_drop_release(dev);
1834 mlx5_hrxq_release(dev, verbs->hrxq);
1844 * Pointer to Ethernet device structure.
1845 * @param[in, out] flow
1846 * Pointer to flow structure.
1848 * Pointer to error structure.
1851 * 0 on success, a negative errno value otherwise and rte_errno is set.
1854 mlx5_flow_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
1855 struct rte_flow_error *error)
1857 struct mlx5_flow_verbs *verbs;
1860 LIST_FOREACH(verbs, &flow->verbs, next) {
1861 if (flow->fate & MLX5_FLOW_FATE_DROP) {
1862 verbs->hrxq = mlx5_hrxq_drop_new(dev);
1866 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1868 "cannot get drop hash queue");
1872 struct mlx5_hrxq *hrxq;
1874 hrxq = mlx5_hrxq_get(dev, flow->key,
1875 MLX5_RSS_HASH_KEY_LEN,
1878 flow->rss.queue_num);
1880 hrxq = mlx5_hrxq_new(dev, flow->key,
1881 MLX5_RSS_HASH_KEY_LEN,
1884 flow->rss.queue_num);
1888 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1890 "cannot get hash queue");
1896 mlx5_glue->create_flow(verbs->hrxq->qp, verbs->attr);
1898 rte_flow_error_set(error, errno,
1899 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1901 "hardware refuses to create flow");
1907 err = rte_errno; /* Save rte_errno before cleanup. */
1908 LIST_FOREACH(verbs, &flow->verbs, next) {
1910 if (flow->fate & MLX5_FLOW_FATE_DROP)
1911 mlx5_hrxq_drop_release(dev);
1913 mlx5_hrxq_release(dev, verbs->hrxq);
1917 rte_errno = err; /* Restore rte_errno. */
1922 * Create a flow and add it to @p list.
1925 * Pointer to Ethernet device.
1927 * Pointer to a TAILQ flow list.
1929 * Flow rule attributes.
1931 * Pattern specification (list terminated by the END pattern item).
1932 * @param[in] actions
1933 * Associated actions (list terminated by the END action).
1935 * Perform verbose error reporting if not NULL.
1938 * A flow on success, NULL otherwise and rte_errno is set.
1940 static struct rte_flow *
1941 mlx5_flow_list_create(struct rte_eth_dev *dev,
1942 struct mlx5_flows *list,
1943 const struct rte_flow_attr *attr,
1944 const struct rte_flow_item items[],
1945 const struct rte_flow_action actions[],
1946 struct rte_flow_error *error)
1948 struct rte_flow *flow = NULL;
1952 ret = mlx5_flow_merge(dev, flow, size, attr, items, actions, error);
1956 flow = rte_calloc(__func__, 1, size, 0);
1958 rte_flow_error_set(error, ENOMEM,
1959 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1961 "not enough memory to create flow");
1964 ret = mlx5_flow_merge(dev, flow, size, attr, items, actions, error);
1969 assert((size_t)ret == size);
1970 if (dev->data->dev_started) {
1971 ret = mlx5_flow_apply(dev, flow, error);
1973 ret = rte_errno; /* Save rte_errno before cleanup. */
1975 mlx5_flow_remove(dev, flow);
1978 rte_errno = ret; /* Restore rte_errno. */
1982 mlx5_flow_rxq_mark_set(dev, flow);
1983 TAILQ_INSERT_TAIL(list, flow, next);
1990 * @see rte_flow_create()
1994 mlx5_flow_create(struct rte_eth_dev *dev,
1995 const struct rte_flow_attr *attr,
1996 const struct rte_flow_item items[],
1997 const struct rte_flow_action actions[],
1998 struct rte_flow_error *error)
2000 return mlx5_flow_list_create
2001 (dev, &((struct priv *)dev->data->dev_private)->flows,
2002 attr, items, actions, error);
2006 * Destroy a flow in a list.
2009 * Pointer to Ethernet device.
2011 * Pointer to a TAILQ flow list.
2016 mlx5_flow_list_destroy(struct rte_eth_dev *dev, struct mlx5_flows *list,
2017 struct rte_flow *flow)
2019 mlx5_flow_remove(dev, flow);
2020 TAILQ_REMOVE(list, flow, next);
2021 mlx5_flow_rxq_mark_trim(dev, flow);
2026 * Destroy all flows.
2029 * Pointer to Ethernet device.
2031 * Pointer to a TAILQ flow list.
2034 mlx5_flow_list_flush(struct rte_eth_dev *dev, struct mlx5_flows *list)
2036 while (!TAILQ_EMPTY(list)) {
2037 struct rte_flow *flow;
2039 flow = TAILQ_FIRST(list);
2040 mlx5_flow_list_destroy(dev, list, flow);
2048 * Pointer to Ethernet device.
2050 * Pointer to a TAILQ flow list.
2053 mlx5_flow_stop(struct rte_eth_dev *dev, struct mlx5_flows *list)
2055 struct rte_flow *flow;
2057 TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next)
2058 mlx5_flow_remove(dev, flow);
2059 mlx5_flow_rxq_mark_clear(dev);
2066 * Pointer to Ethernet device.
2068 * Pointer to a TAILQ flow list.
2071 * 0 on success, a negative errno value otherwise and rte_errno is set.
2074 mlx5_flow_start(struct rte_eth_dev *dev, struct mlx5_flows *list)
2076 struct rte_flow *flow;
2077 struct rte_flow_error error;
2080 TAILQ_FOREACH(flow, list, next) {
2081 ret = mlx5_flow_apply(dev, flow, &error);
2084 mlx5_flow_rxq_mark_set(dev, flow);
2088 ret = rte_errno; /* Save rte_errno before cleanup. */
2089 mlx5_flow_stop(dev, list);
2090 rte_errno = ret; /* Restore rte_errno. */
2095 * Verify the flow list is empty
2098 * Pointer to Ethernet device.
2100 * @return the number of flows not released.
2103 mlx5_flow_verify(struct rte_eth_dev *dev)
2105 struct priv *priv = dev->data->dev_private;
2106 struct rte_flow *flow;
2109 TAILQ_FOREACH(flow, &priv->flows, next) {
2110 DRV_LOG(DEBUG, "port %u flow %p still referenced",
2111 dev->data->port_id, (void *)flow);
2118 * Enable a control flow configured from the control plane.
2121 * Pointer to Ethernet device.
2123 * An Ethernet flow spec to apply.
2125 * An Ethernet flow mask to apply.
2127 * A VLAN flow spec to apply.
2129 * A VLAN flow mask to apply.
2132 * 0 on success, a negative errno value otherwise and rte_errno is set.
2135 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
2136 struct rte_flow_item_eth *eth_spec,
2137 struct rte_flow_item_eth *eth_mask,
2138 struct rte_flow_item_vlan *vlan_spec,
2139 struct rte_flow_item_vlan *vlan_mask)
2141 struct priv *priv = dev->data->dev_private;
2142 const struct rte_flow_attr attr = {
2144 .priority = MLX5_FLOW_PRIO_RSVD,
2146 struct rte_flow_item items[] = {
2148 .type = RTE_FLOW_ITEM_TYPE_ETH,
2154 .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
2155 RTE_FLOW_ITEM_TYPE_END,
2161 .type = RTE_FLOW_ITEM_TYPE_END,
2164 uint16_t queue[priv->reta_idx_n];
2165 struct rte_flow_action_rss action_rss = {
2166 .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
2168 .types = priv->rss_conf.rss_hf,
2169 .key_len = priv->rss_conf.rss_key_len,
2170 .queue_num = priv->reta_idx_n,
2171 .key = priv->rss_conf.rss_key,
2174 struct rte_flow_action actions[] = {
2176 .type = RTE_FLOW_ACTION_TYPE_RSS,
2177 .conf = &action_rss,
2180 .type = RTE_FLOW_ACTION_TYPE_END,
2183 struct rte_flow *flow;
2184 struct rte_flow_error error;
2187 if (!priv->reta_idx_n) {
2191 for (i = 0; i != priv->reta_idx_n; ++i)
2192 queue[i] = (*priv->reta_idx)[i];
2193 flow = mlx5_flow_list_create(dev, &priv->ctrl_flows, &attr, items,
2201 * Enable a flow control configured from the control plane.
2204 * Pointer to Ethernet device.
2206 * An Ethernet flow spec to apply.
2208 * An Ethernet flow mask to apply.
2211 * 0 on success, a negative errno value otherwise and rte_errno is set.
2214 mlx5_ctrl_flow(struct rte_eth_dev *dev,
2215 struct rte_flow_item_eth *eth_spec,
2216 struct rte_flow_item_eth *eth_mask)
2218 return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
2224 * @see rte_flow_destroy()
2228 mlx5_flow_destroy(struct rte_eth_dev *dev,
2229 struct rte_flow *flow,
2230 struct rte_flow_error *error __rte_unused)
2232 struct priv *priv = dev->data->dev_private;
2234 mlx5_flow_list_destroy(dev, &priv->flows, flow);
2239 * Destroy all flows.
2241 * @see rte_flow_flush()
2245 mlx5_flow_flush(struct rte_eth_dev *dev,
2246 struct rte_flow_error *error __rte_unused)
2248 struct priv *priv = dev->data->dev_private;
2250 mlx5_flow_list_flush(dev, &priv->flows);
2257 * @see rte_flow_isolate()
2261 mlx5_flow_isolate(struct rte_eth_dev *dev,
2263 struct rte_flow_error *error)
2265 struct priv *priv = dev->data->dev_private;
2267 if (dev->data->dev_started) {
2268 rte_flow_error_set(error, EBUSY,
2269 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2271 "port must be stopped first");
2274 priv->isolated = !!enable;
2276 dev->dev_ops = &mlx5_dev_ops_isolate;
2278 dev->dev_ops = &mlx5_dev_ops;
2283 * Convert a flow director filter to a generic flow.
2286 * Pointer to Ethernet device.
2287 * @param fdir_filter
2288 * Flow director filter to add.
2290 * Generic flow parameters structure.
2293 * 0 on success, a negative errno value otherwise and rte_errno is set.
2296 mlx5_fdir_filter_convert(struct rte_eth_dev *dev,
2297 const struct rte_eth_fdir_filter *fdir_filter,
2298 struct mlx5_fdir *attributes)
2300 struct priv *priv = dev->data->dev_private;
2301 const struct rte_eth_fdir_input *input = &fdir_filter->input;
2302 const struct rte_eth_fdir_masks *mask =
2303 &dev->data->dev_conf.fdir_conf.mask;
2305 /* Validate queue number. */
2306 if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
2307 DRV_LOG(ERR, "port %u invalid queue number %d",
2308 dev->data->port_id, fdir_filter->action.rx_queue);
2312 attributes->attr.ingress = 1;
2313 attributes->items[0] = (struct rte_flow_item) {
2314 .type = RTE_FLOW_ITEM_TYPE_ETH,
2315 .spec = &attributes->l2,
2316 .mask = &attributes->l2_mask,
2318 switch (fdir_filter->action.behavior) {
2319 case RTE_ETH_FDIR_ACCEPT:
2320 attributes->actions[0] = (struct rte_flow_action){
2321 .type = RTE_FLOW_ACTION_TYPE_QUEUE,
2322 .conf = &attributes->queue,
2325 case RTE_ETH_FDIR_REJECT:
2326 attributes->actions[0] = (struct rte_flow_action){
2327 .type = RTE_FLOW_ACTION_TYPE_DROP,
2331 DRV_LOG(ERR, "port %u invalid behavior %d",
2333 fdir_filter->action.behavior);
2334 rte_errno = ENOTSUP;
2337 attributes->queue.index = fdir_filter->action.rx_queue;
2339 switch (fdir_filter->input.flow_type) {
2340 case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
2341 case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
2342 case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
2343 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2344 .src_addr = input->flow.ip4_flow.src_ip,
2345 .dst_addr = input->flow.ip4_flow.dst_ip,
2346 .time_to_live = input->flow.ip4_flow.ttl,
2347 .type_of_service = input->flow.ip4_flow.tos,
2348 .next_proto_id = input->flow.ip4_flow.proto,
2350 attributes->l3_mask.ipv4.hdr = (struct ipv4_hdr){
2351 .src_addr = mask->ipv4_mask.src_ip,
2352 .dst_addr = mask->ipv4_mask.dst_ip,
2353 .time_to_live = mask->ipv4_mask.ttl,
2354 .type_of_service = mask->ipv4_mask.tos,
2355 .next_proto_id = mask->ipv4_mask.proto,
2357 attributes->items[1] = (struct rte_flow_item){
2358 .type = RTE_FLOW_ITEM_TYPE_IPV4,
2359 .spec = &attributes->l3,
2360 .mask = &attributes->l3_mask,
2363 case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
2364 case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
2365 case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
2366 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2367 .hop_limits = input->flow.ipv6_flow.hop_limits,
2368 .proto = input->flow.ipv6_flow.proto,
2371 memcpy(attributes->l3.ipv6.hdr.src_addr,
2372 input->flow.ipv6_flow.src_ip,
2373 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2374 memcpy(attributes->l3.ipv6.hdr.dst_addr,
2375 input->flow.ipv6_flow.dst_ip,
2376 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2377 memcpy(attributes->l3_mask.ipv6.hdr.src_addr,
2378 mask->ipv6_mask.src_ip,
2379 RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
2380 memcpy(attributes->l3_mask.ipv6.hdr.dst_addr,
2381 mask->ipv6_mask.dst_ip,
2382 RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
2383 attributes->items[1] = (struct rte_flow_item){
2384 .type = RTE_FLOW_ITEM_TYPE_IPV6,
2385 .spec = &attributes->l3,
2386 .mask = &attributes->l3_mask,
2390 DRV_LOG(ERR, "port %u invalid flow type%d",
2391 dev->data->port_id, fdir_filter->input.flow_type);
2392 rte_errno = ENOTSUP;
2396 switch (fdir_filter->input.flow_type) {
2397 case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
2398 attributes->l4.udp.hdr = (struct udp_hdr){
2399 .src_port = input->flow.udp4_flow.src_port,
2400 .dst_port = input->flow.udp4_flow.dst_port,
2402 attributes->l4_mask.udp.hdr = (struct udp_hdr){
2403 .src_port = mask->src_port_mask,
2404 .dst_port = mask->dst_port_mask,
2406 attributes->items[2] = (struct rte_flow_item){
2407 .type = RTE_FLOW_ITEM_TYPE_UDP,
2408 .spec = &attributes->l4,
2409 .mask = &attributes->l4_mask,
2412 case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
2413 attributes->l4.tcp.hdr = (struct tcp_hdr){
2414 .src_port = input->flow.tcp4_flow.src_port,
2415 .dst_port = input->flow.tcp4_flow.dst_port,
2417 attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
2418 .src_port = mask->src_port_mask,
2419 .dst_port = mask->dst_port_mask,
2421 attributes->items[2] = (struct rte_flow_item){
2422 .type = RTE_FLOW_ITEM_TYPE_TCP,
2423 .spec = &attributes->l4,
2424 .mask = &attributes->l4_mask,
2427 case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
2428 attributes->l4.udp.hdr = (struct udp_hdr){
2429 .src_port = input->flow.udp6_flow.src_port,
2430 .dst_port = input->flow.udp6_flow.dst_port,
2432 attributes->l4_mask.udp.hdr = (struct udp_hdr){
2433 .src_port = mask->src_port_mask,
2434 .dst_port = mask->dst_port_mask,
2436 attributes->items[2] = (struct rte_flow_item){
2437 .type = RTE_FLOW_ITEM_TYPE_UDP,
2438 .spec = &attributes->l4,
2439 .mask = &attributes->l4_mask,
2442 case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
2443 attributes->l4.tcp.hdr = (struct tcp_hdr){
2444 .src_port = input->flow.tcp6_flow.src_port,
2445 .dst_port = input->flow.tcp6_flow.dst_port,
2447 attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
2448 .src_port = mask->src_port_mask,
2449 .dst_port = mask->dst_port_mask,
2451 attributes->items[2] = (struct rte_flow_item){
2452 .type = RTE_FLOW_ITEM_TYPE_TCP,
2453 .spec = &attributes->l4,
2454 .mask = &attributes->l4_mask,
2457 case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
2458 case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
2461 DRV_LOG(ERR, "port %u invalid flow type%d",
2462 dev->data->port_id, fdir_filter->input.flow_type);
2463 rte_errno = ENOTSUP;
2470 * Add new flow director filter and store it in list.
2473 * Pointer to Ethernet device.
2474 * @param fdir_filter
2475 * Flow director filter to add.
2478 * 0 on success, a negative errno value otherwise and rte_errno is set.
2481 mlx5_fdir_filter_add(struct rte_eth_dev *dev,
2482 const struct rte_eth_fdir_filter *fdir_filter)
2484 struct priv *priv = dev->data->dev_private;
2485 struct mlx5_fdir attributes = {
2488 .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2489 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2493 struct rte_flow_error error;
2494 struct rte_flow *flow;
2497 ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
2500 flow = mlx5_flow_list_create(dev, &priv->flows, &attributes.attr,
2501 attributes.items, attributes.actions,
2504 DRV_LOG(DEBUG, "port %u FDIR created %p", dev->data->port_id,
2512 * Delete specific filter.
2515 * Pointer to Ethernet device.
2516 * @param fdir_filter
2517 * Filter to be deleted.
2520 * 0 on success, a negative errno value otherwise and rte_errno is set.
2523 mlx5_fdir_filter_delete(struct rte_eth_dev *dev __rte_unused,
2524 const struct rte_eth_fdir_filter *fdir_filter
2527 rte_errno = ENOTSUP;
2532 * Update queue for specific filter.
2535 * Pointer to Ethernet device.
2536 * @param fdir_filter
2537 * Filter to be updated.
2540 * 0 on success, a negative errno value otherwise and rte_errno is set.
2543 mlx5_fdir_filter_update(struct rte_eth_dev *dev,
2544 const struct rte_eth_fdir_filter *fdir_filter)
2548 ret = mlx5_fdir_filter_delete(dev, fdir_filter);
2551 return mlx5_fdir_filter_add(dev, fdir_filter);
2555 * Flush all filters.
2558 * Pointer to Ethernet device.
2561 mlx5_fdir_filter_flush(struct rte_eth_dev *dev)
2563 struct priv *priv = dev->data->dev_private;
2565 mlx5_flow_list_flush(dev, &priv->flows);
2569 * Get flow director information.
2572 * Pointer to Ethernet device.
2573 * @param[out] fdir_info
2574 * Resulting flow director information.
2577 mlx5_fdir_info_get(struct rte_eth_dev *dev, struct rte_eth_fdir_info *fdir_info)
2579 struct rte_eth_fdir_masks *mask =
2580 &dev->data->dev_conf.fdir_conf.mask;
2582 fdir_info->mode = dev->data->dev_conf.fdir_conf.mode;
2583 fdir_info->guarant_spc = 0;
2584 rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
2585 fdir_info->max_flexpayload = 0;
2586 fdir_info->flow_types_mask[0] = 0;
2587 fdir_info->flex_payload_unit = 0;
2588 fdir_info->max_flex_payload_segment_num = 0;
2589 fdir_info->flex_payload_limit = 0;
2590 memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
2594 * Deal with flow director operations.
2597 * Pointer to Ethernet device.
2599 * Operation to perform.
2601 * Pointer to operation-specific structure.
2604 * 0 on success, a negative errno value otherwise and rte_errno is set.
2607 mlx5_fdir_ctrl_func(struct rte_eth_dev *dev, enum rte_filter_op filter_op,
2610 enum rte_fdir_mode fdir_mode =
2611 dev->data->dev_conf.fdir_conf.mode;
2613 if (filter_op == RTE_ETH_FILTER_NOP)
2615 if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
2616 fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
2617 DRV_LOG(ERR, "port %u flow director mode %d not supported",
2618 dev->data->port_id, fdir_mode);
2622 switch (filter_op) {
2623 case RTE_ETH_FILTER_ADD:
2624 return mlx5_fdir_filter_add(dev, arg);
2625 case RTE_ETH_FILTER_UPDATE:
2626 return mlx5_fdir_filter_update(dev, arg);
2627 case RTE_ETH_FILTER_DELETE:
2628 return mlx5_fdir_filter_delete(dev, arg);
2629 case RTE_ETH_FILTER_FLUSH:
2630 mlx5_fdir_filter_flush(dev);
2632 case RTE_ETH_FILTER_INFO:
2633 mlx5_fdir_info_get(dev, arg);
2636 DRV_LOG(DEBUG, "port %u unknown operation %u",
2637 dev->data->port_id, filter_op);
2645 * Manage filter operations.
2648 * Pointer to Ethernet device structure.
2649 * @param filter_type
2652 * Operation to perform.
2654 * Pointer to operation-specific structure.
2657 * 0 on success, a negative errno value otherwise and rte_errno is set.
2660 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
2661 enum rte_filter_type filter_type,
2662 enum rte_filter_op filter_op,
2665 switch (filter_type) {
2666 case RTE_ETH_FILTER_GENERIC:
2667 if (filter_op != RTE_ETH_FILTER_GET) {
2671 *(const void **)arg = &mlx5_flow_ops;
2673 case RTE_ETH_FILTER_FDIR:
2674 return mlx5_fdir_ctrl_func(dev, filter_op, arg);
2676 DRV_LOG(ERR, "port %u filter type (%d) not supported",
2677 dev->data->port_id, filter_type);
2678 rte_errno = ENOTSUP;