1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2016 6WIND S.A.
3 * Copyright 2016 Mellanox Technologies, Ltd
11 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
13 #pragma GCC diagnostic ignored "-Wpedantic"
15 #include <infiniband/verbs.h>
17 #pragma GCC diagnostic error "-Wpedantic"
20 #include <rte_common.h>
21 #include <rte_ether.h>
22 #include <rte_eth_ctrl.h>
23 #include <rte_ethdev_driver.h>
25 #include <rte_flow_driver.h>
26 #include <rte_malloc.h>
30 #include "mlx5_defs.h"
32 #include "mlx5_glue.h"
34 /* Dev ops structure defined in mlx5.c */
35 extern const struct eth_dev_ops mlx5_dev_ops;
36 extern const struct eth_dev_ops mlx5_dev_ops_isolate;
38 /* Pattern outer Layer bits. */
39 #define MLX5_FLOW_LAYER_OUTER_L2 (1u << 0)
40 #define MLX5_FLOW_LAYER_OUTER_L3_IPV4 (1u << 1)
41 #define MLX5_FLOW_LAYER_OUTER_L3_IPV6 (1u << 2)
42 #define MLX5_FLOW_LAYER_OUTER_L4_UDP (1u << 3)
43 #define MLX5_FLOW_LAYER_OUTER_L4_TCP (1u << 4)
44 #define MLX5_FLOW_LAYER_OUTER_VLAN (1u << 5)
46 /* Pattern inner Layer bits. */
47 #define MLX5_FLOW_LAYER_INNER_L2 (1u << 6)
48 #define MLX5_FLOW_LAYER_INNER_L3_IPV4 (1u << 7)
49 #define MLX5_FLOW_LAYER_INNER_L3_IPV6 (1u << 8)
50 #define MLX5_FLOW_LAYER_INNER_L4_UDP (1u << 9)
51 #define MLX5_FLOW_LAYER_INNER_L4_TCP (1u << 10)
52 #define MLX5_FLOW_LAYER_INNER_VLAN (1u << 11)
55 #define MLX5_FLOW_LAYER_OUTER_L3 \
56 (MLX5_FLOW_LAYER_OUTER_L3_IPV4 | MLX5_FLOW_LAYER_OUTER_L3_IPV6)
57 #define MLX5_FLOW_LAYER_OUTER_L4 \
58 (MLX5_FLOW_LAYER_OUTER_L4_UDP | MLX5_FLOW_LAYER_OUTER_L4_TCP)
59 #define MLX5_FLOW_LAYER_OUTER \
60 (MLX5_FLOW_LAYER_OUTER_L2 | MLX5_FLOW_LAYER_OUTER_L3 | \
61 MLX5_FLOW_LAYER_OUTER_L4)
64 #define MLX5_FLOW_LAYER_TUNNEL 0
67 #define MLX5_FLOW_LAYER_INNER_L3 \
68 (MLX5_FLOW_LAYER_INNER_L3_IPV4 | MLX5_FLOW_LAYER_INNER_L3_IPV6)
69 #define MLX5_FLOW_LAYER_INNER_L4 \
70 (MLX5_FLOW_LAYER_INNER_L4_UDP | MLX5_FLOW_LAYER_INNER_L4_TCP)
71 #define MLX5_FLOW_LAYER_INNER \
72 (MLX5_FLOW_LAYER_INNER_L2 | MLX5_FLOW_LAYER_INNER_L3 | \
73 MLX5_FLOW_LAYER_INNER_L4)
75 /* Actions that modify the fate of matching traffic. */
76 #define MLX5_FLOW_FATE_DROP (1u << 0)
77 #define MLX5_FLOW_FATE_QUEUE (1u << 1)
78 #define MLX5_FLOW_FATE_RSS (1u << 2)
80 /* Modify a packet. */
81 #define MLX5_FLOW_MOD_FLAG (1u << 0)
82 #define MLX5_FLOW_MOD_MARK (1u << 1)
84 /* possible L3 layers protocols filtering. */
85 #define MLX5_IP_PROTOCOL_TCP 6
86 #define MLX5_IP_PROTOCOL_UDP 17
88 /* Priority reserved for default flows. */
89 #define MLX5_FLOW_PRIO_RSVD ((uint32_t)-1)
93 MLX5_EXPANSION_ROOT_OUTER,
94 MLX5_EXPANSION_OUTER_ETH,
95 MLX5_EXPANSION_OUTER_IPV4,
96 MLX5_EXPANSION_OUTER_IPV4_UDP,
97 MLX5_EXPANSION_OUTER_IPV4_TCP,
98 MLX5_EXPANSION_OUTER_IPV6,
99 MLX5_EXPANSION_OUTER_IPV6_UDP,
100 MLX5_EXPANSION_OUTER_IPV6_TCP,
103 MLX5_EXPANSION_IPV4_UDP,
104 MLX5_EXPANSION_IPV4_TCP,
106 MLX5_EXPANSION_IPV6_UDP,
107 MLX5_EXPANSION_IPV6_TCP,
110 /** Supported expansion of items. */
111 static const struct rte_flow_expand_node mlx5_support_expansion[] = {
112 [MLX5_EXPANSION_ROOT] = {
113 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
115 MLX5_EXPANSION_IPV6),
116 .type = RTE_FLOW_ITEM_TYPE_END,
118 [MLX5_EXPANSION_ROOT_OUTER] = {
119 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_ETH,
120 MLX5_EXPANSION_OUTER_IPV4,
121 MLX5_EXPANSION_OUTER_IPV6),
122 .type = RTE_FLOW_ITEM_TYPE_END,
124 [MLX5_EXPANSION_OUTER_ETH] = {
125 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_IPV4,
126 MLX5_EXPANSION_OUTER_IPV6),
127 .type = RTE_FLOW_ITEM_TYPE_ETH,
130 [MLX5_EXPANSION_OUTER_IPV4] = {
131 .next = RTE_FLOW_EXPAND_RSS_NEXT
132 (MLX5_EXPANSION_OUTER_IPV4_UDP,
133 MLX5_EXPANSION_OUTER_IPV4_TCP),
134 .type = RTE_FLOW_ITEM_TYPE_IPV4,
135 .rss_types = ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
136 ETH_RSS_NONFRAG_IPV4_OTHER,
138 [MLX5_EXPANSION_OUTER_IPV4_UDP] = {
139 .type = RTE_FLOW_ITEM_TYPE_UDP,
140 .rss_types = ETH_RSS_NONFRAG_IPV4_UDP,
142 [MLX5_EXPANSION_OUTER_IPV4_TCP] = {
143 .type = RTE_FLOW_ITEM_TYPE_TCP,
144 .rss_types = ETH_RSS_NONFRAG_IPV4_TCP,
146 [MLX5_EXPANSION_OUTER_IPV6] = {
147 .next = RTE_FLOW_EXPAND_RSS_NEXT
148 (MLX5_EXPANSION_OUTER_IPV6_UDP,
149 MLX5_EXPANSION_OUTER_IPV6_TCP),
150 .type = RTE_FLOW_ITEM_TYPE_IPV6,
151 .rss_types = ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 |
152 ETH_RSS_NONFRAG_IPV6_OTHER,
154 [MLX5_EXPANSION_OUTER_IPV6_UDP] = {
155 .type = RTE_FLOW_ITEM_TYPE_UDP,
156 .rss_types = ETH_RSS_NONFRAG_IPV6_UDP,
158 [MLX5_EXPANSION_OUTER_IPV6_TCP] = {
159 .type = RTE_FLOW_ITEM_TYPE_TCP,
160 .rss_types = ETH_RSS_NONFRAG_IPV6_TCP,
162 [MLX5_EXPANSION_ETH] = {
163 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
164 MLX5_EXPANSION_IPV6),
165 .type = RTE_FLOW_ITEM_TYPE_ETH,
167 [MLX5_EXPANSION_IPV4] = {
168 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4_UDP,
169 MLX5_EXPANSION_IPV4_TCP),
170 .type = RTE_FLOW_ITEM_TYPE_IPV4,
171 .rss_types = ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
172 ETH_RSS_NONFRAG_IPV4_OTHER,
174 [MLX5_EXPANSION_IPV4_UDP] = {
175 .type = RTE_FLOW_ITEM_TYPE_UDP,
176 .rss_types = ETH_RSS_NONFRAG_IPV4_UDP,
178 [MLX5_EXPANSION_IPV4_TCP] = {
179 .type = RTE_FLOW_ITEM_TYPE_TCP,
180 .rss_types = ETH_RSS_NONFRAG_IPV4_TCP,
182 [MLX5_EXPANSION_IPV6] = {
183 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV6_UDP,
184 MLX5_EXPANSION_IPV6_TCP),
185 .type = RTE_FLOW_ITEM_TYPE_IPV6,
186 .rss_types = ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 |
187 ETH_RSS_NONFRAG_IPV6_OTHER,
189 [MLX5_EXPANSION_IPV6_UDP] = {
190 .type = RTE_FLOW_ITEM_TYPE_UDP,
191 .rss_types = ETH_RSS_NONFRAG_IPV6_UDP,
193 [MLX5_EXPANSION_IPV6_TCP] = {
194 .type = RTE_FLOW_ITEM_TYPE_TCP,
195 .rss_types = ETH_RSS_NONFRAG_IPV6_TCP,
199 /** Handles information leading to a drop fate. */
200 struct mlx5_flow_verbs {
201 LIST_ENTRY(mlx5_flow_verbs) next;
202 unsigned int size; /**< Size of the attribute. */
204 struct ibv_flow_attr *attr;
205 /**< Pointer to the Specification buffer. */
206 uint8_t *specs; /**< Pointer to the specifications. */
208 struct ibv_flow *flow; /**< Verbs flow pointer. */
209 struct mlx5_hrxq *hrxq; /**< Hash Rx queue object. */
210 uint64_t hash_fields; /**< Verbs hash Rx queue hash fields. */
213 /* Flow structure. */
215 TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
216 struct rte_flow_attr attributes; /**< User flow attribute. */
217 uint32_t l3_protocol_en:1; /**< Protocol filtering requested. */
219 /**< Bit-fields of present layers see MLX5_FLOW_LAYER_*. */
221 /**< Bit-fields of present modifier see MLX5_FLOW_MOD_*. */
223 /**< Bit-fields of present fate see MLX5_FLOW_FATE_*. */
224 uint8_t l3_protocol; /**< valid when l3_protocol_en is set. */
225 LIST_HEAD(verbs, mlx5_flow_verbs) verbs; /**< Verbs flows list. */
226 struct mlx5_flow_verbs *cur_verbs;
227 /**< Current Verbs flow structure being filled. */
228 struct rte_flow_action_rss rss;/**< RSS context. */
229 uint8_t key[MLX5_RSS_HASH_KEY_LEN]; /**< RSS hash key. */
230 uint16_t (*queue)[]; /**< Destination queues to redirect traffic to. */
233 static const struct rte_flow_ops mlx5_flow_ops = {
234 .validate = mlx5_flow_validate,
235 .create = mlx5_flow_create,
236 .destroy = mlx5_flow_destroy,
237 .flush = mlx5_flow_flush,
238 .isolate = mlx5_flow_isolate,
241 /* Convert FDIR request to Generic flow. */
243 struct rte_flow_attr attr;
244 struct rte_flow_action actions[2];
245 struct rte_flow_item items[4];
246 struct rte_flow_item_eth l2;
247 struct rte_flow_item_eth l2_mask;
249 struct rte_flow_item_ipv4 ipv4;
250 struct rte_flow_item_ipv6 ipv6;
253 struct rte_flow_item_ipv4 ipv4;
254 struct rte_flow_item_ipv6 ipv6;
257 struct rte_flow_item_udp udp;
258 struct rte_flow_item_tcp tcp;
261 struct rte_flow_item_udp udp;
262 struct rte_flow_item_tcp tcp;
264 struct rte_flow_action_queue queue;
267 /* Verbs specification header. */
268 struct ibv_spec_header {
269 enum ibv_flow_spec_type type;
274 * Number of sub priorities.
275 * For each kind of pattern matching i.e. L2, L3, L4 to have a correct
276 * matching on the NIC (firmware dependent) L4 most have the higher priority
277 * followed by L3 and ending with L2.
279 #define MLX5_PRIORITY_MAP_L2 2
280 #define MLX5_PRIORITY_MAP_L3 1
281 #define MLX5_PRIORITY_MAP_L4 0
282 #define MLX5_PRIORITY_MAP_MAX 3
284 /* Map of Verbs to Flow priority with 8 Verbs priorities. */
285 static const uint32_t priority_map_3[][MLX5_PRIORITY_MAP_MAX] = {
286 { 0, 1, 2 }, { 2, 3, 4 }, { 5, 6, 7 },
289 /* Map of Verbs to Flow priority with 16 Verbs priorities. */
290 static const uint32_t priority_map_5[][MLX5_PRIORITY_MAP_MAX] = {
291 { 0, 1, 2 }, { 3, 4, 5 }, { 6, 7, 8 },
292 { 9, 10, 11 }, { 12, 13, 14 },
296 * Discover the maximum number of priority available.
299 * Pointer to Ethernet device.
302 * number of supported flow priority on success, a negative errno
303 * value otherwise and rte_errno is set.
306 mlx5_flow_discover_priorities(struct rte_eth_dev *dev)
309 struct ibv_flow_attr attr;
310 struct ibv_flow_spec_eth eth;
311 struct ibv_flow_spec_action_drop drop;
317 .type = IBV_FLOW_SPEC_ETH,
318 .size = sizeof(struct ibv_flow_spec_eth),
321 .size = sizeof(struct ibv_flow_spec_action_drop),
322 .type = IBV_FLOW_SPEC_ACTION_DROP,
325 struct ibv_flow *flow;
326 struct mlx5_hrxq *drop = mlx5_hrxq_drop_new(dev);
327 uint16_t vprio[] = { 8, 16 };
335 for (i = 0; i != RTE_DIM(vprio); i++) {
336 flow_attr.attr.priority = vprio[i] - 1;
337 flow = mlx5_glue->create_flow(drop->qp, &flow_attr.attr);
340 claim_zero(mlx5_glue->destroy_flow(flow));
345 priority = RTE_DIM(priority_map_3);
348 priority = RTE_DIM(priority_map_5);
353 "port %u verbs maximum priority: %d expected 8/16",
354 dev->data->port_id, vprio[i]);
357 mlx5_hrxq_drop_release(dev);
358 DRV_LOG(INFO, "port %u flow maximum priority: %d",
359 dev->data->port_id, priority);
364 * Adjust flow priority.
367 * Pointer to Ethernet device.
369 * Pointer to an rte flow.
372 mlx5_flow_adjust_priority(struct rte_eth_dev *dev, struct rte_flow *flow)
374 struct priv *priv = dev->data->dev_private;
375 uint32_t priority = flow->attributes.priority;
376 uint32_t subpriority = flow->cur_verbs->attr->priority;
378 switch (priv->config.flow_prio) {
379 case RTE_DIM(priority_map_3):
380 priority = priority_map_3[priority][subpriority];
382 case RTE_DIM(priority_map_5):
383 priority = priority_map_5[priority][subpriority];
386 flow->cur_verbs->attr->priority = priority;
390 * Verify the @p attributes will be correctly understood by the NIC and store
391 * them in the @p flow if everything is correct.
394 * Pointer to Ethernet device.
395 * @param[in] attributes
396 * Pointer to flow attributes
397 * @param[in, out] flow
398 * Pointer to the rte_flow structure.
400 * Pointer to error structure.
403 * 0 on success, a negative errno value otherwise and rte_errno is set.
406 mlx5_flow_attributes(struct rte_eth_dev *dev,
407 const struct rte_flow_attr *attributes,
408 struct rte_flow *flow,
409 struct rte_flow_error *error)
411 uint32_t priority_max =
412 ((struct priv *)dev->data->dev_private)->config.flow_prio - 1;
414 if (attributes->group)
415 return rte_flow_error_set(error, ENOTSUP,
416 RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
418 "groups is not supported");
419 if (attributes->priority != MLX5_FLOW_PRIO_RSVD &&
420 attributes->priority >= priority_max)
421 return rte_flow_error_set(error, ENOTSUP,
422 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
424 "priority out of range");
425 if (attributes->egress)
426 return rte_flow_error_set(error, ENOTSUP,
427 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
429 "egress is not supported");
430 if (attributes->transfer)
431 return rte_flow_error_set(error, ENOTSUP,
432 RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
434 "transfer is not supported");
435 if (!attributes->ingress)
436 return rte_flow_error_set(error, ENOTSUP,
437 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
439 "ingress attribute is mandatory");
440 flow->attributes = *attributes;
441 if (attributes->priority == MLX5_FLOW_PRIO_RSVD)
442 flow->attributes.priority = priority_max;
447 * Verify the @p item specifications (spec, last, mask) are compatible with the
451 * Item specification.
453 * @p item->mask or flow default bit-masks.
454 * @param[in] nic_mask
455 * Bit-masks covering supported fields by the NIC to compare with user mask.
457 * Bit-masks size in bytes.
459 * Pointer to error structure.
462 * 0 on success, a negative errno value otherwise and rte_errno is set.
465 mlx5_flow_item_acceptable(const struct rte_flow_item *item,
467 const uint8_t *nic_mask,
469 struct rte_flow_error *error)
474 for (i = 0; i < size; ++i)
475 if ((nic_mask[i] | mask[i]) != nic_mask[i])
476 return rte_flow_error_set(error, ENOTSUP,
477 RTE_FLOW_ERROR_TYPE_ITEM,
479 "mask enables non supported"
481 if (!item->spec && (item->mask || item->last))
482 return rte_flow_error_set(error, EINVAL,
483 RTE_FLOW_ERROR_TYPE_ITEM,
485 "mask/last without a spec is not"
487 if (item->spec && item->last) {
493 for (i = 0; i < size; ++i) {
494 spec[i] = ((const uint8_t *)item->spec)[i] & mask[i];
495 last[i] = ((const uint8_t *)item->last)[i] & mask[i];
497 ret = memcmp(spec, last, size);
499 return rte_flow_error_set(error, ENOTSUP,
500 RTE_FLOW_ERROR_TYPE_ITEM,
502 "range is not supported");
508 * Add a verbs item specification into @p flow.
510 * @param[in, out] flow
511 * Pointer to flow structure.
513 * Create specification.
515 * Size in bytes of the specification to copy.
518 mlx5_flow_spec_verbs_add(struct rte_flow *flow, void *src, unsigned int size)
520 struct mlx5_flow_verbs *verbs = flow->cur_verbs;
525 dst = (void *)(verbs->specs + verbs->size);
526 memcpy(dst, src, size);
527 ++verbs->attr->num_of_specs;
533 * Adjust verbs hash fields according to the @p flow information.
535 * @param[in, out] flow.
536 * Pointer to flow structure.
538 * 1 when the hash field is for a tunnel item.
539 * @param[in] layer_types
541 * @param[in] hash_fields
545 mlx5_flow_verbs_hashfields_adjust(struct rte_flow *flow,
546 int tunnel __rte_unused,
547 uint32_t layer_types, uint64_t hash_fields)
549 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
550 hash_fields |= (tunnel ? IBV_RX_HASH_INNER : 0);
551 if (flow->rss.level == 2 && !tunnel)
553 else if (flow->rss.level < 2 && tunnel)
556 if (!(flow->rss.types & layer_types))
558 flow->cur_verbs->hash_fields |= hash_fields;
562 * Convert the @p item into a Verbs specification after ensuring the NIC
563 * will understand and process it correctly.
564 * If the necessary size for the conversion is greater than the @p flow_size,
565 * nothing is written in @p flow, the validation is still performed.
568 * Item specification.
569 * @param[in, out] flow
570 * Pointer to flow structure.
571 * @param[in] flow_size
572 * Size in bytes of the available space in @p flow, if too small, nothing is
575 * Pointer to error structure.
578 * On success the number of bytes consumed/necessary, if the returned value
579 * is lesser or equal to @p flow_size, the @p item has fully been converted,
580 * otherwise another call with this returned memory size should be done.
581 * On error, a negative errno value is returned and rte_errno is set.
584 mlx5_flow_item_eth(const struct rte_flow_item *item, struct rte_flow *flow,
585 const size_t flow_size, struct rte_flow_error *error)
587 const struct rte_flow_item_eth *spec = item->spec;
588 const struct rte_flow_item_eth *mask = item->mask;
589 const struct rte_flow_item_eth nic_mask = {
590 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
591 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
592 .type = RTE_BE16(0xffff),
594 const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
595 const unsigned int size = sizeof(struct ibv_flow_spec_eth);
596 struct ibv_flow_spec_eth eth = {
597 .type = IBV_FLOW_SPEC_ETH | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
602 if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
603 MLX5_FLOW_LAYER_OUTER_L2))
604 return rte_flow_error_set(error, ENOTSUP,
605 RTE_FLOW_ERROR_TYPE_ITEM,
607 "L2 layers already configured");
609 mask = &rte_flow_item_eth_mask;
610 ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
611 (const uint8_t *)&nic_mask,
612 sizeof(struct rte_flow_item_eth),
616 flow->layers |= tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
617 MLX5_FLOW_LAYER_OUTER_L2;
618 if (size > flow_size)
623 memcpy(ð.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
624 memcpy(ð.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
625 eth.val.ether_type = spec->type;
626 memcpy(ð.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
627 memcpy(ð.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
628 eth.mask.ether_type = mask->type;
629 /* Remove unwanted bits from values. */
630 for (i = 0; i < ETHER_ADDR_LEN; ++i) {
631 eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
632 eth.val.src_mac[i] &= eth.mask.src_mac[i];
634 eth.val.ether_type &= eth.mask.ether_type;
636 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2;
637 mlx5_flow_spec_verbs_add(flow, ð, size);
642 * Update the VLAN tag in the Verbs Ethernet specification.
644 * @param[in, out] attr
645 * Pointer to Verbs attributes structure.
647 * Verbs structure containing the VLAN information to copy.
650 mlx5_flow_item_vlan_update(struct ibv_flow_attr *attr,
651 struct ibv_flow_spec_eth *eth)
654 const enum ibv_flow_spec_type search = eth->type;
655 struct ibv_spec_header *hdr = (struct ibv_spec_header *)
656 ((uint8_t *)attr + sizeof(struct ibv_flow_attr));
658 for (i = 0; i != attr->num_of_specs; ++i) {
659 if (hdr->type == search) {
660 struct ibv_flow_spec_eth *e =
661 (struct ibv_flow_spec_eth *)hdr;
663 e->val.vlan_tag = eth->val.vlan_tag;
664 e->mask.vlan_tag = eth->mask.vlan_tag;
665 e->val.ether_type = eth->val.ether_type;
666 e->mask.ether_type = eth->mask.ether_type;
669 hdr = (struct ibv_spec_header *)((uint8_t *)hdr + hdr->size);
674 * Convert the @p item into @p flow (or by updating the already present
675 * Ethernet Verbs) specification after ensuring the NIC will understand and
676 * process it correctly.
677 * If the necessary size for the conversion is greater than the @p flow_size,
678 * nothing is written in @p flow, the validation is still performed.
681 * Item specification.
682 * @param[in, out] flow
683 * Pointer to flow structure.
684 * @param[in] flow_size
685 * Size in bytes of the available space in @p flow, if too small, nothing is
688 * Pointer to error structure.
691 * On success the number of bytes consumed/necessary, if the returned value
692 * is lesser or equal to @p flow_size, the @p item has fully been converted,
693 * otherwise another call with this returned memory size should be done.
694 * On error, a negative errno value is returned and rte_errno is set.
697 mlx5_flow_item_vlan(const struct rte_flow_item *item, struct rte_flow *flow,
698 const size_t flow_size, struct rte_flow_error *error)
700 const struct rte_flow_item_vlan *spec = item->spec;
701 const struct rte_flow_item_vlan *mask = item->mask;
702 const struct rte_flow_item_vlan nic_mask = {
703 .tci = RTE_BE16(0x0fff),
704 .inner_type = RTE_BE16(0xffff),
706 unsigned int size = sizeof(struct ibv_flow_spec_eth);
707 const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
708 struct ibv_flow_spec_eth eth = {
709 .type = IBV_FLOW_SPEC_ETH | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
713 const uint32_t l34m = tunnel ? (MLX5_FLOW_LAYER_INNER_L3 |
714 MLX5_FLOW_LAYER_INNER_L4) :
715 (MLX5_FLOW_LAYER_OUTER_L3 | MLX5_FLOW_LAYER_OUTER_L4);
716 const uint32_t vlanm = tunnel ? MLX5_FLOW_LAYER_INNER_VLAN :
717 MLX5_FLOW_LAYER_OUTER_VLAN;
718 const uint32_t l2m = tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
719 MLX5_FLOW_LAYER_OUTER_L2;
721 if (flow->layers & vlanm)
722 return rte_flow_error_set(error, ENOTSUP,
723 RTE_FLOW_ERROR_TYPE_ITEM,
725 "VLAN layer already configured");
726 else if ((flow->layers & l34m) != 0)
727 return rte_flow_error_set(error, ENOTSUP,
728 RTE_FLOW_ERROR_TYPE_ITEM,
730 "L2 layer cannot follow L3/L4 layer");
732 mask = &rte_flow_item_vlan_mask;
733 ret = mlx5_flow_item_acceptable
734 (item, (const uint8_t *)mask,
735 (const uint8_t *)&nic_mask,
736 sizeof(struct rte_flow_item_vlan), error);
740 eth.val.vlan_tag = spec->tci;
741 eth.mask.vlan_tag = mask->tci;
742 eth.val.vlan_tag &= eth.mask.vlan_tag;
743 eth.val.ether_type = spec->inner_type;
744 eth.mask.ether_type = mask->inner_type;
745 eth.val.ether_type &= eth.mask.ether_type;
748 * From verbs perspective an empty VLAN is equivalent
749 * to a packet without VLAN layer.
751 if (!eth.mask.vlan_tag)
752 return rte_flow_error_set(error, EINVAL,
753 RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
755 "VLAN cannot be empty");
756 if (!(flow->layers & l2m)) {
757 if (size <= flow_size) {
758 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2;
759 mlx5_flow_spec_verbs_add(flow, ð, size);
763 mlx5_flow_item_vlan_update(flow->cur_verbs->attr,
765 size = 0; /* Only an update is done in eth specification. */
767 flow->layers |= tunnel ?
768 (MLX5_FLOW_LAYER_INNER_L2 | MLX5_FLOW_LAYER_INNER_VLAN) :
769 (MLX5_FLOW_LAYER_OUTER_L2 | MLX5_FLOW_LAYER_OUTER_VLAN);
774 * Convert the @p item into a Verbs specification after ensuring the NIC
775 * will understand and process it correctly.
776 * If the necessary size for the conversion is greater than the @p flow_size,
777 * nothing is written in @p flow, the validation is still performed.
780 * Item specification.
781 * @param[in, out] flow
782 * Pointer to flow structure.
783 * @param[in] flow_size
784 * Size in bytes of the available space in @p flow, if too small, nothing is
787 * Pointer to error structure.
790 * On success the number of bytes consumed/necessary, if the returned value
791 * is lesser or equal to @p flow_size, the @p item has fully been converted,
792 * otherwise another call with this returned memory size should be done.
793 * On error, a negative errno value is returned and rte_errno is set.
796 mlx5_flow_item_ipv4(const struct rte_flow_item *item, struct rte_flow *flow,
797 const size_t flow_size, struct rte_flow_error *error)
799 const struct rte_flow_item_ipv4 *spec = item->spec;
800 const struct rte_flow_item_ipv4 *mask = item->mask;
801 const struct rte_flow_item_ipv4 nic_mask = {
803 .src_addr = RTE_BE32(0xffffffff),
804 .dst_addr = RTE_BE32(0xffffffff),
805 .type_of_service = 0xff,
806 .next_proto_id = 0xff,
809 const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
810 unsigned int size = sizeof(struct ibv_flow_spec_ipv4_ext);
811 struct ibv_flow_spec_ipv4_ext ipv4 = {
812 .type = IBV_FLOW_SPEC_IPV4_EXT |
813 (tunnel ? IBV_FLOW_SPEC_INNER : 0),
818 if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
819 MLX5_FLOW_LAYER_OUTER_L3))
820 return rte_flow_error_set(error, ENOTSUP,
821 RTE_FLOW_ERROR_TYPE_ITEM,
823 "multiple L3 layers not supported");
824 else if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
825 MLX5_FLOW_LAYER_OUTER_L4))
826 return rte_flow_error_set(error, ENOTSUP,
827 RTE_FLOW_ERROR_TYPE_ITEM,
829 "L3 cannot follow an L4 layer.");
831 mask = &rte_flow_item_ipv4_mask;
832 ret = mlx5_flow_item_acceptable
833 (item, (const uint8_t *)mask,
834 (const uint8_t *)&nic_mask,
835 sizeof(struct rte_flow_item_ipv4), error);
838 flow->layers |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
839 MLX5_FLOW_LAYER_OUTER_L3_IPV4;
841 ipv4.val = (struct ibv_flow_ipv4_ext_filter){
842 .src_ip = spec->hdr.src_addr,
843 .dst_ip = spec->hdr.dst_addr,
844 .proto = spec->hdr.next_proto_id,
845 .tos = spec->hdr.type_of_service,
847 ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
848 .src_ip = mask->hdr.src_addr,
849 .dst_ip = mask->hdr.dst_addr,
850 .proto = mask->hdr.next_proto_id,
851 .tos = mask->hdr.type_of_service,
853 /* Remove unwanted bits from values. */
854 ipv4.val.src_ip &= ipv4.mask.src_ip;
855 ipv4.val.dst_ip &= ipv4.mask.dst_ip;
856 ipv4.val.proto &= ipv4.mask.proto;
857 ipv4.val.tos &= ipv4.mask.tos;
859 flow->l3_protocol_en = !!ipv4.mask.proto;
860 flow->l3_protocol = ipv4.val.proto;
861 if (size <= flow_size) {
862 mlx5_flow_verbs_hashfields_adjust
864 (ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
865 ETH_RSS_NONFRAG_IPV4_OTHER),
866 (IBV_RX_HASH_SRC_IPV4 | IBV_RX_HASH_DST_IPV4));
867 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L3;
868 mlx5_flow_spec_verbs_add(flow, &ipv4, size);
874 * Convert the @p item into a Verbs specification after ensuring the NIC
875 * will understand and process it correctly.
876 * If the necessary size for the conversion is greater than the @p flow_size,
877 * nothing is written in @p flow, the validation is still performed.
880 * Item specification.
881 * @param[in, out] flow
882 * Pointer to flow structure.
883 * @param[in] flow_size
884 * Size in bytes of the available space in @p flow, if too small, nothing is
887 * Pointer to error structure.
890 * On success the number of bytes consumed/necessary, if the returned value
891 * is lesser or equal to @p flow_size, the @p item has fully been converted,
892 * otherwise another call with this returned memory size should be done.
893 * On error, a negative errno value is returned and rte_errno is set.
896 mlx5_flow_item_ipv6(const struct rte_flow_item *item, struct rte_flow *flow,
897 const size_t flow_size, struct rte_flow_error *error)
899 const struct rte_flow_item_ipv6 *spec = item->spec;
900 const struct rte_flow_item_ipv6 *mask = item->mask;
901 const struct rte_flow_item_ipv6 nic_mask = {
904 "\xff\xff\xff\xff\xff\xff\xff\xff"
905 "\xff\xff\xff\xff\xff\xff\xff\xff",
907 "\xff\xff\xff\xff\xff\xff\xff\xff"
908 "\xff\xff\xff\xff\xff\xff\xff\xff",
909 .vtc_flow = RTE_BE32(0xffffffff),
914 const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
915 unsigned int size = sizeof(struct ibv_flow_spec_ipv6);
916 struct ibv_flow_spec_ipv6 ipv6 = {
917 .type = IBV_FLOW_SPEC_IPV6 | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
922 if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
923 MLX5_FLOW_LAYER_OUTER_L3))
924 return rte_flow_error_set(error, ENOTSUP,
925 RTE_FLOW_ERROR_TYPE_ITEM,
927 "multiple L3 layers not supported");
928 else if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
929 MLX5_FLOW_LAYER_OUTER_L4))
930 return rte_flow_error_set(error, ENOTSUP,
931 RTE_FLOW_ERROR_TYPE_ITEM,
933 "L3 cannot follow an L4 layer.");
935 mask = &rte_flow_item_ipv6_mask;
936 ret = mlx5_flow_item_acceptable
937 (item, (const uint8_t *)mask,
938 (const uint8_t *)&nic_mask,
939 sizeof(struct rte_flow_item_ipv6), error);
942 flow->layers |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
943 MLX5_FLOW_LAYER_OUTER_L3_IPV6;
946 uint32_t vtc_flow_val;
947 uint32_t vtc_flow_mask;
949 memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
950 RTE_DIM(ipv6.val.src_ip));
951 memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
952 RTE_DIM(ipv6.val.dst_ip));
953 memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
954 RTE_DIM(ipv6.mask.src_ip));
955 memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
956 RTE_DIM(ipv6.mask.dst_ip));
957 vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
958 vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
959 ipv6.val.flow_label =
960 rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >>
962 ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >>
964 ipv6.val.next_hdr = spec->hdr.proto;
965 ipv6.val.hop_limit = spec->hdr.hop_limits;
966 ipv6.mask.flow_label =
967 rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >>
969 ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >>
971 ipv6.mask.next_hdr = mask->hdr.proto;
972 ipv6.mask.hop_limit = mask->hdr.hop_limits;
973 /* Remove unwanted bits from values. */
974 for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
975 ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
976 ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
978 ipv6.val.flow_label &= ipv6.mask.flow_label;
979 ipv6.val.traffic_class &= ipv6.mask.traffic_class;
980 ipv6.val.next_hdr &= ipv6.mask.next_hdr;
981 ipv6.val.hop_limit &= ipv6.mask.hop_limit;
983 flow->l3_protocol_en = !!ipv6.mask.next_hdr;
984 flow->l3_protocol = ipv6.val.next_hdr;
985 if (size <= flow_size) {
986 mlx5_flow_verbs_hashfields_adjust
988 (ETH_RSS_IPV6 | ETH_RSS_NONFRAG_IPV6_OTHER),
989 (IBV_RX_HASH_SRC_IPV6 | IBV_RX_HASH_DST_IPV6));
990 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L3;
991 mlx5_flow_spec_verbs_add(flow, &ipv6, size);
997 * Convert the @p item into a Verbs specification after ensuring the NIC
998 * will understand and process it correctly.
999 * If the necessary size for the conversion is greater than the @p flow_size,
1000 * nothing is written in @p flow, the validation is still performed.
1003 * Item specification.
1004 * @param[in, out] flow
1005 * Pointer to flow structure.
1006 * @param[in] flow_size
1007 * Size in bytes of the available space in @p flow, if too small, nothing is
1010 * Pointer to error structure.
1013 * On success the number of bytes consumed/necessary, if the returned value
1014 * is lesser or equal to @p flow_size, the @p item has fully been converted,
1015 * otherwise another call with this returned memory size should be done.
1016 * On error, a negative errno value is returned and rte_errno is set.
1019 mlx5_flow_item_udp(const struct rte_flow_item *item, struct rte_flow *flow,
1020 const size_t flow_size, struct rte_flow_error *error)
1022 const struct rte_flow_item_udp *spec = item->spec;
1023 const struct rte_flow_item_udp *mask = item->mask;
1024 const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
1025 unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp);
1026 struct ibv_flow_spec_tcp_udp udp = {
1027 .type = IBV_FLOW_SPEC_UDP | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
1032 if (flow->l3_protocol_en && flow->l3_protocol != MLX5_IP_PROTOCOL_UDP)
1033 return rte_flow_error_set(error, ENOTSUP,
1034 RTE_FLOW_ERROR_TYPE_ITEM,
1036 "protocol filtering not compatible"
1038 if (!(flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
1039 MLX5_FLOW_LAYER_OUTER_L3)))
1040 return rte_flow_error_set(error, ENOTSUP,
1041 RTE_FLOW_ERROR_TYPE_ITEM,
1043 "L3 is mandatory to filter"
1045 if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
1046 MLX5_FLOW_LAYER_OUTER_L4))
1047 return rte_flow_error_set(error, ENOTSUP,
1048 RTE_FLOW_ERROR_TYPE_ITEM,
1050 "L4 layer is already"
1053 mask = &rte_flow_item_udp_mask;
1054 ret = mlx5_flow_item_acceptable
1055 (item, (const uint8_t *)mask,
1056 (const uint8_t *)&rte_flow_item_udp_mask,
1057 sizeof(struct rte_flow_item_udp), error);
1060 flow->layers |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP :
1061 MLX5_FLOW_LAYER_OUTER_L4_UDP;
1063 udp.val.dst_port = spec->hdr.dst_port;
1064 udp.val.src_port = spec->hdr.src_port;
1065 udp.mask.dst_port = mask->hdr.dst_port;
1066 udp.mask.src_port = mask->hdr.src_port;
1067 /* Remove unwanted bits from values. */
1068 udp.val.src_port &= udp.mask.src_port;
1069 udp.val.dst_port &= udp.mask.dst_port;
1071 if (size <= flow_size) {
1072 mlx5_flow_verbs_hashfields_adjust(flow, tunnel, ETH_RSS_UDP,
1073 (IBV_RX_HASH_SRC_PORT_UDP |
1074 IBV_RX_HASH_DST_PORT_UDP));
1075 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L4;
1076 mlx5_flow_spec_verbs_add(flow, &udp, size);
1082 * Convert the @p item into a Verbs specification after ensuring the NIC
1083 * will understand and process it correctly.
1084 * If the necessary size for the conversion is greater than the @p flow_size,
1085 * nothing is written in @p flow, the validation is still performed.
1088 * Item specification.
1089 * @param[in, out] flow
1090 * Pointer to flow structure.
1091 * @param[in] flow_size
1092 * Size in bytes of the available space in @p flow, if too small, nothing is
1095 * Pointer to error structure.
1098 * On success the number of bytes consumed/necessary, if the returned value
1099 * is lesser or equal to @p flow_size, the @p item has fully been converted,
1100 * otherwise another call with this returned memory size should be done.
1101 * On error, a negative errno value is returned and rte_errno is set.
1104 mlx5_flow_item_tcp(const struct rte_flow_item *item, struct rte_flow *flow,
1105 const size_t flow_size, struct rte_flow_error *error)
1107 const struct rte_flow_item_tcp *spec = item->spec;
1108 const struct rte_flow_item_tcp *mask = item->mask;
1109 const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
1110 unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp);
1111 struct ibv_flow_spec_tcp_udp tcp = {
1112 .type = IBV_FLOW_SPEC_TCP | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
1117 if (flow->l3_protocol_en && flow->l3_protocol != MLX5_IP_PROTOCOL_TCP)
1118 return rte_flow_error_set(error, ENOTSUP,
1119 RTE_FLOW_ERROR_TYPE_ITEM,
1121 "protocol filtering not compatible"
1123 if (!(flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
1124 MLX5_FLOW_LAYER_OUTER_L3)))
1125 return rte_flow_error_set(error, ENOTSUP,
1126 RTE_FLOW_ERROR_TYPE_ITEM,
1128 "L3 is mandatory to filter on L4");
1129 if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
1130 MLX5_FLOW_LAYER_OUTER_L4))
1131 return rte_flow_error_set(error, ENOTSUP,
1132 RTE_FLOW_ERROR_TYPE_ITEM,
1134 "L4 layer is already present");
1136 mask = &rte_flow_item_tcp_mask;
1137 ret = mlx5_flow_item_acceptable
1138 (item, (const uint8_t *)mask,
1139 (const uint8_t *)&rte_flow_item_tcp_mask,
1140 sizeof(struct rte_flow_item_tcp), error);
1143 flow->layers |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP :
1144 MLX5_FLOW_LAYER_OUTER_L4_TCP;
1146 tcp.val.dst_port = spec->hdr.dst_port;
1147 tcp.val.src_port = spec->hdr.src_port;
1148 tcp.mask.dst_port = mask->hdr.dst_port;
1149 tcp.mask.src_port = mask->hdr.src_port;
1150 /* Remove unwanted bits from values. */
1151 tcp.val.src_port &= tcp.mask.src_port;
1152 tcp.val.dst_port &= tcp.mask.dst_port;
1154 if (size <= flow_size) {
1155 mlx5_flow_verbs_hashfields_adjust(flow, tunnel, ETH_RSS_TCP,
1156 (IBV_RX_HASH_SRC_PORT_TCP |
1157 IBV_RX_HASH_DST_PORT_TCP));
1158 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L4;
1159 mlx5_flow_spec_verbs_add(flow, &tcp, size);
1165 * Convert the @p pattern into a Verbs specifications after ensuring the NIC
1166 * will understand and process it correctly.
1167 * The conversion is performed item per item, each of them is written into
1168 * the @p flow if its size is lesser or equal to @p flow_size.
1169 * Validation and memory consumption computation are still performed until the
1170 * end of @p pattern, unless an error is encountered.
1172 * @param[in] pattern
1174 * @param[in, out] flow
1175 * Pointer to the rte_flow structure.
1176 * @param[in] flow_size
1177 * Size in bytes of the available space in @p flow, if too small some
1178 * garbage may be present.
1180 * Pointer to error structure.
1183 * On success the number of bytes consumed/necessary, if the returned value
1184 * is lesser or equal to @p flow_size, the @pattern has fully been
1185 * converted, otherwise another call with this returned memory size should
1187 * On error, a negative errno value is returned and rte_errno is set.
1190 mlx5_flow_items(const struct rte_flow_item pattern[],
1191 struct rte_flow *flow, const size_t flow_size,
1192 struct rte_flow_error *error)
1194 int remain = flow_size;
1197 for (; pattern->type != RTE_FLOW_ITEM_TYPE_END; pattern++) {
1200 switch (pattern->type) {
1201 case RTE_FLOW_ITEM_TYPE_VOID:
1203 case RTE_FLOW_ITEM_TYPE_ETH:
1204 ret = mlx5_flow_item_eth(pattern, flow, remain, error);
1206 case RTE_FLOW_ITEM_TYPE_VLAN:
1207 ret = mlx5_flow_item_vlan(pattern, flow, remain, error);
1209 case RTE_FLOW_ITEM_TYPE_IPV4:
1210 ret = mlx5_flow_item_ipv4(pattern, flow, remain, error);
1212 case RTE_FLOW_ITEM_TYPE_IPV6:
1213 ret = mlx5_flow_item_ipv6(pattern, flow, remain, error);
1215 case RTE_FLOW_ITEM_TYPE_UDP:
1216 ret = mlx5_flow_item_udp(pattern, flow, remain, error);
1218 case RTE_FLOW_ITEM_TYPE_TCP:
1219 ret = mlx5_flow_item_tcp(pattern, flow, remain, error);
1222 return rte_flow_error_set(error, ENOTSUP,
1223 RTE_FLOW_ERROR_TYPE_ITEM,
1225 "item not supported");
1235 if (!flow->layers) {
1236 const struct rte_flow_item item = {
1237 .type = RTE_FLOW_ITEM_TYPE_ETH,
1240 return mlx5_flow_item_eth(&item, flow, flow_size, error);
1246 * Convert the @p action into a Verbs specification after ensuring the NIC
1247 * will understand and process it correctly.
1248 * If the necessary size for the conversion is greater than the @p flow_size,
1249 * nothing is written in @p flow, the validation is still performed.
1252 * Action configuration.
1253 * @param[in, out] flow
1254 * Pointer to flow structure.
1255 * @param[in] flow_size
1256 * Size in bytes of the available space in @p flow, if too small, nothing is
1259 * Pointer to error structure.
1262 * On success the number of bytes consumed/necessary, if the returned value
1263 * is lesser or equal to @p flow_size, the @p action has fully been
1264 * converted, otherwise another call with this returned memory size should
1266 * On error, a negative errno value is returned and rte_errno is set.
1269 mlx5_flow_action_drop(const struct rte_flow_action *action,
1270 struct rte_flow *flow, const size_t flow_size,
1271 struct rte_flow_error *error)
1273 unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
1274 struct ibv_flow_spec_action_drop drop = {
1275 .type = IBV_FLOW_SPEC_ACTION_DROP,
1280 return rte_flow_error_set(error, ENOTSUP,
1281 RTE_FLOW_ERROR_TYPE_ACTION,
1283 "multiple fate actions are not"
1285 if (flow->modifier & (MLX5_FLOW_MOD_FLAG | MLX5_FLOW_MOD_MARK))
1286 return rte_flow_error_set(error, ENOTSUP,
1287 RTE_FLOW_ERROR_TYPE_ACTION,
1289 "drop is not compatible with"
1290 " flag/mark action");
1291 if (size < flow_size)
1292 mlx5_flow_spec_verbs_add(flow, &drop, size);
1293 flow->fate |= MLX5_FLOW_FATE_DROP;
1298 * Convert the @p action into @p flow after ensuring the NIC will understand
1299 * and process it correctly.
1302 * Pointer to Ethernet device structure.
1304 * Action configuration.
1305 * @param[in, out] flow
1306 * Pointer to flow structure.
1308 * Pointer to error structure.
1311 * 0 on success, a negative errno value otherwise and rte_errno is set.
1314 mlx5_flow_action_queue(struct rte_eth_dev *dev,
1315 const struct rte_flow_action *action,
1316 struct rte_flow *flow,
1317 struct rte_flow_error *error)
1319 struct priv *priv = dev->data->dev_private;
1320 const struct rte_flow_action_queue *queue = action->conf;
1323 return rte_flow_error_set(error, ENOTSUP,
1324 RTE_FLOW_ERROR_TYPE_ACTION,
1326 "multiple fate actions are not"
1328 if (queue->index >= priv->rxqs_n)
1329 return rte_flow_error_set(error, EINVAL,
1330 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1332 "queue index out of range");
1333 if (!(*priv->rxqs)[queue->index])
1334 return rte_flow_error_set(error, EINVAL,
1335 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1337 "queue is not configured");
1339 (*flow->queue)[0] = queue->index;
1340 flow->rss.queue_num = 1;
1341 flow->fate |= MLX5_FLOW_FATE_QUEUE;
1346 * Ensure the @p action will be understood and used correctly by the NIC.
1349 * Pointer to Ethernet device structure.
1351 * Pointer to flow actions array.
1352 * @param flow[in, out]
1353 * Pointer to the rte_flow structure.
1354 * @param error[in, out]
1355 * Pointer to error structure.
1358 * On success @p flow->queue array and @p flow->rss are filled and valid.
1359 * On error, a negative errno value is returned and rte_errno is set.
1362 mlx5_flow_action_rss(struct rte_eth_dev *dev,
1363 const struct rte_flow_action *action,
1364 struct rte_flow *flow,
1365 struct rte_flow_error *error)
1367 struct priv *priv = dev->data->dev_private;
1368 const struct rte_flow_action_rss *rss = action->conf;
1372 return rte_flow_error_set(error, ENOTSUP,
1373 RTE_FLOW_ERROR_TYPE_ACTION,
1375 "multiple fate actions are not"
1377 if (rss->func != RTE_ETH_HASH_FUNCTION_DEFAULT &&
1378 rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ)
1379 return rte_flow_error_set(error, ENOTSUP,
1380 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1382 "RSS hash function not supported");
1383 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
1388 return rte_flow_error_set(error, ENOTSUP,
1389 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1391 "tunnel RSS is not supported");
1392 if (rss->key_len < MLX5_RSS_HASH_KEY_LEN)
1393 return rte_flow_error_set(error, ENOTSUP,
1394 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1396 "RSS hash key too small");
1397 if (rss->key_len > MLX5_RSS_HASH_KEY_LEN)
1398 return rte_flow_error_set(error, ENOTSUP,
1399 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1401 "RSS hash key too large");
1402 if (rss->queue_num > priv->config.ind_table_max_size)
1403 return rte_flow_error_set(error, ENOTSUP,
1404 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1406 "number of queues too large");
1407 if (rss->types & MLX5_RSS_HF_MASK)
1408 return rte_flow_error_set(error, ENOTSUP,
1409 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1411 "some RSS protocols are not"
1413 for (i = 0; i != rss->queue_num; ++i) {
1414 if (!(*priv->rxqs)[rss->queue[i]])
1415 return rte_flow_error_set
1417 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1419 "queue is not configured");
1422 memcpy((*flow->queue), rss->queue,
1423 rss->queue_num * sizeof(uint16_t));
1424 flow->rss.queue_num = rss->queue_num;
1425 memcpy(flow->key, rss->key, MLX5_RSS_HASH_KEY_LEN);
1426 flow->rss.types = rss->types;
1427 flow->rss.level = rss->level;
1428 flow->fate |= MLX5_FLOW_FATE_RSS;
1433 * Convert the @p action into a Verbs specification after ensuring the NIC
1434 * will understand and process it correctly.
1435 * If the necessary size for the conversion is greater than the @p flow_size,
1436 * nothing is written in @p flow, the validation is still performed.
1439 * Action configuration.
1440 * @param[in, out] flow
1441 * Pointer to flow structure.
1442 * @param[in] flow_size
1443 * Size in bytes of the available space in @p flow, if too small, nothing is
1446 * Pointer to error structure.
1449 * On success the number of bytes consumed/necessary, if the returned value
1450 * is lesser or equal to @p flow_size, the @p action has fully been
1451 * converted, otherwise another call with this returned memory size should
1453 * On error, a negative errno value is returned and rte_errno is set.
1456 mlx5_flow_action_flag(const struct rte_flow_action *action,
1457 struct rte_flow *flow, const size_t flow_size,
1458 struct rte_flow_error *error)
1460 unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1461 struct ibv_flow_spec_action_tag tag = {
1462 .type = IBV_FLOW_SPEC_ACTION_TAG,
1464 .tag_id = mlx5_flow_mark_set(MLX5_FLOW_MARK_DEFAULT),
1466 struct mlx5_flow_verbs *verbs = flow->cur_verbs;
1468 if (flow->modifier & MLX5_FLOW_MOD_FLAG)
1469 return rte_flow_error_set(error, ENOTSUP,
1470 RTE_FLOW_ERROR_TYPE_ACTION,
1472 "flag action already present");
1473 if (flow->fate & MLX5_FLOW_FATE_DROP)
1474 return rte_flow_error_set(error, ENOTSUP,
1475 RTE_FLOW_ERROR_TYPE_ACTION,
1477 "flag is not compatible with drop"
1479 if (flow->modifier & MLX5_FLOW_MOD_MARK)
1481 else if (size <= flow_size && verbs)
1482 mlx5_flow_spec_verbs_add(flow, &tag, size);
1483 flow->modifier |= MLX5_FLOW_MOD_FLAG;
1488 * Update verbs specification to modify the flag to mark.
1490 * @param[in, out] verbs
1491 * Pointer to the mlx5_flow_verbs structure.
1492 * @param[in] mark_id
1493 * Mark identifier to replace the flag.
1496 mlx5_flow_verbs_mark_update(struct mlx5_flow_verbs *verbs, uint32_t mark_id)
1498 struct ibv_spec_header *hdr;
1503 /* Update Verbs specification. */
1504 hdr = (struct ibv_spec_header *)verbs->specs;
1507 for (i = 0; i != verbs->attr->num_of_specs; ++i) {
1508 if (hdr->type == IBV_FLOW_SPEC_ACTION_TAG) {
1509 struct ibv_flow_spec_action_tag *t =
1510 (struct ibv_flow_spec_action_tag *)hdr;
1512 t->tag_id = mlx5_flow_mark_set(mark_id);
1514 hdr = (struct ibv_spec_header *)((uintptr_t)hdr + hdr->size);
1519 * Convert the @p action into @p flow (or by updating the already present
1520 * Flag Verbs specification) after ensuring the NIC will understand and
1521 * process it correctly.
1522 * If the necessary size for the conversion is greater than the @p flow_size,
1523 * nothing is written in @p flow, the validation is still performed.
1526 * Action configuration.
1527 * @param[in, out] flow
1528 * Pointer to flow structure.
1529 * @param[in] flow_size
1530 * Size in bytes of the available space in @p flow, if too small, nothing is
1533 * Pointer to error structure.
1536 * On success the number of bytes consumed/necessary, if the returned value
1537 * is lesser or equal to @p flow_size, the @p action has fully been
1538 * converted, otherwise another call with this returned memory size should
1540 * On error, a negative errno value is returned and rte_errno is set.
1543 mlx5_flow_action_mark(const struct rte_flow_action *action,
1544 struct rte_flow *flow, const size_t flow_size,
1545 struct rte_flow_error *error)
1547 const struct rte_flow_action_mark *mark = action->conf;
1548 unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1549 struct ibv_flow_spec_action_tag tag = {
1550 .type = IBV_FLOW_SPEC_ACTION_TAG,
1553 struct mlx5_flow_verbs *verbs = flow->cur_verbs;
1556 return rte_flow_error_set(error, EINVAL,
1557 RTE_FLOW_ERROR_TYPE_ACTION,
1559 "configuration cannot be null");
1560 if (mark->id >= MLX5_FLOW_MARK_MAX)
1561 return rte_flow_error_set(error, EINVAL,
1562 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1564 "mark id must in 0 <= id < "
1565 RTE_STR(MLX5_FLOW_MARK_MAX));
1566 if (flow->modifier & MLX5_FLOW_MOD_MARK)
1567 return rte_flow_error_set(error, ENOTSUP,
1568 RTE_FLOW_ERROR_TYPE_ACTION,
1570 "mark action already present");
1571 if (flow->fate & MLX5_FLOW_FATE_DROP)
1572 return rte_flow_error_set(error, ENOTSUP,
1573 RTE_FLOW_ERROR_TYPE_ACTION,
1575 "mark is not compatible with drop"
1577 if (flow->modifier & MLX5_FLOW_MOD_FLAG) {
1578 mlx5_flow_verbs_mark_update(verbs, mark->id);
1580 } else if (size <= flow_size) {
1581 tag.tag_id = mlx5_flow_mark_set(mark->id);
1582 mlx5_flow_spec_verbs_add(flow, &tag, size);
1584 flow->modifier |= MLX5_FLOW_MOD_MARK;
1589 * Convert the @p action into @p flow after ensuring the NIC will understand
1590 * and process it correctly.
1591 * The conversion is performed action per action, each of them is written into
1592 * the @p flow if its size is lesser or equal to @p flow_size.
1593 * Validation and memory consumption computation are still performed until the
1594 * end of @p action, unless an error is encountered.
1597 * Pointer to Ethernet device structure.
1598 * @param[in] actions
1599 * Pointer to flow actions array.
1600 * @param[in, out] flow
1601 * Pointer to the rte_flow structure.
1602 * @param[in] flow_size
1603 * Size in bytes of the available space in @p flow, if too small some
1604 * garbage may be present.
1606 * Pointer to error structure.
1609 * On success the number of bytes consumed/necessary, if the returned value
1610 * is lesser or equal to @p flow_size, the @p actions has fully been
1611 * converted, otherwise another call with this returned memory size should
1613 * On error, a negative errno value is returned and rte_errno is set.
1616 mlx5_flow_actions(struct rte_eth_dev *dev,
1617 const struct rte_flow_action actions[],
1618 struct rte_flow *flow, const size_t flow_size,
1619 struct rte_flow_error *error)
1622 int remain = flow_size;
1625 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1626 switch (actions->type) {
1627 case RTE_FLOW_ACTION_TYPE_VOID:
1629 case RTE_FLOW_ACTION_TYPE_FLAG:
1630 ret = mlx5_flow_action_flag(actions, flow, remain,
1633 case RTE_FLOW_ACTION_TYPE_MARK:
1634 ret = mlx5_flow_action_mark(actions, flow, remain,
1637 case RTE_FLOW_ACTION_TYPE_DROP:
1638 ret = mlx5_flow_action_drop(actions, flow, remain,
1641 case RTE_FLOW_ACTION_TYPE_QUEUE:
1642 ret = mlx5_flow_action_queue(dev, actions, flow, error);
1644 case RTE_FLOW_ACTION_TYPE_RSS:
1645 ret = mlx5_flow_action_rss(dev, actions, flow, error);
1648 return rte_flow_error_set(error, ENOTSUP,
1649 RTE_FLOW_ERROR_TYPE_ACTION,
1651 "action not supported");
1662 return rte_flow_error_set(error, ENOTSUP,
1663 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1665 "no fate action found");
1670 * Convert the @p attributes, @p pattern, @p action, into an flow for the NIC
1671 * after ensuring the NIC will understand and process it correctly.
1672 * The conversion is only performed item/action per item/action, each of
1673 * them is written into the @p flow if its size is lesser or equal to @p
1675 * Validation and memory consumption computation are still performed until the
1676 * end, unless an error is encountered.
1679 * Pointer to Ethernet device.
1680 * @param[in, out] flow
1681 * Pointer to flow structure.
1682 * @param[in] flow_size
1683 * Size in bytes of the available space in @p flow, if too small some
1684 * garbage may be present.
1685 * @param[in] attributes
1686 * Flow rule attributes.
1687 * @param[in] pattern
1688 * Pattern specification (list terminated by the END pattern item).
1689 * @param[in] actions
1690 * Associated actions (list terminated by the END action).
1692 * Perform verbose error reporting if not NULL.
1695 * On success the number of bytes consumed/necessary, if the returned value
1696 * is lesser or equal to @p flow_size, the flow has fully been converted and
1697 * can be applied, otherwise another call with this returned memory size
1699 * On error, a negative errno value is returned and rte_errno is set.
1702 mlx5_flow_merge(struct rte_eth_dev *dev, struct rte_flow *flow,
1703 const size_t flow_size,
1704 const struct rte_flow_attr *attributes,
1705 const struct rte_flow_item pattern[],
1706 const struct rte_flow_action actions[],
1707 struct rte_flow_error *error)
1709 struct rte_flow local_flow = { .layers = 0, };
1710 size_t size = sizeof(*flow);
1712 struct rte_flow_expand_rss buf;
1713 uint8_t buffer[2048];
1715 struct rte_flow_expand_rss *buf = &expand_buffer.buf;
1716 struct mlx5_flow_verbs *original_verbs = NULL;
1717 size_t original_verbs_size = 0;
1718 uint32_t original_layers = 0;
1719 int expanded_pattern_idx = 0;
1723 if (size > flow_size)
1725 ret = mlx5_flow_attributes(dev, attributes, flow, error);
1728 ret = mlx5_flow_actions(dev, actions, &local_flow, 0, error);
1731 if (local_flow.rss.types) {
1732 ret = rte_flow_expand_rss(buf, sizeof(expand_buffer.buffer),
1733 pattern, local_flow.rss.types,
1734 mlx5_support_expansion,
1735 local_flow.rss.level < 2 ?
1736 MLX5_EXPANSION_ROOT :
1737 MLX5_EXPANSION_ROOT_OUTER);
1739 (unsigned int)ret < sizeof(expand_buffer.buffer));
1742 buf->entry[0].pattern = (void *)(uintptr_t)pattern;
1744 size += RTE_ALIGN_CEIL(local_flow.rss.queue_num * sizeof(uint16_t),
1746 if (size <= flow_size)
1747 flow->queue = (void *)(flow + 1);
1748 LIST_INIT(&flow->verbs);
1752 for (i = 0; i != buf->entries; ++i) {
1756 flow->layers = original_layers;
1757 size += sizeof(struct ibv_flow_attr) +
1758 sizeof(struct mlx5_flow_verbs);
1760 if (size < flow_size) {
1761 flow->cur_verbs = (void *)((uintptr_t)flow + off);
1762 flow->cur_verbs->attr = (void *)(flow->cur_verbs + 1);
1763 flow->cur_verbs->specs =
1764 (void *)(flow->cur_verbs->attr + 1);
1766 /* First iteration convert the pattern into Verbs. */
1768 /* Actions don't need to be converted several time. */
1769 ret = mlx5_flow_actions(dev, actions, flow,
1770 (size < flow_size) ?
1771 flow_size - size : 0,
1778 * Next iteration means the pattern has already been
1779 * converted and an expansion is necessary to match
1780 * the user RSS request. For that only the expanded
1781 * items will be converted, the common part with the
1782 * user pattern are just copied into the next buffer
1785 size += original_verbs_size;
1786 if (size < flow_size) {
1787 rte_memcpy(flow->cur_verbs->attr,
1788 original_verbs->attr,
1789 original_verbs_size +
1790 sizeof(struct ibv_flow_attr));
1791 flow->cur_verbs->size = original_verbs_size;
1794 ret = mlx5_flow_items
1795 ((const struct rte_flow_item *)
1796 &buf->entry[i].pattern[expanded_pattern_idx],
1798 (size < flow_size) ? flow_size - size : 0, error);
1802 if (size <= flow_size) {
1803 mlx5_flow_adjust_priority(dev, flow);
1804 LIST_INSERT_HEAD(&flow->verbs, flow->cur_verbs, next);
1807 * Keep a pointer of the first verbs conversion and the layers
1808 * it has encountered.
1811 original_verbs = flow->cur_verbs;
1812 original_verbs_size = size - off2;
1813 original_layers = flow->layers;
1815 * move the index of the expanded pattern to the
1816 * first item not addressed yet.
1818 if (pattern->type == RTE_FLOW_ITEM_TYPE_END) {
1819 expanded_pattern_idx++;
1821 const struct rte_flow_item *item = pattern;
1823 for (item = pattern;
1824 item->type != RTE_FLOW_ITEM_TYPE_END;
1826 expanded_pattern_idx++;
1830 /* Restore the origin layers in the flow. */
1831 flow->layers = original_layers;
1836 * Mark the Rx queues mark flag if the flow has a mark or flag modifier.
1839 * Pointer to Ethernet device.
1841 * Pointer to flow structure.
1844 mlx5_flow_rxq_mark_set(struct rte_eth_dev *dev, struct rte_flow *flow)
1846 struct priv *priv = dev->data->dev_private;
1848 if (flow->modifier & (MLX5_FLOW_MOD_FLAG | MLX5_FLOW_MOD_MARK)) {
1851 for (i = 0; i != flow->rss.queue_num; ++i) {
1852 int idx = (*flow->queue)[i];
1853 struct mlx5_rxq_ctrl *rxq_ctrl =
1854 container_of((*priv->rxqs)[idx],
1855 struct mlx5_rxq_ctrl, rxq);
1857 rxq_ctrl->rxq.mark = 1;
1858 rxq_ctrl->flow_mark_n++;
1864 * Clear the Rx queue mark associated with the @p flow if no other flow uses
1865 * it with a mark request.
1868 * Pointer to Ethernet device.
1870 * Pointer to the flow.
1873 mlx5_flow_rxq_mark_trim(struct rte_eth_dev *dev, struct rte_flow *flow)
1875 struct priv *priv = dev->data->dev_private;
1877 if (flow->modifier & (MLX5_FLOW_MOD_FLAG | MLX5_FLOW_MOD_MARK)) {
1880 for (i = 0; i != flow->rss.queue_num; ++i) {
1881 int idx = (*flow->queue)[i];
1882 struct mlx5_rxq_ctrl *rxq_ctrl =
1883 container_of((*priv->rxqs)[idx],
1884 struct mlx5_rxq_ctrl, rxq);
1886 rxq_ctrl->flow_mark_n--;
1887 rxq_ctrl->rxq.mark = !!rxq_ctrl->flow_mark_n;
1893 * Clear the mark bit in all Rx queues.
1896 * Pointer to Ethernet device.
1899 mlx5_flow_rxq_mark_clear(struct rte_eth_dev *dev)
1901 struct priv *priv = dev->data->dev_private;
1905 for (idx = 0, i = 0; idx != priv->rxqs_n; ++i) {
1906 struct mlx5_rxq_ctrl *rxq_ctrl;
1908 if (!(*priv->rxqs)[idx])
1910 rxq_ctrl = container_of((*priv->rxqs)[idx],
1911 struct mlx5_rxq_ctrl, rxq);
1912 rxq_ctrl->flow_mark_n = 0;
1913 rxq_ctrl->rxq.mark = 0;
1919 * Validate a flow supported by the NIC.
1921 * @see rte_flow_validate()
1925 mlx5_flow_validate(struct rte_eth_dev *dev,
1926 const struct rte_flow_attr *attr,
1927 const struct rte_flow_item items[],
1928 const struct rte_flow_action actions[],
1929 struct rte_flow_error *error)
1931 int ret = mlx5_flow_merge(dev, NULL, 0, attr, items, actions, error);
1942 * Pointer to Ethernet device.
1943 * @param[in, out] flow
1944 * Pointer to flow structure.
1947 mlx5_flow_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
1949 struct mlx5_flow_verbs *verbs;
1951 LIST_FOREACH(verbs, &flow->verbs, next) {
1953 claim_zero(mlx5_glue->destroy_flow(verbs->flow));
1957 if (flow->fate & MLX5_FLOW_FATE_DROP)
1958 mlx5_hrxq_drop_release(dev);
1960 mlx5_hrxq_release(dev, verbs->hrxq);
1970 * Pointer to Ethernet device structure.
1971 * @param[in, out] flow
1972 * Pointer to flow structure.
1974 * Pointer to error structure.
1977 * 0 on success, a negative errno value otherwise and rte_errno is set.
1980 mlx5_flow_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
1981 struct rte_flow_error *error)
1983 struct mlx5_flow_verbs *verbs;
1986 LIST_FOREACH(verbs, &flow->verbs, next) {
1987 if (flow->fate & MLX5_FLOW_FATE_DROP) {
1988 verbs->hrxq = mlx5_hrxq_drop_new(dev);
1992 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1994 "cannot get drop hash queue");
1998 struct mlx5_hrxq *hrxq;
2000 hrxq = mlx5_hrxq_get(dev, flow->key,
2001 MLX5_RSS_HASH_KEY_LEN,
2004 flow->rss.queue_num);
2006 hrxq = mlx5_hrxq_new(dev, flow->key,
2007 MLX5_RSS_HASH_KEY_LEN,
2010 flow->rss.queue_num);
2014 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2016 "cannot get hash queue");
2022 mlx5_glue->create_flow(verbs->hrxq->qp, verbs->attr);
2024 rte_flow_error_set(error, errno,
2025 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2027 "hardware refuses to create flow");
2033 err = rte_errno; /* Save rte_errno before cleanup. */
2034 LIST_FOREACH(verbs, &flow->verbs, next) {
2036 if (flow->fate & MLX5_FLOW_FATE_DROP)
2037 mlx5_hrxq_drop_release(dev);
2039 mlx5_hrxq_release(dev, verbs->hrxq);
2043 rte_errno = err; /* Restore rte_errno. */
2048 * Create a flow and add it to @p list.
2051 * Pointer to Ethernet device.
2053 * Pointer to a TAILQ flow list.
2055 * Flow rule attributes.
2057 * Pattern specification (list terminated by the END pattern item).
2058 * @param[in] actions
2059 * Associated actions (list terminated by the END action).
2061 * Perform verbose error reporting if not NULL.
2064 * A flow on success, NULL otherwise and rte_errno is set.
2066 static struct rte_flow *
2067 mlx5_flow_list_create(struct rte_eth_dev *dev,
2068 struct mlx5_flows *list,
2069 const struct rte_flow_attr *attr,
2070 const struct rte_flow_item items[],
2071 const struct rte_flow_action actions[],
2072 struct rte_flow_error *error)
2074 struct rte_flow *flow = NULL;
2078 ret = mlx5_flow_merge(dev, flow, size, attr, items, actions, error);
2082 flow = rte_calloc(__func__, 1, size, 0);
2084 rte_flow_error_set(error, ENOMEM,
2085 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2087 "not enough memory to create flow");
2090 ret = mlx5_flow_merge(dev, flow, size, attr, items, actions, error);
2095 assert((size_t)ret == size);
2096 if (dev->data->dev_started) {
2097 ret = mlx5_flow_apply(dev, flow, error);
2099 ret = rte_errno; /* Save rte_errno before cleanup. */
2101 mlx5_flow_remove(dev, flow);
2104 rte_errno = ret; /* Restore rte_errno. */
2108 TAILQ_INSERT_TAIL(list, flow, next);
2109 mlx5_flow_rxq_mark_set(dev, flow);
2116 * @see rte_flow_create()
2120 mlx5_flow_create(struct rte_eth_dev *dev,
2121 const struct rte_flow_attr *attr,
2122 const struct rte_flow_item items[],
2123 const struct rte_flow_action actions[],
2124 struct rte_flow_error *error)
2126 return mlx5_flow_list_create
2127 (dev, &((struct priv *)dev->data->dev_private)->flows,
2128 attr, items, actions, error);
2132 * Destroy a flow in a list.
2135 * Pointer to Ethernet device.
2137 * Pointer to a TAILQ flow list.
2142 mlx5_flow_list_destroy(struct rte_eth_dev *dev, struct mlx5_flows *list,
2143 struct rte_flow *flow)
2145 mlx5_flow_remove(dev, flow);
2146 TAILQ_REMOVE(list, flow, next);
2147 mlx5_flow_rxq_mark_trim(dev, flow);
2152 * Destroy all flows.
2155 * Pointer to Ethernet device.
2157 * Pointer to a TAILQ flow list.
2160 mlx5_flow_list_flush(struct rte_eth_dev *dev, struct mlx5_flows *list)
2162 while (!TAILQ_EMPTY(list)) {
2163 struct rte_flow *flow;
2165 flow = TAILQ_FIRST(list);
2166 mlx5_flow_list_destroy(dev, list, flow);
2174 * Pointer to Ethernet device.
2176 * Pointer to a TAILQ flow list.
2179 mlx5_flow_stop(struct rte_eth_dev *dev, struct mlx5_flows *list)
2181 struct rte_flow *flow;
2183 TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next)
2184 mlx5_flow_remove(dev, flow);
2185 mlx5_flow_rxq_mark_clear(dev);
2192 * Pointer to Ethernet device.
2194 * Pointer to a TAILQ flow list.
2197 * 0 on success, a negative errno value otherwise and rte_errno is set.
2200 mlx5_flow_start(struct rte_eth_dev *dev, struct mlx5_flows *list)
2202 struct rte_flow *flow;
2203 struct rte_flow_error error;
2206 TAILQ_FOREACH(flow, list, next) {
2207 ret = mlx5_flow_apply(dev, flow, &error);
2210 mlx5_flow_rxq_mark_set(dev, flow);
2214 ret = rte_errno; /* Save rte_errno before cleanup. */
2215 mlx5_flow_stop(dev, list);
2216 rte_errno = ret; /* Restore rte_errno. */
2221 * Verify the flow list is empty
2224 * Pointer to Ethernet device.
2226 * @return the number of flows not released.
2229 mlx5_flow_verify(struct rte_eth_dev *dev)
2231 struct priv *priv = dev->data->dev_private;
2232 struct rte_flow *flow;
2235 TAILQ_FOREACH(flow, &priv->flows, next) {
2236 DRV_LOG(DEBUG, "port %u flow %p still referenced",
2237 dev->data->port_id, (void *)flow);
2244 * Enable a control flow configured from the control plane.
2247 * Pointer to Ethernet device.
2249 * An Ethernet flow spec to apply.
2251 * An Ethernet flow mask to apply.
2253 * A VLAN flow spec to apply.
2255 * A VLAN flow mask to apply.
2258 * 0 on success, a negative errno value otherwise and rte_errno is set.
2261 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
2262 struct rte_flow_item_eth *eth_spec,
2263 struct rte_flow_item_eth *eth_mask,
2264 struct rte_flow_item_vlan *vlan_spec,
2265 struct rte_flow_item_vlan *vlan_mask)
2267 struct priv *priv = dev->data->dev_private;
2268 const struct rte_flow_attr attr = {
2270 .priority = MLX5_FLOW_PRIO_RSVD,
2272 struct rte_flow_item items[] = {
2274 .type = RTE_FLOW_ITEM_TYPE_ETH,
2280 .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
2281 RTE_FLOW_ITEM_TYPE_END,
2287 .type = RTE_FLOW_ITEM_TYPE_END,
2290 uint16_t queue[priv->reta_idx_n];
2291 struct rte_flow_action_rss action_rss = {
2292 .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
2294 .types = priv->rss_conf.rss_hf,
2295 .key_len = priv->rss_conf.rss_key_len,
2296 .queue_num = priv->reta_idx_n,
2297 .key = priv->rss_conf.rss_key,
2300 struct rte_flow_action actions[] = {
2302 .type = RTE_FLOW_ACTION_TYPE_RSS,
2303 .conf = &action_rss,
2306 .type = RTE_FLOW_ACTION_TYPE_END,
2309 struct rte_flow *flow;
2310 struct rte_flow_error error;
2313 if (!priv->reta_idx_n) {
2317 for (i = 0; i != priv->reta_idx_n; ++i)
2318 queue[i] = (*priv->reta_idx)[i];
2319 flow = mlx5_flow_list_create(dev, &priv->ctrl_flows, &attr, items,
2327 * Enable a flow control configured from the control plane.
2330 * Pointer to Ethernet device.
2332 * An Ethernet flow spec to apply.
2334 * An Ethernet flow mask to apply.
2337 * 0 on success, a negative errno value otherwise and rte_errno is set.
2340 mlx5_ctrl_flow(struct rte_eth_dev *dev,
2341 struct rte_flow_item_eth *eth_spec,
2342 struct rte_flow_item_eth *eth_mask)
2344 return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
2350 * @see rte_flow_destroy()
2354 mlx5_flow_destroy(struct rte_eth_dev *dev,
2355 struct rte_flow *flow,
2356 struct rte_flow_error *error __rte_unused)
2358 struct priv *priv = dev->data->dev_private;
2360 mlx5_flow_list_destroy(dev, &priv->flows, flow);
2365 * Destroy all flows.
2367 * @see rte_flow_flush()
2371 mlx5_flow_flush(struct rte_eth_dev *dev,
2372 struct rte_flow_error *error __rte_unused)
2374 struct priv *priv = dev->data->dev_private;
2376 mlx5_flow_list_flush(dev, &priv->flows);
2383 * @see rte_flow_isolate()
2387 mlx5_flow_isolate(struct rte_eth_dev *dev,
2389 struct rte_flow_error *error)
2391 struct priv *priv = dev->data->dev_private;
2393 if (dev->data->dev_started) {
2394 rte_flow_error_set(error, EBUSY,
2395 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2397 "port must be stopped first");
2400 priv->isolated = !!enable;
2402 dev->dev_ops = &mlx5_dev_ops_isolate;
2404 dev->dev_ops = &mlx5_dev_ops;
2409 * Convert a flow director filter to a generic flow.
2412 * Pointer to Ethernet device.
2413 * @param fdir_filter
2414 * Flow director filter to add.
2416 * Generic flow parameters structure.
2419 * 0 on success, a negative errno value otherwise and rte_errno is set.
2422 mlx5_fdir_filter_convert(struct rte_eth_dev *dev,
2423 const struct rte_eth_fdir_filter *fdir_filter,
2424 struct mlx5_fdir *attributes)
2426 struct priv *priv = dev->data->dev_private;
2427 const struct rte_eth_fdir_input *input = &fdir_filter->input;
2428 const struct rte_eth_fdir_masks *mask =
2429 &dev->data->dev_conf.fdir_conf.mask;
2431 /* Validate queue number. */
2432 if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
2433 DRV_LOG(ERR, "port %u invalid queue number %d",
2434 dev->data->port_id, fdir_filter->action.rx_queue);
2438 attributes->attr.ingress = 1;
2439 attributes->items[0] = (struct rte_flow_item) {
2440 .type = RTE_FLOW_ITEM_TYPE_ETH,
2441 .spec = &attributes->l2,
2442 .mask = &attributes->l2_mask,
2444 switch (fdir_filter->action.behavior) {
2445 case RTE_ETH_FDIR_ACCEPT:
2446 attributes->actions[0] = (struct rte_flow_action){
2447 .type = RTE_FLOW_ACTION_TYPE_QUEUE,
2448 .conf = &attributes->queue,
2451 case RTE_ETH_FDIR_REJECT:
2452 attributes->actions[0] = (struct rte_flow_action){
2453 .type = RTE_FLOW_ACTION_TYPE_DROP,
2457 DRV_LOG(ERR, "port %u invalid behavior %d",
2459 fdir_filter->action.behavior);
2460 rte_errno = ENOTSUP;
2463 attributes->queue.index = fdir_filter->action.rx_queue;
2465 switch (fdir_filter->input.flow_type) {
2466 case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
2467 case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
2468 case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
2469 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2470 .src_addr = input->flow.ip4_flow.src_ip,
2471 .dst_addr = input->flow.ip4_flow.dst_ip,
2472 .time_to_live = input->flow.ip4_flow.ttl,
2473 .type_of_service = input->flow.ip4_flow.tos,
2474 .next_proto_id = input->flow.ip4_flow.proto,
2476 attributes->l3_mask.ipv4.hdr = (struct ipv4_hdr){
2477 .src_addr = mask->ipv4_mask.src_ip,
2478 .dst_addr = mask->ipv4_mask.dst_ip,
2479 .time_to_live = mask->ipv4_mask.ttl,
2480 .type_of_service = mask->ipv4_mask.tos,
2481 .next_proto_id = mask->ipv4_mask.proto,
2483 attributes->items[1] = (struct rte_flow_item){
2484 .type = RTE_FLOW_ITEM_TYPE_IPV4,
2485 .spec = &attributes->l3,
2486 .mask = &attributes->l3_mask,
2489 case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
2490 case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
2491 case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
2492 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2493 .hop_limits = input->flow.ipv6_flow.hop_limits,
2494 .proto = input->flow.ipv6_flow.proto,
2497 memcpy(attributes->l3.ipv6.hdr.src_addr,
2498 input->flow.ipv6_flow.src_ip,
2499 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2500 memcpy(attributes->l3.ipv6.hdr.dst_addr,
2501 input->flow.ipv6_flow.dst_ip,
2502 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2503 memcpy(attributes->l3_mask.ipv6.hdr.src_addr,
2504 mask->ipv6_mask.src_ip,
2505 RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
2506 memcpy(attributes->l3_mask.ipv6.hdr.dst_addr,
2507 mask->ipv6_mask.dst_ip,
2508 RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
2509 attributes->items[1] = (struct rte_flow_item){
2510 .type = RTE_FLOW_ITEM_TYPE_IPV6,
2511 .spec = &attributes->l3,
2512 .mask = &attributes->l3_mask,
2516 DRV_LOG(ERR, "port %u invalid flow type%d",
2517 dev->data->port_id, fdir_filter->input.flow_type);
2518 rte_errno = ENOTSUP;
2522 switch (fdir_filter->input.flow_type) {
2523 case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
2524 attributes->l4.udp.hdr = (struct udp_hdr){
2525 .src_port = input->flow.udp4_flow.src_port,
2526 .dst_port = input->flow.udp4_flow.dst_port,
2528 attributes->l4_mask.udp.hdr = (struct udp_hdr){
2529 .src_port = mask->src_port_mask,
2530 .dst_port = mask->dst_port_mask,
2532 attributes->items[2] = (struct rte_flow_item){
2533 .type = RTE_FLOW_ITEM_TYPE_UDP,
2534 .spec = &attributes->l4,
2535 .mask = &attributes->l4_mask,
2538 case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
2539 attributes->l4.tcp.hdr = (struct tcp_hdr){
2540 .src_port = input->flow.tcp4_flow.src_port,
2541 .dst_port = input->flow.tcp4_flow.dst_port,
2543 attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
2544 .src_port = mask->src_port_mask,
2545 .dst_port = mask->dst_port_mask,
2547 attributes->items[2] = (struct rte_flow_item){
2548 .type = RTE_FLOW_ITEM_TYPE_TCP,
2549 .spec = &attributes->l4,
2550 .mask = &attributes->l4_mask,
2553 case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
2554 attributes->l4.udp.hdr = (struct udp_hdr){
2555 .src_port = input->flow.udp6_flow.src_port,
2556 .dst_port = input->flow.udp6_flow.dst_port,
2558 attributes->l4_mask.udp.hdr = (struct udp_hdr){
2559 .src_port = mask->src_port_mask,
2560 .dst_port = mask->dst_port_mask,
2562 attributes->items[2] = (struct rte_flow_item){
2563 .type = RTE_FLOW_ITEM_TYPE_UDP,
2564 .spec = &attributes->l4,
2565 .mask = &attributes->l4_mask,
2568 case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
2569 attributes->l4.tcp.hdr = (struct tcp_hdr){
2570 .src_port = input->flow.tcp6_flow.src_port,
2571 .dst_port = input->flow.tcp6_flow.dst_port,
2573 attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
2574 .src_port = mask->src_port_mask,
2575 .dst_port = mask->dst_port_mask,
2577 attributes->items[2] = (struct rte_flow_item){
2578 .type = RTE_FLOW_ITEM_TYPE_TCP,
2579 .spec = &attributes->l4,
2580 .mask = &attributes->l4_mask,
2583 case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
2584 case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
2587 DRV_LOG(ERR, "port %u invalid flow type%d",
2588 dev->data->port_id, fdir_filter->input.flow_type);
2589 rte_errno = ENOTSUP;
2596 * Add new flow director filter and store it in list.
2599 * Pointer to Ethernet device.
2600 * @param fdir_filter
2601 * Flow director filter to add.
2604 * 0 on success, a negative errno value otherwise and rte_errno is set.
2607 mlx5_fdir_filter_add(struct rte_eth_dev *dev,
2608 const struct rte_eth_fdir_filter *fdir_filter)
2610 struct priv *priv = dev->data->dev_private;
2611 struct mlx5_fdir attributes = {
2614 .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2615 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2619 struct rte_flow_error error;
2620 struct rte_flow *flow;
2623 ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
2626 flow = mlx5_flow_list_create(dev, &priv->flows, &attributes.attr,
2627 attributes.items, attributes.actions,
2630 DRV_LOG(DEBUG, "port %u FDIR created %p", dev->data->port_id,
2638 * Delete specific filter.
2641 * Pointer to Ethernet device.
2642 * @param fdir_filter
2643 * Filter to be deleted.
2646 * 0 on success, a negative errno value otherwise and rte_errno is set.
2649 mlx5_fdir_filter_delete(struct rte_eth_dev *dev __rte_unused,
2650 const struct rte_eth_fdir_filter *fdir_filter
2653 rte_errno = ENOTSUP;
2658 * Update queue for specific filter.
2661 * Pointer to Ethernet device.
2662 * @param fdir_filter
2663 * Filter to be updated.
2666 * 0 on success, a negative errno value otherwise and rte_errno is set.
2669 mlx5_fdir_filter_update(struct rte_eth_dev *dev,
2670 const struct rte_eth_fdir_filter *fdir_filter)
2674 ret = mlx5_fdir_filter_delete(dev, fdir_filter);
2677 return mlx5_fdir_filter_add(dev, fdir_filter);
2681 * Flush all filters.
2684 * Pointer to Ethernet device.
2687 mlx5_fdir_filter_flush(struct rte_eth_dev *dev)
2689 struct priv *priv = dev->data->dev_private;
2691 mlx5_flow_list_flush(dev, &priv->flows);
2695 * Get flow director information.
2698 * Pointer to Ethernet device.
2699 * @param[out] fdir_info
2700 * Resulting flow director information.
2703 mlx5_fdir_info_get(struct rte_eth_dev *dev, struct rte_eth_fdir_info *fdir_info)
2705 struct rte_eth_fdir_masks *mask =
2706 &dev->data->dev_conf.fdir_conf.mask;
2708 fdir_info->mode = dev->data->dev_conf.fdir_conf.mode;
2709 fdir_info->guarant_spc = 0;
2710 rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
2711 fdir_info->max_flexpayload = 0;
2712 fdir_info->flow_types_mask[0] = 0;
2713 fdir_info->flex_payload_unit = 0;
2714 fdir_info->max_flex_payload_segment_num = 0;
2715 fdir_info->flex_payload_limit = 0;
2716 memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
2720 * Deal with flow director operations.
2723 * Pointer to Ethernet device.
2725 * Operation to perform.
2727 * Pointer to operation-specific structure.
2730 * 0 on success, a negative errno value otherwise and rte_errno is set.
2733 mlx5_fdir_ctrl_func(struct rte_eth_dev *dev, enum rte_filter_op filter_op,
2736 enum rte_fdir_mode fdir_mode =
2737 dev->data->dev_conf.fdir_conf.mode;
2739 if (filter_op == RTE_ETH_FILTER_NOP)
2741 if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
2742 fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
2743 DRV_LOG(ERR, "port %u flow director mode %d not supported",
2744 dev->data->port_id, fdir_mode);
2748 switch (filter_op) {
2749 case RTE_ETH_FILTER_ADD:
2750 return mlx5_fdir_filter_add(dev, arg);
2751 case RTE_ETH_FILTER_UPDATE:
2752 return mlx5_fdir_filter_update(dev, arg);
2753 case RTE_ETH_FILTER_DELETE:
2754 return mlx5_fdir_filter_delete(dev, arg);
2755 case RTE_ETH_FILTER_FLUSH:
2756 mlx5_fdir_filter_flush(dev);
2758 case RTE_ETH_FILTER_INFO:
2759 mlx5_fdir_info_get(dev, arg);
2762 DRV_LOG(DEBUG, "port %u unknown operation %u",
2763 dev->data->port_id, filter_op);
2771 * Manage filter operations.
2774 * Pointer to Ethernet device structure.
2775 * @param filter_type
2778 * Operation to perform.
2780 * Pointer to operation-specific structure.
2783 * 0 on success, a negative errno value otherwise and rte_errno is set.
2786 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
2787 enum rte_filter_type filter_type,
2788 enum rte_filter_op filter_op,
2791 switch (filter_type) {
2792 case RTE_ETH_FILTER_GENERIC:
2793 if (filter_op != RTE_ETH_FILTER_GET) {
2797 *(const void **)arg = &mlx5_flow_ops;
2799 case RTE_ETH_FILTER_FDIR:
2800 return mlx5_fdir_ctrl_func(dev, filter_op, arg);
2802 DRV_LOG(ERR, "port %u filter type (%d) not supported",
2803 dev->data->port_id, filter_type);
2804 rte_errno = ENOTSUP;