1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2016 6WIND S.A.
3 * Copyright 2016 Mellanox Technologies, Ltd
6 #include <netinet/in.h>
13 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
15 #pragma GCC diagnostic ignored "-Wpedantic"
17 #include <infiniband/verbs.h>
19 #pragma GCC diagnostic error "-Wpedantic"
22 #include <rte_common.h>
23 #include <rte_ether.h>
24 #include <rte_ethdev_driver.h>
26 #include <rte_flow_driver.h>
27 #include <rte_malloc.h>
31 #include "mlx5_defs.h"
32 #include "mlx5_flow.h"
33 #include "mlx5_glue.h"
35 #include "mlx5_rxtx.h"
37 /* Dev ops structure defined in mlx5.c */
38 extern const struct eth_dev_ops mlx5_dev_ops;
39 extern const struct eth_dev_ops mlx5_dev_ops_isolate;
41 /** Device flow drivers. */
42 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
43 extern const struct mlx5_flow_driver_ops mlx5_flow_dv_drv_ops;
45 extern const struct mlx5_flow_driver_ops mlx5_flow_verbs_drv_ops;
47 const struct mlx5_flow_driver_ops mlx5_flow_null_drv_ops;
49 const struct mlx5_flow_driver_ops *flow_drv_ops[] = {
50 [MLX5_FLOW_TYPE_MIN] = &mlx5_flow_null_drv_ops,
51 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
52 [MLX5_FLOW_TYPE_DV] = &mlx5_flow_dv_drv_ops,
54 [MLX5_FLOW_TYPE_VERBS] = &mlx5_flow_verbs_drv_ops,
55 [MLX5_FLOW_TYPE_MAX] = &mlx5_flow_null_drv_ops
60 MLX5_EXPANSION_ROOT_OUTER,
61 MLX5_EXPANSION_ROOT_ETH_VLAN,
62 MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN,
63 MLX5_EXPANSION_OUTER_ETH,
64 MLX5_EXPANSION_OUTER_ETH_VLAN,
65 MLX5_EXPANSION_OUTER_VLAN,
66 MLX5_EXPANSION_OUTER_IPV4,
67 MLX5_EXPANSION_OUTER_IPV4_UDP,
68 MLX5_EXPANSION_OUTER_IPV4_TCP,
69 MLX5_EXPANSION_OUTER_IPV6,
70 MLX5_EXPANSION_OUTER_IPV6_UDP,
71 MLX5_EXPANSION_OUTER_IPV6_TCP,
73 MLX5_EXPANSION_VXLAN_GPE,
77 MLX5_EXPANSION_ETH_VLAN,
80 MLX5_EXPANSION_IPV4_UDP,
81 MLX5_EXPANSION_IPV4_TCP,
83 MLX5_EXPANSION_IPV6_UDP,
84 MLX5_EXPANSION_IPV6_TCP,
87 /** Supported expansion of items. */
88 static const struct rte_flow_expand_node mlx5_support_expansion[] = {
89 [MLX5_EXPANSION_ROOT] = {
90 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
93 .type = RTE_FLOW_ITEM_TYPE_END,
95 [MLX5_EXPANSION_ROOT_OUTER] = {
96 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_ETH,
97 MLX5_EXPANSION_OUTER_IPV4,
98 MLX5_EXPANSION_OUTER_IPV6),
99 .type = RTE_FLOW_ITEM_TYPE_END,
101 [MLX5_EXPANSION_ROOT_ETH_VLAN] = {
102 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH_VLAN),
103 .type = RTE_FLOW_ITEM_TYPE_END,
105 [MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN] = {
106 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_ETH_VLAN),
107 .type = RTE_FLOW_ITEM_TYPE_END,
109 [MLX5_EXPANSION_OUTER_ETH] = {
110 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_IPV4,
111 MLX5_EXPANSION_OUTER_IPV6,
112 MLX5_EXPANSION_MPLS),
113 .type = RTE_FLOW_ITEM_TYPE_ETH,
116 [MLX5_EXPANSION_OUTER_ETH_VLAN] = {
117 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_VLAN),
118 .type = RTE_FLOW_ITEM_TYPE_ETH,
121 [MLX5_EXPANSION_OUTER_VLAN] = {
122 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_IPV4,
123 MLX5_EXPANSION_OUTER_IPV6),
124 .type = RTE_FLOW_ITEM_TYPE_VLAN,
126 [MLX5_EXPANSION_OUTER_IPV4] = {
127 .next = RTE_FLOW_EXPAND_RSS_NEXT
128 (MLX5_EXPANSION_OUTER_IPV4_UDP,
129 MLX5_EXPANSION_OUTER_IPV4_TCP,
132 MLX5_EXPANSION_IPV6),
133 .type = RTE_FLOW_ITEM_TYPE_IPV4,
134 .rss_types = ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
135 ETH_RSS_NONFRAG_IPV4_OTHER,
137 [MLX5_EXPANSION_OUTER_IPV4_UDP] = {
138 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN,
139 MLX5_EXPANSION_VXLAN_GPE),
140 .type = RTE_FLOW_ITEM_TYPE_UDP,
141 .rss_types = ETH_RSS_NONFRAG_IPV4_UDP,
143 [MLX5_EXPANSION_OUTER_IPV4_TCP] = {
144 .type = RTE_FLOW_ITEM_TYPE_TCP,
145 .rss_types = ETH_RSS_NONFRAG_IPV4_TCP,
147 [MLX5_EXPANSION_OUTER_IPV6] = {
148 .next = RTE_FLOW_EXPAND_RSS_NEXT
149 (MLX5_EXPANSION_OUTER_IPV6_UDP,
150 MLX5_EXPANSION_OUTER_IPV6_TCP,
152 MLX5_EXPANSION_IPV6),
153 .type = RTE_FLOW_ITEM_TYPE_IPV6,
154 .rss_types = ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 |
155 ETH_RSS_NONFRAG_IPV6_OTHER,
157 [MLX5_EXPANSION_OUTER_IPV6_UDP] = {
158 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN,
159 MLX5_EXPANSION_VXLAN_GPE),
160 .type = RTE_FLOW_ITEM_TYPE_UDP,
161 .rss_types = ETH_RSS_NONFRAG_IPV6_UDP,
163 [MLX5_EXPANSION_OUTER_IPV6_TCP] = {
164 .type = RTE_FLOW_ITEM_TYPE_TCP,
165 .rss_types = ETH_RSS_NONFRAG_IPV6_TCP,
167 [MLX5_EXPANSION_VXLAN] = {
168 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH),
169 .type = RTE_FLOW_ITEM_TYPE_VXLAN,
171 [MLX5_EXPANSION_VXLAN_GPE] = {
172 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
174 MLX5_EXPANSION_IPV6),
175 .type = RTE_FLOW_ITEM_TYPE_VXLAN_GPE,
177 [MLX5_EXPANSION_GRE] = {
178 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4),
179 .type = RTE_FLOW_ITEM_TYPE_GRE,
181 [MLX5_EXPANSION_MPLS] = {
182 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
183 MLX5_EXPANSION_IPV6),
184 .type = RTE_FLOW_ITEM_TYPE_MPLS,
186 [MLX5_EXPANSION_ETH] = {
187 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
188 MLX5_EXPANSION_IPV6),
189 .type = RTE_FLOW_ITEM_TYPE_ETH,
191 [MLX5_EXPANSION_ETH_VLAN] = {
192 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VLAN),
193 .type = RTE_FLOW_ITEM_TYPE_ETH,
195 [MLX5_EXPANSION_VLAN] = {
196 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
197 MLX5_EXPANSION_IPV6),
198 .type = RTE_FLOW_ITEM_TYPE_VLAN,
200 [MLX5_EXPANSION_IPV4] = {
201 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4_UDP,
202 MLX5_EXPANSION_IPV4_TCP),
203 .type = RTE_FLOW_ITEM_TYPE_IPV4,
204 .rss_types = ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
205 ETH_RSS_NONFRAG_IPV4_OTHER,
207 [MLX5_EXPANSION_IPV4_UDP] = {
208 .type = RTE_FLOW_ITEM_TYPE_UDP,
209 .rss_types = ETH_RSS_NONFRAG_IPV4_UDP,
211 [MLX5_EXPANSION_IPV4_TCP] = {
212 .type = RTE_FLOW_ITEM_TYPE_TCP,
213 .rss_types = ETH_RSS_NONFRAG_IPV4_TCP,
215 [MLX5_EXPANSION_IPV6] = {
216 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV6_UDP,
217 MLX5_EXPANSION_IPV6_TCP),
218 .type = RTE_FLOW_ITEM_TYPE_IPV6,
219 .rss_types = ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 |
220 ETH_RSS_NONFRAG_IPV6_OTHER,
222 [MLX5_EXPANSION_IPV6_UDP] = {
223 .type = RTE_FLOW_ITEM_TYPE_UDP,
224 .rss_types = ETH_RSS_NONFRAG_IPV6_UDP,
226 [MLX5_EXPANSION_IPV6_TCP] = {
227 .type = RTE_FLOW_ITEM_TYPE_TCP,
228 .rss_types = ETH_RSS_NONFRAG_IPV6_TCP,
232 static const struct rte_flow_ops mlx5_flow_ops = {
233 .validate = mlx5_flow_validate,
234 .create = mlx5_flow_create,
235 .destroy = mlx5_flow_destroy,
236 .flush = mlx5_flow_flush,
237 .isolate = mlx5_flow_isolate,
238 .query = mlx5_flow_query,
241 /* Convert FDIR request to Generic flow. */
243 struct rte_flow_attr attr;
244 struct rte_flow_item items[4];
245 struct rte_flow_item_eth l2;
246 struct rte_flow_item_eth l2_mask;
248 struct rte_flow_item_ipv4 ipv4;
249 struct rte_flow_item_ipv6 ipv6;
252 struct rte_flow_item_ipv4 ipv4;
253 struct rte_flow_item_ipv6 ipv6;
256 struct rte_flow_item_udp udp;
257 struct rte_flow_item_tcp tcp;
260 struct rte_flow_item_udp udp;
261 struct rte_flow_item_tcp tcp;
263 struct rte_flow_action actions[2];
264 struct rte_flow_action_queue queue;
267 /* Map of Verbs to Flow priority with 8 Verbs priorities. */
268 static const uint32_t priority_map_3[][MLX5_PRIORITY_MAP_MAX] = {
269 { 0, 1, 2 }, { 2, 3, 4 }, { 5, 6, 7 },
272 /* Map of Verbs to Flow priority with 16 Verbs priorities. */
273 static const uint32_t priority_map_5[][MLX5_PRIORITY_MAP_MAX] = {
274 { 0, 1, 2 }, { 3, 4, 5 }, { 6, 7, 8 },
275 { 9, 10, 11 }, { 12, 13, 14 },
278 /* Tunnel information. */
279 struct mlx5_flow_tunnel_info {
280 uint64_t tunnel; /**< Tunnel bit (see MLX5_FLOW_*). */
281 uint32_t ptype; /**< Tunnel Ptype (see RTE_PTYPE_*). */
284 static struct mlx5_flow_tunnel_info tunnels_info[] = {
286 .tunnel = MLX5_FLOW_LAYER_VXLAN,
287 .ptype = RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L4_UDP,
290 .tunnel = MLX5_FLOW_LAYER_GENEVE,
291 .ptype = RTE_PTYPE_TUNNEL_GENEVE | RTE_PTYPE_L4_UDP,
294 .tunnel = MLX5_FLOW_LAYER_VXLAN_GPE,
295 .ptype = RTE_PTYPE_TUNNEL_VXLAN_GPE | RTE_PTYPE_L4_UDP,
298 .tunnel = MLX5_FLOW_LAYER_GRE,
299 .ptype = RTE_PTYPE_TUNNEL_GRE,
302 .tunnel = MLX5_FLOW_LAYER_MPLS | MLX5_FLOW_LAYER_OUTER_L4_UDP,
303 .ptype = RTE_PTYPE_TUNNEL_MPLS_IN_UDP | RTE_PTYPE_L4_UDP,
306 .tunnel = MLX5_FLOW_LAYER_MPLS,
307 .ptype = RTE_PTYPE_TUNNEL_MPLS_IN_GRE,
310 .tunnel = MLX5_FLOW_LAYER_NVGRE,
311 .ptype = RTE_PTYPE_TUNNEL_NVGRE,
314 .tunnel = MLX5_FLOW_LAYER_IPIP,
315 .ptype = RTE_PTYPE_TUNNEL_IP,
318 .tunnel = MLX5_FLOW_LAYER_IPV6_ENCAP,
319 .ptype = RTE_PTYPE_TUNNEL_IP,
324 * Translate tag ID to register.
327 * Pointer to the Ethernet device structure.
329 * The feature that request the register.
331 * The request register ID.
333 * Error description in case of any.
336 * The request register on success, a negative errno
337 * value otherwise and rte_errno is set.
340 mlx5_flow_get_reg_id(struct rte_eth_dev *dev,
341 enum mlx5_feature_name feature,
343 struct rte_flow_error *error)
345 struct mlx5_priv *priv = dev->data->dev_private;
346 struct mlx5_dev_config *config = &priv->config;
349 case MLX5_HAIRPIN_RX:
351 case MLX5_HAIRPIN_TX:
353 case MLX5_METADATA_RX:
354 switch (config->dv_xmeta_en) {
355 case MLX5_XMETA_MODE_LEGACY:
357 case MLX5_XMETA_MODE_META16:
359 case MLX5_XMETA_MODE_META32:
363 case MLX5_METADATA_TX:
365 case MLX5_METADATA_FDB:
368 switch (config->dv_xmeta_en) {
369 case MLX5_XMETA_MODE_LEGACY:
371 case MLX5_XMETA_MODE_META16:
373 case MLX5_XMETA_MODE_META32:
381 * Suppose engaging reg_c_2 .. reg_c_7 registers.
382 * reg_c_2 is reserved for coloring by meters.
383 * reg_c_3 is reserved for split flows TAG.
385 if (id > (REG_C_7 - REG_C_4))
386 return rte_flow_error_set
388 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
389 NULL, "invalid tag id");
390 if (config->flow_mreg_c[id + REG_C_4 - REG_C_0] == REG_NONE)
391 return rte_flow_error_set
393 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
394 NULL, "unsupported tag id");
395 return config->flow_mreg_c[id + REG_C_4 - REG_C_0];
398 return rte_flow_error_set(error, EINVAL,
399 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
400 NULL, "invalid feature name");
404 * Check extensive flow metadata register support.
407 * Pointer to rte_eth_dev structure.
410 * True if device supports extensive flow metadata register, otherwise false.
413 mlx5_flow_ext_mreg_supported(struct rte_eth_dev *dev)
415 struct mlx5_priv *priv = dev->data->dev_private;
416 struct mlx5_dev_config *config = &priv->config;
419 * Having available reg_c can be regarded inclusively as supporting
420 * extensive flow metadata register, which could mean,
421 * - metadata register copy action by modify header.
422 * - 16 modify header actions is supported.
423 * - reg_c's are preserved across different domain (FDB and NIC) on
424 * packet loopback by flow lookup miss.
426 return config->flow_mreg_c[2] != REG_NONE;
430 * Discover the maximum number of priority available.
433 * Pointer to the Ethernet device structure.
436 * number of supported flow priority on success, a negative errno
437 * value otherwise and rte_errno is set.
440 mlx5_flow_discover_priorities(struct rte_eth_dev *dev)
442 struct mlx5_priv *priv = dev->data->dev_private;
444 struct ibv_flow_attr attr;
445 struct ibv_flow_spec_eth eth;
446 struct ibv_flow_spec_action_drop drop;
450 .port = (uint8_t)priv->ibv_port,
453 .type = IBV_FLOW_SPEC_ETH,
454 .size = sizeof(struct ibv_flow_spec_eth),
457 .size = sizeof(struct ibv_flow_spec_action_drop),
458 .type = IBV_FLOW_SPEC_ACTION_DROP,
461 struct ibv_flow *flow;
462 struct mlx5_hrxq *drop = mlx5_hrxq_drop_new(dev);
463 uint16_t vprio[] = { 8, 16 };
471 for (i = 0; i != RTE_DIM(vprio); i++) {
472 flow_attr.attr.priority = vprio[i] - 1;
473 flow = mlx5_glue->create_flow(drop->qp, &flow_attr.attr);
476 claim_zero(mlx5_glue->destroy_flow(flow));
479 mlx5_hrxq_drop_release(dev);
482 priority = RTE_DIM(priority_map_3);
485 priority = RTE_DIM(priority_map_5);
490 "port %u verbs maximum priority: %d expected 8/16",
491 dev->data->port_id, priority);
494 DRV_LOG(INFO, "port %u flow maximum priority: %d",
495 dev->data->port_id, priority);
500 * Adjust flow priority based on the highest layer and the request priority.
503 * Pointer to the Ethernet device structure.
504 * @param[in] priority
505 * The rule base priority.
506 * @param[in] subpriority
507 * The priority based on the items.
512 uint32_t mlx5_flow_adjust_priority(struct rte_eth_dev *dev, int32_t priority,
513 uint32_t subpriority)
516 struct mlx5_priv *priv = dev->data->dev_private;
518 switch (priv->config.flow_prio) {
519 case RTE_DIM(priority_map_3):
520 res = priority_map_3[priority][subpriority];
522 case RTE_DIM(priority_map_5):
523 res = priority_map_5[priority][subpriority];
530 * Verify the @p item specifications (spec, last, mask) are compatible with the
534 * Item specification.
536 * @p item->mask or flow default bit-masks.
537 * @param[in] nic_mask
538 * Bit-masks covering supported fields by the NIC to compare with user mask.
540 * Bit-masks size in bytes.
542 * Pointer to error structure.
545 * 0 on success, a negative errno value otherwise and rte_errno is set.
548 mlx5_flow_item_acceptable(const struct rte_flow_item *item,
550 const uint8_t *nic_mask,
552 struct rte_flow_error *error)
557 for (i = 0; i < size; ++i)
558 if ((nic_mask[i] | mask[i]) != nic_mask[i])
559 return rte_flow_error_set(error, ENOTSUP,
560 RTE_FLOW_ERROR_TYPE_ITEM,
562 "mask enables non supported"
564 if (!item->spec && (item->mask || item->last))
565 return rte_flow_error_set(error, EINVAL,
566 RTE_FLOW_ERROR_TYPE_ITEM, item,
567 "mask/last without a spec is not"
569 if (item->spec && item->last) {
575 for (i = 0; i < size; ++i) {
576 spec[i] = ((const uint8_t *)item->spec)[i] & mask[i];
577 last[i] = ((const uint8_t *)item->last)[i] & mask[i];
579 ret = memcmp(spec, last, size);
581 return rte_flow_error_set(error, EINVAL,
582 RTE_FLOW_ERROR_TYPE_ITEM,
584 "range is not valid");
590 * Adjust the hash fields according to the @p flow information.
592 * @param[in] dev_flow.
593 * Pointer to the mlx5_flow.
595 * 1 when the hash field is for a tunnel item.
596 * @param[in] layer_types
598 * @param[in] hash_fields
602 * The hash fields that should be used.
605 mlx5_flow_hashfields_adjust(struct mlx5_flow *dev_flow,
606 int tunnel __rte_unused, uint64_t layer_types,
607 uint64_t hash_fields)
609 struct rte_flow *flow = dev_flow->flow;
610 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
611 int rss_request_inner = flow->rss.level >= 2;
613 /* Check RSS hash level for tunnel. */
614 if (tunnel && rss_request_inner)
615 hash_fields |= IBV_RX_HASH_INNER;
616 else if (tunnel || rss_request_inner)
619 /* Check if requested layer matches RSS hash fields. */
620 if (!(flow->rss.types & layer_types))
626 * Lookup and set the ptype in the data Rx part. A single Ptype can be used,
627 * if several tunnel rules are used on this queue, the tunnel ptype will be
631 * Rx queue to update.
634 flow_rxq_tunnel_ptype_update(struct mlx5_rxq_ctrl *rxq_ctrl)
637 uint32_t tunnel_ptype = 0;
639 /* Look up for the ptype to use. */
640 for (i = 0; i != MLX5_FLOW_TUNNEL; ++i) {
641 if (!rxq_ctrl->flow_tunnels_n[i])
644 tunnel_ptype = tunnels_info[i].ptype;
650 rxq_ctrl->rxq.tunnel = tunnel_ptype;
654 * Set the Rx queue flags (Mark/Flag and Tunnel Ptypes) according to the devive
658 * Pointer to the Ethernet device structure.
659 * @param[in] dev_flow
660 * Pointer to device flow structure.
663 flow_drv_rxq_flags_set(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow)
665 struct mlx5_priv *priv = dev->data->dev_private;
666 struct rte_flow *flow = dev_flow->flow;
667 const int mark = !!(dev_flow->actions &
668 (MLX5_FLOW_ACTION_FLAG | MLX5_FLOW_ACTION_MARK));
669 const int tunnel = !!(dev_flow->layers & MLX5_FLOW_LAYER_TUNNEL);
672 for (i = 0; i != flow->rss.queue_num; ++i) {
673 int idx = (*flow->rss.queue)[i];
674 struct mlx5_rxq_ctrl *rxq_ctrl =
675 container_of((*priv->rxqs)[idx],
676 struct mlx5_rxq_ctrl, rxq);
679 * To support metadata register copy on Tx loopback,
680 * this must be always enabled (metadata may arive
681 * from other port - not from local flows only.
683 if (priv->config.dv_flow_en &&
684 priv->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY &&
685 mlx5_flow_ext_mreg_supported(dev)) {
686 rxq_ctrl->rxq.mark = 1;
687 rxq_ctrl->flow_mark_n = 1;
689 rxq_ctrl->rxq.mark = 1;
690 rxq_ctrl->flow_mark_n++;
695 /* Increase the counter matching the flow. */
696 for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) {
697 if ((tunnels_info[j].tunnel &
699 tunnels_info[j].tunnel) {
700 rxq_ctrl->flow_tunnels_n[j]++;
704 flow_rxq_tunnel_ptype_update(rxq_ctrl);
710 * Set the Rx queue flags (Mark/Flag and Tunnel Ptypes) for a flow
713 * Pointer to the Ethernet device structure.
715 * Pointer to flow structure.
718 flow_rxq_flags_set(struct rte_eth_dev *dev, struct rte_flow *flow)
720 struct mlx5_flow *dev_flow;
722 LIST_FOREACH(dev_flow, &flow->dev_flows, next)
723 flow_drv_rxq_flags_set(dev, dev_flow);
727 * Clear the Rx queue flags (Mark/Flag and Tunnel Ptype) associated with the
728 * device flow if no other flow uses it with the same kind of request.
731 * Pointer to Ethernet device.
732 * @param[in] dev_flow
733 * Pointer to the device flow.
736 flow_drv_rxq_flags_trim(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow)
738 struct mlx5_priv *priv = dev->data->dev_private;
739 struct rte_flow *flow = dev_flow->flow;
740 const int mark = !!(dev_flow->actions &
741 (MLX5_FLOW_ACTION_FLAG | MLX5_FLOW_ACTION_MARK));
742 const int tunnel = !!(dev_flow->layers & MLX5_FLOW_LAYER_TUNNEL);
745 assert(dev->data->dev_started);
746 for (i = 0; i != flow->rss.queue_num; ++i) {
747 int idx = (*flow->rss.queue)[i];
748 struct mlx5_rxq_ctrl *rxq_ctrl =
749 container_of((*priv->rxqs)[idx],
750 struct mlx5_rxq_ctrl, rxq);
752 if (priv->config.dv_flow_en &&
753 priv->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY &&
754 mlx5_flow_ext_mreg_supported(dev)) {
755 rxq_ctrl->rxq.mark = 1;
756 rxq_ctrl->flow_mark_n = 1;
758 rxq_ctrl->flow_mark_n--;
759 rxq_ctrl->rxq.mark = !!rxq_ctrl->flow_mark_n;
764 /* Decrease the counter matching the flow. */
765 for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) {
766 if ((tunnels_info[j].tunnel &
768 tunnels_info[j].tunnel) {
769 rxq_ctrl->flow_tunnels_n[j]--;
773 flow_rxq_tunnel_ptype_update(rxq_ctrl);
779 * Clear the Rx queue flags (Mark/Flag and Tunnel Ptype) associated with the
780 * @p flow if no other flow uses it with the same kind of request.
783 * Pointer to Ethernet device.
785 * Pointer to the flow.
788 flow_rxq_flags_trim(struct rte_eth_dev *dev, struct rte_flow *flow)
790 struct mlx5_flow *dev_flow;
792 LIST_FOREACH(dev_flow, &flow->dev_flows, next)
793 flow_drv_rxq_flags_trim(dev, dev_flow);
797 * Clear the Mark/Flag and Tunnel ptype information in all Rx queues.
800 * Pointer to Ethernet device.
803 flow_rxq_flags_clear(struct rte_eth_dev *dev)
805 struct mlx5_priv *priv = dev->data->dev_private;
808 for (i = 0; i != priv->rxqs_n; ++i) {
809 struct mlx5_rxq_ctrl *rxq_ctrl;
812 if (!(*priv->rxqs)[i])
814 rxq_ctrl = container_of((*priv->rxqs)[i],
815 struct mlx5_rxq_ctrl, rxq);
816 rxq_ctrl->flow_mark_n = 0;
817 rxq_ctrl->rxq.mark = 0;
818 for (j = 0; j != MLX5_FLOW_TUNNEL; ++j)
819 rxq_ctrl->flow_tunnels_n[j] = 0;
820 rxq_ctrl->rxq.tunnel = 0;
825 * return a pointer to the desired action in the list of actions.
828 * The list of actions to search the action in.
830 * The action to find.
833 * Pointer to the action in the list, if found. NULL otherwise.
835 const struct rte_flow_action *
836 mlx5_flow_find_action(const struct rte_flow_action *actions,
837 enum rte_flow_action_type action)
841 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++)
842 if (actions->type == action)
848 * Validate the flag action.
850 * @param[in] action_flags
851 * Bit-fields that holds the actions detected until now.
853 * Attributes of flow that includes this action.
855 * Pointer to error structure.
858 * 0 on success, a negative errno value otherwise and rte_errno is set.
861 mlx5_flow_validate_action_flag(uint64_t action_flags,
862 const struct rte_flow_attr *attr,
863 struct rte_flow_error *error)
866 if (action_flags & MLX5_FLOW_ACTION_DROP)
867 return rte_flow_error_set(error, EINVAL,
868 RTE_FLOW_ERROR_TYPE_ACTION, NULL,
869 "can't drop and flag in same flow");
870 if (action_flags & MLX5_FLOW_ACTION_MARK)
871 return rte_flow_error_set(error, EINVAL,
872 RTE_FLOW_ERROR_TYPE_ACTION, NULL,
873 "can't mark and flag in same flow");
874 if (action_flags & MLX5_FLOW_ACTION_FLAG)
875 return rte_flow_error_set(error, EINVAL,
876 RTE_FLOW_ERROR_TYPE_ACTION, NULL,
878 " actions in same flow");
880 return rte_flow_error_set(error, ENOTSUP,
881 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
882 "flag action not supported for "
888 * Validate the mark action.
891 * Pointer to the queue action.
892 * @param[in] action_flags
893 * Bit-fields that holds the actions detected until now.
895 * Attributes of flow that includes this action.
897 * Pointer to error structure.
900 * 0 on success, a negative errno value otherwise and rte_errno is set.
903 mlx5_flow_validate_action_mark(const struct rte_flow_action *action,
904 uint64_t action_flags,
905 const struct rte_flow_attr *attr,
906 struct rte_flow_error *error)
908 const struct rte_flow_action_mark *mark = action->conf;
911 return rte_flow_error_set(error, EINVAL,
912 RTE_FLOW_ERROR_TYPE_ACTION,
914 "configuration cannot be null");
915 if (mark->id >= MLX5_FLOW_MARK_MAX)
916 return rte_flow_error_set(error, EINVAL,
917 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
919 "mark id must in 0 <= id < "
920 RTE_STR(MLX5_FLOW_MARK_MAX));
921 if (action_flags & MLX5_FLOW_ACTION_DROP)
922 return rte_flow_error_set(error, EINVAL,
923 RTE_FLOW_ERROR_TYPE_ACTION, NULL,
924 "can't drop and mark in same flow");
925 if (action_flags & MLX5_FLOW_ACTION_FLAG)
926 return rte_flow_error_set(error, EINVAL,
927 RTE_FLOW_ERROR_TYPE_ACTION, NULL,
928 "can't flag and mark in same flow");
929 if (action_flags & MLX5_FLOW_ACTION_MARK)
930 return rte_flow_error_set(error, EINVAL,
931 RTE_FLOW_ERROR_TYPE_ACTION, NULL,
932 "can't have 2 mark actions in same"
935 return rte_flow_error_set(error, ENOTSUP,
936 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
937 "mark action not supported for "
943 * Validate the drop action.
945 * @param[in] action_flags
946 * Bit-fields that holds the actions detected until now.
948 * Attributes of flow that includes this action.
950 * Pointer to error structure.
953 * 0 on success, a negative errno value otherwise and rte_errno is set.
956 mlx5_flow_validate_action_drop(uint64_t action_flags,
957 const struct rte_flow_attr *attr,
958 struct rte_flow_error *error)
960 if (action_flags & MLX5_FLOW_ACTION_FLAG)
961 return rte_flow_error_set(error, EINVAL,
962 RTE_FLOW_ERROR_TYPE_ACTION, NULL,
963 "can't drop and flag in same flow");
964 if (action_flags & MLX5_FLOW_ACTION_MARK)
965 return rte_flow_error_set(error, EINVAL,
966 RTE_FLOW_ERROR_TYPE_ACTION, NULL,
967 "can't drop and mark in same flow");
968 if (action_flags & (MLX5_FLOW_FATE_ACTIONS |
969 MLX5_FLOW_FATE_ESWITCH_ACTIONS))
970 return rte_flow_error_set(error, EINVAL,
971 RTE_FLOW_ERROR_TYPE_ACTION, NULL,
972 "can't have 2 fate actions in"
975 return rte_flow_error_set(error, ENOTSUP,
976 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
977 "drop action not supported for "
983 * Validate the queue action.
986 * Pointer to the queue action.
987 * @param[in] action_flags
988 * Bit-fields that holds the actions detected until now.
990 * Pointer to the Ethernet device structure.
992 * Attributes of flow that includes this action.
994 * Pointer to error structure.
997 * 0 on success, a negative errno value otherwise and rte_errno is set.
1000 mlx5_flow_validate_action_queue(const struct rte_flow_action *action,
1001 uint64_t action_flags,
1002 struct rte_eth_dev *dev,
1003 const struct rte_flow_attr *attr,
1004 struct rte_flow_error *error)
1006 struct mlx5_priv *priv = dev->data->dev_private;
1007 const struct rte_flow_action_queue *queue = action->conf;
1009 if (action_flags & MLX5_FLOW_FATE_ACTIONS)
1010 return rte_flow_error_set(error, EINVAL,
1011 RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1012 "can't have 2 fate actions in"
1015 return rte_flow_error_set(error, EINVAL,
1016 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1017 NULL, "No Rx queues configured");
1018 if (queue->index >= priv->rxqs_n)
1019 return rte_flow_error_set(error, EINVAL,
1020 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1022 "queue index out of range");
1023 if (!(*priv->rxqs)[queue->index])
1024 return rte_flow_error_set(error, EINVAL,
1025 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1027 "queue is not configured");
1029 return rte_flow_error_set(error, ENOTSUP,
1030 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1031 "queue action not supported for "
1037 * Validate the rss action.
1040 * Pointer to the queue action.
1041 * @param[in] action_flags
1042 * Bit-fields that holds the actions detected until now.
1044 * Pointer to the Ethernet device structure.
1046 * Attributes of flow that includes this action.
1047 * @param[in] item_flags
1048 * Items that were detected.
1050 * Pointer to error structure.
1053 * 0 on success, a negative errno value otherwise and rte_errno is set.
1056 mlx5_flow_validate_action_rss(const struct rte_flow_action *action,
1057 uint64_t action_flags,
1058 struct rte_eth_dev *dev,
1059 const struct rte_flow_attr *attr,
1060 uint64_t item_flags,
1061 struct rte_flow_error *error)
1063 struct mlx5_priv *priv = dev->data->dev_private;
1064 const struct rte_flow_action_rss *rss = action->conf;
1065 int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1068 if (action_flags & MLX5_FLOW_FATE_ACTIONS)
1069 return rte_flow_error_set(error, EINVAL,
1070 RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1071 "can't have 2 fate actions"
1073 if (rss->func != RTE_ETH_HASH_FUNCTION_DEFAULT &&
1074 rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ)
1075 return rte_flow_error_set(error, ENOTSUP,
1076 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1078 "RSS hash function not supported");
1079 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
1084 return rte_flow_error_set(error, ENOTSUP,
1085 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1087 "tunnel RSS is not supported");
1088 /* allow RSS key_len 0 in case of NULL (default) RSS key. */
1089 if (rss->key_len == 0 && rss->key != NULL)
1090 return rte_flow_error_set(error, ENOTSUP,
1091 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1093 "RSS hash key length 0");
1094 if (rss->key_len > 0 && rss->key_len < MLX5_RSS_HASH_KEY_LEN)
1095 return rte_flow_error_set(error, ENOTSUP,
1096 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1098 "RSS hash key too small");
1099 if (rss->key_len > MLX5_RSS_HASH_KEY_LEN)
1100 return rte_flow_error_set(error, ENOTSUP,
1101 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1103 "RSS hash key too large");
1104 if (rss->queue_num > priv->config.ind_table_max_size)
1105 return rte_flow_error_set(error, ENOTSUP,
1106 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1108 "number of queues too large");
1109 if (rss->types & MLX5_RSS_HF_MASK)
1110 return rte_flow_error_set(error, ENOTSUP,
1111 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1113 "some RSS protocols are not"
1116 return rte_flow_error_set(error, EINVAL,
1117 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1118 NULL, "No Rx queues configured");
1119 if (!rss->queue_num)
1120 return rte_flow_error_set(error, EINVAL,
1121 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1122 NULL, "No queues configured");
1123 for (i = 0; i != rss->queue_num; ++i) {
1124 if (!(*priv->rxqs)[rss->queue[i]])
1125 return rte_flow_error_set
1126 (error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1127 &rss->queue[i], "queue is not configured");
1130 return rte_flow_error_set(error, ENOTSUP,
1131 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1132 "rss action not supported for "
1134 if (rss->level > 1 && !tunnel)
1135 return rte_flow_error_set(error, EINVAL,
1136 RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
1137 "inner RSS is not supported for "
1138 "non-tunnel flows");
1143 * Validate the count action.
1146 * Pointer to the Ethernet device structure.
1148 * Attributes of flow that includes this action.
1150 * Pointer to error structure.
1153 * 0 on success, a negative errno value otherwise and rte_errno is set.
1156 mlx5_flow_validate_action_count(struct rte_eth_dev *dev __rte_unused,
1157 const struct rte_flow_attr *attr,
1158 struct rte_flow_error *error)
1161 return rte_flow_error_set(error, ENOTSUP,
1162 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1163 "count action not supported for "
1169 * Verify the @p attributes will be correctly understood by the NIC and store
1170 * them in the @p flow if everything is correct.
1173 * Pointer to the Ethernet device structure.
1174 * @param[in] attributes
1175 * Pointer to flow attributes
1177 * Pointer to error structure.
1180 * 0 on success, a negative errno value otherwise and rte_errno is set.
1183 mlx5_flow_validate_attributes(struct rte_eth_dev *dev,
1184 const struct rte_flow_attr *attributes,
1185 struct rte_flow_error *error)
1187 struct mlx5_priv *priv = dev->data->dev_private;
1188 uint32_t priority_max = priv->config.flow_prio - 1;
1190 if (attributes->group)
1191 return rte_flow_error_set(error, ENOTSUP,
1192 RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
1193 NULL, "groups is not supported");
1194 if (attributes->priority != MLX5_FLOW_PRIO_RSVD &&
1195 attributes->priority >= priority_max)
1196 return rte_flow_error_set(error, ENOTSUP,
1197 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
1198 NULL, "priority out of range");
1199 if (attributes->egress)
1200 return rte_flow_error_set(error, ENOTSUP,
1201 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1202 "egress is not supported");
1203 if (attributes->transfer && !priv->config.dv_esw_en)
1204 return rte_flow_error_set(error, ENOTSUP,
1205 RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
1206 NULL, "transfer is not supported");
1207 if (!attributes->ingress)
1208 return rte_flow_error_set(error, EINVAL,
1209 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
1211 "ingress attribute is mandatory");
1216 * Validate ICMP6 item.
1219 * Item specification.
1220 * @param[in] item_flags
1221 * Bit-fields that holds the items detected until now.
1223 * Pointer to error structure.
1226 * 0 on success, a negative errno value otherwise and rte_errno is set.
1229 mlx5_flow_validate_item_icmp6(const struct rte_flow_item *item,
1230 uint64_t item_flags,
1231 uint8_t target_protocol,
1232 struct rte_flow_error *error)
1234 const struct rte_flow_item_icmp6 *mask = item->mask;
1235 const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1236 const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
1237 MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1238 const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
1239 MLX5_FLOW_LAYER_OUTER_L4;
1242 if (target_protocol != 0xFF && target_protocol != IPPROTO_ICMPV6)
1243 return rte_flow_error_set(error, EINVAL,
1244 RTE_FLOW_ERROR_TYPE_ITEM, item,
1245 "protocol filtering not compatible"
1246 " with ICMP6 layer");
1247 if (!(item_flags & l3m))
1248 return rte_flow_error_set(error, EINVAL,
1249 RTE_FLOW_ERROR_TYPE_ITEM, item,
1250 "IPv6 is mandatory to filter on"
1252 if (item_flags & l4m)
1253 return rte_flow_error_set(error, EINVAL,
1254 RTE_FLOW_ERROR_TYPE_ITEM, item,
1255 "multiple L4 layers not supported");
1257 mask = &rte_flow_item_icmp6_mask;
1258 ret = mlx5_flow_item_acceptable
1259 (item, (const uint8_t *)mask,
1260 (const uint8_t *)&rte_flow_item_icmp6_mask,
1261 sizeof(struct rte_flow_item_icmp6), error);
1268 * Validate ICMP item.
1271 * Item specification.
1272 * @param[in] item_flags
1273 * Bit-fields that holds the items detected until now.
1275 * Pointer to error structure.
1278 * 0 on success, a negative errno value otherwise and rte_errno is set.
1281 mlx5_flow_validate_item_icmp(const struct rte_flow_item *item,
1282 uint64_t item_flags,
1283 uint8_t target_protocol,
1284 struct rte_flow_error *error)
1286 const struct rte_flow_item_icmp *mask = item->mask;
1287 const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1288 const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
1289 MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1290 const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
1291 MLX5_FLOW_LAYER_OUTER_L4;
1294 if (target_protocol != 0xFF && target_protocol != IPPROTO_ICMP)
1295 return rte_flow_error_set(error, EINVAL,
1296 RTE_FLOW_ERROR_TYPE_ITEM, item,
1297 "protocol filtering not compatible"
1298 " with ICMP layer");
1299 if (!(item_flags & l3m))
1300 return rte_flow_error_set(error, EINVAL,
1301 RTE_FLOW_ERROR_TYPE_ITEM, item,
1302 "IPv4 is mandatory to filter"
1304 if (item_flags & l4m)
1305 return rte_flow_error_set(error, EINVAL,
1306 RTE_FLOW_ERROR_TYPE_ITEM, item,
1307 "multiple L4 layers not supported");
1309 mask = &rte_flow_item_icmp_mask;
1310 ret = mlx5_flow_item_acceptable
1311 (item, (const uint8_t *)mask,
1312 (const uint8_t *)&rte_flow_item_icmp_mask,
1313 sizeof(struct rte_flow_item_icmp), error);
1320 * Validate Ethernet item.
1323 * Item specification.
1324 * @param[in] item_flags
1325 * Bit-fields that holds the items detected until now.
1327 * Pointer to error structure.
1330 * 0 on success, a negative errno value otherwise and rte_errno is set.
1333 mlx5_flow_validate_item_eth(const struct rte_flow_item *item,
1334 uint64_t item_flags,
1335 struct rte_flow_error *error)
1337 const struct rte_flow_item_eth *mask = item->mask;
1338 const struct rte_flow_item_eth nic_mask = {
1339 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
1340 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
1341 .type = RTE_BE16(0xffff),
1344 int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1345 const uint64_t ethm = tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
1346 MLX5_FLOW_LAYER_OUTER_L2;
1348 if (item_flags & ethm)
1349 return rte_flow_error_set(error, ENOTSUP,
1350 RTE_FLOW_ERROR_TYPE_ITEM, item,
1351 "multiple L2 layers not supported");
1352 if ((!tunnel && (item_flags & MLX5_FLOW_LAYER_OUTER_L3)) ||
1353 (tunnel && (item_flags & MLX5_FLOW_LAYER_INNER_L3)))
1354 return rte_flow_error_set(error, EINVAL,
1355 RTE_FLOW_ERROR_TYPE_ITEM, item,
1356 "L2 layer should not follow "
1358 if ((!tunnel && (item_flags & MLX5_FLOW_LAYER_OUTER_VLAN)) ||
1359 (tunnel && (item_flags & MLX5_FLOW_LAYER_INNER_VLAN)))
1360 return rte_flow_error_set(error, EINVAL,
1361 RTE_FLOW_ERROR_TYPE_ITEM, item,
1362 "L2 layer should not follow VLAN");
1364 mask = &rte_flow_item_eth_mask;
1365 ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
1366 (const uint8_t *)&nic_mask,
1367 sizeof(struct rte_flow_item_eth),
1373 * Validate VLAN item.
1376 * Item specification.
1377 * @param[in] item_flags
1378 * Bit-fields that holds the items detected until now.
1380 * Ethernet device flow is being created on.
1382 * Pointer to error structure.
1385 * 0 on success, a negative errno value otherwise and rte_errno is set.
1388 mlx5_flow_validate_item_vlan(const struct rte_flow_item *item,
1389 uint64_t item_flags,
1390 struct rte_eth_dev *dev,
1391 struct rte_flow_error *error)
1393 const struct rte_flow_item_vlan *spec = item->spec;
1394 const struct rte_flow_item_vlan *mask = item->mask;
1395 const struct rte_flow_item_vlan nic_mask = {
1396 .tci = RTE_BE16(UINT16_MAX),
1397 .inner_type = RTE_BE16(UINT16_MAX),
1399 uint16_t vlan_tag = 0;
1400 const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1402 const uint64_t l34m = tunnel ? (MLX5_FLOW_LAYER_INNER_L3 |
1403 MLX5_FLOW_LAYER_INNER_L4) :
1404 (MLX5_FLOW_LAYER_OUTER_L3 |
1405 MLX5_FLOW_LAYER_OUTER_L4);
1406 const uint64_t vlanm = tunnel ? MLX5_FLOW_LAYER_INNER_VLAN :
1407 MLX5_FLOW_LAYER_OUTER_VLAN;
1409 if (item_flags & vlanm)
1410 return rte_flow_error_set(error, EINVAL,
1411 RTE_FLOW_ERROR_TYPE_ITEM, item,
1412 "multiple VLAN layers not supported");
1413 else if ((item_flags & l34m) != 0)
1414 return rte_flow_error_set(error, EINVAL,
1415 RTE_FLOW_ERROR_TYPE_ITEM, item,
1416 "VLAN cannot follow L3/L4 layer");
1418 mask = &rte_flow_item_vlan_mask;
1419 ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
1420 (const uint8_t *)&nic_mask,
1421 sizeof(struct rte_flow_item_vlan),
1425 if (!tunnel && mask->tci != RTE_BE16(0x0fff)) {
1426 struct mlx5_priv *priv = dev->data->dev_private;
1428 if (priv->vmwa_context) {
1430 * Non-NULL context means we have a virtual machine
1431 * and SR-IOV enabled, we have to create VLAN interface
1432 * to make hypervisor to setup E-Switch vport
1433 * context correctly. We avoid creating the multiple
1434 * VLAN interfaces, so we cannot support VLAN tag mask.
1436 return rte_flow_error_set(error, EINVAL,
1437 RTE_FLOW_ERROR_TYPE_ITEM,
1439 "VLAN tag mask is not"
1440 " supported in virtual"
1445 vlan_tag = spec->tci;
1446 vlan_tag &= mask->tci;
1449 * From verbs perspective an empty VLAN is equivalent
1450 * to a packet without VLAN layer.
1453 return rte_flow_error_set(error, EINVAL,
1454 RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
1456 "VLAN cannot be empty");
1461 * Validate IPV4 item.
1464 * Item specification.
1465 * @param[in] item_flags
1466 * Bit-fields that holds the items detected until now.
1467 * @param[in] acc_mask
1468 * Acceptable mask, if NULL default internal default mask
1469 * will be used to check whether item fields are supported.
1471 * Pointer to error structure.
1474 * 0 on success, a negative errno value otherwise and rte_errno is set.
1477 mlx5_flow_validate_item_ipv4(const struct rte_flow_item *item,
1478 uint64_t item_flags,
1480 uint16_t ether_type,
1481 const struct rte_flow_item_ipv4 *acc_mask,
1482 struct rte_flow_error *error)
1484 const struct rte_flow_item_ipv4 *mask = item->mask;
1485 const struct rte_flow_item_ipv4 *spec = item->spec;
1486 const struct rte_flow_item_ipv4 nic_mask = {
1488 .src_addr = RTE_BE32(0xffffffff),
1489 .dst_addr = RTE_BE32(0xffffffff),
1490 .type_of_service = 0xff,
1491 .next_proto_id = 0xff,
1494 const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1495 const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
1496 MLX5_FLOW_LAYER_OUTER_L3;
1497 const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
1498 MLX5_FLOW_LAYER_OUTER_L4;
1500 uint8_t next_proto = 0xFF;
1501 const uint64_t l2_vlan = (MLX5_FLOW_LAYER_L2 |
1502 MLX5_FLOW_LAYER_OUTER_VLAN |
1503 MLX5_FLOW_LAYER_INNER_VLAN);
1505 if ((last_item & l2_vlan) && ether_type &&
1506 ether_type != RTE_ETHER_TYPE_IPV4)
1507 return rte_flow_error_set(error, EINVAL,
1508 RTE_FLOW_ERROR_TYPE_ITEM, item,
1509 "IPv4 cannot follow L2/VLAN layer "
1510 "which ether type is not IPv4");
1511 if (item_flags & MLX5_FLOW_LAYER_IPIP) {
1513 next_proto = mask->hdr.next_proto_id &
1514 spec->hdr.next_proto_id;
1515 if (next_proto == IPPROTO_IPIP || next_proto == IPPROTO_IPV6)
1516 return rte_flow_error_set(error, EINVAL,
1517 RTE_FLOW_ERROR_TYPE_ITEM,
1522 if (item_flags & MLX5_FLOW_LAYER_IPV6_ENCAP)
1523 return rte_flow_error_set(error, EINVAL,
1524 RTE_FLOW_ERROR_TYPE_ITEM, item,
1525 "wrong tunnel type - IPv6 specified "
1526 "but IPv4 item provided");
1527 if (item_flags & l3m)
1528 return rte_flow_error_set(error, ENOTSUP,
1529 RTE_FLOW_ERROR_TYPE_ITEM, item,
1530 "multiple L3 layers not supported");
1531 else if (item_flags & l4m)
1532 return rte_flow_error_set(error, EINVAL,
1533 RTE_FLOW_ERROR_TYPE_ITEM, item,
1534 "L3 cannot follow an L4 layer.");
1535 else if ((item_flags & MLX5_FLOW_LAYER_NVGRE) &&
1536 !(item_flags & MLX5_FLOW_LAYER_INNER_L2))
1537 return rte_flow_error_set(error, EINVAL,
1538 RTE_FLOW_ERROR_TYPE_ITEM, item,
1539 "L3 cannot follow an NVGRE layer.");
1541 mask = &rte_flow_item_ipv4_mask;
1542 else if (mask->hdr.next_proto_id != 0 &&
1543 mask->hdr.next_proto_id != 0xff)
1544 return rte_flow_error_set(error, EINVAL,
1545 RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
1546 "partial mask is not supported"
1548 ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
1549 acc_mask ? (const uint8_t *)acc_mask
1550 : (const uint8_t *)&nic_mask,
1551 sizeof(struct rte_flow_item_ipv4),
1559 * Validate IPV6 item.
1562 * Item specification.
1563 * @param[in] item_flags
1564 * Bit-fields that holds the items detected until now.
1565 * @param[in] acc_mask
1566 * Acceptable mask, if NULL default internal default mask
1567 * will be used to check whether item fields are supported.
1569 * Pointer to error structure.
1572 * 0 on success, a negative errno value otherwise and rte_errno is set.
1575 mlx5_flow_validate_item_ipv6(const struct rte_flow_item *item,
1576 uint64_t item_flags,
1578 uint16_t ether_type,
1579 const struct rte_flow_item_ipv6 *acc_mask,
1580 struct rte_flow_error *error)
1582 const struct rte_flow_item_ipv6 *mask = item->mask;
1583 const struct rte_flow_item_ipv6 *spec = item->spec;
1584 const struct rte_flow_item_ipv6 nic_mask = {
1587 "\xff\xff\xff\xff\xff\xff\xff\xff"
1588 "\xff\xff\xff\xff\xff\xff\xff\xff",
1590 "\xff\xff\xff\xff\xff\xff\xff\xff"
1591 "\xff\xff\xff\xff\xff\xff\xff\xff",
1592 .vtc_flow = RTE_BE32(0xffffffff),
1597 const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1598 const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
1599 MLX5_FLOW_LAYER_OUTER_L3;
1600 const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
1601 MLX5_FLOW_LAYER_OUTER_L4;
1603 uint8_t next_proto = 0xFF;
1604 const uint64_t l2_vlan = (MLX5_FLOW_LAYER_L2 |
1605 MLX5_FLOW_LAYER_OUTER_VLAN |
1606 MLX5_FLOW_LAYER_INNER_VLAN);
1608 if ((last_item & l2_vlan) && ether_type &&
1609 ether_type != RTE_ETHER_TYPE_IPV6)
1610 return rte_flow_error_set(error, EINVAL,
1611 RTE_FLOW_ERROR_TYPE_ITEM, item,
1612 "IPv6 cannot follow L2/VLAN layer "
1613 "which ether type is not IPv6");
1614 if (item_flags & MLX5_FLOW_LAYER_IPV6_ENCAP) {
1616 next_proto = mask->hdr.proto & spec->hdr.proto;
1617 if (next_proto == IPPROTO_IPIP || next_proto == IPPROTO_IPV6)
1618 return rte_flow_error_set(error, EINVAL,
1619 RTE_FLOW_ERROR_TYPE_ITEM,
1624 if (item_flags & MLX5_FLOW_LAYER_IPIP)
1625 return rte_flow_error_set(error, EINVAL,
1626 RTE_FLOW_ERROR_TYPE_ITEM, item,
1627 "wrong tunnel type - IPv4 specified "
1628 "but IPv6 item provided");
1629 if (item_flags & l3m)
1630 return rte_flow_error_set(error, ENOTSUP,
1631 RTE_FLOW_ERROR_TYPE_ITEM, item,
1632 "multiple L3 layers not supported");
1633 else if (item_flags & l4m)
1634 return rte_flow_error_set(error, EINVAL,
1635 RTE_FLOW_ERROR_TYPE_ITEM, item,
1636 "L3 cannot follow an L4 layer.");
1637 else if ((item_flags & MLX5_FLOW_LAYER_NVGRE) &&
1638 !(item_flags & MLX5_FLOW_LAYER_INNER_L2))
1639 return rte_flow_error_set(error, EINVAL,
1640 RTE_FLOW_ERROR_TYPE_ITEM, item,
1641 "L3 cannot follow an NVGRE layer.");
1643 mask = &rte_flow_item_ipv6_mask;
1644 ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
1645 acc_mask ? (const uint8_t *)acc_mask
1646 : (const uint8_t *)&nic_mask,
1647 sizeof(struct rte_flow_item_ipv6),
1655 * Validate UDP item.
1658 * Item specification.
1659 * @param[in] item_flags
1660 * Bit-fields that holds the items detected until now.
1661 * @param[in] target_protocol
1662 * The next protocol in the previous item.
1663 * @param[in] flow_mask
1664 * mlx5 flow-specific (DV, verbs, etc.) supported header fields mask.
1666 * Pointer to error structure.
1669 * 0 on success, a negative errno value otherwise and rte_errno is set.
1672 mlx5_flow_validate_item_udp(const struct rte_flow_item *item,
1673 uint64_t item_flags,
1674 uint8_t target_protocol,
1675 struct rte_flow_error *error)
1677 const struct rte_flow_item_udp *mask = item->mask;
1678 const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1679 const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
1680 MLX5_FLOW_LAYER_OUTER_L3;
1681 const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
1682 MLX5_FLOW_LAYER_OUTER_L4;
1685 if (target_protocol != 0xff && target_protocol != IPPROTO_UDP)
1686 return rte_flow_error_set(error, EINVAL,
1687 RTE_FLOW_ERROR_TYPE_ITEM, item,
1688 "protocol filtering not compatible"
1690 if (!(item_flags & l3m))
1691 return rte_flow_error_set(error, EINVAL,
1692 RTE_FLOW_ERROR_TYPE_ITEM, item,
1693 "L3 is mandatory to filter on L4");
1694 if (item_flags & l4m)
1695 return rte_flow_error_set(error, EINVAL,
1696 RTE_FLOW_ERROR_TYPE_ITEM, item,
1697 "multiple L4 layers not supported");
1699 mask = &rte_flow_item_udp_mask;
1700 ret = mlx5_flow_item_acceptable
1701 (item, (const uint8_t *)mask,
1702 (const uint8_t *)&rte_flow_item_udp_mask,
1703 sizeof(struct rte_flow_item_udp), error);
1710 * Validate TCP item.
1713 * Item specification.
1714 * @param[in] item_flags
1715 * Bit-fields that holds the items detected until now.
1716 * @param[in] target_protocol
1717 * The next protocol in the previous item.
1719 * Pointer to error structure.
1722 * 0 on success, a negative errno value otherwise and rte_errno is set.
1725 mlx5_flow_validate_item_tcp(const struct rte_flow_item *item,
1726 uint64_t item_flags,
1727 uint8_t target_protocol,
1728 const struct rte_flow_item_tcp *flow_mask,
1729 struct rte_flow_error *error)
1731 const struct rte_flow_item_tcp *mask = item->mask;
1732 const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1733 const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
1734 MLX5_FLOW_LAYER_OUTER_L3;
1735 const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
1736 MLX5_FLOW_LAYER_OUTER_L4;
1740 if (target_protocol != 0xff && target_protocol != IPPROTO_TCP)
1741 return rte_flow_error_set(error, EINVAL,
1742 RTE_FLOW_ERROR_TYPE_ITEM, item,
1743 "protocol filtering not compatible"
1745 if (!(item_flags & l3m))
1746 return rte_flow_error_set(error, EINVAL,
1747 RTE_FLOW_ERROR_TYPE_ITEM, item,
1748 "L3 is mandatory to filter on L4");
1749 if (item_flags & l4m)
1750 return rte_flow_error_set(error, EINVAL,
1751 RTE_FLOW_ERROR_TYPE_ITEM, item,
1752 "multiple L4 layers not supported");
1754 mask = &rte_flow_item_tcp_mask;
1755 ret = mlx5_flow_item_acceptable
1756 (item, (const uint8_t *)mask,
1757 (const uint8_t *)flow_mask,
1758 sizeof(struct rte_flow_item_tcp), error);
1765 * Validate VXLAN item.
1768 * Item specification.
1769 * @param[in] item_flags
1770 * Bit-fields that holds the items detected until now.
1771 * @param[in] target_protocol
1772 * The next protocol in the previous item.
1774 * Pointer to error structure.
1777 * 0 on success, a negative errno value otherwise and rte_errno is set.
1780 mlx5_flow_validate_item_vxlan(const struct rte_flow_item *item,
1781 uint64_t item_flags,
1782 struct rte_flow_error *error)
1784 const struct rte_flow_item_vxlan *spec = item->spec;
1785 const struct rte_flow_item_vxlan *mask = item->mask;
1790 } id = { .vlan_id = 0, };
1791 uint32_t vlan_id = 0;
1794 if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
1795 return rte_flow_error_set(error, ENOTSUP,
1796 RTE_FLOW_ERROR_TYPE_ITEM, item,
1797 "multiple tunnel layers not"
1800 * Verify only UDPv4 is present as defined in
1801 * https://tools.ietf.org/html/rfc7348
1803 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
1804 return rte_flow_error_set(error, EINVAL,
1805 RTE_FLOW_ERROR_TYPE_ITEM, item,
1806 "no outer UDP layer found");
1808 mask = &rte_flow_item_vxlan_mask;
1809 ret = mlx5_flow_item_acceptable
1810 (item, (const uint8_t *)mask,
1811 (const uint8_t *)&rte_flow_item_vxlan_mask,
1812 sizeof(struct rte_flow_item_vxlan),
1817 memcpy(&id.vni[1], spec->vni, 3);
1818 vlan_id = id.vlan_id;
1819 memcpy(&id.vni[1], mask->vni, 3);
1820 vlan_id &= id.vlan_id;
1823 * Tunnel id 0 is equivalent as not adding a VXLAN layer, if
1824 * only this layer is defined in the Verbs specification it is
1825 * interpreted as wildcard and all packets will match this
1826 * rule, if it follows a full stack layer (ex: eth / ipv4 /
1827 * udp), all packets matching the layers before will also
1828 * match this rule. To avoid such situation, VNI 0 is
1829 * currently refused.
1832 return rte_flow_error_set(error, ENOTSUP,
1833 RTE_FLOW_ERROR_TYPE_ITEM, item,
1834 "VXLAN vni cannot be 0");
1835 if (!(item_flags & MLX5_FLOW_LAYER_OUTER))
1836 return rte_flow_error_set(error, ENOTSUP,
1837 RTE_FLOW_ERROR_TYPE_ITEM, item,
1838 "VXLAN tunnel must be fully defined");
1843 * Validate VXLAN_GPE item.
1846 * Item specification.
1847 * @param[in] item_flags
1848 * Bit-fields that holds the items detected until now.
1850 * Pointer to the private data structure.
1851 * @param[in] target_protocol
1852 * The next protocol in the previous item.
1854 * Pointer to error structure.
1857 * 0 on success, a negative errno value otherwise and rte_errno is set.
1860 mlx5_flow_validate_item_vxlan_gpe(const struct rte_flow_item *item,
1861 uint64_t item_flags,
1862 struct rte_eth_dev *dev,
1863 struct rte_flow_error *error)
1865 struct mlx5_priv *priv = dev->data->dev_private;
1866 const struct rte_flow_item_vxlan_gpe *spec = item->spec;
1867 const struct rte_flow_item_vxlan_gpe *mask = item->mask;
1872 } id = { .vlan_id = 0, };
1873 uint32_t vlan_id = 0;
1875 if (!priv->config.l3_vxlan_en)
1876 return rte_flow_error_set(error, ENOTSUP,
1877 RTE_FLOW_ERROR_TYPE_ITEM, item,
1878 "L3 VXLAN is not enabled by device"
1879 " parameter and/or not configured in"
1881 if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
1882 return rte_flow_error_set(error, ENOTSUP,
1883 RTE_FLOW_ERROR_TYPE_ITEM, item,
1884 "multiple tunnel layers not"
1887 * Verify only UDPv4 is present as defined in
1888 * https://tools.ietf.org/html/rfc7348
1890 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
1891 return rte_flow_error_set(error, EINVAL,
1892 RTE_FLOW_ERROR_TYPE_ITEM, item,
1893 "no outer UDP layer found");
1895 mask = &rte_flow_item_vxlan_gpe_mask;
1896 ret = mlx5_flow_item_acceptable
1897 (item, (const uint8_t *)mask,
1898 (const uint8_t *)&rte_flow_item_vxlan_gpe_mask,
1899 sizeof(struct rte_flow_item_vxlan_gpe),
1905 return rte_flow_error_set(error, ENOTSUP,
1906 RTE_FLOW_ERROR_TYPE_ITEM,
1908 "VxLAN-GPE protocol"
1910 memcpy(&id.vni[1], spec->vni, 3);
1911 vlan_id = id.vlan_id;
1912 memcpy(&id.vni[1], mask->vni, 3);
1913 vlan_id &= id.vlan_id;
1916 * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
1917 * layer is defined in the Verbs specification it is interpreted as
1918 * wildcard and all packets will match this rule, if it follows a full
1919 * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
1920 * before will also match this rule. To avoid such situation, VNI 0
1921 * is currently refused.
1924 return rte_flow_error_set(error, ENOTSUP,
1925 RTE_FLOW_ERROR_TYPE_ITEM, item,
1926 "VXLAN-GPE vni cannot be 0");
1927 if (!(item_flags & MLX5_FLOW_LAYER_OUTER))
1928 return rte_flow_error_set(error, ENOTSUP,
1929 RTE_FLOW_ERROR_TYPE_ITEM, item,
1930 "VXLAN-GPE tunnel must be fully"
1935 * Validate GRE Key item.
1938 * Item specification.
1939 * @param[in] item_flags
1940 * Bit flags to mark detected items.
1941 * @param[in] gre_item
1942 * Pointer to gre_item
1944 * Pointer to error structure.
1947 * 0 on success, a negative errno value otherwise and rte_errno is set.
1950 mlx5_flow_validate_item_gre_key(const struct rte_flow_item *item,
1951 uint64_t item_flags,
1952 const struct rte_flow_item *gre_item,
1953 struct rte_flow_error *error)
1955 const rte_be32_t *mask = item->mask;
1957 rte_be32_t gre_key_default_mask = RTE_BE32(UINT32_MAX);
1958 const struct rte_flow_item_gre *gre_spec = gre_item->spec;
1959 const struct rte_flow_item_gre *gre_mask = gre_item->mask;
1961 if (item_flags & MLX5_FLOW_LAYER_GRE_KEY)
1962 return rte_flow_error_set(error, ENOTSUP,
1963 RTE_FLOW_ERROR_TYPE_ITEM, item,
1964 "Multiple GRE key not support");
1965 if (!(item_flags & MLX5_FLOW_LAYER_GRE))
1966 return rte_flow_error_set(error, ENOTSUP,
1967 RTE_FLOW_ERROR_TYPE_ITEM, item,
1968 "No preceding GRE header");
1969 if (item_flags & MLX5_FLOW_LAYER_INNER)
1970 return rte_flow_error_set(error, ENOTSUP,
1971 RTE_FLOW_ERROR_TYPE_ITEM, item,
1972 "GRE key following a wrong item");
1974 gre_mask = &rte_flow_item_gre_mask;
1975 if (gre_spec && (gre_mask->c_rsvd0_ver & RTE_BE16(0x2000)) &&
1976 !(gre_spec->c_rsvd0_ver & RTE_BE16(0x2000)))
1977 return rte_flow_error_set(error, EINVAL,
1978 RTE_FLOW_ERROR_TYPE_ITEM, item,
1979 "Key bit must be on");
1982 mask = &gre_key_default_mask;
1983 ret = mlx5_flow_item_acceptable
1984 (item, (const uint8_t *)mask,
1985 (const uint8_t *)&gre_key_default_mask,
1986 sizeof(rte_be32_t), error);
1991 * Validate GRE item.
1994 * Item specification.
1995 * @param[in] item_flags
1996 * Bit flags to mark detected items.
1997 * @param[in] target_protocol
1998 * The next protocol in the previous item.
2000 * Pointer to error structure.
2003 * 0 on success, a negative errno value otherwise and rte_errno is set.
2006 mlx5_flow_validate_item_gre(const struct rte_flow_item *item,
2007 uint64_t item_flags,
2008 uint8_t target_protocol,
2009 struct rte_flow_error *error)
2011 const struct rte_flow_item_gre *spec __rte_unused = item->spec;
2012 const struct rte_flow_item_gre *mask = item->mask;
2014 const struct rte_flow_item_gre nic_mask = {
2015 .c_rsvd0_ver = RTE_BE16(0xB000),
2016 .protocol = RTE_BE16(UINT16_MAX),
2019 if (target_protocol != 0xff && target_protocol != IPPROTO_GRE)
2020 return rte_flow_error_set(error, EINVAL,
2021 RTE_FLOW_ERROR_TYPE_ITEM, item,
2022 "protocol filtering not compatible"
2023 " with this GRE layer");
2024 if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
2025 return rte_flow_error_set(error, ENOTSUP,
2026 RTE_FLOW_ERROR_TYPE_ITEM, item,
2027 "multiple tunnel layers not"
2029 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3))
2030 return rte_flow_error_set(error, ENOTSUP,
2031 RTE_FLOW_ERROR_TYPE_ITEM, item,
2032 "L3 Layer is missing");
2034 mask = &rte_flow_item_gre_mask;
2035 ret = mlx5_flow_item_acceptable
2036 (item, (const uint8_t *)mask,
2037 (const uint8_t *)&nic_mask,
2038 sizeof(struct rte_flow_item_gre), error);
2041 #ifndef HAVE_MLX5DV_DR
2042 #ifndef HAVE_IBV_DEVICE_MPLS_SUPPORT
2043 if (spec && (spec->protocol & mask->protocol))
2044 return rte_flow_error_set(error, ENOTSUP,
2045 RTE_FLOW_ERROR_TYPE_ITEM, item,
2046 "without MPLS support the"
2047 " specification cannot be used for"
2055 * Validate Geneve item.
2058 * Item specification.
2059 * @param[in] itemFlags
2060 * Bit-fields that holds the items detected until now.
2062 * Pointer to the private data structure.
2064 * Pointer to error structure.
2067 * 0 on success, a negative errno value otherwise and rte_errno is set.
2071 mlx5_flow_validate_item_geneve(const struct rte_flow_item *item,
2072 uint64_t item_flags,
2073 struct rte_eth_dev *dev,
2074 struct rte_flow_error *error)
2076 struct mlx5_priv *priv = dev->data->dev_private;
2077 const struct rte_flow_item_geneve *spec = item->spec;
2078 const struct rte_flow_item_geneve *mask = item->mask;
2081 uint8_t opt_len = priv->config.hca_attr.geneve_max_opt_len ?
2082 MLX5_GENEVE_OPT_LEN_1 : MLX5_GENEVE_OPT_LEN_0;
2083 const struct rte_flow_item_geneve nic_mask = {
2084 .ver_opt_len_o_c_rsvd0 = RTE_BE16(0x3f80),
2085 .vni = "\xff\xff\xff",
2086 .protocol = RTE_BE16(UINT16_MAX),
2089 if (!(priv->config.hca_attr.flex_parser_protocols &
2090 MLX5_HCA_FLEX_GENEVE_ENABLED) ||
2091 !priv->config.hca_attr.tunnel_stateless_geneve_rx)
2092 return rte_flow_error_set(error, ENOTSUP,
2093 RTE_FLOW_ERROR_TYPE_ITEM, item,
2094 "L3 Geneve is not enabled by device"
2095 " parameter and/or not configured in"
2097 if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
2098 return rte_flow_error_set(error, ENOTSUP,
2099 RTE_FLOW_ERROR_TYPE_ITEM, item,
2100 "multiple tunnel layers not"
2103 * Verify only UDPv4 is present as defined in
2104 * https://tools.ietf.org/html/rfc7348
2106 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
2107 return rte_flow_error_set(error, EINVAL,
2108 RTE_FLOW_ERROR_TYPE_ITEM, item,
2109 "no outer UDP layer found");
2111 mask = &rte_flow_item_geneve_mask;
2112 ret = mlx5_flow_item_acceptable
2113 (item, (const uint8_t *)mask,
2114 (const uint8_t *)&nic_mask,
2115 sizeof(struct rte_flow_item_geneve), error);
2119 gbhdr = rte_be_to_cpu_16(spec->ver_opt_len_o_c_rsvd0);
2120 if (MLX5_GENEVE_VER_VAL(gbhdr) ||
2121 MLX5_GENEVE_CRITO_VAL(gbhdr) ||
2122 MLX5_GENEVE_RSVD_VAL(gbhdr) || spec->rsvd1)
2123 return rte_flow_error_set(error, ENOTSUP,
2124 RTE_FLOW_ERROR_TYPE_ITEM,
2126 "Geneve protocol unsupported"
2127 " fields are being used");
2128 if (MLX5_GENEVE_OPTLEN_VAL(gbhdr) > opt_len)
2129 return rte_flow_error_set
2131 RTE_FLOW_ERROR_TYPE_ITEM,
2133 "Unsupported Geneve options length");
2135 if (!(item_flags & MLX5_FLOW_LAYER_OUTER))
2136 return rte_flow_error_set
2138 RTE_FLOW_ERROR_TYPE_ITEM, item,
2139 "Geneve tunnel must be fully defined");
2144 * Validate MPLS item.
2147 * Pointer to the rte_eth_dev structure.
2149 * Item specification.
2150 * @param[in] item_flags
2151 * Bit-fields that holds the items detected until now.
2152 * @param[in] prev_layer
2153 * The protocol layer indicated in previous item.
2155 * Pointer to error structure.
2158 * 0 on success, a negative errno value otherwise and rte_errno is set.
2161 mlx5_flow_validate_item_mpls(struct rte_eth_dev *dev __rte_unused,
2162 const struct rte_flow_item *item __rte_unused,
2163 uint64_t item_flags __rte_unused,
2164 uint64_t prev_layer __rte_unused,
2165 struct rte_flow_error *error)
2167 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
2168 const struct rte_flow_item_mpls *mask = item->mask;
2169 struct mlx5_priv *priv = dev->data->dev_private;
2172 if (!priv->config.mpls_en)
2173 return rte_flow_error_set(error, ENOTSUP,
2174 RTE_FLOW_ERROR_TYPE_ITEM, item,
2175 "MPLS not supported or"
2176 " disabled in firmware"
2178 /* MPLS over IP, UDP, GRE is allowed */
2179 if (!(prev_layer & (MLX5_FLOW_LAYER_OUTER_L3 |
2180 MLX5_FLOW_LAYER_OUTER_L4_UDP |
2181 MLX5_FLOW_LAYER_GRE)))
2182 return rte_flow_error_set(error, EINVAL,
2183 RTE_FLOW_ERROR_TYPE_ITEM, item,
2184 "protocol filtering not compatible"
2185 " with MPLS layer");
2186 /* Multi-tunnel isn't allowed but MPLS over GRE is an exception. */
2187 if ((item_flags & MLX5_FLOW_LAYER_TUNNEL) &&
2188 !(item_flags & MLX5_FLOW_LAYER_GRE))
2189 return rte_flow_error_set(error, ENOTSUP,
2190 RTE_FLOW_ERROR_TYPE_ITEM, item,
2191 "multiple tunnel layers not"
2194 mask = &rte_flow_item_mpls_mask;
2195 ret = mlx5_flow_item_acceptable
2196 (item, (const uint8_t *)mask,
2197 (const uint8_t *)&rte_flow_item_mpls_mask,
2198 sizeof(struct rte_flow_item_mpls), error);
2203 return rte_flow_error_set(error, ENOTSUP,
2204 RTE_FLOW_ERROR_TYPE_ITEM, item,
2205 "MPLS is not supported by Verbs, please"
2210 * Validate NVGRE item.
2213 * Item specification.
2214 * @param[in] item_flags
2215 * Bit flags to mark detected items.
2216 * @param[in] target_protocol
2217 * The next protocol in the previous item.
2219 * Pointer to error structure.
2222 * 0 on success, a negative errno value otherwise and rte_errno is set.
2225 mlx5_flow_validate_item_nvgre(const struct rte_flow_item *item,
2226 uint64_t item_flags,
2227 uint8_t target_protocol,
2228 struct rte_flow_error *error)
2230 const struct rte_flow_item_nvgre *mask = item->mask;
2233 if (target_protocol != 0xff && target_protocol != IPPROTO_GRE)
2234 return rte_flow_error_set(error, EINVAL,
2235 RTE_FLOW_ERROR_TYPE_ITEM, item,
2236 "protocol filtering not compatible"
2237 " with this GRE layer");
2238 if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
2239 return rte_flow_error_set(error, ENOTSUP,
2240 RTE_FLOW_ERROR_TYPE_ITEM, item,
2241 "multiple tunnel layers not"
2243 if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3))
2244 return rte_flow_error_set(error, ENOTSUP,
2245 RTE_FLOW_ERROR_TYPE_ITEM, item,
2246 "L3 Layer is missing");
2248 mask = &rte_flow_item_nvgre_mask;
2249 ret = mlx5_flow_item_acceptable
2250 (item, (const uint8_t *)mask,
2251 (const uint8_t *)&rte_flow_item_nvgre_mask,
2252 sizeof(struct rte_flow_item_nvgre), error);
2258 /* Allocate unique ID for the split Q/RSS subflows. */
2260 flow_qrss_get_id(struct rte_eth_dev *dev)
2262 struct mlx5_priv *priv = dev->data->dev_private;
2263 uint32_t qrss_id, ret;
2265 ret = mlx5_flow_id_get(priv->qrss_id_pool, &qrss_id);
2272 /* Free unique ID for the split Q/RSS subflows. */
2274 flow_qrss_free_id(struct rte_eth_dev *dev, uint32_t qrss_id)
2276 struct mlx5_priv *priv = dev->data->dev_private;
2279 mlx5_flow_id_release(priv->qrss_id_pool, qrss_id);
2283 * Release resource related QUEUE/RSS action split.
2286 * Pointer to Ethernet device.
2288 * Flow to release id's from.
2291 flow_mreg_split_qrss_release(struct rte_eth_dev *dev,
2292 struct rte_flow *flow)
2294 struct mlx5_flow *dev_flow;
2296 LIST_FOREACH(dev_flow, &flow->dev_flows, next)
2297 if (dev_flow->qrss_id)
2298 flow_qrss_free_id(dev, dev_flow->qrss_id);
2302 flow_null_validate(struct rte_eth_dev *dev __rte_unused,
2303 const struct rte_flow_attr *attr __rte_unused,
2304 const struct rte_flow_item items[] __rte_unused,
2305 const struct rte_flow_action actions[] __rte_unused,
2306 bool external __rte_unused,
2307 struct rte_flow_error *error)
2309 return rte_flow_error_set(error, ENOTSUP,
2310 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
2313 static struct mlx5_flow *
2314 flow_null_prepare(const struct rte_flow_attr *attr __rte_unused,
2315 const struct rte_flow_item items[] __rte_unused,
2316 const struct rte_flow_action actions[] __rte_unused,
2317 struct rte_flow_error *error)
2319 rte_flow_error_set(error, ENOTSUP,
2320 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
2325 flow_null_translate(struct rte_eth_dev *dev __rte_unused,
2326 struct mlx5_flow *dev_flow __rte_unused,
2327 const struct rte_flow_attr *attr __rte_unused,
2328 const struct rte_flow_item items[] __rte_unused,
2329 const struct rte_flow_action actions[] __rte_unused,
2330 struct rte_flow_error *error)
2332 return rte_flow_error_set(error, ENOTSUP,
2333 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
2337 flow_null_apply(struct rte_eth_dev *dev __rte_unused,
2338 struct rte_flow *flow __rte_unused,
2339 struct rte_flow_error *error)
2341 return rte_flow_error_set(error, ENOTSUP,
2342 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
2346 flow_null_remove(struct rte_eth_dev *dev __rte_unused,
2347 struct rte_flow *flow __rte_unused)
2352 flow_null_destroy(struct rte_eth_dev *dev __rte_unused,
2353 struct rte_flow *flow __rte_unused)
2358 flow_null_query(struct rte_eth_dev *dev __rte_unused,
2359 struct rte_flow *flow __rte_unused,
2360 const struct rte_flow_action *actions __rte_unused,
2361 void *data __rte_unused,
2362 struct rte_flow_error *error)
2364 return rte_flow_error_set(error, ENOTSUP,
2365 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
2368 /* Void driver to protect from null pointer reference. */
2369 const struct mlx5_flow_driver_ops mlx5_flow_null_drv_ops = {
2370 .validate = flow_null_validate,
2371 .prepare = flow_null_prepare,
2372 .translate = flow_null_translate,
2373 .apply = flow_null_apply,
2374 .remove = flow_null_remove,
2375 .destroy = flow_null_destroy,
2376 .query = flow_null_query,
2380 * Select flow driver type according to flow attributes and device
2384 * Pointer to the dev structure.
2386 * Pointer to the flow attributes.
2389 * flow driver type, MLX5_FLOW_TYPE_MAX otherwise.
2391 static enum mlx5_flow_drv_type
2392 flow_get_drv_type(struct rte_eth_dev *dev, const struct rte_flow_attr *attr)
2394 struct mlx5_priv *priv = dev->data->dev_private;
2395 enum mlx5_flow_drv_type type = MLX5_FLOW_TYPE_MAX;
2397 if (attr->transfer && priv->config.dv_esw_en)
2398 type = MLX5_FLOW_TYPE_DV;
2399 if (!attr->transfer)
2400 type = priv->config.dv_flow_en ? MLX5_FLOW_TYPE_DV :
2401 MLX5_FLOW_TYPE_VERBS;
2405 #define flow_get_drv_ops(type) flow_drv_ops[type]
2408 * Flow driver validation API. This abstracts calling driver specific functions.
2409 * The type of flow driver is determined according to flow attributes.
2412 * Pointer to the dev structure.
2414 * Pointer to the flow attributes.
2416 * Pointer to the list of items.
2417 * @param[in] actions
2418 * Pointer to the list of actions.
2419 * @param[in] external
2420 * This flow rule is created by request external to PMD.
2422 * Pointer to the error structure.
2425 * 0 on success, a negative errno value otherwise and rte_errno is set.
2428 flow_drv_validate(struct rte_eth_dev *dev,
2429 const struct rte_flow_attr *attr,
2430 const struct rte_flow_item items[],
2431 const struct rte_flow_action actions[],
2432 bool external, struct rte_flow_error *error)
2434 const struct mlx5_flow_driver_ops *fops;
2435 enum mlx5_flow_drv_type type = flow_get_drv_type(dev, attr);
2437 fops = flow_get_drv_ops(type);
2438 return fops->validate(dev, attr, items, actions, external, error);
2442 * Flow driver preparation API. This abstracts calling driver specific
2443 * functions. Parent flow (rte_flow) should have driver type (drv_type). It
2444 * calculates the size of memory required for device flow, allocates the memory,
2445 * initializes the device flow and returns the pointer.
2448 * This function initializes device flow structure such as dv or verbs in
2449 * struct mlx5_flow. However, it is caller's responsibility to initialize the
2450 * rest. For example, adding returning device flow to flow->dev_flow list and
2451 * setting backward reference to the flow should be done out of this function.
2452 * layers field is not filled either.
2455 * Pointer to the flow attributes.
2457 * Pointer to the list of items.
2458 * @param[in] actions
2459 * Pointer to the list of actions.
2461 * Pointer to the error structure.
2464 * Pointer to device flow on success, otherwise NULL and rte_errno is set.
2466 static inline struct mlx5_flow *
2467 flow_drv_prepare(const struct rte_flow *flow,
2468 const struct rte_flow_attr *attr,
2469 const struct rte_flow_item items[],
2470 const struct rte_flow_action actions[],
2471 struct rte_flow_error *error)
2473 const struct mlx5_flow_driver_ops *fops;
2474 enum mlx5_flow_drv_type type = flow->drv_type;
2476 assert(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
2477 fops = flow_get_drv_ops(type);
2478 return fops->prepare(attr, items, actions, error);
2482 * Flow driver translation API. This abstracts calling driver specific
2483 * functions. Parent flow (rte_flow) should have driver type (drv_type). It
2484 * translates a generic flow into a driver flow. flow_drv_prepare() must
2488 * dev_flow->layers could be filled as a result of parsing during translation
2489 * if needed by flow_drv_apply(). dev_flow->flow->actions can also be filled
2490 * if necessary. As a flow can have multiple dev_flows by RSS flow expansion,
2491 * flow->actions could be overwritten even though all the expanded dev_flows
2492 * have the same actions.
2495 * Pointer to the rte dev structure.
2496 * @param[in, out] dev_flow
2497 * Pointer to the mlx5 flow.
2499 * Pointer to the flow attributes.
2501 * Pointer to the list of items.
2502 * @param[in] actions
2503 * Pointer to the list of actions.
2505 * Pointer to the error structure.
2508 * 0 on success, a negative errno value otherwise and rte_errno is set.
2511 flow_drv_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
2512 const struct rte_flow_attr *attr,
2513 const struct rte_flow_item items[],
2514 const struct rte_flow_action actions[],
2515 struct rte_flow_error *error)
2517 const struct mlx5_flow_driver_ops *fops;
2518 enum mlx5_flow_drv_type type = dev_flow->flow->drv_type;
2520 assert(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
2521 fops = flow_get_drv_ops(type);
2522 return fops->translate(dev, dev_flow, attr, items, actions, error);
2526 * Flow driver apply API. This abstracts calling driver specific functions.
2527 * Parent flow (rte_flow) should have driver type (drv_type). It applies
2528 * translated driver flows on to device. flow_drv_translate() must precede.
2531 * Pointer to Ethernet device structure.
2532 * @param[in, out] flow
2533 * Pointer to flow structure.
2535 * Pointer to error structure.
2538 * 0 on success, a negative errno value otherwise and rte_errno is set.
2541 flow_drv_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
2542 struct rte_flow_error *error)
2544 const struct mlx5_flow_driver_ops *fops;
2545 enum mlx5_flow_drv_type type = flow->drv_type;
2547 assert(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
2548 fops = flow_get_drv_ops(type);
2549 return fops->apply(dev, flow, error);
2553 * Flow driver remove API. This abstracts calling driver specific functions.
2554 * Parent flow (rte_flow) should have driver type (drv_type). It removes a flow
2555 * on device. All the resources of the flow should be freed by calling
2556 * flow_drv_destroy().
2559 * Pointer to Ethernet device.
2560 * @param[in, out] flow
2561 * Pointer to flow structure.
2564 flow_drv_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
2566 const struct mlx5_flow_driver_ops *fops;
2567 enum mlx5_flow_drv_type type = flow->drv_type;
2569 assert(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
2570 fops = flow_get_drv_ops(type);
2571 fops->remove(dev, flow);
2575 * Flow driver destroy API. This abstracts calling driver specific functions.
2576 * Parent flow (rte_flow) should have driver type (drv_type). It removes a flow
2577 * on device and releases resources of the flow.
2580 * Pointer to Ethernet device.
2581 * @param[in, out] flow
2582 * Pointer to flow structure.
2585 flow_drv_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
2587 const struct mlx5_flow_driver_ops *fops;
2588 enum mlx5_flow_drv_type type = flow->drv_type;
2590 flow_mreg_split_qrss_release(dev, flow);
2591 assert(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
2592 fops = flow_get_drv_ops(type);
2593 fops->destroy(dev, flow);
2597 * Validate a flow supported by the NIC.
2599 * @see rte_flow_validate()
2603 mlx5_flow_validate(struct rte_eth_dev *dev,
2604 const struct rte_flow_attr *attr,
2605 const struct rte_flow_item items[],
2606 const struct rte_flow_action actions[],
2607 struct rte_flow_error *error)
2611 ret = flow_drv_validate(dev, attr, items, actions, true, error);
2618 * Get RSS action from the action list.
2620 * @param[in] actions
2621 * Pointer to the list of actions.
2624 * Pointer to the RSS action if exist, else return NULL.
2626 static const struct rte_flow_action_rss*
2627 flow_get_rss_action(const struct rte_flow_action actions[])
2629 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
2630 switch (actions->type) {
2631 case RTE_FLOW_ACTION_TYPE_RSS:
2632 return (const struct rte_flow_action_rss *)
2642 find_graph_root(const struct rte_flow_item pattern[], uint32_t rss_level)
2644 const struct rte_flow_item *item;
2645 unsigned int has_vlan = 0;
2647 for (item = pattern; item->type != RTE_FLOW_ITEM_TYPE_END; item++) {
2648 if (item->type == RTE_FLOW_ITEM_TYPE_VLAN) {
2654 return rss_level < 2 ? MLX5_EXPANSION_ROOT_ETH_VLAN :
2655 MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN;
2656 return rss_level < 2 ? MLX5_EXPANSION_ROOT :
2657 MLX5_EXPANSION_ROOT_OUTER;
2661 * Get QUEUE/RSS action from the action list.
2663 * @param[in] actions
2664 * Pointer to the list of actions.
2666 * Pointer to the return pointer.
2667 * @param[out] qrss_type
2668 * Pointer to the action type to return. RTE_FLOW_ACTION_TYPE_END is returned
2669 * if no QUEUE/RSS is found.
2672 * Total number of actions.
2675 flow_parse_qrss_action(const struct rte_flow_action actions[],
2676 const struct rte_flow_action **qrss)
2680 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
2681 switch (actions->type) {
2682 case RTE_FLOW_ACTION_TYPE_QUEUE:
2683 case RTE_FLOW_ACTION_TYPE_RSS:
2691 /* Count RTE_FLOW_ACTION_TYPE_END. */
2692 return actions_n + 1;
2696 * Check if the flow should be splited due to hairpin.
2697 * The reason for the split is that in current HW we can't
2698 * support encap on Rx, so if a flow have encap we move it
2702 * Pointer to Ethernet device.
2704 * Flow rule attributes.
2705 * @param[in] actions
2706 * Associated actions (list terminated by the END action).
2709 * > 0 the number of actions and the flow should be split,
2710 * 0 when no split required.
2713 flow_check_hairpin_split(struct rte_eth_dev *dev,
2714 const struct rte_flow_attr *attr,
2715 const struct rte_flow_action actions[])
2717 int queue_action = 0;
2720 const struct rte_flow_action_queue *queue;
2721 const struct rte_flow_action_rss *rss;
2722 const struct rte_flow_action_raw_encap *raw_encap;
2726 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
2727 switch (actions->type) {
2728 case RTE_FLOW_ACTION_TYPE_QUEUE:
2729 queue = actions->conf;
2730 if (mlx5_rxq_get_type(dev, queue->index) !=
2731 MLX5_RXQ_TYPE_HAIRPIN)
2736 case RTE_FLOW_ACTION_TYPE_RSS:
2737 rss = actions->conf;
2738 if (mlx5_rxq_get_type(dev, rss->queue[0]) !=
2739 MLX5_RXQ_TYPE_HAIRPIN)
2744 case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
2745 case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
2749 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
2750 raw_encap = actions->conf;
2751 if (raw_encap->size >
2752 (sizeof(struct rte_flow_item_eth) +
2753 sizeof(struct rte_flow_item_ipv4)))
2762 if (encap == 1 && queue_action)
2767 /* Declare flow create/destroy prototype in advance. */
2768 static struct rte_flow *
2769 flow_list_create(struct rte_eth_dev *dev, struct mlx5_flows *list,
2770 const struct rte_flow_attr *attr,
2771 const struct rte_flow_item items[],
2772 const struct rte_flow_action actions[],
2773 bool external, struct rte_flow_error *error);
2776 flow_list_destroy(struct rte_eth_dev *dev, struct mlx5_flows *list,
2777 struct rte_flow *flow);
2780 * Add a flow of copying flow metadata registers in RX_CP_TBL.
2782 * As mark_id is unique, if there's already a registered flow for the mark_id,
2783 * return by increasing the reference counter of the resource. Otherwise, create
2784 * the resource (mcp_res) and flow.
2787 * - If ingress port is ANY and reg_c[1] is mark_id,
2788 * flow_tag := mark_id, reg_b := reg_c[0] and jump to RX_ACT_TBL.
2790 * For default flow (zero mark_id), flow is like,
2791 * - If ingress port is ANY,
2792 * reg_b := reg_c[0] and jump to RX_ACT_TBL.
2795 * Pointer to Ethernet device.
2797 * ID of MARK action, zero means default flow for META.
2799 * Perform verbose error reporting if not NULL.
2802 * Associated resource on success, NULL otherwise and rte_errno is set.
2804 static struct mlx5_flow_mreg_copy_resource *
2805 flow_mreg_add_copy_action(struct rte_eth_dev *dev, uint32_t mark_id,
2806 struct rte_flow_error *error)
2808 struct mlx5_priv *priv = dev->data->dev_private;
2809 struct rte_flow_attr attr = {
2810 .group = MLX5_FLOW_MREG_CP_TABLE_GROUP,
2813 struct mlx5_rte_flow_item_tag tag_spec = {
2816 struct rte_flow_item items[] = {
2817 [1] = { .type = RTE_FLOW_ITEM_TYPE_END, },
2819 struct rte_flow_action_mark ftag = {
2822 struct mlx5_flow_action_copy_mreg cp_mreg = {
2826 struct rte_flow_action_jump jump = {
2827 .group = MLX5_FLOW_MREG_ACT_TABLE_GROUP,
2829 struct rte_flow_action actions[] = {
2830 [3] = { .type = RTE_FLOW_ACTION_TYPE_END, },
2832 struct mlx5_flow_mreg_copy_resource *mcp_res;
2835 /* Fill the register fileds in the flow. */
2836 ret = mlx5_flow_get_reg_id(dev, MLX5_FLOW_MARK, 0, error);
2840 ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_RX, 0, error);
2844 /* Check if already registered. */
2845 assert(priv->mreg_cp_tbl);
2846 mcp_res = (void *)mlx5_hlist_lookup(priv->mreg_cp_tbl, mark_id);
2848 /* For non-default rule. */
2851 assert(mark_id || mcp_res->refcnt == 1);
2854 /* Provide the full width of FLAG specific value. */
2855 if (mark_id == (priv->sh->dv_regc0_mask & MLX5_FLOW_MARK_DEFAULT))
2856 tag_spec.data = MLX5_FLOW_MARK_DEFAULT;
2857 /* Build a new flow. */
2859 items[0] = (struct rte_flow_item){
2860 .type = MLX5_RTE_FLOW_ITEM_TYPE_TAG,
2863 items[1] = (struct rte_flow_item){
2864 .type = RTE_FLOW_ITEM_TYPE_END,
2866 actions[0] = (struct rte_flow_action){
2867 .type = MLX5_RTE_FLOW_ACTION_TYPE_MARK,
2870 actions[1] = (struct rte_flow_action){
2871 .type = MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
2874 actions[2] = (struct rte_flow_action){
2875 .type = RTE_FLOW_ACTION_TYPE_JUMP,
2878 actions[3] = (struct rte_flow_action){
2879 .type = RTE_FLOW_ACTION_TYPE_END,
2882 /* Default rule, wildcard match. */
2883 attr.priority = MLX5_FLOW_PRIO_RSVD;
2884 items[0] = (struct rte_flow_item){
2885 .type = RTE_FLOW_ITEM_TYPE_END,
2887 actions[0] = (struct rte_flow_action){
2888 .type = MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
2891 actions[1] = (struct rte_flow_action){
2892 .type = RTE_FLOW_ACTION_TYPE_JUMP,
2895 actions[2] = (struct rte_flow_action){
2896 .type = RTE_FLOW_ACTION_TYPE_END,
2899 /* Build a new entry. */
2900 mcp_res = rte_zmalloc(__func__, sizeof(*mcp_res), 0);
2906 * The copy Flows are not included in any list. There
2907 * ones are referenced from other Flows and can not
2908 * be applied, removed, deleted in ardbitrary order
2909 * by list traversing.
2911 mcp_res->flow = flow_list_create(dev, NULL, &attr, items,
2912 actions, false, error);
2916 mcp_res->hlist_ent.key = mark_id;
2917 ret = mlx5_hlist_insert(priv->mreg_cp_tbl,
2918 &mcp_res->hlist_ent);
2925 flow_list_destroy(dev, NULL, mcp_res->flow);
2931 * Release flow in RX_CP_TBL.
2934 * Pointer to Ethernet device.
2936 * Parent flow for wich copying is provided.
2939 flow_mreg_del_copy_action(struct rte_eth_dev *dev,
2940 struct rte_flow *flow)
2942 struct mlx5_flow_mreg_copy_resource *mcp_res = flow->mreg_copy;
2943 struct mlx5_priv *priv = dev->data->dev_private;
2945 if (!mcp_res || !priv->mreg_cp_tbl)
2947 if (flow->copy_applied) {
2948 assert(mcp_res->appcnt);
2949 flow->copy_applied = 0;
2951 if (!mcp_res->appcnt)
2952 flow_drv_remove(dev, mcp_res->flow);
2955 * We do not check availability of metadata registers here,
2956 * because copy resources are allocated in this case.
2958 if (--mcp_res->refcnt)
2960 assert(mcp_res->flow);
2961 flow_list_destroy(dev, NULL, mcp_res->flow);
2962 mlx5_hlist_remove(priv->mreg_cp_tbl, &mcp_res->hlist_ent);
2964 flow->mreg_copy = NULL;
2968 * Start flow in RX_CP_TBL.
2971 * Pointer to Ethernet device.
2973 * Parent flow for wich copying is provided.
2976 * 0 on success, a negative errno value otherwise and rte_errno is set.
2979 flow_mreg_start_copy_action(struct rte_eth_dev *dev,
2980 struct rte_flow *flow)
2982 struct mlx5_flow_mreg_copy_resource *mcp_res = flow->mreg_copy;
2985 if (!mcp_res || flow->copy_applied)
2987 if (!mcp_res->appcnt) {
2988 ret = flow_drv_apply(dev, mcp_res->flow, NULL);
2993 flow->copy_applied = 1;
2998 * Stop flow in RX_CP_TBL.
3001 * Pointer to Ethernet device.
3003 * Parent flow for wich copying is provided.
3006 flow_mreg_stop_copy_action(struct rte_eth_dev *dev,
3007 struct rte_flow *flow)
3009 struct mlx5_flow_mreg_copy_resource *mcp_res = flow->mreg_copy;
3011 if (!mcp_res || !flow->copy_applied)
3013 assert(mcp_res->appcnt);
3015 flow->copy_applied = 0;
3016 if (!mcp_res->appcnt)
3017 flow_drv_remove(dev, mcp_res->flow);
3021 * Remove the default copy action from RX_CP_TBL.
3024 * Pointer to Ethernet device.
3027 flow_mreg_del_default_copy_action(struct rte_eth_dev *dev)
3029 struct mlx5_flow_mreg_copy_resource *mcp_res;
3030 struct mlx5_priv *priv = dev->data->dev_private;
3032 /* Check if default flow is registered. */
3033 if (!priv->mreg_cp_tbl)
3035 mcp_res = (void *)mlx5_hlist_lookup(priv->mreg_cp_tbl, 0ULL);
3038 assert(mcp_res->flow);
3039 flow_list_destroy(dev, NULL, mcp_res->flow);
3040 mlx5_hlist_remove(priv->mreg_cp_tbl, &mcp_res->hlist_ent);
3045 * Add the default copy action in in RX_CP_TBL.
3048 * Pointer to Ethernet device.
3050 * Perform verbose error reporting if not NULL.
3053 * 0 for success, negative value otherwise and rte_errno is set.
3056 flow_mreg_add_default_copy_action(struct rte_eth_dev *dev,
3057 struct rte_flow_error *error)
3059 struct mlx5_priv *priv = dev->data->dev_private;
3060 struct mlx5_flow_mreg_copy_resource *mcp_res;
3062 /* Check whether extensive metadata feature is engaged. */
3063 if (!priv->config.dv_flow_en ||
3064 priv->config.dv_xmeta_en == MLX5_XMETA_MODE_LEGACY ||
3065 !mlx5_flow_ext_mreg_supported(dev) ||
3066 !priv->sh->dv_regc0_mask)
3068 mcp_res = flow_mreg_add_copy_action(dev, 0, error);
3075 * Add a flow of copying flow metadata registers in RX_CP_TBL.
3077 * All the flow having Q/RSS action should be split by
3078 * flow_mreg_split_qrss_prep() to pass by RX_CP_TBL. A flow in the RX_CP_TBL
3079 * performs the following,
3080 * - CQE->flow_tag := reg_c[1] (MARK)
3081 * - CQE->flow_table_metadata (reg_b) := reg_c[0] (META)
3082 * As CQE's flow_tag is not a register, it can't be simply copied from reg_c[1]
3083 * but there should be a flow per each MARK ID set by MARK action.
3085 * For the aforementioned reason, if there's a MARK action in flow's action
3086 * list, a corresponding flow should be added to the RX_CP_TBL in order to copy
3087 * the MARK ID to CQE's flow_tag like,
3088 * - If reg_c[1] is mark_id,
3089 * flow_tag := mark_id, reg_b := reg_c[0] and jump to RX_ACT_TBL.
3091 * For SET_META action which stores value in reg_c[0], as the destination is
3092 * also a flow metadata register (reg_b), adding a default flow is enough. Zero
3093 * MARK ID means the default flow. The default flow looks like,
3094 * - For all flow, reg_b := reg_c[0] and jump to RX_ACT_TBL.
3097 * Pointer to Ethernet device.
3099 * Pointer to flow structure.
3100 * @param[in] actions
3101 * Pointer to the list of actions.
3103 * Perform verbose error reporting if not NULL.
3106 * 0 on success, negative value otherwise and rte_errno is set.
3109 flow_mreg_update_copy_table(struct rte_eth_dev *dev,
3110 struct rte_flow *flow,
3111 const struct rte_flow_action *actions,
3112 struct rte_flow_error *error)
3114 struct mlx5_priv *priv = dev->data->dev_private;
3115 struct mlx5_dev_config *config = &priv->config;
3116 struct mlx5_flow_mreg_copy_resource *mcp_res;
3117 const struct rte_flow_action_mark *mark;
3119 /* Check whether extensive metadata feature is engaged. */
3120 if (!config->dv_flow_en ||
3121 config->dv_xmeta_en == MLX5_XMETA_MODE_LEGACY ||
3122 !mlx5_flow_ext_mreg_supported(dev) ||
3123 !priv->sh->dv_regc0_mask)
3125 /* Find MARK action. */
3126 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
3127 switch (actions->type) {
3128 case RTE_FLOW_ACTION_TYPE_FLAG:
3129 mcp_res = flow_mreg_add_copy_action
3130 (dev, MLX5_FLOW_MARK_DEFAULT, error);
3133 flow->mreg_copy = mcp_res;
3134 if (dev->data->dev_started) {
3136 flow->copy_applied = 1;
3139 case RTE_FLOW_ACTION_TYPE_MARK:
3140 mark = (const struct rte_flow_action_mark *)
3143 flow_mreg_add_copy_action(dev, mark->id, error);
3146 flow->mreg_copy = mcp_res;
3147 if (dev->data->dev_started) {
3149 flow->copy_applied = 1;
3159 #define MLX5_MAX_SPLIT_ACTIONS 24
3160 #define MLX5_MAX_SPLIT_ITEMS 24
3163 * Split the hairpin flow.
3164 * Since HW can't support encap on Rx we move the encap to Tx.
3165 * If the count action is after the encap then we also
3166 * move the count action. in this case the count will also measure
3170 * Pointer to Ethernet device.
3171 * @param[in] actions
3172 * Associated actions (list terminated by the END action).
3173 * @param[out] actions_rx
3175 * @param[out] actions_tx
3177 * @param[out] pattern_tx
3178 * The pattern items for the Tx flow.
3179 * @param[out] flow_id
3180 * The flow ID connected to this flow.
3186 flow_hairpin_split(struct rte_eth_dev *dev,
3187 const struct rte_flow_action actions[],
3188 struct rte_flow_action actions_rx[],
3189 struct rte_flow_action actions_tx[],
3190 struct rte_flow_item pattern_tx[],
3193 struct mlx5_priv *priv = dev->data->dev_private;
3194 const struct rte_flow_action_raw_encap *raw_encap;
3195 const struct rte_flow_action_raw_decap *raw_decap;
3196 struct mlx5_rte_flow_action_set_tag *set_tag;
3197 struct rte_flow_action *tag_action;
3198 struct mlx5_rte_flow_item_tag *tag_item;
3199 struct rte_flow_item *item;
3203 mlx5_flow_id_get(priv->sh->flow_id_pool, flow_id);
3204 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
3205 switch (actions->type) {
3206 case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
3207 case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
3208 rte_memcpy(actions_tx, actions,
3209 sizeof(struct rte_flow_action));
3212 case RTE_FLOW_ACTION_TYPE_COUNT:
3214 rte_memcpy(actions_tx, actions,
3215 sizeof(struct rte_flow_action));
3218 rte_memcpy(actions_rx, actions,
3219 sizeof(struct rte_flow_action));
3223 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
3224 raw_encap = actions->conf;
3225 if (raw_encap->size >
3226 (sizeof(struct rte_flow_item_eth) +
3227 sizeof(struct rte_flow_item_ipv4))) {
3228 memcpy(actions_tx, actions,
3229 sizeof(struct rte_flow_action));
3233 rte_memcpy(actions_rx, actions,
3234 sizeof(struct rte_flow_action));
3238 case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
3239 raw_decap = actions->conf;
3240 if (raw_decap->size <
3241 (sizeof(struct rte_flow_item_eth) +
3242 sizeof(struct rte_flow_item_ipv4))) {
3243 memcpy(actions_tx, actions,
3244 sizeof(struct rte_flow_action));
3247 rte_memcpy(actions_rx, actions,
3248 sizeof(struct rte_flow_action));
3253 rte_memcpy(actions_rx, actions,
3254 sizeof(struct rte_flow_action));
3259 /* Add set meta action and end action for the Rx flow. */
3260 tag_action = actions_rx;
3261 tag_action->type = MLX5_RTE_FLOW_ACTION_TYPE_TAG;
3263 rte_memcpy(actions_rx, actions, sizeof(struct rte_flow_action));
3265 set_tag = (void *)actions_rx;
3266 set_tag->id = mlx5_flow_get_reg_id(dev, MLX5_HAIRPIN_RX, 0, NULL);
3267 assert(set_tag->id > REG_NONE);
3268 set_tag->data = *flow_id;
3269 tag_action->conf = set_tag;
3270 /* Create Tx item list. */
3271 rte_memcpy(actions_tx, actions, sizeof(struct rte_flow_action));
3272 addr = (void *)&pattern_tx[2];
3274 item->type = MLX5_RTE_FLOW_ITEM_TYPE_TAG;
3275 tag_item = (void *)addr;
3276 tag_item->data = *flow_id;
3277 tag_item->id = mlx5_flow_get_reg_id(dev, MLX5_HAIRPIN_TX, 0, NULL);
3278 assert(set_tag->id > REG_NONE);
3279 item->spec = tag_item;
3280 addr += sizeof(struct mlx5_rte_flow_item_tag);
3281 tag_item = (void *)addr;
3282 tag_item->data = UINT32_MAX;
3283 tag_item->id = UINT16_MAX;
3284 item->mask = tag_item;
3285 addr += sizeof(struct mlx5_rte_flow_item_tag);
3288 item->type = RTE_FLOW_ITEM_TYPE_END;
3293 * The last stage of splitting chain, just creates the subflow
3294 * without any modification.
3297 * Pointer to Ethernet device.
3299 * Parent flow structure pointer.
3300 * @param[in, out] sub_flow
3301 * Pointer to return the created subflow, may be NULL.
3303 * Flow rule attributes.
3305 * Pattern specification (list terminated by the END pattern item).
3306 * @param[in] actions
3307 * Associated actions (list terminated by the END action).
3308 * @param[in] external
3309 * This flow rule is created by request external to PMD.
3311 * Perform verbose error reporting if not NULL.
3313 * 0 on success, negative value otherwise
3316 flow_create_split_inner(struct rte_eth_dev *dev,
3317 struct rte_flow *flow,
3318 struct mlx5_flow **sub_flow,
3319 const struct rte_flow_attr *attr,
3320 const struct rte_flow_item items[],
3321 const struct rte_flow_action actions[],
3322 bool external, struct rte_flow_error *error)
3324 struct mlx5_flow *dev_flow;
3326 dev_flow = flow_drv_prepare(flow, attr, items, actions, error);
3329 dev_flow->flow = flow;
3330 dev_flow->external = external;
3331 /* Subflow object was created, we must include one in the list. */
3332 LIST_INSERT_HEAD(&flow->dev_flows, dev_flow, next);
3334 *sub_flow = dev_flow;
3335 return flow_drv_translate(dev, dev_flow, attr, items, actions, error);
3339 * Split action list having QUEUE/RSS for metadata register copy.
3341 * Once Q/RSS action is detected in user's action list, the flow action
3342 * should be split in order to copy metadata registers, which will happen in
3344 * - CQE->flow_tag := reg_c[1] (MARK)
3345 * - CQE->flow_table_metadata (reg_b) := reg_c[0] (META)
3346 * The Q/RSS action will be performed on RX_ACT_TBL after passing by RX_CP_TBL.
3347 * This is because the last action of each flow must be a terminal action
3348 * (QUEUE, RSS or DROP).
3350 * Flow ID must be allocated to identify actions in the RX_ACT_TBL and it is
3351 * stored and kept in the mlx5_flow structure per each sub_flow.
3353 * The Q/RSS action is replaced with,
3354 * - SET_TAG, setting the allocated flow ID to reg_c[2].
3355 * And the following JUMP action is added at the end,
3356 * - JUMP, to RX_CP_TBL.
3358 * A flow to perform remained Q/RSS action will be created in RX_ACT_TBL by
3359 * flow_create_split_metadata() routine. The flow will look like,
3360 * - If flow ID matches (reg_c[2]), perform Q/RSS.
3363 * Pointer to Ethernet device.
3364 * @param[out] split_actions
3365 * Pointer to store split actions to jump to CP_TBL.
3366 * @param[in] actions
3367 * Pointer to the list of original flow actions.
3369 * Pointer to the Q/RSS action.
3370 * @param[in] actions_n
3371 * Number of original actions.
3373 * Perform verbose error reporting if not NULL.
3376 * non-zero unique flow_id on success, otherwise 0 and
3377 * error/rte_error are set.
3380 flow_mreg_split_qrss_prep(struct rte_eth_dev *dev,
3381 struct rte_flow_action *split_actions,
3382 const struct rte_flow_action *actions,
3383 const struct rte_flow_action *qrss,
3384 int actions_n, struct rte_flow_error *error)
3386 struct mlx5_rte_flow_action_set_tag *set_tag;
3387 struct rte_flow_action_jump *jump;
3388 const int qrss_idx = qrss - actions;
3393 * Given actions will be split
3394 * - Replace QUEUE/RSS action with SET_TAG to set flow ID.
3395 * - Add jump to mreg CP_TBL.
3396 * As a result, there will be one more action.
3400 * Allocate the new subflow ID. This one is unique within
3401 * device and not shared with representors. Otherwise,
3402 * we would have to resolve multi-thread access synch
3403 * issue. Each flow on the shared device is appended
3404 * with source vport identifier, so the resulting
3405 * flows will be unique in the shared (by master and
3406 * representors) domain even if they have coinciding
3409 flow_id = flow_qrss_get_id(dev);
3411 return rte_flow_error_set(error, ENOMEM,
3412 RTE_FLOW_ERROR_TYPE_ACTION,
3413 NULL, "can't allocate id "
3414 "for split Q/RSS subflow");
3415 /* Internal SET_TAG action to set flow ID. */
3416 set_tag = (void *)(split_actions + actions_n);
3417 *set_tag = (struct mlx5_rte_flow_action_set_tag){
3420 ret = mlx5_flow_get_reg_id(dev, MLX5_COPY_MARK, 0, error);
3424 /* JUMP action to jump to mreg copy table (CP_TBL). */
3425 jump = (void *)(set_tag + 1);
3426 *jump = (struct rte_flow_action_jump){
3427 .group = MLX5_FLOW_MREG_CP_TABLE_GROUP,
3429 /* Construct new actions array. */
3430 memcpy(split_actions, actions, sizeof(*split_actions) * actions_n);
3431 /* Replace QUEUE/RSS action. */
3432 split_actions[qrss_idx] = (struct rte_flow_action){
3433 .type = MLX5_RTE_FLOW_ACTION_TYPE_TAG,
3436 split_actions[actions_n - 2] = (struct rte_flow_action){
3437 .type = RTE_FLOW_ACTION_TYPE_JUMP,
3440 split_actions[actions_n - 1] = (struct rte_flow_action){
3441 .type = RTE_FLOW_ACTION_TYPE_END,
3447 * Extend the given action list for Tx metadata copy.
3449 * Copy the given action list to the ext_actions and add flow metadata register
3450 * copy action in order to copy reg_a set by WQE to reg_c[0].
3452 * @param[out] ext_actions
3453 * Pointer to the extended action list.
3454 * @param[in] actions
3455 * Pointer to the list of actions.
3456 * @param[in] actions_n
3457 * Number of actions in the list.
3459 * Perform verbose error reporting if not NULL.
3462 * 0 on success, negative value otherwise
3465 flow_mreg_tx_copy_prep(struct rte_eth_dev *dev,
3466 struct rte_flow_action *ext_actions,
3467 const struct rte_flow_action *actions,
3468 int actions_n, struct rte_flow_error *error)
3470 struct mlx5_flow_action_copy_mreg *cp_mreg =
3471 (struct mlx5_flow_action_copy_mreg *)
3472 (ext_actions + actions_n + 1);
3475 ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_RX, 0, error);
3479 ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_TX, 0, error);
3483 memcpy(ext_actions, actions,
3484 sizeof(*ext_actions) * actions_n);
3485 ext_actions[actions_n - 1] = (struct rte_flow_action){
3486 .type = MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
3489 ext_actions[actions_n] = (struct rte_flow_action){
3490 .type = RTE_FLOW_ACTION_TYPE_END,
3496 * The splitting for metadata feature.
3498 * - Q/RSS action on NIC Rx should be split in order to pass by
3499 * the mreg copy table (RX_CP_TBL) and then it jumps to the
3500 * action table (RX_ACT_TBL) which has the split Q/RSS action.
3502 * - All the actions on NIC Tx should have a mreg copy action to
3503 * copy reg_a from WQE to reg_c[0].
3506 * Pointer to Ethernet device.
3508 * Parent flow structure pointer.
3510 * Flow rule attributes.
3512 * Pattern specification (list terminated by the END pattern item).
3513 * @param[in] actions
3514 * Associated actions (list terminated by the END action).
3515 * @param[in] external
3516 * This flow rule is created by request external to PMD.
3518 * Perform verbose error reporting if not NULL.
3520 * 0 on success, negative value otherwise
3523 flow_create_split_metadata(struct rte_eth_dev *dev,
3524 struct rte_flow *flow,
3525 const struct rte_flow_attr *attr,
3526 const struct rte_flow_item items[],
3527 const struct rte_flow_action actions[],
3528 bool external, struct rte_flow_error *error)
3530 struct mlx5_priv *priv = dev->data->dev_private;
3531 struct mlx5_dev_config *config = &priv->config;
3532 const struct rte_flow_action *qrss = NULL;
3533 struct rte_flow_action *ext_actions = NULL;
3534 struct mlx5_flow *dev_flow = NULL;
3535 uint32_t qrss_id = 0;
3540 /* Check whether extensive metadata feature is engaged. */
3541 if (!config->dv_flow_en ||
3542 config->dv_xmeta_en == MLX5_XMETA_MODE_LEGACY ||
3543 !mlx5_flow_ext_mreg_supported(dev))
3544 return flow_create_split_inner(dev, flow, NULL, attr, items,
3545 actions, external, error);
3546 actions_n = flow_parse_qrss_action(actions, &qrss);
3548 /* Exclude hairpin flows from splitting. */
3549 if (qrss->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
3550 const struct rte_flow_action_queue *queue;
3553 if (mlx5_rxq_get_type(dev, queue->index) ==
3554 MLX5_RXQ_TYPE_HAIRPIN)
3556 } else if (qrss->type == RTE_FLOW_ACTION_TYPE_RSS) {
3557 const struct rte_flow_action_rss *rss;
3560 if (mlx5_rxq_get_type(dev, rss->queue[0]) ==
3561 MLX5_RXQ_TYPE_HAIRPIN)
3567 * Q/RSS action on NIC Rx should be split in order to pass by
3568 * the mreg copy table (RX_CP_TBL) and then it jumps to the
3569 * action table (RX_ACT_TBL) which has the split Q/RSS action.
3571 act_size = sizeof(struct rte_flow_action) * (actions_n + 1) +
3572 sizeof(struct rte_flow_action_set_tag) +
3573 sizeof(struct rte_flow_action_jump);
3574 ext_actions = rte_zmalloc(__func__, act_size, 0);
3576 return rte_flow_error_set(error, ENOMEM,
3577 RTE_FLOW_ERROR_TYPE_ACTION,
3578 NULL, "no memory to split "
3581 * Create the new actions list with removed Q/RSS action
3582 * and appended set tag and jump to register copy table
3583 * (RX_CP_TBL). We should preallocate unique tag ID here
3584 * in advance, because it is needed for set tag action.
3586 qrss_id = flow_mreg_split_qrss_prep(dev, ext_actions, actions,
3587 qrss, actions_n, error);
3592 } else if (attr->egress && !attr->transfer) {
3594 * All the actions on NIC Tx should have a metadata register
3595 * copy action to copy reg_a from WQE to reg_c[meta]
3597 act_size = sizeof(struct rte_flow_action) * (actions_n + 1) +
3598 sizeof(struct mlx5_flow_action_copy_mreg);
3599 ext_actions = rte_zmalloc(__func__, act_size, 0);
3601 return rte_flow_error_set(error, ENOMEM,
3602 RTE_FLOW_ERROR_TYPE_ACTION,
3603 NULL, "no memory to split "
3605 /* Create the action list appended with copy register. */
3606 ret = flow_mreg_tx_copy_prep(dev, ext_actions, actions,
3611 /* Add the unmodified original or prefix subflow. */
3612 ret = flow_create_split_inner(dev, flow, &dev_flow, attr, items,
3613 ext_actions ? ext_actions : actions,
3619 const struct rte_flow_attr q_attr = {
3620 .group = MLX5_FLOW_MREG_ACT_TABLE_GROUP,
3623 /* Internal PMD action to set register. */
3624 struct mlx5_rte_flow_item_tag q_tag_spec = {
3628 struct rte_flow_item q_items[] = {
3630 .type = MLX5_RTE_FLOW_ITEM_TYPE_TAG,
3631 .spec = &q_tag_spec,
3636 .type = RTE_FLOW_ITEM_TYPE_END,
3639 struct rte_flow_action q_actions[] = {
3645 .type = RTE_FLOW_ACTION_TYPE_END,
3648 uint64_t hash_fields = dev_flow->hash_fields;
3650 * Put unique id in prefix flow due to it is destroyed after
3651 * prefix flow and id will be freed after there is no actual
3652 * flows with this id and identifier reallocation becomes
3653 * possible (for example, for other flows in other threads).
3655 dev_flow->qrss_id = qrss_id;
3658 ret = mlx5_flow_get_reg_id(dev, MLX5_COPY_MARK, 0, error);
3661 q_tag_spec.id = ret;
3662 /* Add suffix subflow to execute Q/RSS. */
3663 ret = flow_create_split_inner(dev, flow, &dev_flow,
3664 &q_attr, q_items, q_actions,
3669 dev_flow->hash_fields = hash_fields;
3674 * We do not destroy the partially created sub_flows in case of error.
3675 * These ones are included into parent flow list and will be destroyed
3676 * by flow_drv_destroy.
3678 flow_qrss_free_id(dev, qrss_id);
3679 rte_free(ext_actions);
3684 * Split the flow to subflow set. The splitters might be linked
3685 * in the chain, like this:
3686 * flow_create_split_outer() calls:
3687 * flow_create_split_meter() calls:
3688 * flow_create_split_metadata(meter_subflow_0) calls:
3689 * flow_create_split_inner(metadata_subflow_0)
3690 * flow_create_split_inner(metadata_subflow_1)
3691 * flow_create_split_inner(metadata_subflow_2)
3692 * flow_create_split_metadata(meter_subflow_1) calls:
3693 * flow_create_split_inner(metadata_subflow_0)
3694 * flow_create_split_inner(metadata_subflow_1)
3695 * flow_create_split_inner(metadata_subflow_2)
3697 * This provide flexible way to add new levels of flow splitting.
3698 * The all of successfully created subflows are included to the
3699 * parent flow dev_flow list.
3702 * Pointer to Ethernet device.
3704 * Parent flow structure pointer.
3706 * Flow rule attributes.
3708 * Pattern specification (list terminated by the END pattern item).
3709 * @param[in] actions
3710 * Associated actions (list terminated by the END action).
3711 * @param[in] external
3712 * This flow rule is created by request external to PMD.
3714 * Perform verbose error reporting if not NULL.
3716 * 0 on success, negative value otherwise
3719 flow_create_split_outer(struct rte_eth_dev *dev,
3720 struct rte_flow *flow,
3721 const struct rte_flow_attr *attr,
3722 const struct rte_flow_item items[],
3723 const struct rte_flow_action actions[],
3724 bool external, struct rte_flow_error *error)
3728 ret = flow_create_split_metadata(dev, flow, attr, items,
3729 actions, external, error);
3735 * Create a flow and add it to @p list.
3738 * Pointer to Ethernet device.
3740 * Pointer to a TAILQ flow list. If this parameter NULL,
3741 * no list insertion occurred, flow is just created,
3742 * this is caller's responsibility to track the
3745 * Flow rule attributes.
3747 * Pattern specification (list terminated by the END pattern item).
3748 * @param[in] actions
3749 * Associated actions (list terminated by the END action).
3750 * @param[in] external
3751 * This flow rule is created by request external to PMD.
3753 * Perform verbose error reporting if not NULL.
3756 * A flow on success, NULL otherwise and rte_errno is set.
3758 static struct rte_flow *
3759 flow_list_create(struct rte_eth_dev *dev, struct mlx5_flows *list,
3760 const struct rte_flow_attr *attr,
3761 const struct rte_flow_item items[],
3762 const struct rte_flow_action actions[],
3763 bool external, struct rte_flow_error *error)
3765 struct mlx5_priv *priv = dev->data->dev_private;
3766 struct rte_flow *flow = NULL;
3767 struct mlx5_flow *dev_flow;
3768 const struct rte_flow_action_rss *rss;
3770 struct rte_flow_expand_rss buf;
3771 uint8_t buffer[2048];
3774 struct rte_flow_action actions[MLX5_MAX_SPLIT_ACTIONS];
3775 uint8_t buffer[2048];
3778 struct rte_flow_action actions[MLX5_MAX_SPLIT_ACTIONS];
3779 uint8_t buffer[2048];
3780 } actions_hairpin_tx;
3782 struct rte_flow_item items[MLX5_MAX_SPLIT_ITEMS];
3783 uint8_t buffer[2048];
3785 struct rte_flow_expand_rss *buf = &expand_buffer.buf;
3786 const struct rte_flow_action *p_actions_rx = actions;
3790 int hairpin_flow = 0;
3791 uint32_t hairpin_id = 0;
3792 struct rte_flow_attr attr_tx = { .priority = 0 };
3794 hairpin_flow = flow_check_hairpin_split(dev, attr, actions);
3795 if (hairpin_flow > 0) {
3796 if (hairpin_flow > MLX5_MAX_SPLIT_ACTIONS) {
3800 flow_hairpin_split(dev, actions, actions_rx.actions,
3801 actions_hairpin_tx.actions, items_tx.items,
3803 p_actions_rx = actions_rx.actions;
3805 ret = flow_drv_validate(dev, attr, items, p_actions_rx, external,
3808 goto error_before_flow;
3809 flow_size = sizeof(struct rte_flow);
3810 rss = flow_get_rss_action(p_actions_rx);
3812 flow_size += RTE_ALIGN_CEIL(rss->queue_num * sizeof(uint16_t),
3815 flow_size += RTE_ALIGN_CEIL(sizeof(uint16_t), sizeof(void *));
3816 flow = rte_calloc(__func__, 1, flow_size, 0);
3819 goto error_before_flow;
3821 flow->drv_type = flow_get_drv_type(dev, attr);
3822 if (hairpin_id != 0)
3823 flow->hairpin_flow_id = hairpin_id;
3824 assert(flow->drv_type > MLX5_FLOW_TYPE_MIN &&
3825 flow->drv_type < MLX5_FLOW_TYPE_MAX);
3826 flow->rss.queue = (void *)(flow + 1);
3829 * The following information is required by
3830 * mlx5_flow_hashfields_adjust() in advance.
3832 flow->rss.level = rss->level;
3833 /* RSS type 0 indicates default RSS type (ETH_RSS_IP). */
3834 flow->rss.types = !rss->types ? ETH_RSS_IP : rss->types;
3836 LIST_INIT(&flow->dev_flows);
3837 if (rss && rss->types) {
3838 unsigned int graph_root;
3840 graph_root = find_graph_root(items, rss->level);
3841 ret = rte_flow_expand_rss(buf, sizeof(expand_buffer.buffer),
3843 mlx5_support_expansion,
3846 (unsigned int)ret < sizeof(expand_buffer.buffer));
3849 buf->entry[0].pattern = (void *)(uintptr_t)items;
3851 for (i = 0; i < buf->entries; ++i) {
3853 * The splitter may create multiple dev_flows,
3854 * depending on configuration. In the simplest
3855 * case it just creates unmodified original flow.
3857 ret = flow_create_split_outer(dev, flow, attr,
3858 buf->entry[i].pattern,
3859 p_actions_rx, external,
3864 /* Create the tx flow. */
3866 attr_tx.group = MLX5_HAIRPIN_TX_TABLE;
3867 attr_tx.ingress = 0;
3869 dev_flow = flow_drv_prepare(flow, &attr_tx, items_tx.items,
3870 actions_hairpin_tx.actions, error);
3873 dev_flow->flow = flow;
3874 dev_flow->external = 0;
3875 LIST_INSERT_HEAD(&flow->dev_flows, dev_flow, next);
3876 ret = flow_drv_translate(dev, dev_flow, &attr_tx,
3878 actions_hairpin_tx.actions, error);
3883 * Update the metadata register copy table. If extensive
3884 * metadata feature is enabled and registers are supported
3885 * we might create the extra rte_flow for each unique
3886 * MARK/FLAG action ID.
3888 * The table is updated for ingress Flows only, because
3889 * the egress Flows belong to the different device and
3890 * copy table should be updated in peer NIC Rx domain.
3892 if (attr->ingress &&
3893 (external || attr->group != MLX5_FLOW_MREG_CP_TABLE_GROUP)) {
3894 ret = flow_mreg_update_copy_table(dev, flow, actions, error);
3898 if (dev->data->dev_started) {
3899 ret = flow_drv_apply(dev, flow, error);
3904 TAILQ_INSERT_TAIL(list, flow, next);
3905 flow_rxq_flags_set(dev, flow);
3909 mlx5_flow_id_release(priv->sh->flow_id_pool,
3914 flow_mreg_del_copy_action(dev, flow);
3915 ret = rte_errno; /* Save rte_errno before cleanup. */
3916 if (flow->hairpin_flow_id)
3917 mlx5_flow_id_release(priv->sh->flow_id_pool,
3918 flow->hairpin_flow_id);
3920 flow_drv_destroy(dev, flow);
3922 rte_errno = ret; /* Restore rte_errno. */
3927 * Create a dedicated flow rule on e-switch table 0 (root table), to direct all
3928 * incoming packets to table 1.
3930 * Other flow rules, requested for group n, will be created in
3931 * e-switch table n+1.
3932 * Jump action to e-switch group n will be created to group n+1.
3934 * Used when working in switchdev mode, to utilise advantages of table 1
3938 * Pointer to Ethernet device.
3941 * Pointer to flow on success, NULL otherwise and rte_errno is set.
3944 mlx5_flow_create_esw_table_zero_flow(struct rte_eth_dev *dev)
3946 const struct rte_flow_attr attr = {
3953 const struct rte_flow_item pattern = {
3954 .type = RTE_FLOW_ITEM_TYPE_END,
3956 struct rte_flow_action_jump jump = {
3959 const struct rte_flow_action actions[] = {
3961 .type = RTE_FLOW_ACTION_TYPE_JUMP,
3965 .type = RTE_FLOW_ACTION_TYPE_END,
3968 struct mlx5_priv *priv = dev->data->dev_private;
3969 struct rte_flow_error error;
3971 return flow_list_create(dev, &priv->ctrl_flows, &attr, &pattern,
3972 actions, false, &error);
3978 * @see rte_flow_create()
3982 mlx5_flow_create(struct rte_eth_dev *dev,
3983 const struct rte_flow_attr *attr,
3984 const struct rte_flow_item items[],
3985 const struct rte_flow_action actions[],
3986 struct rte_flow_error *error)
3988 struct mlx5_priv *priv = dev->data->dev_private;
3990 return flow_list_create(dev, &priv->flows,
3991 attr, items, actions, true, error);
3995 * Destroy a flow in a list.
3998 * Pointer to Ethernet device.
4000 * Pointer to a TAILQ flow list. If this parameter NULL,
4001 * there is no flow removal from the list.
4006 flow_list_destroy(struct rte_eth_dev *dev, struct mlx5_flows *list,
4007 struct rte_flow *flow)
4009 struct mlx5_priv *priv = dev->data->dev_private;
4012 * Update RX queue flags only if port is started, otherwise it is
4015 if (dev->data->dev_started)
4016 flow_rxq_flags_trim(dev, flow);
4017 if (flow->hairpin_flow_id)
4018 mlx5_flow_id_release(priv->sh->flow_id_pool,
4019 flow->hairpin_flow_id);
4020 flow_drv_destroy(dev, flow);
4022 TAILQ_REMOVE(list, flow, next);
4023 flow_mreg_del_copy_action(dev, flow);
4024 rte_free(flow->fdir);
4029 * Destroy all flows.
4032 * Pointer to Ethernet device.
4034 * Pointer to a TAILQ flow list.
4037 mlx5_flow_list_flush(struct rte_eth_dev *dev, struct mlx5_flows *list)
4039 while (!TAILQ_EMPTY(list)) {
4040 struct rte_flow *flow;
4042 flow = TAILQ_FIRST(list);
4043 flow_list_destroy(dev, list, flow);
4051 * Pointer to Ethernet device.
4053 * Pointer to a TAILQ flow list.
4056 mlx5_flow_stop(struct rte_eth_dev *dev, struct mlx5_flows *list)
4058 struct rte_flow *flow;
4060 TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) {
4061 flow_drv_remove(dev, flow);
4062 flow_mreg_stop_copy_action(dev, flow);
4064 flow_mreg_del_default_copy_action(dev);
4065 flow_rxq_flags_clear(dev);
4072 * Pointer to Ethernet device.
4074 * Pointer to a TAILQ flow list.
4077 * 0 on success, a negative errno value otherwise and rte_errno is set.
4080 mlx5_flow_start(struct rte_eth_dev *dev, struct mlx5_flows *list)
4082 struct rte_flow *flow;
4083 struct rte_flow_error error;
4086 /* Make sure default copy action (reg_c[0] -> reg_b) is created. */
4087 ret = flow_mreg_add_default_copy_action(dev, &error);
4090 /* Apply Flows created by application. */
4091 TAILQ_FOREACH(flow, list, next) {
4092 ret = flow_mreg_start_copy_action(dev, flow);
4095 ret = flow_drv_apply(dev, flow, &error);
4098 flow_rxq_flags_set(dev, flow);
4102 ret = rte_errno; /* Save rte_errno before cleanup. */
4103 mlx5_flow_stop(dev, list);
4104 rte_errno = ret; /* Restore rte_errno. */
4109 * Verify the flow list is empty
4112 * Pointer to Ethernet device.
4114 * @return the number of flows not released.
4117 mlx5_flow_verify(struct rte_eth_dev *dev)
4119 struct mlx5_priv *priv = dev->data->dev_private;
4120 struct rte_flow *flow;
4123 TAILQ_FOREACH(flow, &priv->flows, next) {
4124 DRV_LOG(DEBUG, "port %u flow %p still referenced",
4125 dev->data->port_id, (void *)flow);
4132 * Enable default hairpin egress flow.
4135 * Pointer to Ethernet device.
4140 * 0 on success, a negative errno value otherwise and rte_errno is set.
4143 mlx5_ctrl_flow_source_queue(struct rte_eth_dev *dev,
4146 struct mlx5_priv *priv = dev->data->dev_private;
4147 const struct rte_flow_attr attr = {
4151 struct mlx5_rte_flow_item_tx_queue queue_spec = {
4154 struct mlx5_rte_flow_item_tx_queue queue_mask = {
4155 .queue = UINT32_MAX,
4157 struct rte_flow_item items[] = {
4159 .type = MLX5_RTE_FLOW_ITEM_TYPE_TX_QUEUE,
4160 .spec = &queue_spec,
4162 .mask = &queue_mask,
4165 .type = RTE_FLOW_ITEM_TYPE_END,
4168 struct rte_flow_action_jump jump = {
4169 .group = MLX5_HAIRPIN_TX_TABLE,
4171 struct rte_flow_action actions[2];
4172 struct rte_flow *flow;
4173 struct rte_flow_error error;
4175 actions[0].type = RTE_FLOW_ACTION_TYPE_JUMP;
4176 actions[0].conf = &jump;
4177 actions[1].type = RTE_FLOW_ACTION_TYPE_END;
4178 flow = flow_list_create(dev, &priv->ctrl_flows,
4179 &attr, items, actions, false, &error);
4182 "Failed to create ctrl flow: rte_errno(%d),"
4183 " type(%d), message(%s)",
4184 rte_errno, error.type,
4185 error.message ? error.message : " (no stated reason)");
4192 * Enable a control flow configured from the control plane.
4195 * Pointer to Ethernet device.
4197 * An Ethernet flow spec to apply.
4199 * An Ethernet flow mask to apply.
4201 * A VLAN flow spec to apply.
4203 * A VLAN flow mask to apply.
4206 * 0 on success, a negative errno value otherwise and rte_errno is set.
4209 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
4210 struct rte_flow_item_eth *eth_spec,
4211 struct rte_flow_item_eth *eth_mask,
4212 struct rte_flow_item_vlan *vlan_spec,
4213 struct rte_flow_item_vlan *vlan_mask)
4215 struct mlx5_priv *priv = dev->data->dev_private;
4216 const struct rte_flow_attr attr = {
4218 .priority = MLX5_FLOW_PRIO_RSVD,
4220 struct rte_flow_item items[] = {
4222 .type = RTE_FLOW_ITEM_TYPE_ETH,
4228 .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
4229 RTE_FLOW_ITEM_TYPE_END,
4235 .type = RTE_FLOW_ITEM_TYPE_END,
4238 uint16_t queue[priv->reta_idx_n];
4239 struct rte_flow_action_rss action_rss = {
4240 .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
4242 .types = priv->rss_conf.rss_hf,
4243 .key_len = priv->rss_conf.rss_key_len,
4244 .queue_num = priv->reta_idx_n,
4245 .key = priv->rss_conf.rss_key,
4248 struct rte_flow_action actions[] = {
4250 .type = RTE_FLOW_ACTION_TYPE_RSS,
4251 .conf = &action_rss,
4254 .type = RTE_FLOW_ACTION_TYPE_END,
4257 struct rte_flow *flow;
4258 struct rte_flow_error error;
4261 if (!priv->reta_idx_n || !priv->rxqs_n) {
4264 for (i = 0; i != priv->reta_idx_n; ++i)
4265 queue[i] = (*priv->reta_idx)[i];
4266 flow = flow_list_create(dev, &priv->ctrl_flows,
4267 &attr, items, actions, false, &error);
4274 * Enable a flow control configured from the control plane.
4277 * Pointer to Ethernet device.
4279 * An Ethernet flow spec to apply.
4281 * An Ethernet flow mask to apply.
4284 * 0 on success, a negative errno value otherwise and rte_errno is set.
4287 mlx5_ctrl_flow(struct rte_eth_dev *dev,
4288 struct rte_flow_item_eth *eth_spec,
4289 struct rte_flow_item_eth *eth_mask)
4291 return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
4297 * @see rte_flow_destroy()
4301 mlx5_flow_destroy(struct rte_eth_dev *dev,
4302 struct rte_flow *flow,
4303 struct rte_flow_error *error __rte_unused)
4305 struct mlx5_priv *priv = dev->data->dev_private;
4307 flow_list_destroy(dev, &priv->flows, flow);
4312 * Destroy all flows.
4314 * @see rte_flow_flush()
4318 mlx5_flow_flush(struct rte_eth_dev *dev,
4319 struct rte_flow_error *error __rte_unused)
4321 struct mlx5_priv *priv = dev->data->dev_private;
4323 mlx5_flow_list_flush(dev, &priv->flows);
4330 * @see rte_flow_isolate()
4334 mlx5_flow_isolate(struct rte_eth_dev *dev,
4336 struct rte_flow_error *error)
4338 struct mlx5_priv *priv = dev->data->dev_private;
4340 if (dev->data->dev_started) {
4341 rte_flow_error_set(error, EBUSY,
4342 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
4344 "port must be stopped first");
4347 priv->isolated = !!enable;
4349 dev->dev_ops = &mlx5_dev_ops_isolate;
4351 dev->dev_ops = &mlx5_dev_ops;
4358 * @see rte_flow_query()
4362 flow_drv_query(struct rte_eth_dev *dev,
4363 struct rte_flow *flow,
4364 const struct rte_flow_action *actions,
4366 struct rte_flow_error *error)
4368 const struct mlx5_flow_driver_ops *fops;
4369 enum mlx5_flow_drv_type ftype = flow->drv_type;
4371 assert(ftype > MLX5_FLOW_TYPE_MIN && ftype < MLX5_FLOW_TYPE_MAX);
4372 fops = flow_get_drv_ops(ftype);
4374 return fops->query(dev, flow, actions, data, error);
4380 * @see rte_flow_query()
4384 mlx5_flow_query(struct rte_eth_dev *dev,
4385 struct rte_flow *flow,
4386 const struct rte_flow_action *actions,
4388 struct rte_flow_error *error)
4392 ret = flow_drv_query(dev, flow, actions, data, error);
4399 * Convert a flow director filter to a generic flow.
4402 * Pointer to Ethernet device.
4403 * @param fdir_filter
4404 * Flow director filter to add.
4406 * Generic flow parameters structure.
4409 * 0 on success, a negative errno value otherwise and rte_errno is set.
4412 flow_fdir_filter_convert(struct rte_eth_dev *dev,
4413 const struct rte_eth_fdir_filter *fdir_filter,
4414 struct mlx5_fdir *attributes)
4416 struct mlx5_priv *priv = dev->data->dev_private;
4417 const struct rte_eth_fdir_input *input = &fdir_filter->input;
4418 const struct rte_eth_fdir_masks *mask =
4419 &dev->data->dev_conf.fdir_conf.mask;
4421 /* Validate queue number. */
4422 if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
4423 DRV_LOG(ERR, "port %u invalid queue number %d",
4424 dev->data->port_id, fdir_filter->action.rx_queue);
4428 attributes->attr.ingress = 1;
4429 attributes->items[0] = (struct rte_flow_item) {
4430 .type = RTE_FLOW_ITEM_TYPE_ETH,
4431 .spec = &attributes->l2,
4432 .mask = &attributes->l2_mask,
4434 switch (fdir_filter->action.behavior) {
4435 case RTE_ETH_FDIR_ACCEPT:
4436 attributes->actions[0] = (struct rte_flow_action){
4437 .type = RTE_FLOW_ACTION_TYPE_QUEUE,
4438 .conf = &attributes->queue,
4441 case RTE_ETH_FDIR_REJECT:
4442 attributes->actions[0] = (struct rte_flow_action){
4443 .type = RTE_FLOW_ACTION_TYPE_DROP,
4447 DRV_LOG(ERR, "port %u invalid behavior %d",
4449 fdir_filter->action.behavior);
4450 rte_errno = ENOTSUP;
4453 attributes->queue.index = fdir_filter->action.rx_queue;
4455 switch (fdir_filter->input.flow_type) {
4456 case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
4457 case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
4458 case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
4459 attributes->l3.ipv4.hdr = (struct rte_ipv4_hdr){
4460 .src_addr = input->flow.ip4_flow.src_ip,
4461 .dst_addr = input->flow.ip4_flow.dst_ip,
4462 .time_to_live = input->flow.ip4_flow.ttl,
4463 .type_of_service = input->flow.ip4_flow.tos,
4465 attributes->l3_mask.ipv4.hdr = (struct rte_ipv4_hdr){
4466 .src_addr = mask->ipv4_mask.src_ip,
4467 .dst_addr = mask->ipv4_mask.dst_ip,
4468 .time_to_live = mask->ipv4_mask.ttl,
4469 .type_of_service = mask->ipv4_mask.tos,
4470 .next_proto_id = mask->ipv4_mask.proto,
4472 attributes->items[1] = (struct rte_flow_item){
4473 .type = RTE_FLOW_ITEM_TYPE_IPV4,
4474 .spec = &attributes->l3,
4475 .mask = &attributes->l3_mask,
4478 case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
4479 case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
4480 case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
4481 attributes->l3.ipv6.hdr = (struct rte_ipv6_hdr){
4482 .hop_limits = input->flow.ipv6_flow.hop_limits,
4483 .proto = input->flow.ipv6_flow.proto,
4486 memcpy(attributes->l3.ipv6.hdr.src_addr,
4487 input->flow.ipv6_flow.src_ip,
4488 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
4489 memcpy(attributes->l3.ipv6.hdr.dst_addr,
4490 input->flow.ipv6_flow.dst_ip,
4491 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
4492 memcpy(attributes->l3_mask.ipv6.hdr.src_addr,
4493 mask->ipv6_mask.src_ip,
4494 RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
4495 memcpy(attributes->l3_mask.ipv6.hdr.dst_addr,
4496 mask->ipv6_mask.dst_ip,
4497 RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
4498 attributes->items[1] = (struct rte_flow_item){
4499 .type = RTE_FLOW_ITEM_TYPE_IPV6,
4500 .spec = &attributes->l3,
4501 .mask = &attributes->l3_mask,
4505 DRV_LOG(ERR, "port %u invalid flow type%d",
4506 dev->data->port_id, fdir_filter->input.flow_type);
4507 rte_errno = ENOTSUP;
4511 switch (fdir_filter->input.flow_type) {
4512 case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
4513 attributes->l4.udp.hdr = (struct rte_udp_hdr){
4514 .src_port = input->flow.udp4_flow.src_port,
4515 .dst_port = input->flow.udp4_flow.dst_port,
4517 attributes->l4_mask.udp.hdr = (struct rte_udp_hdr){
4518 .src_port = mask->src_port_mask,
4519 .dst_port = mask->dst_port_mask,
4521 attributes->items[2] = (struct rte_flow_item){
4522 .type = RTE_FLOW_ITEM_TYPE_UDP,
4523 .spec = &attributes->l4,
4524 .mask = &attributes->l4_mask,
4527 case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
4528 attributes->l4.tcp.hdr = (struct rte_tcp_hdr){
4529 .src_port = input->flow.tcp4_flow.src_port,
4530 .dst_port = input->flow.tcp4_flow.dst_port,
4532 attributes->l4_mask.tcp.hdr = (struct rte_tcp_hdr){
4533 .src_port = mask->src_port_mask,
4534 .dst_port = mask->dst_port_mask,
4536 attributes->items[2] = (struct rte_flow_item){
4537 .type = RTE_FLOW_ITEM_TYPE_TCP,
4538 .spec = &attributes->l4,
4539 .mask = &attributes->l4_mask,
4542 case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
4543 attributes->l4.udp.hdr = (struct rte_udp_hdr){
4544 .src_port = input->flow.udp6_flow.src_port,
4545 .dst_port = input->flow.udp6_flow.dst_port,
4547 attributes->l4_mask.udp.hdr = (struct rte_udp_hdr){
4548 .src_port = mask->src_port_mask,
4549 .dst_port = mask->dst_port_mask,
4551 attributes->items[2] = (struct rte_flow_item){
4552 .type = RTE_FLOW_ITEM_TYPE_UDP,
4553 .spec = &attributes->l4,
4554 .mask = &attributes->l4_mask,
4557 case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
4558 attributes->l4.tcp.hdr = (struct rte_tcp_hdr){
4559 .src_port = input->flow.tcp6_flow.src_port,
4560 .dst_port = input->flow.tcp6_flow.dst_port,
4562 attributes->l4_mask.tcp.hdr = (struct rte_tcp_hdr){
4563 .src_port = mask->src_port_mask,
4564 .dst_port = mask->dst_port_mask,
4566 attributes->items[2] = (struct rte_flow_item){
4567 .type = RTE_FLOW_ITEM_TYPE_TCP,
4568 .spec = &attributes->l4,
4569 .mask = &attributes->l4_mask,
4572 case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
4573 case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
4576 DRV_LOG(ERR, "port %u invalid flow type%d",
4577 dev->data->port_id, fdir_filter->input.flow_type);
4578 rte_errno = ENOTSUP;
4584 #define FLOW_FDIR_CMP(f1, f2, fld) \
4585 memcmp(&(f1)->fld, &(f2)->fld, sizeof(f1->fld))
4588 * Compare two FDIR flows. If items and actions are identical, the two flows are
4592 * Pointer to Ethernet device.
4594 * FDIR flow to compare.
4596 * FDIR flow to compare.
4599 * Zero on match, 1 otherwise.
4602 flow_fdir_cmp(const struct mlx5_fdir *f1, const struct mlx5_fdir *f2)
4604 if (FLOW_FDIR_CMP(f1, f2, attr) ||
4605 FLOW_FDIR_CMP(f1, f2, l2) ||
4606 FLOW_FDIR_CMP(f1, f2, l2_mask) ||
4607 FLOW_FDIR_CMP(f1, f2, l3) ||
4608 FLOW_FDIR_CMP(f1, f2, l3_mask) ||
4609 FLOW_FDIR_CMP(f1, f2, l4) ||
4610 FLOW_FDIR_CMP(f1, f2, l4_mask) ||
4611 FLOW_FDIR_CMP(f1, f2, actions[0].type))
4613 if (f1->actions[0].type == RTE_FLOW_ACTION_TYPE_QUEUE &&
4614 FLOW_FDIR_CMP(f1, f2, queue))
4620 * Search device flow list to find out a matched FDIR flow.
4623 * Pointer to Ethernet device.
4625 * FDIR flow to lookup.
4628 * Pointer of flow if found, NULL otherwise.
4630 static struct rte_flow *
4631 flow_fdir_filter_lookup(struct rte_eth_dev *dev, struct mlx5_fdir *fdir_flow)
4633 struct mlx5_priv *priv = dev->data->dev_private;
4634 struct rte_flow *flow = NULL;
4637 TAILQ_FOREACH(flow, &priv->flows, next) {
4638 if (flow->fdir && !flow_fdir_cmp(flow->fdir, fdir_flow)) {
4639 DRV_LOG(DEBUG, "port %u found FDIR flow %p",
4640 dev->data->port_id, (void *)flow);
4648 * Add new flow director filter and store it in list.
4651 * Pointer to Ethernet device.
4652 * @param fdir_filter
4653 * Flow director filter to add.
4656 * 0 on success, a negative errno value otherwise and rte_errno is set.
4659 flow_fdir_filter_add(struct rte_eth_dev *dev,
4660 const struct rte_eth_fdir_filter *fdir_filter)
4662 struct mlx5_priv *priv = dev->data->dev_private;
4663 struct mlx5_fdir *fdir_flow;
4664 struct rte_flow *flow;
4667 fdir_flow = rte_zmalloc(__func__, sizeof(*fdir_flow), 0);
4672 ret = flow_fdir_filter_convert(dev, fdir_filter, fdir_flow);
4675 flow = flow_fdir_filter_lookup(dev, fdir_flow);
4680 flow = flow_list_create(dev, &priv->flows, &fdir_flow->attr,
4681 fdir_flow->items, fdir_flow->actions, true,
4685 assert(!flow->fdir);
4686 flow->fdir = fdir_flow;
4687 DRV_LOG(DEBUG, "port %u created FDIR flow %p",
4688 dev->data->port_id, (void *)flow);
4691 rte_free(fdir_flow);
4696 * Delete specific filter.
4699 * Pointer to Ethernet device.
4700 * @param fdir_filter
4701 * Filter to be deleted.
4704 * 0 on success, a negative errno value otherwise and rte_errno is set.
4707 flow_fdir_filter_delete(struct rte_eth_dev *dev,
4708 const struct rte_eth_fdir_filter *fdir_filter)
4710 struct mlx5_priv *priv = dev->data->dev_private;
4711 struct rte_flow *flow;
4712 struct mlx5_fdir fdir_flow = {
4717 ret = flow_fdir_filter_convert(dev, fdir_filter, &fdir_flow);
4720 flow = flow_fdir_filter_lookup(dev, &fdir_flow);
4725 flow_list_destroy(dev, &priv->flows, flow);
4726 DRV_LOG(DEBUG, "port %u deleted FDIR flow %p",
4727 dev->data->port_id, (void *)flow);
4732 * Update queue for specific filter.
4735 * Pointer to Ethernet device.
4736 * @param fdir_filter
4737 * Filter to be updated.
4740 * 0 on success, a negative errno value otherwise and rte_errno is set.
4743 flow_fdir_filter_update(struct rte_eth_dev *dev,
4744 const struct rte_eth_fdir_filter *fdir_filter)
4748 ret = flow_fdir_filter_delete(dev, fdir_filter);
4751 return flow_fdir_filter_add(dev, fdir_filter);
4755 * Flush all filters.
4758 * Pointer to Ethernet device.
4761 flow_fdir_filter_flush(struct rte_eth_dev *dev)
4763 struct mlx5_priv *priv = dev->data->dev_private;
4765 mlx5_flow_list_flush(dev, &priv->flows);
4769 * Get flow director information.
4772 * Pointer to Ethernet device.
4773 * @param[out] fdir_info
4774 * Resulting flow director information.
4777 flow_fdir_info_get(struct rte_eth_dev *dev, struct rte_eth_fdir_info *fdir_info)
4779 struct rte_eth_fdir_masks *mask =
4780 &dev->data->dev_conf.fdir_conf.mask;
4782 fdir_info->mode = dev->data->dev_conf.fdir_conf.mode;
4783 fdir_info->guarant_spc = 0;
4784 rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
4785 fdir_info->max_flexpayload = 0;
4786 fdir_info->flow_types_mask[0] = 0;
4787 fdir_info->flex_payload_unit = 0;
4788 fdir_info->max_flex_payload_segment_num = 0;
4789 fdir_info->flex_payload_limit = 0;
4790 memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
4794 * Deal with flow director operations.
4797 * Pointer to Ethernet device.
4799 * Operation to perform.
4801 * Pointer to operation-specific structure.
4804 * 0 on success, a negative errno value otherwise and rte_errno is set.
4807 flow_fdir_ctrl_func(struct rte_eth_dev *dev, enum rte_filter_op filter_op,
4810 enum rte_fdir_mode fdir_mode =
4811 dev->data->dev_conf.fdir_conf.mode;
4813 if (filter_op == RTE_ETH_FILTER_NOP)
4815 if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
4816 fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
4817 DRV_LOG(ERR, "port %u flow director mode %d not supported",
4818 dev->data->port_id, fdir_mode);
4822 switch (filter_op) {
4823 case RTE_ETH_FILTER_ADD:
4824 return flow_fdir_filter_add(dev, arg);
4825 case RTE_ETH_FILTER_UPDATE:
4826 return flow_fdir_filter_update(dev, arg);
4827 case RTE_ETH_FILTER_DELETE:
4828 return flow_fdir_filter_delete(dev, arg);
4829 case RTE_ETH_FILTER_FLUSH:
4830 flow_fdir_filter_flush(dev);
4832 case RTE_ETH_FILTER_INFO:
4833 flow_fdir_info_get(dev, arg);
4836 DRV_LOG(DEBUG, "port %u unknown operation %u",
4837 dev->data->port_id, filter_op);
4845 * Manage filter operations.
4848 * Pointer to Ethernet device structure.
4849 * @param filter_type
4852 * Operation to perform.
4854 * Pointer to operation-specific structure.
4857 * 0 on success, a negative errno value otherwise and rte_errno is set.
4860 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
4861 enum rte_filter_type filter_type,
4862 enum rte_filter_op filter_op,
4865 switch (filter_type) {
4866 case RTE_ETH_FILTER_GENERIC:
4867 if (filter_op != RTE_ETH_FILTER_GET) {
4871 *(const void **)arg = &mlx5_flow_ops;
4873 case RTE_ETH_FILTER_FDIR:
4874 return flow_fdir_ctrl_func(dev, filter_op, arg);
4876 DRV_LOG(ERR, "port %u filter type (%d) not supported",
4877 dev->data->port_id, filter_type);
4878 rte_errno = ENOTSUP;
4884 #define MLX5_POOL_QUERY_FREQ_US 1000000
4887 * Set the periodic procedure for triggering asynchronous batch queries for all
4888 * the counter pools.
4891 * Pointer to mlx5_ibv_shared object.
4894 mlx5_set_query_alarm(struct mlx5_ibv_shared *sh)
4896 struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(sh, 0, 0);
4897 uint32_t pools_n = rte_atomic16_read(&cont->n_valid);
4900 cont = MLX5_CNT_CONTAINER(sh, 1, 0);
4901 pools_n += rte_atomic16_read(&cont->n_valid);
4902 us = MLX5_POOL_QUERY_FREQ_US / pools_n;
4903 DRV_LOG(DEBUG, "Set alarm for %u pools each %u us", pools_n, us);
4904 if (rte_eal_alarm_set(us, mlx5_flow_query_alarm, sh)) {
4905 sh->cmng.query_thread_on = 0;
4906 DRV_LOG(ERR, "Cannot reinitialize query alarm");
4908 sh->cmng.query_thread_on = 1;
4913 * The periodic procedure for triggering asynchronous batch queries for all the
4914 * counter pools. This function is probably called by the host thread.
4917 * The parameter for the alarm process.
4920 mlx5_flow_query_alarm(void *arg)
4922 struct mlx5_ibv_shared *sh = arg;
4923 struct mlx5_devx_obj *dcs;
4926 uint8_t batch = sh->cmng.batch;
4927 uint16_t pool_index = sh->cmng.pool_index;
4928 struct mlx5_pools_container *cont;
4929 struct mlx5_pools_container *mcont;
4930 struct mlx5_flow_counter_pool *pool;
4932 if (sh->cmng.pending_queries >= MLX5_MAX_PENDING_QUERIES)
4935 cont = MLX5_CNT_CONTAINER(sh, batch, 1);
4936 mcont = MLX5_CNT_CONTAINER(sh, batch, 0);
4937 /* Check if resize was done and need to flip a container. */
4938 if (cont != mcont) {
4940 /* Clean the old container. */
4941 rte_free(cont->pools);
4942 memset(cont, 0, sizeof(*cont));
4945 /* Flip the host container. */
4946 sh->cmng.mhi[batch] ^= (uint8_t)2;
4950 /* 2 empty containers case is unexpected. */
4951 if (unlikely(batch != sh->cmng.batch))
4955 goto next_container;
4957 pool = cont->pools[pool_index];
4959 /* There is a pool query in progress. */
4962 LIST_FIRST(&sh->cmng.free_stat_raws);
4964 /* No free counter statistics raw memory. */
4966 dcs = (struct mlx5_devx_obj *)(uintptr_t)rte_atomic64_read
4968 offset = batch ? 0 : dcs->id % MLX5_COUNTERS_PER_POOL;
4969 ret = mlx5_devx_cmd_flow_counter_query(dcs, 0, MLX5_COUNTERS_PER_POOL -
4971 pool->raw_hw->mem_mng->dm->id,
4973 (pool->raw_hw->data + offset),
4975 (uint64_t)(uintptr_t)pool);
4977 DRV_LOG(ERR, "Failed to trigger asynchronous query for dcs ID"
4978 " %d", pool->min_dcs->id);
4979 pool->raw_hw = NULL;
4982 pool->raw_hw->min_dcs_id = dcs->id;
4983 LIST_REMOVE(pool->raw_hw, next);
4984 sh->cmng.pending_queries++;
4986 if (pool_index >= rte_atomic16_read(&cont->n_valid)) {
4991 sh->cmng.batch = batch;
4992 sh->cmng.pool_index = pool_index;
4993 mlx5_set_query_alarm(sh);
4997 * Handler for the HW respond about ready values from an asynchronous batch
4998 * query. This function is probably called by the host thread.
5001 * The pointer to the shared IB device context.
5002 * @param[in] async_id
5003 * The Devx async ID.
5005 * The status of the completion.
5008 mlx5_flow_async_pool_query_handle(struct mlx5_ibv_shared *sh,
5009 uint64_t async_id, int status)
5011 struct mlx5_flow_counter_pool *pool =
5012 (struct mlx5_flow_counter_pool *)(uintptr_t)async_id;
5013 struct mlx5_counter_stats_raw *raw_to_free;
5015 if (unlikely(status)) {
5016 raw_to_free = pool->raw_hw;
5018 raw_to_free = pool->raw;
5019 rte_spinlock_lock(&pool->sl);
5020 pool->raw = pool->raw_hw;
5021 rte_spinlock_unlock(&pool->sl);
5022 rte_atomic64_add(&pool->query_gen, 1);
5023 /* Be sure the new raw counters data is updated in memory. */
5026 LIST_INSERT_HEAD(&sh->cmng.free_stat_raws, raw_to_free, next);
5027 pool->raw_hw = NULL;
5028 sh->cmng.pending_queries--;
5032 * Translate the rte_flow group index to HW table value.
5034 * @param[in] attributes
5035 * Pointer to flow attributes
5036 * @param[in] external
5037 * Value is part of flow rule created by request external to PMD.
5039 * rte_flow group index value.
5043 * Pointer to error structure.
5046 * 0 on success, a negative errno value otherwise and rte_errno is set.
5049 mlx5_flow_group_to_table(const struct rte_flow_attr *attributes, bool external,
5050 uint32_t group, uint32_t *table,
5051 struct rte_flow_error *error)
5053 if (attributes->transfer && external) {
5054 if (group == UINT32_MAX)
5055 return rte_flow_error_set
5057 RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
5059 "group index not supported");
5068 * Discover availability of metadata reg_c's.
5070 * Iteratively use test flows to check availability.
5073 * Pointer to the Ethernet device structure.
5076 * 0 on success, a negative errno value otherwise and rte_errno is set.
5079 mlx5_flow_discover_mreg_c(struct rte_eth_dev *dev)
5081 struct mlx5_priv *priv = dev->data->dev_private;
5082 struct mlx5_dev_config *config = &priv->config;
5083 enum modify_reg idx;
5086 /* reg_c[0] and reg_c[1] are reserved. */
5087 config->flow_mreg_c[n++] = REG_C_0;
5088 config->flow_mreg_c[n++] = REG_C_1;
5089 /* Discover availability of other reg_c's. */
5090 for (idx = REG_C_2; idx <= REG_C_7; ++idx) {
5091 struct rte_flow_attr attr = {
5092 .group = MLX5_FLOW_MREG_CP_TABLE_GROUP,
5093 .priority = MLX5_FLOW_PRIO_RSVD,
5096 struct rte_flow_item items[] = {
5098 .type = RTE_FLOW_ITEM_TYPE_END,
5101 struct rte_flow_action actions[] = {
5103 .type = MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
5104 .conf = &(struct mlx5_flow_action_copy_mreg){
5110 .type = RTE_FLOW_ACTION_TYPE_JUMP,
5111 .conf = &(struct rte_flow_action_jump){
5112 .group = MLX5_FLOW_MREG_ACT_TABLE_GROUP,
5116 .type = RTE_FLOW_ACTION_TYPE_END,
5119 struct rte_flow *flow;
5120 struct rte_flow_error error;
5122 if (!config->dv_flow_en)
5124 /* Create internal flow, validation skips copy action. */
5125 flow = flow_list_create(dev, NULL, &attr, items,
5126 actions, false, &error);
5129 if (dev->data->dev_started || !flow_drv_apply(dev, flow, NULL))
5130 config->flow_mreg_c[n++] = idx;
5131 flow_list_destroy(dev, NULL, flow);
5133 for (; n < MLX5_MREG_C_NUM; ++n)
5134 config->flow_mreg_c[n] = REG_NONE;