1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2016 6WIND S.A.
3 * Copyright 2016 Mellanox Technologies, Ltd
11 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
13 #pragma GCC diagnostic ignored "-Wpedantic"
15 #include <infiniband/verbs.h>
17 #pragma GCC diagnostic error "-Wpedantic"
20 #include <rte_common.h>
21 #include <rte_ether.h>
22 #include <rte_eth_ctrl.h>
23 #include <rte_ethdev_driver.h>
25 #include <rte_flow_driver.h>
26 #include <rte_malloc.h>
30 #include "mlx5_defs.h"
32 #include "mlx5_glue.h"
34 /* Flow priority for control plane flows. */
35 #define MLX5_CTRL_FLOW_PRIORITY 1
37 /* Internet Protocol versions. */
42 #ifndef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
43 struct ibv_flow_spec_counter_action {
48 /* Dev ops structure defined in mlx5.c */
49 extern const struct eth_dev_ops mlx5_dev_ops;
50 extern const struct eth_dev_ops mlx5_dev_ops_isolate;
52 /** Structure give to the conversion functions. */
53 struct mlx5_flow_data {
54 struct rte_eth_dev *dev; /** Ethernet device. */
55 struct mlx5_flow_parse *parser; /** Parser context. */
56 struct rte_flow_error *error; /** Error context. */
60 mlx5_flow_create_eth(const struct rte_flow_item *item,
61 const void *default_mask,
62 struct mlx5_flow_data *data);
65 mlx5_flow_create_vlan(const struct rte_flow_item *item,
66 const void *default_mask,
67 struct mlx5_flow_data *data);
70 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
71 const void *default_mask,
72 struct mlx5_flow_data *data);
75 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
76 const void *default_mask,
77 struct mlx5_flow_data *data);
80 mlx5_flow_create_udp(const struct rte_flow_item *item,
81 const void *default_mask,
82 struct mlx5_flow_data *data);
85 mlx5_flow_create_tcp(const struct rte_flow_item *item,
86 const void *default_mask,
87 struct mlx5_flow_data *data);
90 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
91 const void *default_mask,
92 struct mlx5_flow_data *data);
95 mlx5_flow_create_vxlan_gpe(const struct rte_flow_item *item,
96 const void *default_mask,
97 struct mlx5_flow_data *data);
100 mlx5_flow_create_gre(const struct rte_flow_item *item,
101 const void *default_mask,
102 struct mlx5_flow_data *data);
105 mlx5_flow_create_mpls(const struct rte_flow_item *item,
106 const void *default_mask,
107 struct mlx5_flow_data *data);
109 struct mlx5_flow_parse;
112 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
116 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id);
119 mlx5_flow_create_count(struct rte_eth_dev *dev, struct mlx5_flow_parse *parser);
121 /* Hash RX queue types. */
133 /* Initialization data for hash RX queue. */
134 struct hash_rxq_init {
135 uint64_t hash_fields; /* Fields that participate in the hash. */
136 uint64_t dpdk_rss_hf; /* Matching DPDK RSS hash fields. */
137 unsigned int flow_priority; /* Flow priority to use. */
138 unsigned int ip_version; /* Internet protocol. */
141 /* Initialization data for hash RX queues. */
142 const struct hash_rxq_init hash_rxq_init[] = {
144 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
145 IBV_RX_HASH_DST_IPV4 |
146 IBV_RX_HASH_SRC_PORT_TCP |
147 IBV_RX_HASH_DST_PORT_TCP),
148 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,
150 .ip_version = MLX5_IPV4,
153 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
154 IBV_RX_HASH_DST_IPV4 |
155 IBV_RX_HASH_SRC_PORT_UDP |
156 IBV_RX_HASH_DST_PORT_UDP),
157 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP,
159 .ip_version = MLX5_IPV4,
162 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
163 IBV_RX_HASH_DST_IPV4),
164 .dpdk_rss_hf = (ETH_RSS_IPV4 |
167 .ip_version = MLX5_IPV4,
170 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
171 IBV_RX_HASH_DST_IPV6 |
172 IBV_RX_HASH_SRC_PORT_TCP |
173 IBV_RX_HASH_DST_PORT_TCP),
174 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP,
176 .ip_version = MLX5_IPV6,
179 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
180 IBV_RX_HASH_DST_IPV6 |
181 IBV_RX_HASH_SRC_PORT_UDP |
182 IBV_RX_HASH_DST_PORT_UDP),
183 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP,
185 .ip_version = MLX5_IPV6,
188 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
189 IBV_RX_HASH_DST_IPV6),
190 .dpdk_rss_hf = (ETH_RSS_IPV6 |
193 .ip_version = MLX5_IPV6,
202 /* Number of entries in hash_rxq_init[]. */
203 const unsigned int hash_rxq_init_n = RTE_DIM(hash_rxq_init);
205 /** Structure for holding counter stats. */
206 struct mlx5_flow_counter_stats {
207 uint64_t hits; /**< Number of packets matched by the rule. */
208 uint64_t bytes; /**< Number of bytes matched by the rule. */
211 /** Structure for Drop queue. */
212 struct mlx5_hrxq_drop {
213 struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */
214 struct ibv_qp *qp; /**< Verbs queue pair. */
215 struct ibv_wq *wq; /**< Verbs work queue. */
216 struct ibv_cq *cq; /**< Verbs completion queue. */
219 /* Flows structures. */
221 uint64_t hash_fields; /**< Fields that participate in the hash. */
222 struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
223 struct ibv_flow *ibv_flow; /**< Verbs flow. */
224 struct mlx5_hrxq *hrxq; /**< Hash Rx queues. */
227 /* Drop flows structures. */
228 struct mlx5_flow_drop {
229 struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
230 struct ibv_flow *ibv_flow; /**< Verbs flow. */
234 TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
235 uint32_t mark:1; /**< Set if the flow is marked. */
236 uint32_t drop:1; /**< Drop queue. */
237 struct rte_flow_action_rss rss_conf; /**< RSS configuration */
238 uint16_t (*queues)[]; /**< Queues indexes to use. */
239 uint8_t rss_key[40]; /**< copy of the RSS key. */
240 uint32_t tunnel; /**< Tunnel type of RTE_PTYPE_TUNNEL_XXX. */
241 struct ibv_counter_set *cs; /**< Holds the counters for the rule. */
242 struct mlx5_flow_counter_stats counter_stats;/**<The counter stats. */
243 struct mlx5_flow frxq[RTE_DIM(hash_rxq_init)];
244 /**< Flow with Rx queue. */
247 /** Static initializer for items. */
249 (const enum rte_flow_item_type []){ \
250 __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
253 #define IS_TUNNEL(type) ( \
254 (type) == RTE_FLOW_ITEM_TYPE_VXLAN || \
255 (type) == RTE_FLOW_ITEM_TYPE_VXLAN_GPE || \
256 (type) == RTE_FLOW_ITEM_TYPE_GRE || \
257 (type) == RTE_FLOW_ITEM_TYPE_MPLS)
259 const uint32_t flow_ptype[] = {
260 [RTE_FLOW_ITEM_TYPE_VXLAN] = RTE_PTYPE_TUNNEL_VXLAN,
261 [RTE_FLOW_ITEM_TYPE_VXLAN_GPE] = RTE_PTYPE_TUNNEL_VXLAN_GPE,
262 [RTE_FLOW_ITEM_TYPE_GRE] = RTE_PTYPE_TUNNEL_GRE,
263 [RTE_FLOW_ITEM_TYPE_MPLS] = RTE_PTYPE_TUNNEL_MPLS_IN_GRE,
266 #define PTYPE_IDX(t) ((RTE_PTYPE_TUNNEL_MASK & (t)) >> 12)
268 const uint32_t ptype_ext[] = {
269 [PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN)] = RTE_PTYPE_TUNNEL_VXLAN |
271 [PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN_GPE)] = RTE_PTYPE_TUNNEL_VXLAN_GPE |
273 [PTYPE_IDX(RTE_PTYPE_TUNNEL_GRE)] = RTE_PTYPE_TUNNEL_GRE,
274 [PTYPE_IDX(RTE_PTYPE_TUNNEL_MPLS_IN_GRE)] =
275 RTE_PTYPE_TUNNEL_MPLS_IN_GRE,
276 [PTYPE_IDX(RTE_PTYPE_TUNNEL_MPLS_IN_UDP)] =
277 RTE_PTYPE_TUNNEL_MPLS_IN_GRE | RTE_PTYPE_L4_UDP,
280 /** Structure to generate a simple graph of layers supported by the NIC. */
281 struct mlx5_flow_items {
282 /** List of possible actions for these items. */
283 const enum rte_flow_action_type *const actions;
284 /** Bit-masks corresponding to the possibilities for the item. */
287 * Default bit-masks to use when item->mask is not provided. When
288 * \default_mask is also NULL, the full supported bit-mask (\mask) is
291 const void *default_mask;
292 /** Bit-masks size in bytes. */
293 const unsigned int mask_sz;
295 * Conversion function from rte_flow to NIC specific flow.
298 * rte_flow item to convert.
299 * @param default_mask
300 * Default bit-masks to use when item->mask is not provided.
302 * Internal structure to store the conversion.
305 * 0 on success, a negative errno value otherwise and rte_errno is
308 int (*convert)(const struct rte_flow_item *item,
309 const void *default_mask,
310 struct mlx5_flow_data *data);
311 /** Size in bytes of the destination structure. */
312 const unsigned int dst_sz;
313 /** List of possible following items. */
314 const enum rte_flow_item_type *const items;
317 /** Valid action for this PMD. */
318 static const enum rte_flow_action_type valid_actions[] = {
319 RTE_FLOW_ACTION_TYPE_DROP,
320 RTE_FLOW_ACTION_TYPE_QUEUE,
321 RTE_FLOW_ACTION_TYPE_MARK,
322 RTE_FLOW_ACTION_TYPE_FLAG,
323 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
324 RTE_FLOW_ACTION_TYPE_COUNT,
326 RTE_FLOW_ACTION_TYPE_END,
329 /** Graph of supported items and associated actions. */
330 static const struct mlx5_flow_items mlx5_flow_items[] = {
331 [RTE_FLOW_ITEM_TYPE_END] = {
332 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
333 RTE_FLOW_ITEM_TYPE_VXLAN,
334 RTE_FLOW_ITEM_TYPE_VXLAN_GPE,
335 RTE_FLOW_ITEM_TYPE_GRE),
337 [RTE_FLOW_ITEM_TYPE_ETH] = {
338 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
339 RTE_FLOW_ITEM_TYPE_IPV4,
340 RTE_FLOW_ITEM_TYPE_IPV6),
341 .actions = valid_actions,
342 .mask = &(const struct rte_flow_item_eth){
343 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
344 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
347 .default_mask = &rte_flow_item_eth_mask,
348 .mask_sz = sizeof(struct rte_flow_item_eth),
349 .convert = mlx5_flow_create_eth,
350 .dst_sz = sizeof(struct ibv_flow_spec_eth),
352 [RTE_FLOW_ITEM_TYPE_VLAN] = {
353 .items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
354 RTE_FLOW_ITEM_TYPE_IPV6),
355 .actions = valid_actions,
356 .mask = &(const struct rte_flow_item_vlan){
360 .default_mask = &rte_flow_item_vlan_mask,
361 .mask_sz = sizeof(struct rte_flow_item_vlan),
362 .convert = mlx5_flow_create_vlan,
365 [RTE_FLOW_ITEM_TYPE_IPV4] = {
366 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
367 RTE_FLOW_ITEM_TYPE_TCP,
368 RTE_FLOW_ITEM_TYPE_GRE),
369 .actions = valid_actions,
370 .mask = &(const struct rte_flow_item_ipv4){
374 .type_of_service = -1,
378 .default_mask = &rte_flow_item_ipv4_mask,
379 .mask_sz = sizeof(struct rte_flow_item_ipv4),
380 .convert = mlx5_flow_create_ipv4,
381 .dst_sz = sizeof(struct ibv_flow_spec_ipv4_ext),
383 [RTE_FLOW_ITEM_TYPE_IPV6] = {
384 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
385 RTE_FLOW_ITEM_TYPE_TCP,
386 RTE_FLOW_ITEM_TYPE_GRE),
387 .actions = valid_actions,
388 .mask = &(const struct rte_flow_item_ipv6){
391 0xff, 0xff, 0xff, 0xff,
392 0xff, 0xff, 0xff, 0xff,
393 0xff, 0xff, 0xff, 0xff,
394 0xff, 0xff, 0xff, 0xff,
397 0xff, 0xff, 0xff, 0xff,
398 0xff, 0xff, 0xff, 0xff,
399 0xff, 0xff, 0xff, 0xff,
400 0xff, 0xff, 0xff, 0xff,
407 .default_mask = &rte_flow_item_ipv6_mask,
408 .mask_sz = sizeof(struct rte_flow_item_ipv6),
409 .convert = mlx5_flow_create_ipv6,
410 .dst_sz = sizeof(struct ibv_flow_spec_ipv6),
412 [RTE_FLOW_ITEM_TYPE_UDP] = {
413 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN,
414 RTE_FLOW_ITEM_TYPE_VXLAN_GPE,
415 RTE_FLOW_ITEM_TYPE_MPLS),
416 .actions = valid_actions,
417 .mask = &(const struct rte_flow_item_udp){
423 .default_mask = &rte_flow_item_udp_mask,
424 .mask_sz = sizeof(struct rte_flow_item_udp),
425 .convert = mlx5_flow_create_udp,
426 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
428 [RTE_FLOW_ITEM_TYPE_TCP] = {
429 .actions = valid_actions,
430 .mask = &(const struct rte_flow_item_tcp){
436 .default_mask = &rte_flow_item_tcp_mask,
437 .mask_sz = sizeof(struct rte_flow_item_tcp),
438 .convert = mlx5_flow_create_tcp,
439 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
441 [RTE_FLOW_ITEM_TYPE_GRE] = {
442 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
443 RTE_FLOW_ITEM_TYPE_IPV4,
444 RTE_FLOW_ITEM_TYPE_IPV6,
445 RTE_FLOW_ITEM_TYPE_MPLS),
446 .actions = valid_actions,
447 .mask = &(const struct rte_flow_item_gre){
450 .default_mask = &rte_flow_item_gre_mask,
451 .mask_sz = sizeof(struct rte_flow_item_gre),
452 .convert = mlx5_flow_create_gre,
453 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
454 .dst_sz = sizeof(struct ibv_flow_spec_gre),
456 .dst_sz = sizeof(struct ibv_flow_spec_tunnel),
459 [RTE_FLOW_ITEM_TYPE_MPLS] = {
460 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
461 RTE_FLOW_ITEM_TYPE_IPV4,
462 RTE_FLOW_ITEM_TYPE_IPV6),
463 .actions = valid_actions,
464 .mask = &(const struct rte_flow_item_mpls){
465 .label_tc_s = "\xff\xff\xf0",
467 .default_mask = &rte_flow_item_mpls_mask,
468 .mask_sz = sizeof(struct rte_flow_item_mpls),
469 .convert = mlx5_flow_create_mpls,
470 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
471 .dst_sz = sizeof(struct ibv_flow_spec_mpls),
474 [RTE_FLOW_ITEM_TYPE_VXLAN] = {
475 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
476 RTE_FLOW_ITEM_TYPE_IPV4, /* For L3 VXLAN. */
477 RTE_FLOW_ITEM_TYPE_IPV6), /* For L3 VXLAN. */
478 .actions = valid_actions,
479 .mask = &(const struct rte_flow_item_vxlan){
480 .vni = "\xff\xff\xff",
482 .default_mask = &rte_flow_item_vxlan_mask,
483 .mask_sz = sizeof(struct rte_flow_item_vxlan),
484 .convert = mlx5_flow_create_vxlan,
485 .dst_sz = sizeof(struct ibv_flow_spec_tunnel),
487 [RTE_FLOW_ITEM_TYPE_VXLAN_GPE] = {
488 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
489 RTE_FLOW_ITEM_TYPE_IPV4,
490 RTE_FLOW_ITEM_TYPE_IPV6),
491 .actions = valid_actions,
492 .mask = &(const struct rte_flow_item_vxlan_gpe){
493 .vni = "\xff\xff\xff",
495 .default_mask = &rte_flow_item_vxlan_gpe_mask,
496 .mask_sz = sizeof(struct rte_flow_item_vxlan_gpe),
497 .convert = mlx5_flow_create_vxlan_gpe,
498 .dst_sz = sizeof(struct ibv_flow_spec_tunnel),
502 /** Structure to pass to the conversion function. */
503 struct mlx5_flow_parse {
504 uint32_t inner; /**< Verbs value, set once tunnel is encountered. */
506 /**< Whether resources should remain after a validate. */
507 uint32_t drop:1; /**< Target is a drop queue. */
508 uint32_t mark:1; /**< Mark is present in the flow. */
509 uint32_t count:1; /**< Count is present in the flow. */
510 uint32_t mark_id; /**< Mark identifier. */
511 struct rte_flow_action_rss rss_conf; /**< RSS configuration */
512 uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
513 uint8_t rss_key[40]; /**< copy of the RSS key. */
514 enum hash_rxq_type layer; /**< Last pattern layer detected. */
515 enum hash_rxq_type out_layer; /**< Last outer pattern layer detected. */
516 uint32_t tunnel; /**< Tunnel type of RTE_PTYPE_TUNNEL_XXX. */
517 struct ibv_counter_set *cs; /**< Holds the counter set for the rule */
519 struct ibv_flow_attr *ibv_attr;
520 /**< Pointer to Verbs attributes. */
522 /**< Current position or total size of the attribute. */
523 uint64_t hash_fields; /**< Verbs hash fields. */
524 } queue[RTE_DIM(hash_rxq_init)];
527 static const struct rte_flow_ops mlx5_flow_ops = {
528 .validate = mlx5_flow_validate,
529 .create = mlx5_flow_create,
530 .destroy = mlx5_flow_destroy,
531 .flush = mlx5_flow_flush,
532 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
533 .query = mlx5_flow_query,
537 .isolate = mlx5_flow_isolate,
540 /* Convert FDIR request to Generic flow. */
542 struct rte_flow_attr attr;
543 struct rte_flow_action actions[2];
544 struct rte_flow_item items[4];
545 struct rte_flow_item_eth l2;
546 struct rte_flow_item_eth l2_mask;
548 struct rte_flow_item_ipv4 ipv4;
549 struct rte_flow_item_ipv6 ipv6;
552 struct rte_flow_item_ipv4 ipv4;
553 struct rte_flow_item_ipv6 ipv6;
556 struct rte_flow_item_udp udp;
557 struct rte_flow_item_tcp tcp;
560 struct rte_flow_item_udp udp;
561 struct rte_flow_item_tcp tcp;
563 struct rte_flow_action_queue queue;
566 /* Verbs specification header. */
567 struct ibv_spec_header {
568 enum ibv_flow_spec_type type;
573 * Check item is fully supported by the NIC matching capability.
576 * Item specification.
578 * Bit-masks covering supported fields to compare with spec, last and mask in
581 * Bit-Mask size in bytes.
584 * 0 on success, a negative errno value otherwise and rte_errno is set.
587 mlx5_flow_item_validate(const struct rte_flow_item *item,
588 const uint8_t *mask, unsigned int size)
591 const uint8_t *spec = item->spec;
592 const uint8_t *last = item->last;
593 const uint8_t *m = item->mask ? item->mask : mask;
595 if (!spec && (item->mask || last))
600 * Single-pass check to make sure that:
601 * - item->mask is supported, no bits are set outside mask.
602 * - Both masked item->spec and item->last are equal (no range
605 for (i = 0; i < size; i++) {
608 if ((m[i] | mask[i]) != mask[i])
610 if (last && ((spec[i] & m[i]) != (last[i] & m[i])))
620 * Extract attribute to the parser.
623 * Flow rule attributes.
625 * Perform verbose error reporting if not NULL.
628 * 0 on success, a negative errno value otherwise and rte_errno is set.
631 mlx5_flow_convert_attributes(const struct rte_flow_attr *attr,
632 struct rte_flow_error *error)
635 rte_flow_error_set(error, ENOTSUP,
636 RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
638 "groups are not supported");
641 if (attr->priority && attr->priority != MLX5_CTRL_FLOW_PRIORITY) {
642 rte_flow_error_set(error, ENOTSUP,
643 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
645 "priorities are not supported");
649 rte_flow_error_set(error, ENOTSUP,
650 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
652 "egress is not supported");
655 if (attr->transfer) {
656 rte_flow_error_set(error, ENOTSUP,
657 RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
659 "transfer is not supported");
662 if (!attr->ingress) {
663 rte_flow_error_set(error, ENOTSUP,
664 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
666 "only ingress is supported");
673 * Extract actions request to the parser.
676 * Pointer to Ethernet device.
678 * Associated actions (list terminated by the END action).
680 * Perform verbose error reporting if not NULL.
681 * @param[in, out] parser
682 * Internal parser structure.
685 * 0 on success, a negative errno value otherwise and rte_errno is set.
688 mlx5_flow_convert_actions(struct rte_eth_dev *dev,
689 const struct rte_flow_action actions[],
690 struct rte_flow_error *error,
691 struct mlx5_flow_parse *parser)
693 enum { FATE = 1, MARK = 2, COUNT = 4, };
694 uint32_t overlap = 0;
695 struct priv *priv = dev->data->dev_private;
697 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
698 if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
700 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
702 goto exit_action_overlap;
705 } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
706 const struct rte_flow_action_queue *queue =
707 (const struct rte_flow_action_queue *)
711 goto exit_action_overlap;
713 if (!queue || (queue->index > (priv->rxqs_n - 1)))
714 goto exit_action_not_supported;
715 parser->queues[0] = queue->index;
716 parser->rss_conf = (struct rte_flow_action_rss){
718 .queue = parser->queues,
720 } else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
721 const struct rte_flow_action_rss *rss =
722 (const struct rte_flow_action_rss *)
724 const uint8_t *rss_key;
725 uint32_t rss_key_len;
729 goto exit_action_overlap;
732 rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ) {
733 rte_flow_error_set(error, EINVAL,
734 RTE_FLOW_ERROR_TYPE_ACTION,
736 "the only supported RSS hash"
737 " function is Toeplitz");
740 #ifndef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
741 if (parser->rss_conf.level > 1) {
742 rte_flow_error_set(error, EINVAL,
743 RTE_FLOW_ERROR_TYPE_ACTION,
745 "a nonzero RSS encapsulation"
746 " level is not supported");
750 if (parser->rss_conf.level > 2) {
751 rte_flow_error_set(error, EINVAL,
752 RTE_FLOW_ERROR_TYPE_ACTION,
754 "RSS encapsulation level"
755 " > 1 is not supported");
758 if (rss->types & MLX5_RSS_HF_MASK) {
759 rte_flow_error_set(error, EINVAL,
760 RTE_FLOW_ERROR_TYPE_ACTION,
762 "unsupported RSS type"
767 rss_key_len = rss->key_len;
770 rss_key_len = rss_hash_default_key_len;
771 rss_key = rss_hash_default_key;
773 if (rss_key_len != RTE_DIM(parser->rss_key)) {
774 rte_flow_error_set(error, EINVAL,
775 RTE_FLOW_ERROR_TYPE_ACTION,
777 "RSS hash key must be"
778 " exactly 40 bytes long");
781 if (!rss->queue_num) {
782 rte_flow_error_set(error, EINVAL,
783 RTE_FLOW_ERROR_TYPE_ACTION,
788 if (rss->queue_num > RTE_DIM(parser->queues)) {
789 rte_flow_error_set(error, EINVAL,
790 RTE_FLOW_ERROR_TYPE_ACTION,
792 "too many queues for RSS"
796 for (n = 0; n < rss->queue_num; ++n) {
797 if (rss->queue[n] >= priv->rxqs_n) {
798 rte_flow_error_set(error, EINVAL,
799 RTE_FLOW_ERROR_TYPE_ACTION,
801 "queue id > number of"
806 parser->rss_conf = (struct rte_flow_action_rss){
807 .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
810 .key_len = rss_key_len,
811 .queue_num = rss->queue_num,
812 .key = memcpy(parser->rss_key, rss_key,
813 sizeof(*rss_key) * rss_key_len),
814 .queue = memcpy(parser->queues, rss->queue,
815 sizeof(*rss->queue) *
818 } else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
819 const struct rte_flow_action_mark *mark =
820 (const struct rte_flow_action_mark *)
824 goto exit_action_overlap;
827 rte_flow_error_set(error, EINVAL,
828 RTE_FLOW_ERROR_TYPE_ACTION,
830 "mark must be defined");
832 } else if (mark->id >= MLX5_FLOW_MARK_MAX) {
833 rte_flow_error_set(error, ENOTSUP,
834 RTE_FLOW_ERROR_TYPE_ACTION,
836 "mark must be between 0"
841 parser->mark_id = mark->id;
842 } else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
844 goto exit_action_overlap;
847 } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT &&
848 priv->config.flow_counter_en) {
850 goto exit_action_overlap;
854 goto exit_action_not_supported;
857 /* When fate is unknown, drop traffic. */
858 if (!(overlap & FATE))
860 if (parser->drop && parser->mark)
862 if (!parser->rss_conf.queue_num && !parser->drop) {
863 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
864 NULL, "no valid action");
868 exit_action_not_supported:
869 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
870 actions, "action not supported");
873 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
874 actions, "overlapping actions are not supported");
882 * Pattern specification (list terminated by the END pattern item).
884 * Perform verbose error reporting if not NULL.
885 * @param[in, out] parser
886 * Internal parser structure.
889 * 0 on success, a negative errno value otherwise and rte_errno is set.
892 mlx5_flow_convert_items_validate(struct rte_eth_dev *dev,
893 const struct rte_flow_item items[],
894 struct rte_flow_error *error,
895 struct mlx5_flow_parse *parser)
897 struct priv *priv = dev->data->dev_private;
898 const struct mlx5_flow_items *cur_item = mlx5_flow_items;
900 unsigned int last_voids = 0;
903 /* Initialise the offsets to start after verbs attribute. */
904 for (i = 0; i != hash_rxq_init_n; ++i)
905 parser->queue[i].offset = sizeof(struct ibv_flow_attr);
906 for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
907 const struct mlx5_flow_items *token = NULL;
910 if (items->type == RTE_FLOW_ITEM_TYPE_VOID) {
916 cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
918 if (cur_item->items[i] == items->type) {
919 token = &mlx5_flow_items[items->type];
925 goto exit_item_not_supported;
928 ret = mlx5_flow_item_validate(items,
929 (const uint8_t *)cur_item->mask,
932 goto exit_item_not_supported;
933 if (IS_TUNNEL(items->type)) {
934 if (parser->tunnel &&
935 !((items - last_voids - 1)->type ==
936 RTE_FLOW_ITEM_TYPE_GRE && items->type ==
937 RTE_FLOW_ITEM_TYPE_MPLS)) {
938 rte_flow_error_set(error, ENOTSUP,
939 RTE_FLOW_ERROR_TYPE_ITEM,
941 "Cannot recognize multiple"
942 " tunnel encapsulations.");
945 if (items->type == RTE_FLOW_ITEM_TYPE_MPLS &&
946 !priv->config.mpls_en) {
947 rte_flow_error_set(error, ENOTSUP,
948 RTE_FLOW_ERROR_TYPE_ITEM,
950 "MPLS not supported or"
951 " disabled in firmware"
955 if (!priv->config.tunnel_en &&
956 parser->rss_conf.level > 1) {
957 rte_flow_error_set(error, ENOTSUP,
958 RTE_FLOW_ERROR_TYPE_ITEM,
960 "RSS on tunnel is not supported");
963 parser->inner = IBV_FLOW_SPEC_INNER;
964 parser->tunnel = flow_ptype[items->type];
967 parser->queue[HASH_RXQ_ETH].offset += cur_item->dst_sz;
969 for (n = 0; n != hash_rxq_init_n; ++n)
970 parser->queue[n].offset += cur_item->dst_sz;
975 parser->queue[HASH_RXQ_ETH].offset +=
976 sizeof(struct ibv_flow_spec_action_drop);
979 for (i = 0; i != hash_rxq_init_n; ++i)
980 parser->queue[i].offset +=
981 sizeof(struct ibv_flow_spec_action_tag);
984 unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
986 for (i = 0; i != hash_rxq_init_n; ++i)
987 parser->queue[i].offset += size;
990 exit_item_not_supported:
991 return rte_flow_error_set(error, -ret, RTE_FLOW_ERROR_TYPE_ITEM,
992 items, "item not supported");
996 * Allocate memory space to store verbs flow attributes.
999 * Amount of byte to allocate.
1001 * Perform verbose error reporting if not NULL.
1004 * A verbs flow attribute on success, NULL otherwise and rte_errno is set.
1006 static struct ibv_flow_attr *
1007 mlx5_flow_convert_allocate(unsigned int size, struct rte_flow_error *error)
1009 struct ibv_flow_attr *ibv_attr;
1011 ibv_attr = rte_calloc(__func__, 1, size, 0);
1013 rte_flow_error_set(error, ENOMEM,
1014 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1016 "cannot allocate verbs spec attributes");
1023 * Make inner packet matching with an higher priority from the non Inner
1027 * Pointer to Ethernet device.
1028 * @param[in, out] parser
1029 * Internal parser structure.
1031 * User flow attribute.
1034 mlx5_flow_update_priority(struct rte_eth_dev *dev,
1035 struct mlx5_flow_parse *parser,
1036 const struct rte_flow_attr *attr)
1038 struct priv *priv = dev->data->dev_private;
1042 /* 8 priorities >= 16 priorities
1043 * Control flow: 4-7 8-15
1044 * User normal flow: 1-3 4-7
1045 * User tunnel flow: 0-2 0-3
1047 priority = attr->priority * MLX5_VERBS_FLOW_PRIO_8;
1048 if (priv->config.max_verbs_prio == MLX5_VERBS_FLOW_PRIO_8)
1051 * Lower non-tunnel flow Verbs priority 1 if only support 8 Verbs
1052 * priorities, lower 4 otherwise.
1054 if (!parser->inner) {
1055 if (priv->config.max_verbs_prio == MLX5_VERBS_FLOW_PRIO_8)
1058 priority += MLX5_VERBS_FLOW_PRIO_8 / 2;
1061 parser->queue[HASH_RXQ_ETH].ibv_attr->priority = priority +
1062 hash_rxq_init[HASH_RXQ_ETH].flow_priority;
1065 for (i = 0; i != hash_rxq_init_n; ++i) {
1066 if (!parser->queue[i].ibv_attr)
1068 parser->queue[i].ibv_attr->priority = priority +
1069 hash_rxq_init[i].flow_priority;
1074 * Finalise verbs flow attributes.
1076 * @param[in, out] parser
1077 * Internal parser structure.
1080 mlx5_flow_convert_finalise(struct mlx5_flow_parse *parser)
1083 uint32_t inner = parser->inner;
1085 /* Don't create extra flows for outer RSS. */
1086 if (parser->tunnel && parser->rss_conf.level < 2)
1089 * Fill missing layers in verbs specifications, or compute the correct
1090 * offset to allocate the memory space for the attributes and
1093 for (i = 0; i != hash_rxq_init_n - 1; ++i) {
1095 struct ibv_flow_spec_ipv4_ext ipv4;
1096 struct ibv_flow_spec_ipv6 ipv6;
1097 struct ibv_flow_spec_tcp_udp udp_tcp;
1098 struct ibv_flow_spec_eth eth;
1103 if (i == parser->layer)
1105 if (parser->layer == HASH_RXQ_ETH ||
1106 parser->layer == HASH_RXQ_TUNNEL) {
1107 if (hash_rxq_init[i].ip_version == MLX5_IPV4) {
1108 size = sizeof(struct ibv_flow_spec_ipv4_ext);
1109 specs.ipv4 = (struct ibv_flow_spec_ipv4_ext){
1110 .type = inner | IBV_FLOW_SPEC_IPV4_EXT,
1114 size = sizeof(struct ibv_flow_spec_ipv6);
1115 specs.ipv6 = (struct ibv_flow_spec_ipv6){
1116 .type = inner | IBV_FLOW_SPEC_IPV6,
1120 if (parser->queue[i].ibv_attr) {
1121 dst = (void *)((uintptr_t)
1122 parser->queue[i].ibv_attr +
1123 parser->queue[i].offset);
1124 memcpy(dst, &specs, size);
1125 ++parser->queue[i].ibv_attr->num_of_specs;
1127 parser->queue[i].offset += size;
1129 if ((i == HASH_RXQ_UDPV4) || (i == HASH_RXQ_TCPV4) ||
1130 (i == HASH_RXQ_UDPV6) || (i == HASH_RXQ_TCPV6)) {
1131 size = sizeof(struct ibv_flow_spec_tcp_udp);
1132 specs.udp_tcp = (struct ibv_flow_spec_tcp_udp) {
1133 .type = inner | ((i == HASH_RXQ_UDPV4 ||
1134 i == HASH_RXQ_UDPV6) ?
1139 if (parser->queue[i].ibv_attr) {
1140 dst = (void *)((uintptr_t)
1141 parser->queue[i].ibv_attr +
1142 parser->queue[i].offset);
1143 memcpy(dst, &specs, size);
1144 ++parser->queue[i].ibv_attr->num_of_specs;
1146 parser->queue[i].offset += size;
1152 * Update flows according to pattern and RSS hash fields.
1154 * @param[in, out] parser
1155 * Internal parser structure.
1158 * 0 on success, a negative errno value otherwise and rte_errno is set.
1161 mlx5_flow_convert_rss(struct mlx5_flow_parse *parser)
1164 enum hash_rxq_type start;
1165 enum hash_rxq_type layer;
1166 int outer = parser->tunnel && parser->rss_conf.level < 2;
1167 uint64_t rss = parser->rss_conf.types;
1169 /* Default to outer RSS. */
1170 if (!parser->rss_conf.level)
1171 parser->rss_conf.level = 1;
1172 layer = outer ? parser->out_layer : parser->layer;
1173 if (layer == HASH_RXQ_TUNNEL)
1174 layer = HASH_RXQ_ETH;
1176 /* Only one hash type for outer RSS. */
1177 if (rss && layer == HASH_RXQ_ETH) {
1178 start = HASH_RXQ_TCPV4;
1179 } else if (rss && layer != HASH_RXQ_ETH &&
1180 !(rss & hash_rxq_init[layer].dpdk_rss_hf)) {
1181 /* If RSS not match L4 pattern, try L3 RSS. */
1182 if (layer < HASH_RXQ_IPV4)
1183 layer = HASH_RXQ_IPV4;
1184 else if (layer > HASH_RXQ_IPV4 && layer < HASH_RXQ_IPV6)
1185 layer = HASH_RXQ_IPV6;
1190 /* Scan first valid hash type. */
1191 for (i = start; rss && i <= layer; ++i) {
1192 if (!parser->queue[i].ibv_attr)
1194 if (hash_rxq_init[i].dpdk_rss_hf & rss)
1197 if (rss && i <= layer)
1198 parser->queue[layer].hash_fields =
1199 hash_rxq_init[i].hash_fields;
1200 /* Trim unused hash types. */
1201 for (i = 0; i != hash_rxq_init_n; ++i) {
1202 if (parser->queue[i].ibv_attr && i != layer) {
1203 rte_free(parser->queue[i].ibv_attr);
1204 parser->queue[i].ibv_attr = NULL;
1208 /* Expand for inner or normal RSS. */
1209 if (rss && (layer == HASH_RXQ_ETH || layer == HASH_RXQ_IPV4))
1210 start = HASH_RXQ_TCPV4;
1211 else if (rss && layer == HASH_RXQ_IPV6)
1212 start = HASH_RXQ_TCPV6;
1215 /* For L4 pattern, try L3 RSS if no L4 RSS. */
1216 /* Trim unused hash types. */
1217 for (i = 0; i != hash_rxq_init_n; ++i) {
1218 if (!parser->queue[i].ibv_attr)
1220 if (i < start || i > layer) {
1221 rte_free(parser->queue[i].ibv_attr);
1222 parser->queue[i].ibv_attr = NULL;
1227 if (hash_rxq_init[i].dpdk_rss_hf & rss) {
1228 parser->queue[i].hash_fields =
1229 hash_rxq_init[i].hash_fields;
1230 } else if (i != layer) {
1231 /* Remove unused RSS expansion. */
1232 rte_free(parser->queue[i].ibv_attr);
1233 parser->queue[i].ibv_attr = NULL;
1234 } else if (layer < HASH_RXQ_IPV4 &&
1235 (hash_rxq_init[HASH_RXQ_IPV4].dpdk_rss_hf &
1237 /* Allow IPv4 RSS on L4 pattern. */
1238 parser->queue[i].hash_fields =
1239 hash_rxq_init[HASH_RXQ_IPV4]
1241 } else if (i > HASH_RXQ_IPV4 && i < HASH_RXQ_IPV6 &&
1242 (hash_rxq_init[HASH_RXQ_IPV6].dpdk_rss_hf &
1244 /* Allow IPv4 RSS on L4 pattern. */
1245 parser->queue[i].hash_fields =
1246 hash_rxq_init[HASH_RXQ_IPV6]
1255 * Validate and convert a flow supported by the NIC.
1258 * Pointer to Ethernet device.
1260 * Flow rule attributes.
1261 * @param[in] pattern
1262 * Pattern specification (list terminated by the END pattern item).
1263 * @param[in] actions
1264 * Associated actions (list terminated by the END action).
1266 * Perform verbose error reporting if not NULL.
1267 * @param[in, out] parser
1268 * Internal parser structure.
1271 * 0 on success, a negative errno value otherwise and rte_errno is set.
1274 mlx5_flow_convert(struct rte_eth_dev *dev,
1275 const struct rte_flow_attr *attr,
1276 const struct rte_flow_item items[],
1277 const struct rte_flow_action actions[],
1278 struct rte_flow_error *error,
1279 struct mlx5_flow_parse *parser)
1281 const struct mlx5_flow_items *cur_item = mlx5_flow_items;
1285 /* First step. Validate the attributes, items and actions. */
1286 *parser = (struct mlx5_flow_parse){
1287 .create = parser->create,
1288 .layer = HASH_RXQ_ETH,
1289 .mark_id = MLX5_FLOW_MARK_DEFAULT,
1291 ret = mlx5_flow_convert_attributes(attr, error);
1294 ret = mlx5_flow_convert_actions(dev, actions, error, parser);
1297 ret = mlx5_flow_convert_items_validate(dev, items, error, parser);
1300 mlx5_flow_convert_finalise(parser);
1303 * Allocate the memory space to store verbs specifications.
1306 unsigned int offset = parser->queue[HASH_RXQ_ETH].offset;
1308 parser->queue[HASH_RXQ_ETH].ibv_attr =
1309 mlx5_flow_convert_allocate(offset, error);
1310 if (!parser->queue[HASH_RXQ_ETH].ibv_attr)
1312 parser->queue[HASH_RXQ_ETH].offset =
1313 sizeof(struct ibv_flow_attr);
1315 for (i = 0; i != hash_rxq_init_n; ++i) {
1316 unsigned int offset;
1318 offset = parser->queue[i].offset;
1319 parser->queue[i].ibv_attr =
1320 mlx5_flow_convert_allocate(offset, error);
1321 if (!parser->queue[i].ibv_attr)
1323 parser->queue[i].offset = sizeof(struct ibv_flow_attr);
1326 /* Third step. Conversion parse, fill the specifications. */
1329 parser->layer = HASH_RXQ_ETH;
1330 for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
1331 struct mlx5_flow_data data = {
1337 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
1339 cur_item = &mlx5_flow_items[items->type];
1340 ret = cur_item->convert(items,
1341 (cur_item->default_mask ?
1342 cur_item->default_mask :
1348 if (!parser->drop) {
1349 /* RSS check, remove unused hash types. */
1350 ret = mlx5_flow_convert_rss(parser);
1353 /* Complete missing specification. */
1354 mlx5_flow_convert_finalise(parser);
1356 mlx5_flow_update_priority(dev, parser, attr);
1358 mlx5_flow_create_flag_mark(parser, parser->mark_id);
1359 if (parser->count && parser->create) {
1360 mlx5_flow_create_count(dev, parser);
1362 goto exit_count_error;
1365 /* Only verification is expected, all resources should be released. */
1366 if (!parser->create) {
1367 for (i = 0; i != hash_rxq_init_n; ++i) {
1368 if (parser->queue[i].ibv_attr) {
1369 rte_free(parser->queue[i].ibv_attr);
1370 parser->queue[i].ibv_attr = NULL;
1376 for (i = 0; i != hash_rxq_init_n; ++i) {
1377 if (parser->queue[i].ibv_attr) {
1378 rte_free(parser->queue[i].ibv_attr);
1379 parser->queue[i].ibv_attr = NULL;
1382 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1383 NULL, "cannot allocate verbs spec attributes");
1386 rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1387 NULL, "cannot create counter");
1392 * Copy the specification created into the flow.
1395 * Internal parser structure.
1397 * Create specification.
1399 * Size in bytes of the specification to copy.
1402 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
1408 for (i = 0; i != hash_rxq_init_n; ++i) {
1409 if (!parser->queue[i].ibv_attr)
1411 dst = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1412 parser->queue[i].offset);
1413 memcpy(dst, src, size);
1414 ++parser->queue[i].ibv_attr->num_of_specs;
1415 parser->queue[i].offset += size;
1420 * Convert Ethernet item to Verbs specification.
1423 * Item specification.
1424 * @param default_mask[in]
1425 * Default bit-masks to use when item->mask is not provided.
1426 * @param data[in, out]
1430 * 0 on success, a negative errno value otherwise and rte_errno is set.
1433 mlx5_flow_create_eth(const struct rte_flow_item *item,
1434 const void *default_mask,
1435 struct mlx5_flow_data *data)
1437 const struct rte_flow_item_eth *spec = item->spec;
1438 const struct rte_flow_item_eth *mask = item->mask;
1439 struct mlx5_flow_parse *parser = data->parser;
1440 const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1441 struct ibv_flow_spec_eth eth = {
1442 .type = parser->inner | IBV_FLOW_SPEC_ETH,
1446 parser->layer = HASH_RXQ_ETH;
1451 mask = default_mask;
1452 memcpy(ð.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
1453 memcpy(ð.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
1454 eth.val.ether_type = spec->type;
1455 memcpy(ð.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
1456 memcpy(ð.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
1457 eth.mask.ether_type = mask->type;
1458 /* Remove unwanted bits from values. */
1459 for (i = 0; i < ETHER_ADDR_LEN; ++i) {
1460 eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
1461 eth.val.src_mac[i] &= eth.mask.src_mac[i];
1463 eth.val.ether_type &= eth.mask.ether_type;
1465 mlx5_flow_create_copy(parser, ð, eth_size);
1470 * Convert VLAN item to Verbs specification.
1473 * Item specification.
1474 * @param default_mask[in]
1475 * Default bit-masks to use when item->mask is not provided.
1476 * @param data[in, out]
1480 * 0 on success, a negative errno value otherwise and rte_errno is set.
1483 mlx5_flow_create_vlan(const struct rte_flow_item *item,
1484 const void *default_mask,
1485 struct mlx5_flow_data *data)
1487 const struct rte_flow_item_vlan *spec = item->spec;
1488 const struct rte_flow_item_vlan *mask = item->mask;
1489 struct mlx5_flow_parse *parser = data->parser;
1490 struct ibv_flow_spec_eth *eth;
1491 const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1492 const char *msg = "VLAN cannot be empty";
1497 mask = default_mask;
1499 for (i = 0; i != hash_rxq_init_n; ++i) {
1500 if (!parser->queue[i].ibv_attr)
1503 eth = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1504 parser->queue[i].offset - eth_size);
1505 eth->val.vlan_tag = spec->tci;
1506 eth->mask.vlan_tag = mask->tci;
1507 eth->val.vlan_tag &= eth->mask.vlan_tag;
1509 * From verbs perspective an empty VLAN is equivalent
1510 * to a packet without VLAN layer.
1512 if (!eth->mask.vlan_tag)
1514 /* Outer TPID cannot be matched. */
1515 if (eth->mask.ether_type) {
1516 msg = "VLAN TPID matching is not supported";
1519 eth->val.ether_type = spec->inner_type;
1520 eth->mask.ether_type = mask->inner_type;
1521 eth->val.ether_type &= eth->mask.ether_type;
1526 return rte_flow_error_set(data->error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM,
1531 * Convert IPv4 item to Verbs specification.
1534 * Item specification.
1535 * @param default_mask[in]
1536 * Default bit-masks to use when item->mask is not provided.
1537 * @param data[in, out]
1541 * 0 on success, a negative errno value otherwise and rte_errno is set.
1544 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
1545 const void *default_mask,
1546 struct mlx5_flow_data *data)
1548 struct priv *priv = data->dev->data->dev_private;
1549 const struct rte_flow_item_ipv4 *spec = item->spec;
1550 const struct rte_flow_item_ipv4 *mask = item->mask;
1551 struct mlx5_flow_parse *parser = data->parser;
1552 unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4_ext);
1553 struct ibv_flow_spec_ipv4_ext ipv4 = {
1554 .type = parser->inner | IBV_FLOW_SPEC_IPV4_EXT,
1558 if (parser->layer == HASH_RXQ_TUNNEL &&
1559 parser->tunnel == ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN)] &&
1560 !priv->config.l3_vxlan_en)
1561 return rte_flow_error_set(data->error, EINVAL,
1562 RTE_FLOW_ERROR_TYPE_ITEM,
1564 "L3 VXLAN not enabled by device"
1565 " parameter and/or not configured"
1567 parser->layer = HASH_RXQ_IPV4;
1570 mask = default_mask;
1571 ipv4.val = (struct ibv_flow_ipv4_ext_filter){
1572 .src_ip = spec->hdr.src_addr,
1573 .dst_ip = spec->hdr.dst_addr,
1574 .proto = spec->hdr.next_proto_id,
1575 .tos = spec->hdr.type_of_service,
1577 ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
1578 .src_ip = mask->hdr.src_addr,
1579 .dst_ip = mask->hdr.dst_addr,
1580 .proto = mask->hdr.next_proto_id,
1581 .tos = mask->hdr.type_of_service,
1583 /* Remove unwanted bits from values. */
1584 ipv4.val.src_ip &= ipv4.mask.src_ip;
1585 ipv4.val.dst_ip &= ipv4.mask.dst_ip;
1586 ipv4.val.proto &= ipv4.mask.proto;
1587 ipv4.val.tos &= ipv4.mask.tos;
1589 mlx5_flow_create_copy(parser, &ipv4, ipv4_size);
1594 * Convert IPv6 item to Verbs specification.
1597 * Item specification.
1598 * @param default_mask[in]
1599 * Default bit-masks to use when item->mask is not provided.
1600 * @param data[in, out]
1604 * 0 on success, a negative errno value otherwise and rte_errno is set.
1607 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
1608 const void *default_mask,
1609 struct mlx5_flow_data *data)
1611 struct priv *priv = data->dev->data->dev_private;
1612 const struct rte_flow_item_ipv6 *spec = item->spec;
1613 const struct rte_flow_item_ipv6 *mask = item->mask;
1614 struct mlx5_flow_parse *parser = data->parser;
1615 unsigned int ipv6_size = sizeof(struct ibv_flow_spec_ipv6);
1616 struct ibv_flow_spec_ipv6 ipv6 = {
1617 .type = parser->inner | IBV_FLOW_SPEC_IPV6,
1621 if (parser->layer == HASH_RXQ_TUNNEL &&
1622 parser->tunnel == ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN)] &&
1623 !priv->config.l3_vxlan_en)
1624 return rte_flow_error_set(data->error, EINVAL,
1625 RTE_FLOW_ERROR_TYPE_ITEM,
1627 "L3 VXLAN not enabled by device"
1628 " parameter and/or not configured"
1630 parser->layer = HASH_RXQ_IPV6;
1633 uint32_t vtc_flow_val;
1634 uint32_t vtc_flow_mask;
1637 mask = default_mask;
1638 memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
1639 RTE_DIM(ipv6.val.src_ip));
1640 memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
1641 RTE_DIM(ipv6.val.dst_ip));
1642 memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
1643 RTE_DIM(ipv6.mask.src_ip));
1644 memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
1645 RTE_DIM(ipv6.mask.dst_ip));
1646 vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
1647 vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
1648 ipv6.val.flow_label =
1649 rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >>
1651 ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >>
1653 ipv6.val.next_hdr = spec->hdr.proto;
1654 ipv6.val.hop_limit = spec->hdr.hop_limits;
1655 ipv6.mask.flow_label =
1656 rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >>
1658 ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >>
1660 ipv6.mask.next_hdr = mask->hdr.proto;
1661 ipv6.mask.hop_limit = mask->hdr.hop_limits;
1662 /* Remove unwanted bits from values. */
1663 for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
1664 ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
1665 ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
1667 ipv6.val.flow_label &= ipv6.mask.flow_label;
1668 ipv6.val.traffic_class &= ipv6.mask.traffic_class;
1669 ipv6.val.next_hdr &= ipv6.mask.next_hdr;
1670 ipv6.val.hop_limit &= ipv6.mask.hop_limit;
1672 mlx5_flow_create_copy(parser, &ipv6, ipv6_size);
1677 * Convert UDP item to Verbs specification.
1680 * Item specification.
1681 * @param default_mask[in]
1682 * Default bit-masks to use when item->mask is not provided.
1683 * @param data[in, out]
1687 * 0 on success, a negative errno value otherwise and rte_errno is set.
1690 mlx5_flow_create_udp(const struct rte_flow_item *item,
1691 const void *default_mask,
1692 struct mlx5_flow_data *data)
1694 const struct rte_flow_item_udp *spec = item->spec;
1695 const struct rte_flow_item_udp *mask = item->mask;
1696 struct mlx5_flow_parse *parser = data->parser;
1697 unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1698 struct ibv_flow_spec_tcp_udp udp = {
1699 .type = parser->inner | IBV_FLOW_SPEC_UDP,
1703 if (parser->layer == HASH_RXQ_IPV4)
1704 parser->layer = HASH_RXQ_UDPV4;
1706 parser->layer = HASH_RXQ_UDPV6;
1709 mask = default_mask;
1710 udp.val.dst_port = spec->hdr.dst_port;
1711 udp.val.src_port = spec->hdr.src_port;
1712 udp.mask.dst_port = mask->hdr.dst_port;
1713 udp.mask.src_port = mask->hdr.src_port;
1714 /* Remove unwanted bits from values. */
1715 udp.val.src_port &= udp.mask.src_port;
1716 udp.val.dst_port &= udp.mask.dst_port;
1718 mlx5_flow_create_copy(parser, &udp, udp_size);
1723 * Convert TCP item to Verbs specification.
1726 * Item specification.
1727 * @param default_mask[in]
1728 * Default bit-masks to use when item->mask is not provided.
1729 * @param data[in, out]
1733 * 0 on success, a negative errno value otherwise and rte_errno is set.
1736 mlx5_flow_create_tcp(const struct rte_flow_item *item,
1737 const void *default_mask,
1738 struct mlx5_flow_data *data)
1740 const struct rte_flow_item_tcp *spec = item->spec;
1741 const struct rte_flow_item_tcp *mask = item->mask;
1742 struct mlx5_flow_parse *parser = data->parser;
1743 unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1744 struct ibv_flow_spec_tcp_udp tcp = {
1745 .type = parser->inner | IBV_FLOW_SPEC_TCP,
1749 if (parser->layer == HASH_RXQ_IPV4)
1750 parser->layer = HASH_RXQ_TCPV4;
1752 parser->layer = HASH_RXQ_TCPV6;
1755 mask = default_mask;
1756 tcp.val.dst_port = spec->hdr.dst_port;
1757 tcp.val.src_port = spec->hdr.src_port;
1758 tcp.mask.dst_port = mask->hdr.dst_port;
1759 tcp.mask.src_port = mask->hdr.src_port;
1760 /* Remove unwanted bits from values. */
1761 tcp.val.src_port &= tcp.mask.src_port;
1762 tcp.val.dst_port &= tcp.mask.dst_port;
1764 mlx5_flow_create_copy(parser, &tcp, tcp_size);
1769 * Convert VXLAN item to Verbs specification.
1772 * Item specification.
1773 * @param default_mask[in]
1774 * Default bit-masks to use when item->mask is not provided.
1775 * @param data[in, out]
1779 * 0 on success, a negative errno value otherwise and rte_errno is set.
1782 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
1783 const void *default_mask,
1784 struct mlx5_flow_data *data)
1786 const struct rte_flow_item_vxlan *spec = item->spec;
1787 const struct rte_flow_item_vxlan *mask = item->mask;
1788 struct mlx5_flow_parse *parser = data->parser;
1789 unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1790 struct ibv_flow_spec_tunnel vxlan = {
1791 .type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1800 parser->inner = IBV_FLOW_SPEC_INNER;
1801 parser->tunnel = ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN)];
1802 parser->out_layer = parser->layer;
1803 parser->layer = HASH_RXQ_TUNNEL;
1804 /* Default VXLAN to outer RSS. */
1805 if (!parser->rss_conf.level)
1806 parser->rss_conf.level = 1;
1809 mask = default_mask;
1810 memcpy(&id.vni[1], spec->vni, 3);
1811 vxlan.val.tunnel_id = id.vlan_id;
1812 memcpy(&id.vni[1], mask->vni, 3);
1813 vxlan.mask.tunnel_id = id.vlan_id;
1814 /* Remove unwanted bits from values. */
1815 vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
1818 * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
1819 * layer is defined in the Verbs specification it is interpreted as
1820 * wildcard and all packets will match this rule, if it follows a full
1821 * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
1822 * before will also match this rule.
1823 * To avoid such situation, VNI 0 is currently refused.
1825 /* Only allow tunnel w/o tunnel id pattern after proper outer spec. */
1826 if (parser->out_layer == HASH_RXQ_ETH && !vxlan.val.tunnel_id)
1827 return rte_flow_error_set(data->error, EINVAL,
1828 RTE_FLOW_ERROR_TYPE_ITEM,
1830 "VxLAN vni cannot be 0");
1831 mlx5_flow_create_copy(parser, &vxlan, size);
1836 * Convert VXLAN-GPE item to Verbs specification.
1839 * Item specification.
1840 * @param default_mask[in]
1841 * Default bit-masks to use when item->mask is not provided.
1842 * @param data[in, out]
1846 * 0 on success, a negative errno value otherwise and rte_errno is set.
1849 mlx5_flow_create_vxlan_gpe(const struct rte_flow_item *item,
1850 const void *default_mask,
1851 struct mlx5_flow_data *data)
1853 struct priv *priv = data->dev->data->dev_private;
1854 const struct rte_flow_item_vxlan_gpe *spec = item->spec;
1855 const struct rte_flow_item_vxlan_gpe *mask = item->mask;
1856 struct mlx5_flow_parse *parser = data->parser;
1857 unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1858 struct ibv_flow_spec_tunnel vxlan = {
1859 .type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1867 if (!priv->config.l3_vxlan_en)
1868 return rte_flow_error_set(data->error, EINVAL,
1869 RTE_FLOW_ERROR_TYPE_ITEM,
1871 "L3 VXLAN not enabled by device"
1872 " parameter and/or not configured"
1875 parser->inner = IBV_FLOW_SPEC_INNER;
1876 parser->tunnel = ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN_GPE)];
1877 parser->out_layer = parser->layer;
1878 parser->layer = HASH_RXQ_TUNNEL;
1879 /* Default VXLAN-GPE to outer RSS. */
1880 if (!parser->rss_conf.level)
1881 parser->rss_conf.level = 1;
1884 mask = default_mask;
1885 memcpy(&id.vni[1], spec->vni, 3);
1886 vxlan.val.tunnel_id = id.vlan_id;
1887 memcpy(&id.vni[1], mask->vni, 3);
1888 vxlan.mask.tunnel_id = id.vlan_id;
1890 return rte_flow_error_set(data->error, EINVAL,
1891 RTE_FLOW_ERROR_TYPE_ITEM,
1893 "VxLAN-GPE protocol not"
1895 /* Remove unwanted bits from values. */
1896 vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
1899 * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
1900 * layer is defined in the Verbs specification it is interpreted as
1901 * wildcard and all packets will match this rule, if it follows a full
1902 * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
1903 * before will also match this rule.
1904 * To avoid such situation, VNI 0 is currently refused.
1906 /* Only allow tunnel w/o tunnel id pattern after proper outer spec. */
1907 if (parser->out_layer == HASH_RXQ_ETH && !vxlan.val.tunnel_id)
1908 return rte_flow_error_set(data->error, EINVAL,
1909 RTE_FLOW_ERROR_TYPE_ITEM,
1911 "VxLAN-GPE vni cannot be 0");
1912 mlx5_flow_create_copy(parser, &vxlan, size);
1917 * Convert GRE item to Verbs specification.
1920 * Item specification.
1921 * @param default_mask[in]
1922 * Default bit-masks to use when item->mask is not provided.
1923 * @param data[in, out]
1927 * 0 on success, a negative errno value otherwise and rte_errno is set.
1930 mlx5_flow_create_gre(const struct rte_flow_item *item,
1931 const void *default_mask,
1932 struct mlx5_flow_data *data)
1934 struct mlx5_flow_parse *parser = data->parser;
1935 #ifndef HAVE_IBV_DEVICE_MPLS_SUPPORT
1937 unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1938 struct ibv_flow_spec_tunnel tunnel = {
1939 .type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1943 const struct rte_flow_item_gre *spec = item->spec;
1944 const struct rte_flow_item_gre *mask = item->mask;
1945 unsigned int size = sizeof(struct ibv_flow_spec_gre);
1946 struct ibv_flow_spec_gre tunnel = {
1947 .type = parser->inner | IBV_FLOW_SPEC_GRE,
1951 struct ibv_flow_spec_ipv4_ext *ipv4;
1952 struct ibv_flow_spec_ipv6 *ipv6;
1955 parser->inner = IBV_FLOW_SPEC_INNER;
1956 parser->tunnel = ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_GRE)];
1957 parser->out_layer = parser->layer;
1958 parser->layer = HASH_RXQ_TUNNEL;
1959 /* Default GRE to inner RSS. */
1960 if (!parser->rss_conf.level)
1961 parser->rss_conf.level = 2;
1962 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
1965 mask = default_mask;
1966 tunnel.val.c_ks_res0_ver = spec->c_rsvd0_ver;
1967 tunnel.val.protocol = spec->protocol;
1968 tunnel.mask.c_ks_res0_ver = mask->c_rsvd0_ver;
1969 tunnel.mask.protocol = mask->protocol;
1970 /* Remove unwanted bits from values. */
1971 tunnel.val.c_ks_res0_ver &= tunnel.mask.c_ks_res0_ver;
1972 tunnel.val.protocol &= tunnel.mask.protocol;
1973 tunnel.val.key &= tunnel.mask.key;
1976 /* Update encapsulation IP layer protocol. */
1977 for (i = 0; i != hash_rxq_init_n; ++i) {
1978 if (!parser->queue[i].ibv_attr)
1980 if (parser->out_layer == HASH_RXQ_IPV4) {
1981 ipv4 = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1982 parser->queue[i].offset -
1983 sizeof(struct ibv_flow_spec_ipv4_ext));
1984 if (ipv4->mask.proto && ipv4->val.proto != MLX5_GRE)
1986 ipv4->val.proto = MLX5_GRE;
1987 ipv4->mask.proto = 0xff;
1988 } else if (parser->out_layer == HASH_RXQ_IPV6) {
1989 ipv6 = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1990 parser->queue[i].offset -
1991 sizeof(struct ibv_flow_spec_ipv6));
1992 if (ipv6->mask.next_hdr &&
1993 ipv6->val.next_hdr != MLX5_GRE)
1995 ipv6->val.next_hdr = MLX5_GRE;
1996 ipv6->mask.next_hdr = 0xff;
1999 if (i != hash_rxq_init_n)
2000 return rte_flow_error_set(data->error, EINVAL,
2001 RTE_FLOW_ERROR_TYPE_ITEM,
2003 "IP protocol of GRE must be 47");
2004 mlx5_flow_create_copy(parser, &tunnel, size);
2009 * Convert MPLS item to Verbs specification.
2010 * MPLS tunnel types currently supported are MPLS-in-GRE and MPLS-in-UDP.
2013 * Item specification.
2014 * @param default_mask[in]
2015 * Default bit-masks to use when item->mask is not provided.
2016 * @param data[in, out]
2020 * 0 on success, a negative errno value otherwise and rte_errno is set.
2023 mlx5_flow_create_mpls(const struct rte_flow_item *item,
2024 const void *default_mask,
2025 struct mlx5_flow_data *data)
2027 #ifndef HAVE_IBV_DEVICE_MPLS_SUPPORT
2029 return rte_flow_error_set(data->error, ENOTSUP,
2030 RTE_FLOW_ERROR_TYPE_ITEM,
2032 "MPLS is not supported by driver");
2034 const struct rte_flow_item_mpls *spec = item->spec;
2035 const struct rte_flow_item_mpls *mask = item->mask;
2036 struct mlx5_flow_parse *parser = data->parser;
2037 unsigned int size = sizeof(struct ibv_flow_spec_mpls);
2038 struct ibv_flow_spec_mpls mpls = {
2039 .type = IBV_FLOW_SPEC_MPLS,
2043 parser->inner = IBV_FLOW_SPEC_INNER;
2044 if (parser->layer == HASH_RXQ_UDPV4 ||
2045 parser->layer == HASH_RXQ_UDPV6) {
2047 ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_MPLS_IN_UDP)];
2048 parser->out_layer = parser->layer;
2051 ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_MPLS_IN_GRE)];
2052 /* parser->out_layer stays as in GRE out_layer. */
2054 parser->layer = HASH_RXQ_TUNNEL;
2056 * For MPLS-in-GRE, RSS level should have been set.
2057 * For MPLS-in-UDP, use outer RSS.
2059 if (!parser->rss_conf.level)
2060 parser->rss_conf.level = 1;
2063 mask = default_mask;
2065 * The verbs label field includes the entire MPLS header:
2066 * bits 0:19 - label value field.
2067 * bits 20:22 - traffic class field.
2068 * bits 23 - bottom of stack bit.
2069 * bits 24:31 - ttl field.
2071 mpls.val.label = *(const uint32_t *)spec;
2072 mpls.mask.label = *(const uint32_t *)mask;
2073 /* Remove unwanted bits from values. */
2074 mpls.val.label &= mpls.mask.label;
2076 mlx5_flow_create_copy(parser, &mpls, size);
2082 * Convert mark/flag action to Verbs specification.
2085 * Internal parser structure.
2090 * 0 on success, a negative errno value otherwise and rte_errno is set.
2093 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id)
2095 unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
2096 struct ibv_flow_spec_action_tag tag = {
2097 .type = IBV_FLOW_SPEC_ACTION_TAG,
2099 .tag_id = mlx5_flow_mark_set(mark_id),
2102 assert(parser->mark);
2103 mlx5_flow_create_copy(parser, &tag, size);
2108 * Convert count action to Verbs specification.
2111 * Pointer to Ethernet device.
2113 * Pointer to MLX5 flow parser structure.
2116 * 0 on success, a negative errno value otherwise and rte_errno is set.
2119 mlx5_flow_create_count(struct rte_eth_dev *dev __rte_unused,
2120 struct mlx5_flow_parse *parser __rte_unused)
2122 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
2123 struct priv *priv = dev->data->dev_private;
2124 unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
2125 struct ibv_counter_set_init_attr init_attr = {0};
2126 struct ibv_flow_spec_counter_action counter = {
2127 .type = IBV_FLOW_SPEC_ACTION_COUNT,
2129 .counter_set_handle = 0,
2132 init_attr.counter_set_id = 0;
2133 parser->cs = mlx5_glue->create_counter_set(priv->ctx, &init_attr);
2138 counter.counter_set_handle = parser->cs->handle;
2139 mlx5_flow_create_copy(parser, &counter, size);
2145 * Complete flow rule creation with a drop queue.
2148 * Pointer to Ethernet device.
2150 * Internal parser structure.
2152 * Pointer to the rte_flow.
2154 * Perform verbose error reporting if not NULL.
2157 * 0 on success, a negative errno value otherwise and rte_errno is set.
2160 mlx5_flow_create_action_queue_drop(struct rte_eth_dev *dev,
2161 struct mlx5_flow_parse *parser,
2162 struct rte_flow *flow,
2163 struct rte_flow_error *error)
2165 struct priv *priv = dev->data->dev_private;
2166 struct ibv_flow_spec_action_drop *drop;
2167 unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
2172 drop = (void *)((uintptr_t)parser->queue[HASH_RXQ_ETH].ibv_attr +
2173 parser->queue[HASH_RXQ_ETH].offset);
2174 *drop = (struct ibv_flow_spec_action_drop){
2175 .type = IBV_FLOW_SPEC_ACTION_DROP,
2178 ++parser->queue[HASH_RXQ_ETH].ibv_attr->num_of_specs;
2179 parser->queue[HASH_RXQ_ETH].offset += size;
2180 flow->frxq[HASH_RXQ_ETH].ibv_attr =
2181 parser->queue[HASH_RXQ_ETH].ibv_attr;
2183 flow->cs = parser->cs;
2184 if (!dev->data->dev_started)
2186 parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
2187 flow->frxq[HASH_RXQ_ETH].ibv_flow =
2188 mlx5_glue->create_flow(priv->flow_drop_queue->qp,
2189 flow->frxq[HASH_RXQ_ETH].ibv_attr);
2190 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
2191 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
2192 NULL, "flow rule creation failure");
2198 if (flow->frxq[HASH_RXQ_ETH].ibv_flow) {
2199 claim_zero(mlx5_glue->destroy_flow
2200 (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2201 flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
2203 if (flow->frxq[HASH_RXQ_ETH].ibv_attr) {
2204 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
2205 flow->frxq[HASH_RXQ_ETH].ibv_attr = NULL;
2208 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
2216 * Create hash Rx queues when RSS is enabled.
2219 * Pointer to Ethernet device.
2221 * Internal parser structure.
2223 * Pointer to the rte_flow.
2225 * Perform verbose error reporting if not NULL.
2228 * 0 on success, a negative errno value otherwise and rte_errno is set.
2231 mlx5_flow_create_action_queue_rss(struct rte_eth_dev *dev,
2232 struct mlx5_flow_parse *parser,
2233 struct rte_flow *flow,
2234 struct rte_flow_error *error)
2238 for (i = 0; i != hash_rxq_init_n; ++i) {
2239 if (!parser->queue[i].ibv_attr)
2241 flow->frxq[i].ibv_attr = parser->queue[i].ibv_attr;
2242 parser->queue[i].ibv_attr = NULL;
2243 flow->frxq[i].hash_fields = parser->queue[i].hash_fields;
2244 if (!dev->data->dev_started)
2246 flow->frxq[i].hrxq =
2248 parser->rss_conf.key,
2249 parser->rss_conf.key_len,
2250 flow->frxq[i].hash_fields,
2251 parser->rss_conf.queue,
2252 parser->rss_conf.queue_num,
2254 parser->rss_conf.level);
2255 if (flow->frxq[i].hrxq)
2257 flow->frxq[i].hrxq =
2259 parser->rss_conf.key,
2260 parser->rss_conf.key_len,
2261 flow->frxq[i].hash_fields,
2262 parser->rss_conf.queue,
2263 parser->rss_conf.queue_num,
2265 parser->rss_conf.level);
2266 if (!flow->frxq[i].hrxq) {
2267 return rte_flow_error_set(error, ENOMEM,
2268 RTE_FLOW_ERROR_TYPE_HANDLE,
2270 "cannot create hash rxq");
2277 * RXQ update after flow rule creation.
2280 * Pointer to Ethernet device.
2282 * Pointer to the flow rule.
2285 mlx5_flow_create_update_rxqs(struct rte_eth_dev *dev, struct rte_flow *flow)
2287 struct priv *priv = dev->data->dev_private;
2291 if (!dev->data->dev_started)
2293 for (i = 0; i != flow->rss_conf.queue_num; ++i) {
2294 struct mlx5_rxq_data *rxq_data = (*priv->rxqs)
2295 [(*flow->queues)[i]];
2296 struct mlx5_rxq_ctrl *rxq_ctrl =
2297 container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
2298 uint8_t tunnel = PTYPE_IDX(flow->tunnel);
2300 rxq_data->mark |= flow->mark;
2303 rxq_ctrl->tunnel_types[tunnel] += 1;
2304 /* Clear tunnel type if more than one tunnel types set. */
2305 for (j = 0; j != RTE_DIM(rxq_ctrl->tunnel_types); ++j) {
2308 if (rxq_ctrl->tunnel_types[j] > 0) {
2309 rxq_data->tunnel = 0;
2313 if (j == RTE_DIM(rxq_ctrl->tunnel_types))
2314 rxq_data->tunnel = flow->tunnel;
2319 * Dump flow hash RX queue detail.
2322 * Pointer to Ethernet device.
2324 * Pointer to the rte_flow.
2326 * Hash RX queue index.
2329 mlx5_flow_dump(struct rte_eth_dev *dev __rte_unused,
2330 struct rte_flow *flow __rte_unused,
2331 unsigned int hrxq_idx __rte_unused)
2338 uint64_t extra_hash_fields = 0;
2340 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
2341 if (flow->tunnel && flow->rss_conf.level > 1)
2342 extra_hash_fields = (uint32_t)IBV_RX_HASH_INNER;
2344 spec_ptr = (uintptr_t)(flow->frxq[hrxq_idx].ibv_attr + 1);
2345 for (j = 0, off = 0; j < flow->frxq[hrxq_idx].ibv_attr->num_of_specs;
2347 struct ibv_flow_spec *spec = (void *)spec_ptr;
2348 off += sprintf(buf + off, " %x(%hu)", spec->hdr.type,
2350 spec_ptr += spec->hdr.size;
2353 "port %u Verbs flow %p type %u: hrxq:%p qp:%p ind:%p,"
2354 " hash:%" PRIx64 "/%u specs:%hhu(%hu), priority:%hu, type:%d,"
2355 " flags:%x, comp_mask:%x specs:%s",
2356 dev->data->port_id, (void *)flow, hrxq_idx,
2357 (void *)flow->frxq[hrxq_idx].hrxq,
2358 (void *)flow->frxq[hrxq_idx].hrxq->qp,
2359 (void *)flow->frxq[hrxq_idx].hrxq->ind_table,
2360 (flow->frxq[hrxq_idx].hash_fields | extra_hash_fields),
2361 flow->rss_conf.queue_num,
2362 flow->frxq[hrxq_idx].ibv_attr->num_of_specs,
2363 flow->frxq[hrxq_idx].ibv_attr->size,
2364 flow->frxq[hrxq_idx].ibv_attr->priority,
2365 flow->frxq[hrxq_idx].ibv_attr->type,
2366 flow->frxq[hrxq_idx].ibv_attr->flags,
2367 flow->frxq[hrxq_idx].ibv_attr->comp_mask,
2373 * Complete flow rule creation.
2376 * Pointer to Ethernet device.
2378 * Internal parser structure.
2380 * Pointer to the rte_flow.
2382 * Perform verbose error reporting if not NULL.
2385 * 0 on success, a negative errno value otherwise and rte_errno is set.
2388 mlx5_flow_create_action_queue(struct rte_eth_dev *dev,
2389 struct mlx5_flow_parse *parser,
2390 struct rte_flow *flow,
2391 struct rte_flow_error *error)
2393 struct priv *priv __rte_unused = dev->data->dev_private;
2396 unsigned int flows_n = 0;
2400 assert(!parser->drop);
2401 ret = mlx5_flow_create_action_queue_rss(dev, parser, flow, error);
2405 flow->cs = parser->cs;
2406 if (!dev->data->dev_started)
2408 for (i = 0; i != hash_rxq_init_n; ++i) {
2409 if (!flow->frxq[i].hrxq)
2411 flow->frxq[i].ibv_flow =
2412 mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
2413 flow->frxq[i].ibv_attr);
2414 mlx5_flow_dump(dev, flow, i);
2415 if (!flow->frxq[i].ibv_flow) {
2416 rte_flow_error_set(error, ENOMEM,
2417 RTE_FLOW_ERROR_TYPE_HANDLE,
2418 NULL, "flow rule creation failure");
2424 rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_HANDLE,
2425 NULL, "internal error in flow creation");
2428 mlx5_flow_create_update_rxqs(dev, flow);
2431 ret = rte_errno; /* Save rte_errno before cleanup. */
2433 for (i = 0; i != hash_rxq_init_n; ++i) {
2434 if (flow->frxq[i].ibv_flow) {
2435 struct ibv_flow *ibv_flow = flow->frxq[i].ibv_flow;
2437 claim_zero(mlx5_glue->destroy_flow(ibv_flow));
2439 if (flow->frxq[i].hrxq)
2440 mlx5_hrxq_release(dev, flow->frxq[i].hrxq);
2441 if (flow->frxq[i].ibv_attr)
2442 rte_free(flow->frxq[i].ibv_attr);
2445 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
2449 rte_errno = ret; /* Restore rte_errno. */
2457 * Pointer to Ethernet device.
2459 * Pointer to a TAILQ flow list.
2461 * Flow rule attributes.
2462 * @param[in] pattern
2463 * Pattern specification (list terminated by the END pattern item).
2464 * @param[in] actions
2465 * Associated actions (list terminated by the END action).
2467 * Perform verbose error reporting if not NULL.
2470 * A flow on success, NULL otherwise and rte_errno is set.
2472 static struct rte_flow *
2473 mlx5_flow_list_create(struct rte_eth_dev *dev,
2474 struct mlx5_flows *list,
2475 const struct rte_flow_attr *attr,
2476 const struct rte_flow_item items[],
2477 const struct rte_flow_action actions[],
2478 struct rte_flow_error *error)
2480 struct mlx5_flow_parse parser = { .create = 1, };
2481 struct rte_flow *flow = NULL;
2485 ret = mlx5_flow_convert(dev, attr, items, actions, error, &parser);
2488 flow = rte_calloc(__func__, 1,
2490 parser.rss_conf.queue_num * sizeof(uint16_t),
2493 rte_flow_error_set(error, ENOMEM,
2494 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2496 "cannot allocate flow memory");
2499 /* Copy configuration. */
2500 flow->queues = (uint16_t (*)[])(flow + 1);
2501 flow->tunnel = parser.tunnel;
2502 flow->rss_conf = (struct rte_flow_action_rss){
2503 .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
2505 .types = parser.rss_conf.types,
2506 .key_len = parser.rss_conf.key_len,
2507 .queue_num = parser.rss_conf.queue_num,
2508 .key = memcpy(flow->rss_key, parser.rss_conf.key,
2509 sizeof(*parser.rss_conf.key) *
2510 parser.rss_conf.key_len),
2511 .queue = memcpy(flow->queues, parser.rss_conf.queue,
2512 sizeof(*parser.rss_conf.queue) *
2513 parser.rss_conf.queue_num),
2515 flow->mark = parser.mark;
2516 /* finalise the flow. */
2518 ret = mlx5_flow_create_action_queue_drop(dev, &parser, flow,
2521 ret = mlx5_flow_create_action_queue(dev, &parser, flow, error);
2524 TAILQ_INSERT_TAIL(list, flow, next);
2525 DRV_LOG(DEBUG, "port %u flow created %p", dev->data->port_id,
2529 DRV_LOG(ERR, "port %u flow creation error: %s", dev->data->port_id,
2531 for (i = 0; i != hash_rxq_init_n; ++i) {
2532 if (parser.queue[i].ibv_attr)
2533 rte_free(parser.queue[i].ibv_attr);
2540 * Validate a flow supported by the NIC.
2542 * @see rte_flow_validate()
2546 mlx5_flow_validate(struct rte_eth_dev *dev,
2547 const struct rte_flow_attr *attr,
2548 const struct rte_flow_item items[],
2549 const struct rte_flow_action actions[],
2550 struct rte_flow_error *error)
2552 struct mlx5_flow_parse parser = { .create = 0, };
2554 return mlx5_flow_convert(dev, attr, items, actions, error, &parser);
2560 * @see rte_flow_create()
2564 mlx5_flow_create(struct rte_eth_dev *dev,
2565 const struct rte_flow_attr *attr,
2566 const struct rte_flow_item items[],
2567 const struct rte_flow_action actions[],
2568 struct rte_flow_error *error)
2570 struct priv *priv = dev->data->dev_private;
2572 return mlx5_flow_list_create(dev, &priv->flows, attr, items, actions,
2577 * Destroy a flow in a list.
2580 * Pointer to Ethernet device.
2582 * Pointer to a TAILQ flow list.
2587 mlx5_flow_list_destroy(struct rte_eth_dev *dev, struct mlx5_flows *list,
2588 struct rte_flow *flow)
2590 struct priv *priv = dev->data->dev_private;
2593 if (flow->drop || !dev->data->dev_started)
2595 for (i = 0; flow->tunnel && i != flow->rss_conf.queue_num; ++i) {
2596 /* Update queue tunnel type. */
2597 struct mlx5_rxq_data *rxq_data = (*priv->rxqs)
2598 [(*flow->queues)[i]];
2599 struct mlx5_rxq_ctrl *rxq_ctrl =
2600 container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
2601 uint8_t tunnel = PTYPE_IDX(flow->tunnel);
2603 assert(rxq_ctrl->tunnel_types[tunnel] > 0);
2604 rxq_ctrl->tunnel_types[tunnel] -= 1;
2605 if (!rxq_ctrl->tunnel_types[tunnel]) {
2606 /* Update tunnel type. */
2611 for (j = 0; j < RTE_DIM(rxq_ctrl->tunnel_types); j++)
2612 if (rxq_ctrl->tunnel_types[j]) {
2616 /* Keep same if more than one tunnel types left. */
2618 rxq_data->tunnel = ptype_ext[last];
2619 else if (types == 0)
2620 /* No tunnel type left. */
2621 rxq_data->tunnel = 0;
2624 for (i = 0; flow->mark && i != flow->rss_conf.queue_num; ++i) {
2625 struct rte_flow *tmp;
2629 * To remove the mark from the queue, the queue must not be
2630 * present in any other marked flow (RSS or not).
2632 TAILQ_FOREACH(tmp, list, next) {
2634 uint16_t *tqs = NULL;
2639 for (j = 0; j != hash_rxq_init_n; ++j) {
2640 if (!tmp->frxq[j].hrxq)
2642 tqs = tmp->frxq[j].hrxq->ind_table->queues;
2643 tq_n = tmp->frxq[j].hrxq->ind_table->queues_n;
2647 for (j = 0; (j != tq_n) && !mark; j++)
2648 if (tqs[j] == (*flow->queues)[i])
2651 (*priv->rxqs)[(*flow->queues)[i]]->mark = mark;
2655 if (flow->frxq[HASH_RXQ_ETH].ibv_flow)
2656 claim_zero(mlx5_glue->destroy_flow
2657 (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2658 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
2660 for (i = 0; i != hash_rxq_init_n; ++i) {
2661 struct mlx5_flow *frxq = &flow->frxq[i];
2664 claim_zero(mlx5_glue->destroy_flow
2667 mlx5_hrxq_release(dev, frxq->hrxq);
2669 rte_free(frxq->ibv_attr);
2673 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
2676 TAILQ_REMOVE(list, flow, next);
2677 DRV_LOG(DEBUG, "port %u flow destroyed %p", dev->data->port_id,
2683 * Destroy all flows.
2686 * Pointer to Ethernet device.
2688 * Pointer to a TAILQ flow list.
2691 mlx5_flow_list_flush(struct rte_eth_dev *dev, struct mlx5_flows *list)
2693 while (!TAILQ_EMPTY(list)) {
2694 struct rte_flow *flow;
2696 flow = TAILQ_FIRST(list);
2697 mlx5_flow_list_destroy(dev, list, flow);
2702 * Create drop queue.
2705 * Pointer to Ethernet device.
2708 * 0 on success, a negative errno value otherwise and rte_errno is set.
2711 mlx5_flow_create_drop_queue(struct rte_eth_dev *dev)
2713 struct priv *priv = dev->data->dev_private;
2714 struct mlx5_hrxq_drop *fdq = NULL;
2718 fdq = rte_calloc(__func__, 1, sizeof(*fdq), 0);
2721 "port %u cannot allocate memory for drop queue",
2722 dev->data->port_id);
2726 fdq->cq = mlx5_glue->create_cq(priv->ctx, 1, NULL, NULL, 0);
2728 DRV_LOG(WARNING, "port %u cannot allocate CQ for drop queue",
2729 dev->data->port_id);
2733 fdq->wq = mlx5_glue->create_wq
2735 &(struct ibv_wq_init_attr){
2736 .wq_type = IBV_WQT_RQ,
2743 DRV_LOG(WARNING, "port %u cannot allocate WQ for drop queue",
2744 dev->data->port_id);
2748 fdq->ind_table = mlx5_glue->create_rwq_ind_table
2750 &(struct ibv_rwq_ind_table_init_attr){
2751 .log_ind_tbl_size = 0,
2752 .ind_tbl = &fdq->wq,
2755 if (!fdq->ind_table) {
2757 "port %u cannot allocate indirection table for drop"
2759 dev->data->port_id);
2763 fdq->qp = mlx5_glue->create_qp_ex
2765 &(struct ibv_qp_init_attr_ex){
2766 .qp_type = IBV_QPT_RAW_PACKET,
2768 IBV_QP_INIT_ATTR_PD |
2769 IBV_QP_INIT_ATTR_IND_TABLE |
2770 IBV_QP_INIT_ATTR_RX_HASH,
2771 .rx_hash_conf = (struct ibv_rx_hash_conf){
2773 IBV_RX_HASH_FUNC_TOEPLITZ,
2774 .rx_hash_key_len = rss_hash_default_key_len,
2775 .rx_hash_key = rss_hash_default_key,
2776 .rx_hash_fields_mask = 0,
2778 .rwq_ind_tbl = fdq->ind_table,
2782 DRV_LOG(WARNING, "port %u cannot allocate QP for drop queue",
2783 dev->data->port_id);
2787 priv->flow_drop_queue = fdq;
2791 claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2793 claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2795 claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2797 claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2800 priv->flow_drop_queue = NULL;
2805 * Delete drop queue.
2808 * Pointer to Ethernet device.
2811 mlx5_flow_delete_drop_queue(struct rte_eth_dev *dev)
2813 struct priv *priv = dev->data->dev_private;
2814 struct mlx5_hrxq_drop *fdq = priv->flow_drop_queue;
2819 claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2821 claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2823 claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2825 claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2827 priv->flow_drop_queue = NULL;
2834 * Pointer to Ethernet device.
2836 * Pointer to a TAILQ flow list.
2839 mlx5_flow_stop(struct rte_eth_dev *dev, struct mlx5_flows *list)
2841 struct priv *priv = dev->data->dev_private;
2842 struct rte_flow *flow;
2845 TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) {
2846 struct mlx5_ind_table_ibv *ind_tbl = NULL;
2849 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow)
2851 claim_zero(mlx5_glue->destroy_flow
2852 (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2853 flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
2854 DRV_LOG(DEBUG, "port %u flow %p removed",
2855 dev->data->port_id, (void *)flow);
2859 /* Verify the flow has not already been cleaned. */
2860 for (i = 0; i != hash_rxq_init_n; ++i) {
2861 if (!flow->frxq[i].ibv_flow)
2864 * Indirection table may be necessary to remove the
2865 * flags in the Rx queues.
2866 * This helps to speed-up the process by avoiding
2869 ind_tbl = flow->frxq[i].hrxq->ind_table;
2872 if (i == hash_rxq_init_n)
2876 for (i = 0; i != ind_tbl->queues_n; ++i)
2877 (*priv->rxqs)[ind_tbl->queues[i]]->mark = 0;
2879 for (i = 0; i != hash_rxq_init_n; ++i) {
2880 if (!flow->frxq[i].ibv_flow)
2882 claim_zero(mlx5_glue->destroy_flow
2883 (flow->frxq[i].ibv_flow));
2884 flow->frxq[i].ibv_flow = NULL;
2885 mlx5_hrxq_release(dev, flow->frxq[i].hrxq);
2886 flow->frxq[i].hrxq = NULL;
2888 DRV_LOG(DEBUG, "port %u flow %p removed", dev->data->port_id,
2891 /* Cleanup Rx queue tunnel info. */
2892 for (i = 0; i != priv->rxqs_n; ++i) {
2893 struct mlx5_rxq_data *q = (*priv->rxqs)[i];
2894 struct mlx5_rxq_ctrl *rxq_ctrl =
2895 container_of(q, struct mlx5_rxq_ctrl, rxq);
2899 memset((void *)rxq_ctrl->tunnel_types, 0,
2900 sizeof(rxq_ctrl->tunnel_types));
2909 * Pointer to Ethernet device.
2911 * Pointer to a TAILQ flow list.
2914 * 0 on success, a negative errno value otherwise and rte_errno is set.
2917 mlx5_flow_start(struct rte_eth_dev *dev, struct mlx5_flows *list)
2919 struct priv *priv = dev->data->dev_private;
2920 struct rte_flow *flow;
2922 TAILQ_FOREACH(flow, list, next) {
2926 flow->frxq[HASH_RXQ_ETH].ibv_flow =
2927 mlx5_glue->create_flow
2928 (priv->flow_drop_queue->qp,
2929 flow->frxq[HASH_RXQ_ETH].ibv_attr);
2930 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
2932 "port %u flow %p cannot be applied",
2933 dev->data->port_id, (void *)flow);
2937 DRV_LOG(DEBUG, "port %u flow %p applied",
2938 dev->data->port_id, (void *)flow);
2942 for (i = 0; i != hash_rxq_init_n; ++i) {
2943 if (!flow->frxq[i].ibv_attr)
2945 flow->frxq[i].hrxq =
2946 mlx5_hrxq_get(dev, flow->rss_conf.key,
2947 flow->rss_conf.key_len,
2948 flow->frxq[i].hash_fields,
2949 flow->rss_conf.queue,
2950 flow->rss_conf.queue_num,
2952 flow->rss_conf.level);
2953 if (flow->frxq[i].hrxq)
2955 flow->frxq[i].hrxq =
2956 mlx5_hrxq_new(dev, flow->rss_conf.key,
2957 flow->rss_conf.key_len,
2958 flow->frxq[i].hash_fields,
2959 flow->rss_conf.queue,
2960 flow->rss_conf.queue_num,
2962 flow->rss_conf.level);
2963 if (!flow->frxq[i].hrxq) {
2965 "port %u flow %p cannot create hash"
2967 dev->data->port_id, (void *)flow);
2972 mlx5_flow_dump(dev, flow, i);
2973 flow->frxq[i].ibv_flow =
2974 mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
2975 flow->frxq[i].ibv_attr);
2976 if (!flow->frxq[i].ibv_flow) {
2978 "port %u flow %p type %u cannot be"
2980 dev->data->port_id, (void *)flow, i);
2985 mlx5_flow_create_update_rxqs(dev, flow);
2991 * Verify the flow list is empty
2994 * Pointer to Ethernet device.
2996 * @return the number of flows not released.
2999 mlx5_flow_verify(struct rte_eth_dev *dev)
3001 struct priv *priv = dev->data->dev_private;
3002 struct rte_flow *flow;
3005 TAILQ_FOREACH(flow, &priv->flows, next) {
3006 DRV_LOG(DEBUG, "port %u flow %p still referenced",
3007 dev->data->port_id, (void *)flow);
3014 * Enable a control flow configured from the control plane.
3017 * Pointer to Ethernet device.
3019 * An Ethernet flow spec to apply.
3021 * An Ethernet flow mask to apply.
3023 * A VLAN flow spec to apply.
3025 * A VLAN flow mask to apply.
3028 * 0 on success, a negative errno value otherwise and rte_errno is set.
3031 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
3032 struct rte_flow_item_eth *eth_spec,
3033 struct rte_flow_item_eth *eth_mask,
3034 struct rte_flow_item_vlan *vlan_spec,
3035 struct rte_flow_item_vlan *vlan_mask)
3037 struct priv *priv = dev->data->dev_private;
3038 const struct rte_flow_attr attr = {
3040 .priority = MLX5_CTRL_FLOW_PRIORITY,
3042 struct rte_flow_item items[] = {
3044 .type = RTE_FLOW_ITEM_TYPE_ETH,
3050 .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
3051 RTE_FLOW_ITEM_TYPE_END,
3057 .type = RTE_FLOW_ITEM_TYPE_END,
3060 uint16_t queue[priv->reta_idx_n];
3061 struct rte_flow_action_rss action_rss = {
3062 .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
3064 .types = priv->rss_conf.rss_hf,
3065 .key_len = priv->rss_conf.rss_key_len,
3066 .queue_num = priv->reta_idx_n,
3067 .key = priv->rss_conf.rss_key,
3070 struct rte_flow_action actions[] = {
3072 .type = RTE_FLOW_ACTION_TYPE_RSS,
3073 .conf = &action_rss,
3076 .type = RTE_FLOW_ACTION_TYPE_END,
3079 struct rte_flow *flow;
3080 struct rte_flow_error error;
3083 if (!priv->reta_idx_n) {
3087 for (i = 0; i != priv->reta_idx_n; ++i)
3088 queue[i] = (*priv->reta_idx)[i];
3089 flow = mlx5_flow_list_create(dev, &priv->ctrl_flows, &attr, items,
3097 * Enable a flow control configured from the control plane.
3100 * Pointer to Ethernet device.
3102 * An Ethernet flow spec to apply.
3104 * An Ethernet flow mask to apply.
3107 * 0 on success, a negative errno value otherwise and rte_errno is set.
3110 mlx5_ctrl_flow(struct rte_eth_dev *dev,
3111 struct rte_flow_item_eth *eth_spec,
3112 struct rte_flow_item_eth *eth_mask)
3114 return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
3120 * @see rte_flow_destroy()
3124 mlx5_flow_destroy(struct rte_eth_dev *dev,
3125 struct rte_flow *flow,
3126 struct rte_flow_error *error __rte_unused)
3128 struct priv *priv = dev->data->dev_private;
3130 mlx5_flow_list_destroy(dev, &priv->flows, flow);
3135 * Destroy all flows.
3137 * @see rte_flow_flush()
3141 mlx5_flow_flush(struct rte_eth_dev *dev,
3142 struct rte_flow_error *error __rte_unused)
3144 struct priv *priv = dev->data->dev_private;
3146 mlx5_flow_list_flush(dev, &priv->flows);
3150 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
3152 * Query flow counter.
3156 * @param counter_value
3157 * returned data from the counter.
3160 * 0 on success, a negative errno value otherwise and rte_errno is set.
3163 mlx5_flow_query_count(struct ibv_counter_set *cs,
3164 struct mlx5_flow_counter_stats *counter_stats,
3165 struct rte_flow_query_count *query_count,
3166 struct rte_flow_error *error)
3168 uint64_t counters[2];
3169 struct ibv_query_counter_set_attr query_cs_attr = {
3171 .query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
3173 struct ibv_counter_set_data query_out = {
3175 .outlen = 2 * sizeof(uint64_t),
3177 int err = mlx5_glue->query_counter_set(&query_cs_attr, &query_out);
3180 return rte_flow_error_set(error, err,
3181 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
3183 "cannot read counter");
3184 query_count->hits_set = 1;
3185 query_count->bytes_set = 1;
3186 query_count->hits = counters[0] - counter_stats->hits;
3187 query_count->bytes = counters[1] - counter_stats->bytes;
3188 if (query_count->reset) {
3189 counter_stats->hits = counters[0];
3190 counter_stats->bytes = counters[1];
3198 * @see rte_flow_query()
3202 mlx5_flow_query(struct rte_eth_dev *dev __rte_unused,
3203 struct rte_flow *flow,
3204 const struct rte_flow_action *action __rte_unused,
3206 struct rte_flow_error *error)
3211 ret = mlx5_flow_query_count(flow->cs,
3212 &flow->counter_stats,
3213 (struct rte_flow_query_count *)data,
3218 return rte_flow_error_set(error, EINVAL,
3219 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
3221 "no counter found for flow");
3230 * @see rte_flow_isolate()
3234 mlx5_flow_isolate(struct rte_eth_dev *dev,
3236 struct rte_flow_error *error)
3238 struct priv *priv = dev->data->dev_private;
3240 if (dev->data->dev_started) {
3241 rte_flow_error_set(error, EBUSY,
3242 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
3244 "port must be stopped first");
3247 priv->isolated = !!enable;
3249 dev->dev_ops = &mlx5_dev_ops_isolate;
3251 dev->dev_ops = &mlx5_dev_ops;
3256 * Convert a flow director filter to a generic flow.
3259 * Pointer to Ethernet device.
3260 * @param fdir_filter
3261 * Flow director filter to add.
3263 * Generic flow parameters structure.
3266 * 0 on success, a negative errno value otherwise and rte_errno is set.
3269 mlx5_fdir_filter_convert(struct rte_eth_dev *dev,
3270 const struct rte_eth_fdir_filter *fdir_filter,
3271 struct mlx5_fdir *attributes)
3273 struct priv *priv = dev->data->dev_private;
3274 const struct rte_eth_fdir_input *input = &fdir_filter->input;
3275 const struct rte_eth_fdir_masks *mask =
3276 &dev->data->dev_conf.fdir_conf.mask;
3278 /* Validate queue number. */
3279 if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
3280 DRV_LOG(ERR, "port %u invalid queue number %d",
3281 dev->data->port_id, fdir_filter->action.rx_queue);
3285 attributes->attr.ingress = 1;
3286 attributes->items[0] = (struct rte_flow_item) {
3287 .type = RTE_FLOW_ITEM_TYPE_ETH,
3288 .spec = &attributes->l2,
3289 .mask = &attributes->l2_mask,
3291 switch (fdir_filter->action.behavior) {
3292 case RTE_ETH_FDIR_ACCEPT:
3293 attributes->actions[0] = (struct rte_flow_action){
3294 .type = RTE_FLOW_ACTION_TYPE_QUEUE,
3295 .conf = &attributes->queue,
3298 case RTE_ETH_FDIR_REJECT:
3299 attributes->actions[0] = (struct rte_flow_action){
3300 .type = RTE_FLOW_ACTION_TYPE_DROP,
3304 DRV_LOG(ERR, "port %u invalid behavior %d",
3306 fdir_filter->action.behavior);
3307 rte_errno = ENOTSUP;
3310 attributes->queue.index = fdir_filter->action.rx_queue;
3312 switch (fdir_filter->input.flow_type) {
3313 case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
3314 case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
3315 case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
3316 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
3317 .src_addr = input->flow.ip4_flow.src_ip,
3318 .dst_addr = input->flow.ip4_flow.dst_ip,
3319 .time_to_live = input->flow.ip4_flow.ttl,
3320 .type_of_service = input->flow.ip4_flow.tos,
3321 .next_proto_id = input->flow.ip4_flow.proto,
3323 attributes->l3_mask.ipv4.hdr = (struct ipv4_hdr){
3324 .src_addr = mask->ipv4_mask.src_ip,
3325 .dst_addr = mask->ipv4_mask.dst_ip,
3326 .time_to_live = mask->ipv4_mask.ttl,
3327 .type_of_service = mask->ipv4_mask.tos,
3328 .next_proto_id = mask->ipv4_mask.proto,
3330 attributes->items[1] = (struct rte_flow_item){
3331 .type = RTE_FLOW_ITEM_TYPE_IPV4,
3332 .spec = &attributes->l3,
3333 .mask = &attributes->l3_mask,
3336 case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
3337 case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
3338 case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
3339 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
3340 .hop_limits = input->flow.ipv6_flow.hop_limits,
3341 .proto = input->flow.ipv6_flow.proto,
3344 memcpy(attributes->l3.ipv6.hdr.src_addr,
3345 input->flow.ipv6_flow.src_ip,
3346 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
3347 memcpy(attributes->l3.ipv6.hdr.dst_addr,
3348 input->flow.ipv6_flow.dst_ip,
3349 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
3350 memcpy(attributes->l3_mask.ipv6.hdr.src_addr,
3351 mask->ipv6_mask.src_ip,
3352 RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
3353 memcpy(attributes->l3_mask.ipv6.hdr.dst_addr,
3354 mask->ipv6_mask.dst_ip,
3355 RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
3356 attributes->items[1] = (struct rte_flow_item){
3357 .type = RTE_FLOW_ITEM_TYPE_IPV6,
3358 .spec = &attributes->l3,
3359 .mask = &attributes->l3_mask,
3363 DRV_LOG(ERR, "port %u invalid flow type%d",
3364 dev->data->port_id, fdir_filter->input.flow_type);
3365 rte_errno = ENOTSUP;
3369 switch (fdir_filter->input.flow_type) {
3370 case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
3371 attributes->l4.udp.hdr = (struct udp_hdr){
3372 .src_port = input->flow.udp4_flow.src_port,
3373 .dst_port = input->flow.udp4_flow.dst_port,
3375 attributes->l4_mask.udp.hdr = (struct udp_hdr){
3376 .src_port = mask->src_port_mask,
3377 .dst_port = mask->dst_port_mask,
3379 attributes->items[2] = (struct rte_flow_item){
3380 .type = RTE_FLOW_ITEM_TYPE_UDP,
3381 .spec = &attributes->l4,
3382 .mask = &attributes->l4_mask,
3385 case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
3386 attributes->l4.tcp.hdr = (struct tcp_hdr){
3387 .src_port = input->flow.tcp4_flow.src_port,
3388 .dst_port = input->flow.tcp4_flow.dst_port,
3390 attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
3391 .src_port = mask->src_port_mask,
3392 .dst_port = mask->dst_port_mask,
3394 attributes->items[2] = (struct rte_flow_item){
3395 .type = RTE_FLOW_ITEM_TYPE_TCP,
3396 .spec = &attributes->l4,
3397 .mask = &attributes->l4_mask,
3400 case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
3401 attributes->l4.udp.hdr = (struct udp_hdr){
3402 .src_port = input->flow.udp6_flow.src_port,
3403 .dst_port = input->flow.udp6_flow.dst_port,
3405 attributes->l4_mask.udp.hdr = (struct udp_hdr){
3406 .src_port = mask->src_port_mask,
3407 .dst_port = mask->dst_port_mask,
3409 attributes->items[2] = (struct rte_flow_item){
3410 .type = RTE_FLOW_ITEM_TYPE_UDP,
3411 .spec = &attributes->l4,
3412 .mask = &attributes->l4_mask,
3415 case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
3416 attributes->l4.tcp.hdr = (struct tcp_hdr){
3417 .src_port = input->flow.tcp6_flow.src_port,
3418 .dst_port = input->flow.tcp6_flow.dst_port,
3420 attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
3421 .src_port = mask->src_port_mask,
3422 .dst_port = mask->dst_port_mask,
3424 attributes->items[2] = (struct rte_flow_item){
3425 .type = RTE_FLOW_ITEM_TYPE_TCP,
3426 .spec = &attributes->l4,
3427 .mask = &attributes->l4_mask,
3430 case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
3431 case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
3434 DRV_LOG(ERR, "port %u invalid flow type%d",
3435 dev->data->port_id, fdir_filter->input.flow_type);
3436 rte_errno = ENOTSUP;
3443 * Add new flow director filter and store it in list.
3446 * Pointer to Ethernet device.
3447 * @param fdir_filter
3448 * Flow director filter to add.
3451 * 0 on success, a negative errno value otherwise and rte_errno is set.
3454 mlx5_fdir_filter_add(struct rte_eth_dev *dev,
3455 const struct rte_eth_fdir_filter *fdir_filter)
3457 struct priv *priv = dev->data->dev_private;
3458 struct mlx5_fdir attributes = {
3461 .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
3462 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
3466 struct mlx5_flow_parse parser = {
3467 .layer = HASH_RXQ_ETH,
3469 struct rte_flow_error error;
3470 struct rte_flow *flow;
3473 ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
3476 ret = mlx5_flow_convert(dev, &attributes.attr, attributes.items,
3477 attributes.actions, &error, &parser);
3480 flow = mlx5_flow_list_create(dev, &priv->flows, &attributes.attr,
3481 attributes.items, attributes.actions,
3484 DRV_LOG(DEBUG, "port %u FDIR created %p", dev->data->port_id,
3492 * Delete specific filter.
3495 * Pointer to Ethernet device.
3496 * @param fdir_filter
3497 * Filter to be deleted.
3500 * 0 on success, a negative errno value otherwise and rte_errno is set.
3503 mlx5_fdir_filter_delete(struct rte_eth_dev *dev,
3504 const struct rte_eth_fdir_filter *fdir_filter)
3506 struct priv *priv = dev->data->dev_private;
3507 struct mlx5_fdir attributes = {
3510 struct mlx5_flow_parse parser = {
3512 .layer = HASH_RXQ_ETH,
3514 struct rte_flow_error error;
3515 struct rte_flow *flow;
3519 ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
3522 ret = mlx5_flow_convert(dev, &attributes.attr, attributes.items,
3523 attributes.actions, &error, &parser);
3527 * Special case for drop action which is only set in the
3528 * specifications when the flow is created. In this situation the
3529 * drop specification is missing.
3532 struct ibv_flow_spec_action_drop *drop;
3534 drop = (void *)((uintptr_t)parser.queue[HASH_RXQ_ETH].ibv_attr +
3535 parser.queue[HASH_RXQ_ETH].offset);
3536 *drop = (struct ibv_flow_spec_action_drop){
3537 .type = IBV_FLOW_SPEC_ACTION_DROP,
3538 .size = sizeof(struct ibv_flow_spec_action_drop),
3540 parser.queue[HASH_RXQ_ETH].ibv_attr->num_of_specs++;
3542 TAILQ_FOREACH(flow, &priv->flows, next) {
3543 struct ibv_flow_attr *attr;
3544 struct ibv_spec_header *attr_h;
3546 struct ibv_flow_attr *flow_attr;
3547 struct ibv_spec_header *flow_h;
3549 unsigned int specs_n;
3550 unsigned int queue_id = parser.drop ? HASH_RXQ_ETH :
3553 attr = parser.queue[queue_id].ibv_attr;
3554 flow_attr = flow->frxq[queue_id].ibv_attr;
3555 /* Compare first the attributes. */
3557 memcmp(attr, flow_attr, sizeof(struct ibv_flow_attr)))
3559 if (attr->num_of_specs == 0)
3561 spec = (void *)((uintptr_t)attr +
3562 sizeof(struct ibv_flow_attr));
3563 flow_spec = (void *)((uintptr_t)flow_attr +
3564 sizeof(struct ibv_flow_attr));
3565 specs_n = RTE_MIN(attr->num_of_specs, flow_attr->num_of_specs);
3566 for (i = 0; i != specs_n; ++i) {
3569 if (memcmp(spec, flow_spec,
3570 RTE_MIN(attr_h->size, flow_h->size)))
3572 spec = (void *)((uintptr_t)spec + attr_h->size);
3573 flow_spec = (void *)((uintptr_t)flow_spec +
3576 /* At this point, the flow match. */
3579 /* The flow does not match. */
3582 ret = rte_errno; /* Save rte_errno before cleanup. */
3584 mlx5_flow_list_destroy(dev, &priv->flows, flow);
3586 for (i = 0; i != hash_rxq_init_n; ++i) {
3587 if (parser.queue[i].ibv_attr)
3588 rte_free(parser.queue[i].ibv_attr);
3590 rte_errno = ret; /* Restore rte_errno. */
3595 * Update queue for specific filter.
3598 * Pointer to Ethernet device.
3599 * @param fdir_filter
3600 * Filter to be updated.
3603 * 0 on success, a negative errno value otherwise and rte_errno is set.
3606 mlx5_fdir_filter_update(struct rte_eth_dev *dev,
3607 const struct rte_eth_fdir_filter *fdir_filter)
3611 ret = mlx5_fdir_filter_delete(dev, fdir_filter);
3614 return mlx5_fdir_filter_add(dev, fdir_filter);
3618 * Flush all filters.
3621 * Pointer to Ethernet device.
3624 mlx5_fdir_filter_flush(struct rte_eth_dev *dev)
3626 struct priv *priv = dev->data->dev_private;
3628 mlx5_flow_list_flush(dev, &priv->flows);
3632 * Get flow director information.
3635 * Pointer to Ethernet device.
3636 * @param[out] fdir_info
3637 * Resulting flow director information.
3640 mlx5_fdir_info_get(struct rte_eth_dev *dev, struct rte_eth_fdir_info *fdir_info)
3642 struct rte_eth_fdir_masks *mask =
3643 &dev->data->dev_conf.fdir_conf.mask;
3645 fdir_info->mode = dev->data->dev_conf.fdir_conf.mode;
3646 fdir_info->guarant_spc = 0;
3647 rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
3648 fdir_info->max_flexpayload = 0;
3649 fdir_info->flow_types_mask[0] = 0;
3650 fdir_info->flex_payload_unit = 0;
3651 fdir_info->max_flex_payload_segment_num = 0;
3652 fdir_info->flex_payload_limit = 0;
3653 memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
3657 * Deal with flow director operations.
3660 * Pointer to Ethernet device.
3662 * Operation to perform.
3664 * Pointer to operation-specific structure.
3667 * 0 on success, a negative errno value otherwise and rte_errno is set.
3670 mlx5_fdir_ctrl_func(struct rte_eth_dev *dev, enum rte_filter_op filter_op,
3673 enum rte_fdir_mode fdir_mode =
3674 dev->data->dev_conf.fdir_conf.mode;
3676 if (filter_op == RTE_ETH_FILTER_NOP)
3678 if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
3679 fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
3680 DRV_LOG(ERR, "port %u flow director mode %d not supported",
3681 dev->data->port_id, fdir_mode);
3685 switch (filter_op) {
3686 case RTE_ETH_FILTER_ADD:
3687 return mlx5_fdir_filter_add(dev, arg);
3688 case RTE_ETH_FILTER_UPDATE:
3689 return mlx5_fdir_filter_update(dev, arg);
3690 case RTE_ETH_FILTER_DELETE:
3691 return mlx5_fdir_filter_delete(dev, arg);
3692 case RTE_ETH_FILTER_FLUSH:
3693 mlx5_fdir_filter_flush(dev);
3695 case RTE_ETH_FILTER_INFO:
3696 mlx5_fdir_info_get(dev, arg);
3699 DRV_LOG(DEBUG, "port %u unknown operation %u",
3700 dev->data->port_id, filter_op);
3708 * Manage filter operations.
3711 * Pointer to Ethernet device structure.
3712 * @param filter_type
3715 * Operation to perform.
3717 * Pointer to operation-specific structure.
3720 * 0 on success, a negative errno value otherwise and rte_errno is set.
3723 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
3724 enum rte_filter_type filter_type,
3725 enum rte_filter_op filter_op,
3728 switch (filter_type) {
3729 case RTE_ETH_FILTER_GENERIC:
3730 if (filter_op != RTE_ETH_FILTER_GET) {
3734 *(const void **)arg = &mlx5_flow_ops;
3736 case RTE_ETH_FILTER_FDIR:
3737 return mlx5_fdir_ctrl_func(dev, filter_op, arg);
3739 DRV_LOG(ERR, "port %u filter type (%d) not supported",
3740 dev->data->port_id, filter_type);
3741 rte_errno = ENOTSUP;
3748 * Detect number of Verbs flow priorities supported.
3751 * Pointer to Ethernet device.
3754 * number of supported Verbs flow priority.
3757 mlx5_get_max_verbs_prio(struct rte_eth_dev *dev)
3759 struct priv *priv = dev->data->dev_private;
3760 unsigned int verb_priorities = MLX5_VERBS_FLOW_PRIO_8;
3762 struct ibv_flow_attr attr;
3763 struct ibv_flow_spec_eth eth;
3764 struct ibv_flow_spec_action_drop drop;
3770 .type = IBV_FLOW_SPEC_ETH,
3771 .size = sizeof(struct ibv_flow_spec_eth),
3774 .size = sizeof(struct ibv_flow_spec_action_drop),
3775 .type = IBV_FLOW_SPEC_ACTION_DROP,
3778 struct ibv_flow *flow;
3781 flow_attr.attr.priority = verb_priorities - 1;
3782 flow = mlx5_glue->create_flow(priv->flow_drop_queue->qp,
3785 claim_zero(mlx5_glue->destroy_flow(flow));
3786 /* Try more priorities. */
3787 verb_priorities *= 2;
3789 /* Failed, restore last right number. */
3790 verb_priorities /= 2;
3794 DRV_LOG(DEBUG, "port %u Verbs flow priorities: %d,"
3795 " user flow priorities: %d",
3796 dev->data->port_id, verb_priorities, MLX5_CTRL_FLOW_PRIORITY);
3797 return verb_priorities;