1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2016 6WIND S.A.
3 * Copyright 2016 Mellanox Technologies, Ltd
11 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
13 #pragma GCC diagnostic ignored "-Wpedantic"
15 #include <infiniband/verbs.h>
17 #pragma GCC diagnostic error "-Wpedantic"
20 #include <rte_common.h>
21 #include <rte_ether.h>
22 #include <rte_eth_ctrl.h>
23 #include <rte_ethdev_driver.h>
25 #include <rte_flow_driver.h>
26 #include <rte_malloc.h>
30 #include "mlx5_defs.h"
32 #include "mlx5_glue.h"
34 /* Flow priority for control plane flows. */
35 #define MLX5_CTRL_FLOW_PRIORITY 1
37 /* Internet Protocol versions. */
42 #ifndef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
43 struct ibv_flow_spec_counter_action {
48 /* Dev ops structure defined in mlx5.c */
49 extern const struct eth_dev_ops mlx5_dev_ops;
50 extern const struct eth_dev_ops mlx5_dev_ops_isolate;
52 /** Structure give to the conversion functions. */
53 struct mlx5_flow_data {
54 struct rte_eth_dev *dev; /** Ethernet device. */
55 struct mlx5_flow_parse *parser; /** Parser context. */
56 struct rte_flow_error *error; /** Error context. */
60 mlx5_flow_create_eth(const struct rte_flow_item *item,
61 const void *default_mask,
62 struct mlx5_flow_data *data);
65 mlx5_flow_create_vlan(const struct rte_flow_item *item,
66 const void *default_mask,
67 struct mlx5_flow_data *data);
70 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
71 const void *default_mask,
72 struct mlx5_flow_data *data);
75 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
76 const void *default_mask,
77 struct mlx5_flow_data *data);
80 mlx5_flow_create_udp(const struct rte_flow_item *item,
81 const void *default_mask,
82 struct mlx5_flow_data *data);
85 mlx5_flow_create_tcp(const struct rte_flow_item *item,
86 const void *default_mask,
87 struct mlx5_flow_data *data);
90 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
91 const void *default_mask,
92 struct mlx5_flow_data *data);
95 mlx5_flow_create_vxlan_gpe(const struct rte_flow_item *item,
96 const void *default_mask,
97 struct mlx5_flow_data *data);
100 mlx5_flow_create_gre(const struct rte_flow_item *item,
101 const void *default_mask,
102 struct mlx5_flow_data *data);
105 mlx5_flow_create_mpls(const struct rte_flow_item *item,
106 const void *default_mask,
107 struct mlx5_flow_data *data);
109 struct mlx5_flow_parse;
112 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
116 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id);
119 mlx5_flow_create_count(struct rte_eth_dev *dev, struct mlx5_flow_parse *parser);
121 /* Hash RX queue types. */
133 /* Initialization data for hash RX queue. */
134 struct hash_rxq_init {
135 uint64_t hash_fields; /* Fields that participate in the hash. */
136 uint64_t dpdk_rss_hf; /* Matching DPDK RSS hash fields. */
137 unsigned int flow_priority; /* Flow priority to use. */
138 unsigned int ip_version; /* Internet protocol. */
141 /* Initialization data for hash RX queues. */
142 const struct hash_rxq_init hash_rxq_init[] = {
144 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
145 IBV_RX_HASH_DST_IPV4 |
146 IBV_RX_HASH_SRC_PORT_TCP |
147 IBV_RX_HASH_DST_PORT_TCP),
148 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,
150 .ip_version = MLX5_IPV4,
153 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
154 IBV_RX_HASH_DST_IPV4 |
155 IBV_RX_HASH_SRC_PORT_UDP |
156 IBV_RX_HASH_DST_PORT_UDP),
157 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP,
159 .ip_version = MLX5_IPV4,
162 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
163 IBV_RX_HASH_DST_IPV4),
164 .dpdk_rss_hf = (ETH_RSS_IPV4 |
167 .ip_version = MLX5_IPV4,
170 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
171 IBV_RX_HASH_DST_IPV6 |
172 IBV_RX_HASH_SRC_PORT_TCP |
173 IBV_RX_HASH_DST_PORT_TCP),
174 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP,
176 .ip_version = MLX5_IPV6,
179 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
180 IBV_RX_HASH_DST_IPV6 |
181 IBV_RX_HASH_SRC_PORT_UDP |
182 IBV_RX_HASH_DST_PORT_UDP),
183 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP,
185 .ip_version = MLX5_IPV6,
188 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
189 IBV_RX_HASH_DST_IPV6),
190 .dpdk_rss_hf = (ETH_RSS_IPV6 |
193 .ip_version = MLX5_IPV6,
202 /* Number of entries in hash_rxq_init[]. */
203 const unsigned int hash_rxq_init_n = RTE_DIM(hash_rxq_init);
205 /** Structure for holding counter stats. */
206 struct mlx5_flow_counter_stats {
207 uint64_t hits; /**< Number of packets matched by the rule. */
208 uint64_t bytes; /**< Number of bytes matched by the rule. */
211 /** Structure for Drop queue. */
212 struct mlx5_hrxq_drop {
213 struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */
214 struct ibv_qp *qp; /**< Verbs queue pair. */
215 struct ibv_wq *wq; /**< Verbs work queue. */
216 struct ibv_cq *cq; /**< Verbs completion queue. */
219 /* Flows structures. */
221 uint64_t hash_fields; /**< Fields that participate in the hash. */
222 struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
223 struct ibv_flow *ibv_flow; /**< Verbs flow. */
224 struct mlx5_hrxq *hrxq; /**< Hash Rx queues. */
227 /* Drop flows structures. */
228 struct mlx5_flow_drop {
229 struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
230 struct ibv_flow *ibv_flow; /**< Verbs flow. */
234 TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
235 uint32_t mark:1; /**< Set if the flow is marked. */
236 uint32_t drop:1; /**< Drop queue. */
237 struct rte_flow_action_rss rss_conf; /**< RSS configuration */
238 uint16_t (*queues)[]; /**< Queues indexes to use. */
239 uint8_t rss_key[40]; /**< copy of the RSS key. */
240 uint32_t tunnel; /**< Tunnel type of RTE_PTYPE_TUNNEL_XXX. */
241 struct ibv_counter_set *cs; /**< Holds the counters for the rule. */
242 struct mlx5_flow_counter_stats counter_stats;/**<The counter stats. */
243 struct mlx5_flow frxq[RTE_DIM(hash_rxq_init)];
244 /**< Flow with Rx queue. */
247 /** Static initializer for items. */
249 (const enum rte_flow_item_type []){ \
250 __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
253 #define IS_TUNNEL(type) ( \
254 (type) == RTE_FLOW_ITEM_TYPE_VXLAN || \
255 (type) == RTE_FLOW_ITEM_TYPE_VXLAN_GPE || \
256 (type) == RTE_FLOW_ITEM_TYPE_GRE || \
257 (type) == RTE_FLOW_ITEM_TYPE_MPLS)
259 const uint32_t flow_ptype[] = {
260 [RTE_FLOW_ITEM_TYPE_VXLAN] = RTE_PTYPE_TUNNEL_VXLAN,
261 [RTE_FLOW_ITEM_TYPE_VXLAN_GPE] = RTE_PTYPE_TUNNEL_VXLAN_GPE,
262 [RTE_FLOW_ITEM_TYPE_GRE] = RTE_PTYPE_TUNNEL_GRE,
263 [RTE_FLOW_ITEM_TYPE_MPLS] = RTE_PTYPE_TUNNEL_MPLS_IN_GRE,
266 #define PTYPE_IDX(t) ((RTE_PTYPE_TUNNEL_MASK & (t)) >> 12)
268 const uint32_t ptype_ext[] = {
269 [PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN)] = RTE_PTYPE_TUNNEL_VXLAN |
271 [PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN_GPE)] = RTE_PTYPE_TUNNEL_VXLAN_GPE |
273 [PTYPE_IDX(RTE_PTYPE_TUNNEL_GRE)] = RTE_PTYPE_TUNNEL_GRE,
274 [PTYPE_IDX(RTE_PTYPE_TUNNEL_MPLS_IN_GRE)] =
275 RTE_PTYPE_TUNNEL_MPLS_IN_GRE,
276 [PTYPE_IDX(RTE_PTYPE_TUNNEL_MPLS_IN_UDP)] =
277 RTE_PTYPE_TUNNEL_MPLS_IN_GRE | RTE_PTYPE_L4_UDP,
280 /** Structure to generate a simple graph of layers supported by the NIC. */
281 struct mlx5_flow_items {
282 /** List of possible actions for these items. */
283 const enum rte_flow_action_type *const actions;
284 /** Bit-masks corresponding to the possibilities for the item. */
287 * Default bit-masks to use when item->mask is not provided. When
288 * \default_mask is also NULL, the full supported bit-mask (\mask) is
291 const void *default_mask;
292 /** Bit-masks size in bytes. */
293 const unsigned int mask_sz;
295 * Conversion function from rte_flow to NIC specific flow.
298 * rte_flow item to convert.
299 * @param default_mask
300 * Default bit-masks to use when item->mask is not provided.
302 * Internal structure to store the conversion.
305 * 0 on success, a negative errno value otherwise and rte_errno is
308 int (*convert)(const struct rte_flow_item *item,
309 const void *default_mask,
310 struct mlx5_flow_data *data);
311 /** Size in bytes of the destination structure. */
312 const unsigned int dst_sz;
313 /** List of possible following items. */
314 const enum rte_flow_item_type *const items;
317 /** Valid action for this PMD. */
318 static const enum rte_flow_action_type valid_actions[] = {
319 RTE_FLOW_ACTION_TYPE_DROP,
320 RTE_FLOW_ACTION_TYPE_QUEUE,
321 RTE_FLOW_ACTION_TYPE_MARK,
322 RTE_FLOW_ACTION_TYPE_FLAG,
323 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
324 RTE_FLOW_ACTION_TYPE_COUNT,
326 RTE_FLOW_ACTION_TYPE_END,
329 /** Graph of supported items and associated actions. */
330 static const struct mlx5_flow_items mlx5_flow_items[] = {
331 [RTE_FLOW_ITEM_TYPE_END] = {
332 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
333 RTE_FLOW_ITEM_TYPE_VXLAN,
334 RTE_FLOW_ITEM_TYPE_VXLAN_GPE,
335 RTE_FLOW_ITEM_TYPE_GRE),
337 [RTE_FLOW_ITEM_TYPE_ETH] = {
338 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
339 RTE_FLOW_ITEM_TYPE_IPV4,
340 RTE_FLOW_ITEM_TYPE_IPV6),
341 .actions = valid_actions,
342 .mask = &(const struct rte_flow_item_eth){
343 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
344 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
347 .default_mask = &rte_flow_item_eth_mask,
348 .mask_sz = sizeof(struct rte_flow_item_eth),
349 .convert = mlx5_flow_create_eth,
350 .dst_sz = sizeof(struct ibv_flow_spec_eth),
352 [RTE_FLOW_ITEM_TYPE_VLAN] = {
353 .items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
354 RTE_FLOW_ITEM_TYPE_IPV6),
355 .actions = valid_actions,
356 .mask = &(const struct rte_flow_item_vlan){
360 .default_mask = &rte_flow_item_vlan_mask,
361 .mask_sz = sizeof(struct rte_flow_item_vlan),
362 .convert = mlx5_flow_create_vlan,
365 [RTE_FLOW_ITEM_TYPE_IPV4] = {
366 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
367 RTE_FLOW_ITEM_TYPE_TCP,
368 RTE_FLOW_ITEM_TYPE_GRE),
369 .actions = valid_actions,
370 .mask = &(const struct rte_flow_item_ipv4){
374 .type_of_service = -1,
378 .default_mask = &rte_flow_item_ipv4_mask,
379 .mask_sz = sizeof(struct rte_flow_item_ipv4),
380 .convert = mlx5_flow_create_ipv4,
381 .dst_sz = sizeof(struct ibv_flow_spec_ipv4_ext),
383 [RTE_FLOW_ITEM_TYPE_IPV6] = {
384 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
385 RTE_FLOW_ITEM_TYPE_TCP,
386 RTE_FLOW_ITEM_TYPE_GRE),
387 .actions = valid_actions,
388 .mask = &(const struct rte_flow_item_ipv6){
391 0xff, 0xff, 0xff, 0xff,
392 0xff, 0xff, 0xff, 0xff,
393 0xff, 0xff, 0xff, 0xff,
394 0xff, 0xff, 0xff, 0xff,
397 0xff, 0xff, 0xff, 0xff,
398 0xff, 0xff, 0xff, 0xff,
399 0xff, 0xff, 0xff, 0xff,
400 0xff, 0xff, 0xff, 0xff,
407 .default_mask = &rte_flow_item_ipv6_mask,
408 .mask_sz = sizeof(struct rte_flow_item_ipv6),
409 .convert = mlx5_flow_create_ipv6,
410 .dst_sz = sizeof(struct ibv_flow_spec_ipv6),
412 [RTE_FLOW_ITEM_TYPE_UDP] = {
413 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN,
414 RTE_FLOW_ITEM_TYPE_VXLAN_GPE,
415 RTE_FLOW_ITEM_TYPE_MPLS),
416 .actions = valid_actions,
417 .mask = &(const struct rte_flow_item_udp){
423 .default_mask = &rte_flow_item_udp_mask,
424 .mask_sz = sizeof(struct rte_flow_item_udp),
425 .convert = mlx5_flow_create_udp,
426 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
428 [RTE_FLOW_ITEM_TYPE_TCP] = {
429 .actions = valid_actions,
430 .mask = &(const struct rte_flow_item_tcp){
436 .default_mask = &rte_flow_item_tcp_mask,
437 .mask_sz = sizeof(struct rte_flow_item_tcp),
438 .convert = mlx5_flow_create_tcp,
439 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
441 [RTE_FLOW_ITEM_TYPE_GRE] = {
442 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
443 RTE_FLOW_ITEM_TYPE_IPV4,
444 RTE_FLOW_ITEM_TYPE_IPV6,
445 RTE_FLOW_ITEM_TYPE_MPLS),
446 .actions = valid_actions,
447 .mask = &(const struct rte_flow_item_gre){
450 .default_mask = &rte_flow_item_gre_mask,
451 .mask_sz = sizeof(struct rte_flow_item_gre),
452 .convert = mlx5_flow_create_gre,
453 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
454 .dst_sz = sizeof(struct ibv_flow_spec_gre),
456 .dst_sz = sizeof(struct ibv_flow_spec_tunnel),
459 [RTE_FLOW_ITEM_TYPE_MPLS] = {
460 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
461 RTE_FLOW_ITEM_TYPE_IPV4,
462 RTE_FLOW_ITEM_TYPE_IPV6),
463 .actions = valid_actions,
464 .mask = &(const struct rte_flow_item_mpls){
465 .label_tc_s = "\xff\xff\xf0",
467 .default_mask = &rte_flow_item_mpls_mask,
468 .mask_sz = sizeof(struct rte_flow_item_mpls),
469 .convert = mlx5_flow_create_mpls,
470 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
471 .dst_sz = sizeof(struct ibv_flow_spec_mpls),
474 [RTE_FLOW_ITEM_TYPE_VXLAN] = {
475 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
476 RTE_FLOW_ITEM_TYPE_IPV4, /* For L3 VXLAN. */
477 RTE_FLOW_ITEM_TYPE_IPV6), /* For L3 VXLAN. */
478 .actions = valid_actions,
479 .mask = &(const struct rte_flow_item_vxlan){
480 .vni = "\xff\xff\xff",
482 .default_mask = &rte_flow_item_vxlan_mask,
483 .mask_sz = sizeof(struct rte_flow_item_vxlan),
484 .convert = mlx5_flow_create_vxlan,
485 .dst_sz = sizeof(struct ibv_flow_spec_tunnel),
487 [RTE_FLOW_ITEM_TYPE_VXLAN_GPE] = {
488 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
489 RTE_FLOW_ITEM_TYPE_IPV4,
490 RTE_FLOW_ITEM_TYPE_IPV6),
491 .actions = valid_actions,
492 .mask = &(const struct rte_flow_item_vxlan_gpe){
493 .vni = "\xff\xff\xff",
495 .default_mask = &rte_flow_item_vxlan_gpe_mask,
496 .mask_sz = sizeof(struct rte_flow_item_vxlan_gpe),
497 .convert = mlx5_flow_create_vxlan_gpe,
498 .dst_sz = sizeof(struct ibv_flow_spec_tunnel),
502 /** Structure to pass to the conversion function. */
503 struct mlx5_flow_parse {
504 uint32_t inner; /**< Verbs value, set once tunnel is encountered. */
506 /**< Whether resources should remain after a validate. */
507 uint32_t drop:1; /**< Target is a drop queue. */
508 uint32_t mark:1; /**< Mark is present in the flow. */
509 uint32_t count:1; /**< Count is present in the flow. */
510 uint32_t mark_id; /**< Mark identifier. */
511 struct rte_flow_action_rss rss_conf; /**< RSS configuration */
512 uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
513 uint8_t rss_key[40]; /**< copy of the RSS key. */
514 enum hash_rxq_type layer; /**< Last pattern layer detected. */
515 enum hash_rxq_type out_layer; /**< Last outer pattern layer detected. */
516 uint32_t tunnel; /**< Tunnel type of RTE_PTYPE_TUNNEL_XXX. */
517 struct ibv_counter_set *cs; /**< Holds the counter set for the rule */
519 struct ibv_flow_attr *ibv_attr;
520 /**< Pointer to Verbs attributes. */
522 /**< Current position or total size of the attribute. */
523 uint64_t hash_fields; /**< Verbs hash fields. */
524 } queue[RTE_DIM(hash_rxq_init)];
527 static const struct rte_flow_ops mlx5_flow_ops = {
528 .validate = mlx5_flow_validate,
529 .create = mlx5_flow_create,
530 .destroy = mlx5_flow_destroy,
531 .flush = mlx5_flow_flush,
532 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
533 .query = mlx5_flow_query,
537 .isolate = mlx5_flow_isolate,
540 /* Convert FDIR request to Generic flow. */
542 struct rte_flow_attr attr;
543 struct rte_flow_action actions[2];
544 struct rte_flow_item items[4];
545 struct rte_flow_item_eth l2;
546 struct rte_flow_item_eth l2_mask;
548 struct rte_flow_item_ipv4 ipv4;
549 struct rte_flow_item_ipv6 ipv6;
552 struct rte_flow_item_ipv4 ipv4;
553 struct rte_flow_item_ipv6 ipv6;
556 struct rte_flow_item_udp udp;
557 struct rte_flow_item_tcp tcp;
560 struct rte_flow_item_udp udp;
561 struct rte_flow_item_tcp tcp;
563 struct rte_flow_action_queue queue;
566 /* Verbs specification header. */
567 struct ibv_spec_header {
568 enum ibv_flow_spec_type type;
573 * Check item is fully supported by the NIC matching capability.
576 * Item specification.
578 * Bit-masks covering supported fields to compare with spec, last and mask in
581 * Bit-Mask size in bytes.
584 * 0 on success, a negative errno value otherwise and rte_errno is set.
587 mlx5_flow_item_validate(const struct rte_flow_item *item,
588 const uint8_t *mask, unsigned int size)
591 const uint8_t *spec = item->spec;
592 const uint8_t *last = item->last;
593 const uint8_t *m = item->mask ? item->mask : mask;
595 if (!spec && (item->mask || last))
600 * Single-pass check to make sure that:
601 * - item->mask is supported, no bits are set outside mask.
602 * - Both masked item->spec and item->last are equal (no range
605 for (i = 0; i < size; i++) {
608 if ((m[i] | mask[i]) != mask[i])
610 if (last && ((spec[i] & m[i]) != (last[i] & m[i])))
620 * Extract attribute to the parser.
623 * Flow rule attributes.
625 * Perform verbose error reporting if not NULL.
628 * 0 on success, a negative errno value otherwise and rte_errno is set.
631 mlx5_flow_convert_attributes(const struct rte_flow_attr *attr,
632 struct rte_flow_error *error)
635 rte_flow_error_set(error, ENOTSUP,
636 RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
638 "groups are not supported");
641 if (attr->priority && attr->priority != MLX5_CTRL_FLOW_PRIORITY) {
642 rte_flow_error_set(error, ENOTSUP,
643 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
645 "priorities are not supported");
649 rte_flow_error_set(error, ENOTSUP,
650 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
652 "egress is not supported");
655 if (attr->transfer) {
656 rte_flow_error_set(error, ENOTSUP,
657 RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
659 "transfer is not supported");
662 if (!attr->ingress) {
663 rte_flow_error_set(error, ENOTSUP,
664 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
666 "only ingress is supported");
673 * Extract actions request to the parser.
676 * Pointer to Ethernet device.
678 * Associated actions (list terminated by the END action).
680 * Perform verbose error reporting if not NULL.
681 * @param[in, out] parser
682 * Internal parser structure.
685 * 0 on success, a negative errno value otherwise and rte_errno is set.
688 mlx5_flow_convert_actions(struct rte_eth_dev *dev,
689 const struct rte_flow_action actions[],
690 struct rte_flow_error *error,
691 struct mlx5_flow_parse *parser)
693 enum { FATE = 1, MARK = 2, COUNT = 4, };
694 uint32_t overlap = 0;
695 struct priv *priv = dev->data->dev_private;
697 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
698 if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
700 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
702 goto exit_action_overlap;
705 } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
706 const struct rte_flow_action_queue *queue =
707 (const struct rte_flow_action_queue *)
711 goto exit_action_overlap;
713 if (!queue || (queue->index > (priv->rxqs_n - 1)))
714 goto exit_action_not_supported;
715 parser->queues[0] = queue->index;
716 parser->rss_conf = (struct rte_flow_action_rss){
718 .queue = parser->queues,
720 } else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
721 const struct rte_flow_action_rss *rss =
722 (const struct rte_flow_action_rss *)
724 const uint8_t *rss_key;
725 uint32_t rss_key_len;
729 goto exit_action_overlap;
732 rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ) {
733 rte_flow_error_set(error, EINVAL,
734 RTE_FLOW_ERROR_TYPE_ACTION,
736 "the only supported RSS hash"
737 " function is Toeplitz");
740 #ifndef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
741 if (parser->rss_conf.level > 1) {
742 rte_flow_error_set(error, EINVAL,
743 RTE_FLOW_ERROR_TYPE_ACTION,
745 "a nonzero RSS encapsulation"
746 " level is not supported");
750 if (parser->rss_conf.level > 2) {
751 rte_flow_error_set(error, EINVAL,
752 RTE_FLOW_ERROR_TYPE_ACTION,
754 "RSS encapsulation level"
755 " > 1 is not supported");
758 if (rss->types & MLX5_RSS_HF_MASK) {
759 rte_flow_error_set(error, EINVAL,
760 RTE_FLOW_ERROR_TYPE_ACTION,
762 "unsupported RSS type"
767 rss_key_len = rss->key_len;
770 rss_key_len = rss_hash_default_key_len;
771 rss_key = rss_hash_default_key;
773 if (rss_key_len != RTE_DIM(parser->rss_key)) {
774 rte_flow_error_set(error, EINVAL,
775 RTE_FLOW_ERROR_TYPE_ACTION,
777 "RSS hash key must be"
778 " exactly 40 bytes long");
781 if (!rss->queue_num) {
782 rte_flow_error_set(error, EINVAL,
783 RTE_FLOW_ERROR_TYPE_ACTION,
788 if (rss->queue_num > RTE_DIM(parser->queues)) {
789 rte_flow_error_set(error, EINVAL,
790 RTE_FLOW_ERROR_TYPE_ACTION,
792 "too many queues for RSS"
796 for (n = 0; n < rss->queue_num; ++n) {
797 if (rss->queue[n] >= priv->rxqs_n) {
798 rte_flow_error_set(error, EINVAL,
799 RTE_FLOW_ERROR_TYPE_ACTION,
801 "queue id > number of"
806 parser->rss_conf = (struct rte_flow_action_rss){
807 .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
810 .key_len = rss_key_len,
811 .queue_num = rss->queue_num,
812 .key = memcpy(parser->rss_key, rss_key,
813 sizeof(*rss_key) * rss_key_len),
814 .queue = memcpy(parser->queues, rss->queue,
815 sizeof(*rss->queue) *
818 } else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
819 const struct rte_flow_action_mark *mark =
820 (const struct rte_flow_action_mark *)
824 goto exit_action_overlap;
827 rte_flow_error_set(error, EINVAL,
828 RTE_FLOW_ERROR_TYPE_ACTION,
830 "mark must be defined");
832 } else if (mark->id >= MLX5_FLOW_MARK_MAX) {
833 rte_flow_error_set(error, ENOTSUP,
834 RTE_FLOW_ERROR_TYPE_ACTION,
836 "mark must be between 0"
841 parser->mark_id = mark->id;
842 } else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
844 goto exit_action_overlap;
847 } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT &&
848 priv->config.flow_counter_en) {
850 goto exit_action_overlap;
854 goto exit_action_not_supported;
857 /* When fate is unknown, drop traffic. */
858 if (!(overlap & FATE))
860 if (parser->drop && parser->mark)
862 if (!parser->rss_conf.queue_num && !parser->drop) {
863 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
864 NULL, "no valid action");
868 exit_action_not_supported:
869 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
870 actions, "action not supported");
873 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
874 actions, "overlapping actions are not supported");
882 * Pattern specification (list terminated by the END pattern item).
884 * Perform verbose error reporting if not NULL.
885 * @param[in, out] parser
886 * Internal parser structure.
889 * 0 on success, a negative errno value otherwise and rte_errno is set.
892 mlx5_flow_convert_items_validate(struct rte_eth_dev *dev,
893 const struct rte_flow_item items[],
894 struct rte_flow_error *error,
895 struct mlx5_flow_parse *parser)
897 struct priv *priv = dev->data->dev_private;
898 const struct mlx5_flow_items *cur_item = mlx5_flow_items;
900 unsigned int last_voids = 0;
903 /* Initialise the offsets to start after verbs attribute. */
904 for (i = 0; i != hash_rxq_init_n; ++i)
905 parser->queue[i].offset = sizeof(struct ibv_flow_attr);
906 for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
907 const struct mlx5_flow_items *token = NULL;
910 if (items->type == RTE_FLOW_ITEM_TYPE_VOID) {
916 cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
918 if (cur_item->items[i] == items->type) {
919 token = &mlx5_flow_items[items->type];
925 goto exit_item_not_supported;
928 ret = mlx5_flow_item_validate(items,
929 (const uint8_t *)cur_item->mask,
932 goto exit_item_not_supported;
933 if (IS_TUNNEL(items->type)) {
934 if (parser->tunnel &&
935 !((items - last_voids - 1)->type ==
936 RTE_FLOW_ITEM_TYPE_GRE && items->type ==
937 RTE_FLOW_ITEM_TYPE_MPLS)) {
938 rte_flow_error_set(error, ENOTSUP,
939 RTE_FLOW_ERROR_TYPE_ITEM,
941 "Cannot recognize multiple"
942 " tunnel encapsulations.");
945 if (items->type == RTE_FLOW_ITEM_TYPE_MPLS &&
946 !priv->config.mpls_en) {
947 rte_flow_error_set(error, ENOTSUP,
948 RTE_FLOW_ERROR_TYPE_ITEM,
950 "MPLS not supported or"
951 " disabled in firmware"
955 if (!priv->config.tunnel_en &&
956 parser->rss_conf.level > 1) {
957 rte_flow_error_set(error, ENOTSUP,
958 RTE_FLOW_ERROR_TYPE_ITEM,
960 "RSS on tunnel is not supported");
963 parser->inner = IBV_FLOW_SPEC_INNER;
964 parser->tunnel = flow_ptype[items->type];
967 parser->queue[HASH_RXQ_ETH].offset += cur_item->dst_sz;
969 for (n = 0; n != hash_rxq_init_n; ++n)
970 parser->queue[n].offset += cur_item->dst_sz;
975 parser->queue[HASH_RXQ_ETH].offset +=
976 sizeof(struct ibv_flow_spec_action_drop);
979 for (i = 0; i != hash_rxq_init_n; ++i)
980 parser->queue[i].offset +=
981 sizeof(struct ibv_flow_spec_action_tag);
984 unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
986 for (i = 0; i != hash_rxq_init_n; ++i)
987 parser->queue[i].offset += size;
990 exit_item_not_supported:
991 return rte_flow_error_set(error, -ret, RTE_FLOW_ERROR_TYPE_ITEM,
992 items, "item not supported");
996 * Allocate memory space to store verbs flow attributes.
999 * Amount of byte to allocate.
1001 * Perform verbose error reporting if not NULL.
1004 * A verbs flow attribute on success, NULL otherwise and rte_errno is set.
1006 static struct ibv_flow_attr *
1007 mlx5_flow_convert_allocate(unsigned int size, struct rte_flow_error *error)
1009 struct ibv_flow_attr *ibv_attr;
1011 ibv_attr = rte_calloc(__func__, 1, size, 0);
1013 rte_flow_error_set(error, ENOMEM,
1014 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1016 "cannot allocate verbs spec attributes");
1023 * Make inner packet matching with an higher priority from the non Inner
1027 * Pointer to Ethernet device.
1028 * @param[in, out] parser
1029 * Internal parser structure.
1031 * User flow attribute.
1034 mlx5_flow_update_priority(struct rte_eth_dev *dev,
1035 struct mlx5_flow_parse *parser,
1036 const struct rte_flow_attr *attr)
1038 struct priv *priv = dev->data->dev_private;
1042 /* 8 priorities >= 16 priorities
1043 * Control flow: 4-7 8-15
1044 * User normal flow: 1-3 4-7
1045 * User tunnel flow: 0-2 0-3
1047 priority = attr->priority * MLX5_VERBS_FLOW_PRIO_8;
1048 if (priv->config.max_verbs_prio == MLX5_VERBS_FLOW_PRIO_8)
1051 * Lower non-tunnel flow Verbs priority 1 if only support 8 Verbs
1052 * priorities, lower 4 otherwise.
1054 if (!parser->inner) {
1055 if (priv->config.max_verbs_prio == MLX5_VERBS_FLOW_PRIO_8)
1058 priority += MLX5_VERBS_FLOW_PRIO_8 / 2;
1061 parser->queue[HASH_RXQ_ETH].ibv_attr->priority = priority +
1062 hash_rxq_init[HASH_RXQ_ETH].flow_priority;
1065 for (i = 0; i != hash_rxq_init_n; ++i) {
1066 if (!parser->queue[i].ibv_attr)
1068 parser->queue[i].ibv_attr->priority = priority +
1069 hash_rxq_init[i].flow_priority;
1074 * Finalise verbs flow attributes.
1076 * @param[in, out] parser
1077 * Internal parser structure.
1080 mlx5_flow_convert_finalise(struct mlx5_flow_parse *parser)
1083 uint32_t inner = parser->inner;
1085 /* Don't create extra flows for outer RSS. */
1086 if (parser->tunnel && parser->rss_conf.level < 2)
1089 * Fill missing layers in verbs specifications, or compute the correct
1090 * offset to allocate the memory space for the attributes and
1093 for (i = 0; i != hash_rxq_init_n - 1; ++i) {
1095 struct ibv_flow_spec_ipv4_ext ipv4;
1096 struct ibv_flow_spec_ipv6 ipv6;
1097 struct ibv_flow_spec_tcp_udp udp_tcp;
1098 struct ibv_flow_spec_eth eth;
1103 if (i == parser->layer)
1105 if (parser->layer == HASH_RXQ_ETH ||
1106 parser->layer == HASH_RXQ_TUNNEL) {
1107 if (hash_rxq_init[i].ip_version == MLX5_IPV4) {
1108 size = sizeof(struct ibv_flow_spec_ipv4_ext);
1109 specs.ipv4 = (struct ibv_flow_spec_ipv4_ext){
1110 .type = inner | IBV_FLOW_SPEC_IPV4_EXT,
1114 size = sizeof(struct ibv_flow_spec_ipv6);
1115 specs.ipv6 = (struct ibv_flow_spec_ipv6){
1116 .type = inner | IBV_FLOW_SPEC_IPV6,
1120 if (parser->queue[i].ibv_attr) {
1121 dst = (void *)((uintptr_t)
1122 parser->queue[i].ibv_attr +
1123 parser->queue[i].offset);
1124 memcpy(dst, &specs, size);
1125 ++parser->queue[i].ibv_attr->num_of_specs;
1127 parser->queue[i].offset += size;
1129 if ((i == HASH_RXQ_UDPV4) || (i == HASH_RXQ_TCPV4) ||
1130 (i == HASH_RXQ_UDPV6) || (i == HASH_RXQ_TCPV6)) {
1131 size = sizeof(struct ibv_flow_spec_tcp_udp);
1132 specs.udp_tcp = (struct ibv_flow_spec_tcp_udp) {
1133 .type = inner | ((i == HASH_RXQ_UDPV4 ||
1134 i == HASH_RXQ_UDPV6) ?
1139 if (parser->queue[i].ibv_attr) {
1140 dst = (void *)((uintptr_t)
1141 parser->queue[i].ibv_attr +
1142 parser->queue[i].offset);
1143 memcpy(dst, &specs, size);
1144 ++parser->queue[i].ibv_attr->num_of_specs;
1146 parser->queue[i].offset += size;
1152 * Update flows according to pattern and RSS hash fields.
1154 * @param[in, out] parser
1155 * Internal parser structure.
1158 * 0 on success, a negative errno value otherwise and rte_errno is set.
1161 mlx5_flow_convert_rss(struct mlx5_flow_parse *parser)
1164 enum hash_rxq_type start;
1165 enum hash_rxq_type layer;
1166 int outer = parser->tunnel && parser->rss_conf.level < 2;
1167 uint64_t rss = parser->rss_conf.types;
1169 /* Default to outer RSS. */
1170 if (!parser->rss_conf.level)
1171 parser->rss_conf.level = 1;
1172 layer = outer ? parser->out_layer : parser->layer;
1173 if (layer == HASH_RXQ_TUNNEL)
1174 layer = HASH_RXQ_ETH;
1176 /* Only one hash type for outer RSS. */
1177 if (rss && layer == HASH_RXQ_ETH) {
1178 start = HASH_RXQ_TCPV4;
1179 } else if (rss && layer != HASH_RXQ_ETH &&
1180 !(rss & hash_rxq_init[layer].dpdk_rss_hf)) {
1181 /* If RSS not match L4 pattern, try L3 RSS. */
1182 if (layer < HASH_RXQ_IPV4)
1183 layer = HASH_RXQ_IPV4;
1184 else if (layer > HASH_RXQ_IPV4 && layer < HASH_RXQ_IPV6)
1185 layer = HASH_RXQ_IPV6;
1190 /* Scan first valid hash type. */
1191 for (i = start; rss && i <= layer; ++i) {
1192 if (!parser->queue[i].ibv_attr)
1194 if (hash_rxq_init[i].dpdk_rss_hf & rss)
1197 if (rss && i <= layer)
1198 parser->queue[layer].hash_fields =
1199 hash_rxq_init[i].hash_fields;
1200 /* Trim unused hash types. */
1201 for (i = 0; i != hash_rxq_init_n; ++i) {
1202 if (parser->queue[i].ibv_attr && i != layer) {
1203 rte_free(parser->queue[i].ibv_attr);
1204 parser->queue[i].ibv_attr = NULL;
1208 /* Expand for inner or normal RSS. */
1209 if (rss && (layer == HASH_RXQ_ETH || layer == HASH_RXQ_IPV4))
1210 start = HASH_RXQ_TCPV4;
1211 else if (rss && layer == HASH_RXQ_IPV6)
1212 start = HASH_RXQ_TCPV6;
1215 /* For L4 pattern, try L3 RSS if no L4 RSS. */
1216 /* Trim unused hash types. */
1217 for (i = 0; i != hash_rxq_init_n; ++i) {
1218 if (!parser->queue[i].ibv_attr)
1220 if (i < start || i > layer) {
1221 rte_free(parser->queue[i].ibv_attr);
1222 parser->queue[i].ibv_attr = NULL;
1227 if (hash_rxq_init[i].dpdk_rss_hf & rss) {
1228 parser->queue[i].hash_fields =
1229 hash_rxq_init[i].hash_fields;
1230 } else if (i != layer) {
1231 /* Remove unused RSS expansion. */
1232 rte_free(parser->queue[i].ibv_attr);
1233 parser->queue[i].ibv_attr = NULL;
1234 } else if (layer < HASH_RXQ_IPV4 &&
1235 (hash_rxq_init[HASH_RXQ_IPV4].dpdk_rss_hf &
1237 /* Allow IPv4 RSS on L4 pattern. */
1238 parser->queue[i].hash_fields =
1239 hash_rxq_init[HASH_RXQ_IPV4]
1241 } else if (i > HASH_RXQ_IPV4 && i < HASH_RXQ_IPV6 &&
1242 (hash_rxq_init[HASH_RXQ_IPV6].dpdk_rss_hf &
1244 /* Allow IPv4 RSS on L4 pattern. */
1245 parser->queue[i].hash_fields =
1246 hash_rxq_init[HASH_RXQ_IPV6]
1255 * Validate and convert a flow supported by the NIC.
1258 * Pointer to Ethernet device.
1260 * Flow rule attributes.
1261 * @param[in] pattern
1262 * Pattern specification (list terminated by the END pattern item).
1263 * @param[in] actions
1264 * Associated actions (list terminated by the END action).
1266 * Perform verbose error reporting if not NULL.
1267 * @param[in, out] parser
1268 * Internal parser structure.
1271 * 0 on success, a negative errno value otherwise and rte_errno is set.
1274 mlx5_flow_convert(struct rte_eth_dev *dev,
1275 const struct rte_flow_attr *attr,
1276 const struct rte_flow_item items[],
1277 const struct rte_flow_action actions[],
1278 struct rte_flow_error *error,
1279 struct mlx5_flow_parse *parser)
1281 const struct mlx5_flow_items *cur_item = mlx5_flow_items;
1285 /* First step. Validate the attributes, items and actions. */
1286 *parser = (struct mlx5_flow_parse){
1287 .create = parser->create,
1288 .layer = HASH_RXQ_ETH,
1289 .mark_id = MLX5_FLOW_MARK_DEFAULT,
1291 ret = mlx5_flow_convert_attributes(attr, error);
1294 ret = mlx5_flow_convert_actions(dev, actions, error, parser);
1297 ret = mlx5_flow_convert_items_validate(dev, items, error, parser);
1300 mlx5_flow_convert_finalise(parser);
1303 * Allocate the memory space to store verbs specifications.
1306 unsigned int offset = parser->queue[HASH_RXQ_ETH].offset;
1308 parser->queue[HASH_RXQ_ETH].ibv_attr =
1309 mlx5_flow_convert_allocate(offset, error);
1310 if (!parser->queue[HASH_RXQ_ETH].ibv_attr)
1312 parser->queue[HASH_RXQ_ETH].offset =
1313 sizeof(struct ibv_flow_attr);
1315 for (i = 0; i != hash_rxq_init_n; ++i) {
1316 unsigned int offset;
1318 offset = parser->queue[i].offset;
1319 parser->queue[i].ibv_attr =
1320 mlx5_flow_convert_allocate(offset, error);
1321 if (!parser->queue[i].ibv_attr)
1323 parser->queue[i].offset = sizeof(struct ibv_flow_attr);
1326 /* Third step. Conversion parse, fill the specifications. */
1329 parser->layer = HASH_RXQ_ETH;
1330 for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
1331 struct mlx5_flow_data data = {
1337 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
1339 cur_item = &mlx5_flow_items[items->type];
1340 ret = cur_item->convert(items,
1341 (cur_item->default_mask ?
1342 cur_item->default_mask :
1348 if (!parser->drop) {
1349 /* RSS check, remove unused hash types. */
1350 ret = mlx5_flow_convert_rss(parser);
1353 /* Complete missing specification. */
1354 mlx5_flow_convert_finalise(parser);
1356 mlx5_flow_update_priority(dev, parser, attr);
1358 mlx5_flow_create_flag_mark(parser, parser->mark_id);
1359 if (parser->count && parser->create) {
1360 mlx5_flow_create_count(dev, parser);
1362 goto exit_count_error;
1365 /* Only verification is expected, all resources should be released. */
1366 if (!parser->create) {
1367 for (i = 0; i != hash_rxq_init_n; ++i) {
1368 if (parser->queue[i].ibv_attr) {
1369 rte_free(parser->queue[i].ibv_attr);
1370 parser->queue[i].ibv_attr = NULL;
1376 for (i = 0; i != hash_rxq_init_n; ++i) {
1377 if (parser->queue[i].ibv_attr) {
1378 rte_free(parser->queue[i].ibv_attr);
1379 parser->queue[i].ibv_attr = NULL;
1382 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1383 NULL, "cannot allocate verbs spec attributes");
1386 rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1387 NULL, "cannot create counter");
1392 * Copy the specification created into the flow.
1395 * Internal parser structure.
1397 * Create specification.
1399 * Size in bytes of the specification to copy.
1402 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
1408 for (i = 0; i != hash_rxq_init_n; ++i) {
1409 if (!parser->queue[i].ibv_attr)
1411 dst = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1412 parser->queue[i].offset);
1413 memcpy(dst, src, size);
1414 ++parser->queue[i].ibv_attr->num_of_specs;
1415 parser->queue[i].offset += size;
1420 * Convert Ethernet item to Verbs specification.
1423 * Item specification.
1424 * @param default_mask[in]
1425 * Default bit-masks to use when item->mask is not provided.
1426 * @param data[in, out]
1430 * 0 on success, a negative errno value otherwise and rte_errno is set.
1433 mlx5_flow_create_eth(const struct rte_flow_item *item,
1434 const void *default_mask,
1435 struct mlx5_flow_data *data)
1437 const struct rte_flow_item_eth *spec = item->spec;
1438 const struct rte_flow_item_eth *mask = item->mask;
1439 struct mlx5_flow_parse *parser = data->parser;
1440 const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1441 struct ibv_flow_spec_eth eth = {
1442 .type = parser->inner | IBV_FLOW_SPEC_ETH,
1446 parser->layer = HASH_RXQ_ETH;
1451 mask = default_mask;
1452 memcpy(ð.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
1453 memcpy(ð.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
1454 eth.val.ether_type = spec->type;
1455 memcpy(ð.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
1456 memcpy(ð.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
1457 eth.mask.ether_type = mask->type;
1458 /* Remove unwanted bits from values. */
1459 for (i = 0; i < ETHER_ADDR_LEN; ++i) {
1460 eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
1461 eth.val.src_mac[i] &= eth.mask.src_mac[i];
1463 eth.val.ether_type &= eth.mask.ether_type;
1465 mlx5_flow_create_copy(parser, ð, eth_size);
1470 * Convert VLAN item to Verbs specification.
1473 * Item specification.
1474 * @param default_mask[in]
1475 * Default bit-masks to use when item->mask is not provided.
1476 * @param data[in, out]
1480 * 0 on success, a negative errno value otherwise and rte_errno is set.
1483 mlx5_flow_create_vlan(const struct rte_flow_item *item,
1484 const void *default_mask,
1485 struct mlx5_flow_data *data)
1487 const struct rte_flow_item_vlan *spec = item->spec;
1488 const struct rte_flow_item_vlan *mask = item->mask;
1489 struct mlx5_flow_parse *parser = data->parser;
1490 struct ibv_flow_spec_eth *eth;
1491 const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1492 const char *msg = "VLAN cannot be empty";
1497 mask = default_mask;
1499 for (i = 0; i != hash_rxq_init_n; ++i) {
1500 if (!parser->queue[i].ibv_attr)
1503 eth = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1504 parser->queue[i].offset - eth_size);
1505 eth->val.vlan_tag = spec->tci;
1506 eth->mask.vlan_tag = mask->tci;
1507 eth->val.vlan_tag &= eth->mask.vlan_tag;
1509 * From verbs perspective an empty VLAN is equivalent
1510 * to a packet without VLAN layer.
1512 if (!eth->mask.vlan_tag)
1514 /* Outer TPID cannot be matched. */
1515 if (eth->mask.ether_type) {
1516 msg = "VLAN TPID matching is not supported";
1519 eth->val.ether_type = spec->inner_type;
1520 eth->mask.ether_type = mask->inner_type;
1521 eth->val.ether_type &= eth->mask.ether_type;
1526 return rte_flow_error_set(data->error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM,
1531 * Convert IPv4 item to Verbs specification.
1534 * Item specification.
1535 * @param default_mask[in]
1536 * Default bit-masks to use when item->mask is not provided.
1537 * @param data[in, out]
1541 * 0 on success, a negative errno value otherwise and rte_errno is set.
1544 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
1545 const void *default_mask,
1546 struct mlx5_flow_data *data)
1548 struct priv *priv = data->dev->data->dev_private;
1549 const struct rte_flow_item_ipv4 *spec = item->spec;
1550 const struct rte_flow_item_ipv4 *mask = item->mask;
1551 struct mlx5_flow_parse *parser = data->parser;
1552 unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4_ext);
1553 struct ibv_flow_spec_ipv4_ext ipv4 = {
1554 .type = parser->inner | IBV_FLOW_SPEC_IPV4_EXT,
1558 if (parser->layer == HASH_RXQ_TUNNEL &&
1559 parser->tunnel == ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN)] &&
1560 !priv->config.l3_vxlan_en)
1561 return rte_flow_error_set(data->error, EINVAL,
1562 RTE_FLOW_ERROR_TYPE_ITEM,
1564 "L3 VXLAN not enabled by device"
1565 " parameter and/or not configured"
1567 parser->layer = HASH_RXQ_IPV4;
1570 mask = default_mask;
1571 ipv4.val = (struct ibv_flow_ipv4_ext_filter){
1572 .src_ip = spec->hdr.src_addr,
1573 .dst_ip = spec->hdr.dst_addr,
1574 .proto = spec->hdr.next_proto_id,
1575 .tos = spec->hdr.type_of_service,
1577 ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
1578 .src_ip = mask->hdr.src_addr,
1579 .dst_ip = mask->hdr.dst_addr,
1580 .proto = mask->hdr.next_proto_id,
1581 .tos = mask->hdr.type_of_service,
1583 /* Remove unwanted bits from values. */
1584 ipv4.val.src_ip &= ipv4.mask.src_ip;
1585 ipv4.val.dst_ip &= ipv4.mask.dst_ip;
1586 ipv4.val.proto &= ipv4.mask.proto;
1587 ipv4.val.tos &= ipv4.mask.tos;
1589 mlx5_flow_create_copy(parser, &ipv4, ipv4_size);
1594 * Convert IPv6 item to Verbs specification.
1597 * Item specification.
1598 * @param default_mask[in]
1599 * Default bit-masks to use when item->mask is not provided.
1600 * @param data[in, out]
1604 * 0 on success, a negative errno value otherwise and rte_errno is set.
1607 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
1608 const void *default_mask,
1609 struct mlx5_flow_data *data)
1611 struct priv *priv = data->dev->data->dev_private;
1612 const struct rte_flow_item_ipv6 *spec = item->spec;
1613 const struct rte_flow_item_ipv6 *mask = item->mask;
1614 struct mlx5_flow_parse *parser = data->parser;
1615 unsigned int ipv6_size = sizeof(struct ibv_flow_spec_ipv6);
1616 struct ibv_flow_spec_ipv6 ipv6 = {
1617 .type = parser->inner | IBV_FLOW_SPEC_IPV6,
1621 if (parser->layer == HASH_RXQ_TUNNEL &&
1622 parser->tunnel == ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN)] &&
1623 !priv->config.l3_vxlan_en)
1624 return rte_flow_error_set(data->error, EINVAL,
1625 RTE_FLOW_ERROR_TYPE_ITEM,
1627 "L3 VXLAN not enabled by device"
1628 " parameter and/or not configured"
1630 parser->layer = HASH_RXQ_IPV6;
1633 uint32_t vtc_flow_val;
1634 uint32_t vtc_flow_mask;
1637 mask = default_mask;
1638 memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
1639 RTE_DIM(ipv6.val.src_ip));
1640 memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
1641 RTE_DIM(ipv6.val.dst_ip));
1642 memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
1643 RTE_DIM(ipv6.mask.src_ip));
1644 memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
1645 RTE_DIM(ipv6.mask.dst_ip));
1646 vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
1647 vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
1648 ipv6.val.flow_label =
1649 rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >>
1651 ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >>
1653 ipv6.val.next_hdr = spec->hdr.proto;
1654 ipv6.val.hop_limit = spec->hdr.hop_limits;
1655 ipv6.mask.flow_label =
1656 rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >>
1658 ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >>
1660 ipv6.mask.next_hdr = mask->hdr.proto;
1661 ipv6.mask.hop_limit = mask->hdr.hop_limits;
1662 /* Remove unwanted bits from values. */
1663 for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
1664 ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
1665 ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
1667 ipv6.val.flow_label &= ipv6.mask.flow_label;
1668 ipv6.val.traffic_class &= ipv6.mask.traffic_class;
1669 ipv6.val.next_hdr &= ipv6.mask.next_hdr;
1670 ipv6.val.hop_limit &= ipv6.mask.hop_limit;
1672 mlx5_flow_create_copy(parser, &ipv6, ipv6_size);
1677 * Convert UDP item to Verbs specification.
1680 * Item specification.
1681 * @param default_mask[in]
1682 * Default bit-masks to use when item->mask is not provided.
1683 * @param data[in, out]
1687 * 0 on success, a negative errno value otherwise and rte_errno is set.
1690 mlx5_flow_create_udp(const struct rte_flow_item *item,
1691 const void *default_mask,
1692 struct mlx5_flow_data *data)
1694 const struct rte_flow_item_udp *spec = item->spec;
1695 const struct rte_flow_item_udp *mask = item->mask;
1696 struct mlx5_flow_parse *parser = data->parser;
1697 unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1698 struct ibv_flow_spec_tcp_udp udp = {
1699 .type = parser->inner | IBV_FLOW_SPEC_UDP,
1703 if (parser->layer == HASH_RXQ_IPV4)
1704 parser->layer = HASH_RXQ_UDPV4;
1706 parser->layer = HASH_RXQ_UDPV6;
1709 mask = default_mask;
1710 udp.val.dst_port = spec->hdr.dst_port;
1711 udp.val.src_port = spec->hdr.src_port;
1712 udp.mask.dst_port = mask->hdr.dst_port;
1713 udp.mask.src_port = mask->hdr.src_port;
1714 /* Remove unwanted bits from values. */
1715 udp.val.src_port &= udp.mask.src_port;
1716 udp.val.dst_port &= udp.mask.dst_port;
1718 mlx5_flow_create_copy(parser, &udp, udp_size);
1723 * Convert TCP item to Verbs specification.
1726 * Item specification.
1727 * @param default_mask[in]
1728 * Default bit-masks to use when item->mask is not provided.
1729 * @param data[in, out]
1733 * 0 on success, a negative errno value otherwise and rte_errno is set.
1736 mlx5_flow_create_tcp(const struct rte_flow_item *item,
1737 const void *default_mask,
1738 struct mlx5_flow_data *data)
1740 const struct rte_flow_item_tcp *spec = item->spec;
1741 const struct rte_flow_item_tcp *mask = item->mask;
1742 struct mlx5_flow_parse *parser = data->parser;
1743 unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1744 struct ibv_flow_spec_tcp_udp tcp = {
1745 .type = parser->inner | IBV_FLOW_SPEC_TCP,
1749 if (parser->layer == HASH_RXQ_IPV4)
1750 parser->layer = HASH_RXQ_TCPV4;
1752 parser->layer = HASH_RXQ_TCPV6;
1755 mask = default_mask;
1756 tcp.val.dst_port = spec->hdr.dst_port;
1757 tcp.val.src_port = spec->hdr.src_port;
1758 tcp.mask.dst_port = mask->hdr.dst_port;
1759 tcp.mask.src_port = mask->hdr.src_port;
1760 /* Remove unwanted bits from values. */
1761 tcp.val.src_port &= tcp.mask.src_port;
1762 tcp.val.dst_port &= tcp.mask.dst_port;
1764 mlx5_flow_create_copy(parser, &tcp, tcp_size);
1769 * Convert VXLAN item to Verbs specification.
1772 * Item specification.
1773 * @param default_mask[in]
1774 * Default bit-masks to use when item->mask is not provided.
1775 * @param data[in, out]
1779 * 0 on success, a negative errno value otherwise and rte_errno is set.
1782 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
1783 const void *default_mask,
1784 struct mlx5_flow_data *data)
1786 const struct rte_flow_item_vxlan *spec = item->spec;
1787 const struct rte_flow_item_vxlan *mask = item->mask;
1788 struct mlx5_flow_parse *parser = data->parser;
1789 unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1790 struct ibv_flow_spec_tunnel vxlan = {
1791 .type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1800 parser->inner = IBV_FLOW_SPEC_INNER;
1801 parser->tunnel = ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN)];
1802 parser->out_layer = parser->layer;
1803 parser->layer = HASH_RXQ_TUNNEL;
1804 /* Default VXLAN to outer RSS. */
1805 if (!parser->rss_conf.level)
1806 parser->rss_conf.level = 1;
1809 mask = default_mask;
1810 memcpy(&id.vni[1], spec->vni, 3);
1811 vxlan.val.tunnel_id = id.vlan_id;
1812 memcpy(&id.vni[1], mask->vni, 3);
1813 vxlan.mask.tunnel_id = id.vlan_id;
1814 /* Remove unwanted bits from values. */
1815 vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
1818 * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
1819 * layer is defined in the Verbs specification it is interpreted as
1820 * wildcard and all packets will match this rule, if it follows a full
1821 * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
1822 * before will also match this rule.
1823 * To avoid such situation, VNI 0 is currently refused.
1825 /* Only allow tunnel w/o tunnel id pattern after proper outer spec. */
1826 if (parser->out_layer == HASH_RXQ_ETH && !vxlan.val.tunnel_id)
1827 return rte_flow_error_set(data->error, EINVAL,
1828 RTE_FLOW_ERROR_TYPE_ITEM,
1830 "VxLAN vni cannot be 0");
1831 mlx5_flow_create_copy(parser, &vxlan, size);
1836 * Convert VXLAN-GPE item to Verbs specification.
1839 * Item specification.
1840 * @param default_mask[in]
1841 * Default bit-masks to use when item->mask is not provided.
1842 * @param data[in, out]
1846 * 0 on success, a negative errno value otherwise and rte_errno is set.
1849 mlx5_flow_create_vxlan_gpe(const struct rte_flow_item *item,
1850 const void *default_mask,
1851 struct mlx5_flow_data *data)
1853 struct priv *priv = data->dev->data->dev_private;
1854 const struct rte_flow_item_vxlan_gpe *spec = item->spec;
1855 const struct rte_flow_item_vxlan_gpe *mask = item->mask;
1856 struct mlx5_flow_parse *parser = data->parser;
1857 unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1858 struct ibv_flow_spec_tunnel vxlan = {
1859 .type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1867 if (!priv->config.l3_vxlan_en)
1868 return rte_flow_error_set(data->error, EINVAL,
1869 RTE_FLOW_ERROR_TYPE_ITEM,
1871 "L3 VXLAN not enabled by device"
1872 " parameter and/or not configured"
1875 parser->inner = IBV_FLOW_SPEC_INNER;
1876 parser->tunnel = ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN_GPE)];
1877 parser->out_layer = parser->layer;
1878 parser->layer = HASH_RXQ_TUNNEL;
1879 /* Default VXLAN-GPE to outer RSS. */
1880 if (!parser->rss_conf.level)
1881 parser->rss_conf.level = 1;
1884 mask = default_mask;
1885 memcpy(&id.vni[1], spec->vni, 3);
1886 vxlan.val.tunnel_id = id.vlan_id;
1887 memcpy(&id.vni[1], mask->vni, 3);
1888 vxlan.mask.tunnel_id = id.vlan_id;
1890 return rte_flow_error_set(data->error, EINVAL,
1891 RTE_FLOW_ERROR_TYPE_ITEM,
1893 "VxLAN-GPE protocol not"
1895 /* Remove unwanted bits from values. */
1896 vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
1899 * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
1900 * layer is defined in the Verbs specification it is interpreted as
1901 * wildcard and all packets will match this rule, if it follows a full
1902 * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
1903 * before will also match this rule.
1904 * To avoid such situation, VNI 0 is currently refused.
1906 /* Only allow tunnel w/o tunnel id pattern after proper outer spec. */
1907 if (parser->out_layer == HASH_RXQ_ETH && !vxlan.val.tunnel_id)
1908 return rte_flow_error_set(data->error, EINVAL,
1909 RTE_FLOW_ERROR_TYPE_ITEM,
1911 "VxLAN-GPE vni cannot be 0");
1912 mlx5_flow_create_copy(parser, &vxlan, size);
1917 * Convert GRE item to Verbs specification.
1920 * Item specification.
1921 * @param default_mask[in]
1922 * Default bit-masks to use when item->mask is not provided.
1923 * @param data[in, out]
1927 * 0 on success, a negative errno value otherwise and rte_errno is set.
1930 mlx5_flow_create_gre(const struct rte_flow_item *item,
1931 const void *default_mask,
1932 struct mlx5_flow_data *data)
1934 struct mlx5_flow_parse *parser = data->parser;
1935 #ifndef HAVE_IBV_DEVICE_MPLS_SUPPORT
1937 unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1938 struct ibv_flow_spec_tunnel tunnel = {
1939 .type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1943 const struct rte_flow_item_gre *spec = item->spec;
1944 const struct rte_flow_item_gre *mask = item->mask;
1945 unsigned int size = sizeof(struct ibv_flow_spec_gre);
1946 struct ibv_flow_spec_gre tunnel = {
1947 .type = parser->inner | IBV_FLOW_SPEC_GRE,
1951 struct ibv_flow_spec_ipv4_ext *ipv4;
1952 struct ibv_flow_spec_ipv6 *ipv6;
1955 parser->inner = IBV_FLOW_SPEC_INNER;
1956 parser->tunnel = ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_GRE)];
1957 parser->out_layer = parser->layer;
1958 parser->layer = HASH_RXQ_TUNNEL;
1959 /* Default GRE to inner RSS. */
1960 if (!parser->rss_conf.level)
1961 parser->rss_conf.level = 2;
1962 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
1965 mask = default_mask;
1966 tunnel.val.c_ks_res0_ver = spec->c_rsvd0_ver;
1967 tunnel.val.protocol = spec->protocol;
1968 tunnel.mask.c_ks_res0_ver = mask->c_rsvd0_ver;
1969 tunnel.mask.protocol = mask->protocol;
1970 /* Remove unwanted bits from values. */
1971 tunnel.val.c_ks_res0_ver &= tunnel.mask.c_ks_res0_ver;
1972 tunnel.val.protocol &= tunnel.mask.protocol;
1973 tunnel.val.key &= tunnel.mask.key;
1976 /* Update encapsulation IP layer protocol. */
1977 for (i = 0; i != hash_rxq_init_n; ++i) {
1978 if (!parser->queue[i].ibv_attr)
1980 if (parser->out_layer == HASH_RXQ_IPV4) {
1981 ipv4 = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1982 parser->queue[i].offset -
1983 sizeof(struct ibv_flow_spec_ipv4_ext));
1984 if (ipv4->mask.proto && ipv4->val.proto != MLX5_GRE)
1986 ipv4->val.proto = MLX5_GRE;
1987 ipv4->mask.proto = 0xff;
1988 } else if (parser->out_layer == HASH_RXQ_IPV6) {
1989 ipv6 = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1990 parser->queue[i].offset -
1991 sizeof(struct ibv_flow_spec_ipv6));
1992 if (ipv6->mask.next_hdr &&
1993 ipv6->val.next_hdr != MLX5_GRE)
1995 ipv6->val.next_hdr = MLX5_GRE;
1996 ipv6->mask.next_hdr = 0xff;
1999 if (i != hash_rxq_init_n)
2000 return rte_flow_error_set(data->error, EINVAL,
2001 RTE_FLOW_ERROR_TYPE_ITEM,
2003 "IP protocol of GRE must be 47");
2004 mlx5_flow_create_copy(parser, &tunnel, size);
2009 * Convert MPLS item to Verbs specification.
2010 * MPLS tunnel types currently supported are MPLS-in-GRE and MPLS-in-UDP.
2013 * Item specification.
2014 * @param default_mask[in]
2015 * Default bit-masks to use when item->mask is not provided.
2016 * @param data[in, out]
2020 * 0 on success, a negative errno value otherwise and rte_errno is set.
2023 mlx5_flow_create_mpls(const struct rte_flow_item *item,
2024 const void *default_mask,
2025 struct mlx5_flow_data *data)
2027 #ifndef HAVE_IBV_DEVICE_MPLS_SUPPORT
2029 return rte_flow_error_set(data->error, ENOTSUP,
2030 RTE_FLOW_ERROR_TYPE_ITEM,
2032 "MPLS is not supported by driver");
2034 const struct rte_flow_item_mpls *spec = item->spec;
2035 const struct rte_flow_item_mpls *mask = item->mask;
2036 struct mlx5_flow_parse *parser = data->parser;
2037 unsigned int size = sizeof(struct ibv_flow_spec_mpls);
2038 struct ibv_flow_spec_mpls mpls = {
2039 .type = IBV_FLOW_SPEC_MPLS,
2043 parser->inner = IBV_FLOW_SPEC_INNER;
2044 if (parser->layer == HASH_RXQ_UDPV4 ||
2045 parser->layer == HASH_RXQ_UDPV6) {
2047 ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_MPLS_IN_UDP)];
2048 parser->out_layer = parser->layer;
2051 ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_MPLS_IN_GRE)];
2052 /* parser->out_layer stays as in GRE out_layer. */
2054 parser->layer = HASH_RXQ_TUNNEL;
2056 * For MPLS-in-GRE, RSS level should have been set.
2057 * For MPLS-in-UDP, use outer RSS.
2059 if (!parser->rss_conf.level)
2060 parser->rss_conf.level = 1;
2063 mask = default_mask;
2065 * The verbs label field includes the entire MPLS header:
2066 * bits 0:19 - label value field.
2067 * bits 20:22 - traffic class field.
2068 * bits 23 - bottom of stack bit.
2069 * bits 24:31 - ttl field.
2071 mpls.val.label = *(const uint32_t *)spec;
2072 mpls.mask.label = *(const uint32_t *)mask;
2073 /* Remove unwanted bits from values. */
2074 mpls.val.label &= mpls.mask.label;
2076 mlx5_flow_create_copy(parser, &mpls, size);
2082 * Convert mark/flag action to Verbs specification.
2085 * Internal parser structure.
2090 * 0 on success, a negative errno value otherwise and rte_errno is set.
2093 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id)
2095 unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
2096 struct ibv_flow_spec_action_tag tag = {
2097 .type = IBV_FLOW_SPEC_ACTION_TAG,
2099 .tag_id = mlx5_flow_mark_set(mark_id),
2102 assert(parser->mark);
2103 mlx5_flow_create_copy(parser, &tag, size);
2108 * Convert count action to Verbs specification.
2111 * Pointer to Ethernet device.
2113 * Pointer to MLX5 flow parser structure.
2116 * 0 on success, a negative errno value otherwise and rte_errno is set.
2119 mlx5_flow_create_count(struct rte_eth_dev *dev __rte_unused,
2120 struct mlx5_flow_parse *parser __rte_unused)
2122 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
2123 struct priv *priv = dev->data->dev_private;
2124 unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
2125 struct ibv_counter_set_init_attr init_attr = {0};
2126 struct ibv_flow_spec_counter_action counter = {
2127 .type = IBV_FLOW_SPEC_ACTION_COUNT,
2129 .counter_set_handle = 0,
2132 init_attr.counter_set_id = 0;
2133 parser->cs = mlx5_glue->create_counter_set(priv->ctx, &init_attr);
2138 counter.counter_set_handle = parser->cs->handle;
2139 mlx5_flow_create_copy(parser, &counter, size);
2145 * Complete flow rule creation with a drop queue.
2148 * Pointer to Ethernet device.
2150 * Internal parser structure.
2152 * Pointer to the rte_flow.
2154 * Perform verbose error reporting if not NULL.
2157 * 0 on success, a negative errno value otherwise and rte_errno is set.
2160 mlx5_flow_create_action_queue_drop(struct rte_eth_dev *dev,
2161 struct mlx5_flow_parse *parser,
2162 struct rte_flow *flow,
2163 struct rte_flow_error *error)
2165 struct priv *priv = dev->data->dev_private;
2166 struct ibv_flow_spec_action_drop *drop;
2167 unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
2172 drop = (void *)((uintptr_t)parser->queue[HASH_RXQ_ETH].ibv_attr +
2173 parser->queue[HASH_RXQ_ETH].offset);
2174 *drop = (struct ibv_flow_spec_action_drop){
2175 .type = IBV_FLOW_SPEC_ACTION_DROP,
2178 ++parser->queue[HASH_RXQ_ETH].ibv_attr->num_of_specs;
2179 parser->queue[HASH_RXQ_ETH].offset += size;
2180 flow->frxq[HASH_RXQ_ETH].ibv_attr =
2181 parser->queue[HASH_RXQ_ETH].ibv_attr;
2183 flow->cs = parser->cs;
2184 if (!dev->data->dev_started)
2186 parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
2187 flow->frxq[HASH_RXQ_ETH].ibv_flow =
2188 mlx5_glue->create_flow(priv->flow_drop_queue->qp,
2189 flow->frxq[HASH_RXQ_ETH].ibv_attr);
2190 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
2191 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
2192 NULL, "flow rule creation failure");
2198 if (flow->frxq[HASH_RXQ_ETH].ibv_flow) {
2199 claim_zero(mlx5_glue->destroy_flow
2200 (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2201 flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
2203 if (flow->frxq[HASH_RXQ_ETH].ibv_attr) {
2204 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
2205 flow->frxq[HASH_RXQ_ETH].ibv_attr = NULL;
2208 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
2216 * Create hash Rx queues when RSS is enabled.
2219 * Pointer to Ethernet device.
2221 * Internal parser structure.
2223 * Pointer to the rte_flow.
2225 * Perform verbose error reporting if not NULL.
2228 * 0 on success, a negative errno value otherwise and rte_errno is set.
2231 mlx5_flow_create_action_queue_rss(struct rte_eth_dev *dev,
2232 struct mlx5_flow_parse *parser,
2233 struct rte_flow *flow,
2234 struct rte_flow_error *error)
2238 for (i = 0; i != hash_rxq_init_n; ++i) {
2239 if (!parser->queue[i].ibv_attr)
2241 flow->frxq[i].ibv_attr = parser->queue[i].ibv_attr;
2242 parser->queue[i].ibv_attr = NULL;
2243 flow->frxq[i].hash_fields = parser->queue[i].hash_fields;
2244 if (!dev->data->dev_started)
2246 flow->frxq[i].hrxq =
2248 parser->rss_conf.key,
2249 parser->rss_conf.key_len,
2250 flow->frxq[i].hash_fields,
2251 parser->rss_conf.queue,
2252 parser->rss_conf.queue_num,
2254 parser->rss_conf.level);
2255 if (flow->frxq[i].hrxq)
2257 flow->frxq[i].hrxq =
2259 parser->rss_conf.key,
2260 parser->rss_conf.key_len,
2261 flow->frxq[i].hash_fields,
2262 parser->rss_conf.queue,
2263 parser->rss_conf.queue_num,
2265 parser->rss_conf.level);
2266 if (!flow->frxq[i].hrxq) {
2267 return rte_flow_error_set(error, ENOMEM,
2268 RTE_FLOW_ERROR_TYPE_HANDLE,
2270 "cannot create hash rxq");
2277 * RXQ update after flow rule creation.
2280 * Pointer to Ethernet device.
2282 * Pointer to the flow rule.
2285 mlx5_flow_create_update_rxqs(struct rte_eth_dev *dev, struct rte_flow *flow)
2287 struct priv *priv = dev->data->dev_private;
2291 if (!dev->data->dev_started)
2293 for (i = 0; i != flow->rss_conf.queue_num; ++i) {
2294 struct mlx5_rxq_data *rxq_data = (*priv->rxqs)
2295 [(*flow->queues)[i]];
2296 struct mlx5_rxq_ctrl *rxq_ctrl =
2297 container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
2298 uint8_t tunnel = PTYPE_IDX(flow->tunnel);
2300 rxq_data->mark |= flow->mark;
2303 rxq_ctrl->tunnel_types[tunnel] += 1;
2304 /* Clear tunnel type if more than one tunnel types set. */
2305 for (j = 0; j != RTE_DIM(rxq_ctrl->tunnel_types); ++j) {
2308 if (rxq_ctrl->tunnel_types[j] > 0) {
2309 rxq_data->tunnel = 0;
2313 if (j == RTE_DIM(rxq_ctrl->tunnel_types))
2314 rxq_data->tunnel = flow->tunnel;
2319 * Dump flow hash RX queue detail.
2322 * Pointer to Ethernet device.
2324 * Pointer to the rte_flow.
2326 * Hash RX queue index.
2329 mlx5_flow_dump(struct rte_eth_dev *dev __rte_unused,
2330 struct rte_flow *flow __rte_unused,
2331 unsigned int hrxq_idx __rte_unused)
2339 spec_ptr = (uintptr_t)(flow->frxq[hrxq_idx].ibv_attr + 1);
2340 for (j = 0, off = 0; j < flow->frxq[hrxq_idx].ibv_attr->num_of_specs;
2342 struct ibv_flow_spec *spec = (void *)spec_ptr;
2343 off += sprintf(buf + off, " %x(%hu)", spec->hdr.type,
2345 spec_ptr += spec->hdr.size;
2348 "port %u Verbs flow %p type %u: hrxq:%p qp:%p ind:%p,"
2349 " hash:%" PRIx64 "/%u specs:%hhu(%hu), priority:%hu, type:%d,"
2350 " flags:%x, comp_mask:%x specs:%s",
2351 dev->data->port_id, (void *)flow, hrxq_idx,
2352 (void *)flow->frxq[hrxq_idx].hrxq,
2353 (void *)flow->frxq[hrxq_idx].hrxq->qp,
2354 (void *)flow->frxq[hrxq_idx].hrxq->ind_table,
2355 flow->frxq[hrxq_idx].hash_fields |
2357 flow->rss_conf.level > 1 ? (uint32_t)IBV_RX_HASH_INNER : 0),
2358 flow->rss_conf.queue_num,
2359 flow->frxq[hrxq_idx].ibv_attr->num_of_specs,
2360 flow->frxq[hrxq_idx].ibv_attr->size,
2361 flow->frxq[hrxq_idx].ibv_attr->priority,
2362 flow->frxq[hrxq_idx].ibv_attr->type,
2363 flow->frxq[hrxq_idx].ibv_attr->flags,
2364 flow->frxq[hrxq_idx].ibv_attr->comp_mask,
2370 * Complete flow rule creation.
2373 * Pointer to Ethernet device.
2375 * Internal parser structure.
2377 * Pointer to the rte_flow.
2379 * Perform verbose error reporting if not NULL.
2382 * 0 on success, a negative errno value otherwise and rte_errno is set.
2385 mlx5_flow_create_action_queue(struct rte_eth_dev *dev,
2386 struct mlx5_flow_parse *parser,
2387 struct rte_flow *flow,
2388 struct rte_flow_error *error)
2390 struct priv *priv __rte_unused = dev->data->dev_private;
2393 unsigned int flows_n = 0;
2397 assert(!parser->drop);
2398 ret = mlx5_flow_create_action_queue_rss(dev, parser, flow, error);
2402 flow->cs = parser->cs;
2403 if (!dev->data->dev_started)
2405 for (i = 0; i != hash_rxq_init_n; ++i) {
2406 if (!flow->frxq[i].hrxq)
2408 flow->frxq[i].ibv_flow =
2409 mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
2410 flow->frxq[i].ibv_attr);
2411 mlx5_flow_dump(dev, flow, i);
2412 if (!flow->frxq[i].ibv_flow) {
2413 rte_flow_error_set(error, ENOMEM,
2414 RTE_FLOW_ERROR_TYPE_HANDLE,
2415 NULL, "flow rule creation failure");
2421 rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_HANDLE,
2422 NULL, "internal error in flow creation");
2425 mlx5_flow_create_update_rxqs(dev, flow);
2428 ret = rte_errno; /* Save rte_errno before cleanup. */
2430 for (i = 0; i != hash_rxq_init_n; ++i) {
2431 if (flow->frxq[i].ibv_flow) {
2432 struct ibv_flow *ibv_flow = flow->frxq[i].ibv_flow;
2434 claim_zero(mlx5_glue->destroy_flow(ibv_flow));
2436 if (flow->frxq[i].hrxq)
2437 mlx5_hrxq_release(dev, flow->frxq[i].hrxq);
2438 if (flow->frxq[i].ibv_attr)
2439 rte_free(flow->frxq[i].ibv_attr);
2442 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
2446 rte_errno = ret; /* Restore rte_errno. */
2454 * Pointer to Ethernet device.
2456 * Pointer to a TAILQ flow list.
2458 * Flow rule attributes.
2459 * @param[in] pattern
2460 * Pattern specification (list terminated by the END pattern item).
2461 * @param[in] actions
2462 * Associated actions (list terminated by the END action).
2464 * Perform verbose error reporting if not NULL.
2467 * A flow on success, NULL otherwise and rte_errno is set.
2469 static struct rte_flow *
2470 mlx5_flow_list_create(struct rte_eth_dev *dev,
2471 struct mlx5_flows *list,
2472 const struct rte_flow_attr *attr,
2473 const struct rte_flow_item items[],
2474 const struct rte_flow_action actions[],
2475 struct rte_flow_error *error)
2477 struct mlx5_flow_parse parser = { .create = 1, };
2478 struct rte_flow *flow = NULL;
2482 ret = mlx5_flow_convert(dev, attr, items, actions, error, &parser);
2485 flow = rte_calloc(__func__, 1,
2487 parser.rss_conf.queue_num * sizeof(uint16_t),
2490 rte_flow_error_set(error, ENOMEM,
2491 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2493 "cannot allocate flow memory");
2496 /* Copy configuration. */
2497 flow->queues = (uint16_t (*)[])(flow + 1);
2498 flow->tunnel = parser.tunnel;
2499 flow->rss_conf = (struct rte_flow_action_rss){
2500 .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
2502 .types = parser.rss_conf.types,
2503 .key_len = parser.rss_conf.key_len,
2504 .queue_num = parser.rss_conf.queue_num,
2505 .key = memcpy(flow->rss_key, parser.rss_conf.key,
2506 sizeof(*parser.rss_conf.key) *
2507 parser.rss_conf.key_len),
2508 .queue = memcpy(flow->queues, parser.rss_conf.queue,
2509 sizeof(*parser.rss_conf.queue) *
2510 parser.rss_conf.queue_num),
2512 flow->mark = parser.mark;
2513 /* finalise the flow. */
2515 ret = mlx5_flow_create_action_queue_drop(dev, &parser, flow,
2518 ret = mlx5_flow_create_action_queue(dev, &parser, flow, error);
2521 TAILQ_INSERT_TAIL(list, flow, next);
2522 DRV_LOG(DEBUG, "port %u flow created %p", dev->data->port_id,
2526 DRV_LOG(ERR, "port %u flow creation error: %s", dev->data->port_id,
2528 for (i = 0; i != hash_rxq_init_n; ++i) {
2529 if (parser.queue[i].ibv_attr)
2530 rte_free(parser.queue[i].ibv_attr);
2537 * Validate a flow supported by the NIC.
2539 * @see rte_flow_validate()
2543 mlx5_flow_validate(struct rte_eth_dev *dev,
2544 const struct rte_flow_attr *attr,
2545 const struct rte_flow_item items[],
2546 const struct rte_flow_action actions[],
2547 struct rte_flow_error *error)
2549 struct mlx5_flow_parse parser = { .create = 0, };
2551 return mlx5_flow_convert(dev, attr, items, actions, error, &parser);
2557 * @see rte_flow_create()
2561 mlx5_flow_create(struct rte_eth_dev *dev,
2562 const struct rte_flow_attr *attr,
2563 const struct rte_flow_item items[],
2564 const struct rte_flow_action actions[],
2565 struct rte_flow_error *error)
2567 struct priv *priv = dev->data->dev_private;
2569 return mlx5_flow_list_create(dev, &priv->flows, attr, items, actions,
2574 * Destroy a flow in a list.
2577 * Pointer to Ethernet device.
2579 * Pointer to a TAILQ flow list.
2584 mlx5_flow_list_destroy(struct rte_eth_dev *dev, struct mlx5_flows *list,
2585 struct rte_flow *flow)
2587 struct priv *priv = dev->data->dev_private;
2590 if (flow->drop || !dev->data->dev_started)
2592 for (i = 0; flow->tunnel && i != flow->rss_conf.queue_num; ++i) {
2593 /* Update queue tunnel type. */
2594 struct mlx5_rxq_data *rxq_data = (*priv->rxqs)
2595 [(*flow->queues)[i]];
2596 struct mlx5_rxq_ctrl *rxq_ctrl =
2597 container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
2598 uint8_t tunnel = PTYPE_IDX(flow->tunnel);
2600 assert(rxq_ctrl->tunnel_types[tunnel] > 0);
2601 rxq_ctrl->tunnel_types[tunnel] -= 1;
2602 if (!rxq_ctrl->tunnel_types[tunnel]) {
2603 /* Update tunnel type. */
2608 for (j = 0; j < RTE_DIM(rxq_ctrl->tunnel_types); j++)
2609 if (rxq_ctrl->tunnel_types[j]) {
2613 /* Keep same if more than one tunnel types left. */
2615 rxq_data->tunnel = ptype_ext[last];
2616 else if (types == 0)
2617 /* No tunnel type left. */
2618 rxq_data->tunnel = 0;
2621 for (i = 0; flow->mark && i != flow->rss_conf.queue_num; ++i) {
2622 struct rte_flow *tmp;
2626 * To remove the mark from the queue, the queue must not be
2627 * present in any other marked flow (RSS or not).
2629 TAILQ_FOREACH(tmp, list, next) {
2631 uint16_t *tqs = NULL;
2636 for (j = 0; j != hash_rxq_init_n; ++j) {
2637 if (!tmp->frxq[j].hrxq)
2639 tqs = tmp->frxq[j].hrxq->ind_table->queues;
2640 tq_n = tmp->frxq[j].hrxq->ind_table->queues_n;
2644 for (j = 0; (j != tq_n) && !mark; j++)
2645 if (tqs[j] == (*flow->queues)[i])
2648 (*priv->rxqs)[(*flow->queues)[i]]->mark = mark;
2652 if (flow->frxq[HASH_RXQ_ETH].ibv_flow)
2653 claim_zero(mlx5_glue->destroy_flow
2654 (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2655 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
2657 for (i = 0; i != hash_rxq_init_n; ++i) {
2658 struct mlx5_flow *frxq = &flow->frxq[i];
2661 claim_zero(mlx5_glue->destroy_flow
2664 mlx5_hrxq_release(dev, frxq->hrxq);
2666 rte_free(frxq->ibv_attr);
2670 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
2673 TAILQ_REMOVE(list, flow, next);
2674 DRV_LOG(DEBUG, "port %u flow destroyed %p", dev->data->port_id,
2680 * Destroy all flows.
2683 * Pointer to Ethernet device.
2685 * Pointer to a TAILQ flow list.
2688 mlx5_flow_list_flush(struct rte_eth_dev *dev, struct mlx5_flows *list)
2690 while (!TAILQ_EMPTY(list)) {
2691 struct rte_flow *flow;
2693 flow = TAILQ_FIRST(list);
2694 mlx5_flow_list_destroy(dev, list, flow);
2699 * Create drop queue.
2702 * Pointer to Ethernet device.
2705 * 0 on success, a negative errno value otherwise and rte_errno is set.
2708 mlx5_flow_create_drop_queue(struct rte_eth_dev *dev)
2710 struct priv *priv = dev->data->dev_private;
2711 struct mlx5_hrxq_drop *fdq = NULL;
2715 fdq = rte_calloc(__func__, 1, sizeof(*fdq), 0);
2718 "port %u cannot allocate memory for drop queue",
2719 dev->data->port_id);
2723 fdq->cq = mlx5_glue->create_cq(priv->ctx, 1, NULL, NULL, 0);
2725 DRV_LOG(WARNING, "port %u cannot allocate CQ for drop queue",
2726 dev->data->port_id);
2730 fdq->wq = mlx5_glue->create_wq
2732 &(struct ibv_wq_init_attr){
2733 .wq_type = IBV_WQT_RQ,
2740 DRV_LOG(WARNING, "port %u cannot allocate WQ for drop queue",
2741 dev->data->port_id);
2745 fdq->ind_table = mlx5_glue->create_rwq_ind_table
2747 &(struct ibv_rwq_ind_table_init_attr){
2748 .log_ind_tbl_size = 0,
2749 .ind_tbl = &fdq->wq,
2752 if (!fdq->ind_table) {
2754 "port %u cannot allocate indirection table for drop"
2756 dev->data->port_id);
2760 fdq->qp = mlx5_glue->create_qp_ex
2762 &(struct ibv_qp_init_attr_ex){
2763 .qp_type = IBV_QPT_RAW_PACKET,
2765 IBV_QP_INIT_ATTR_PD |
2766 IBV_QP_INIT_ATTR_IND_TABLE |
2767 IBV_QP_INIT_ATTR_RX_HASH,
2768 .rx_hash_conf = (struct ibv_rx_hash_conf){
2770 IBV_RX_HASH_FUNC_TOEPLITZ,
2771 .rx_hash_key_len = rss_hash_default_key_len,
2772 .rx_hash_key = rss_hash_default_key,
2773 .rx_hash_fields_mask = 0,
2775 .rwq_ind_tbl = fdq->ind_table,
2779 DRV_LOG(WARNING, "port %u cannot allocate QP for drop queue",
2780 dev->data->port_id);
2784 priv->flow_drop_queue = fdq;
2788 claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2790 claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2792 claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2794 claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2797 priv->flow_drop_queue = NULL;
2802 * Delete drop queue.
2805 * Pointer to Ethernet device.
2808 mlx5_flow_delete_drop_queue(struct rte_eth_dev *dev)
2810 struct priv *priv = dev->data->dev_private;
2811 struct mlx5_hrxq_drop *fdq = priv->flow_drop_queue;
2816 claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2818 claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2820 claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2822 claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2824 priv->flow_drop_queue = NULL;
2831 * Pointer to Ethernet device.
2833 * Pointer to a TAILQ flow list.
2836 mlx5_flow_stop(struct rte_eth_dev *dev, struct mlx5_flows *list)
2838 struct priv *priv = dev->data->dev_private;
2839 struct rte_flow *flow;
2842 TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) {
2843 struct mlx5_ind_table_ibv *ind_tbl = NULL;
2846 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow)
2848 claim_zero(mlx5_glue->destroy_flow
2849 (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2850 flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
2851 DRV_LOG(DEBUG, "port %u flow %p removed",
2852 dev->data->port_id, (void *)flow);
2856 /* Verify the flow has not already been cleaned. */
2857 for (i = 0; i != hash_rxq_init_n; ++i) {
2858 if (!flow->frxq[i].ibv_flow)
2861 * Indirection table may be necessary to remove the
2862 * flags in the Rx queues.
2863 * This helps to speed-up the process by avoiding
2866 ind_tbl = flow->frxq[i].hrxq->ind_table;
2869 if (i == hash_rxq_init_n)
2873 for (i = 0; i != ind_tbl->queues_n; ++i)
2874 (*priv->rxqs)[ind_tbl->queues[i]]->mark = 0;
2876 for (i = 0; i != hash_rxq_init_n; ++i) {
2877 if (!flow->frxq[i].ibv_flow)
2879 claim_zero(mlx5_glue->destroy_flow
2880 (flow->frxq[i].ibv_flow));
2881 flow->frxq[i].ibv_flow = NULL;
2882 mlx5_hrxq_release(dev, flow->frxq[i].hrxq);
2883 flow->frxq[i].hrxq = NULL;
2885 DRV_LOG(DEBUG, "port %u flow %p removed", dev->data->port_id,
2888 /* Cleanup Rx queue tunnel info. */
2889 for (i = 0; i != priv->rxqs_n; ++i) {
2890 struct mlx5_rxq_data *q = (*priv->rxqs)[i];
2891 struct mlx5_rxq_ctrl *rxq_ctrl =
2892 container_of(q, struct mlx5_rxq_ctrl, rxq);
2896 memset((void *)rxq_ctrl->tunnel_types, 0,
2897 sizeof(rxq_ctrl->tunnel_types));
2906 * Pointer to Ethernet device.
2908 * Pointer to a TAILQ flow list.
2911 * 0 on success, a negative errno value otherwise and rte_errno is set.
2914 mlx5_flow_start(struct rte_eth_dev *dev, struct mlx5_flows *list)
2916 struct priv *priv = dev->data->dev_private;
2917 struct rte_flow *flow;
2919 TAILQ_FOREACH(flow, list, next) {
2923 flow->frxq[HASH_RXQ_ETH].ibv_flow =
2924 mlx5_glue->create_flow
2925 (priv->flow_drop_queue->qp,
2926 flow->frxq[HASH_RXQ_ETH].ibv_attr);
2927 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
2929 "port %u flow %p cannot be applied",
2930 dev->data->port_id, (void *)flow);
2934 DRV_LOG(DEBUG, "port %u flow %p applied",
2935 dev->data->port_id, (void *)flow);
2939 for (i = 0; i != hash_rxq_init_n; ++i) {
2940 if (!flow->frxq[i].ibv_attr)
2942 flow->frxq[i].hrxq =
2943 mlx5_hrxq_get(dev, flow->rss_conf.key,
2944 flow->rss_conf.key_len,
2945 flow->frxq[i].hash_fields,
2946 flow->rss_conf.queue,
2947 flow->rss_conf.queue_num,
2949 flow->rss_conf.level);
2950 if (flow->frxq[i].hrxq)
2952 flow->frxq[i].hrxq =
2953 mlx5_hrxq_new(dev, flow->rss_conf.key,
2954 flow->rss_conf.key_len,
2955 flow->frxq[i].hash_fields,
2956 flow->rss_conf.queue,
2957 flow->rss_conf.queue_num,
2959 flow->rss_conf.level);
2960 if (!flow->frxq[i].hrxq) {
2962 "port %u flow %p cannot create hash"
2964 dev->data->port_id, (void *)flow);
2969 mlx5_flow_dump(dev, flow, i);
2970 flow->frxq[i].ibv_flow =
2971 mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
2972 flow->frxq[i].ibv_attr);
2973 if (!flow->frxq[i].ibv_flow) {
2975 "port %u flow %p type %u cannot be"
2977 dev->data->port_id, (void *)flow, i);
2982 mlx5_flow_create_update_rxqs(dev, flow);
2988 * Verify the flow list is empty
2991 * Pointer to Ethernet device.
2993 * @return the number of flows not released.
2996 mlx5_flow_verify(struct rte_eth_dev *dev)
2998 struct priv *priv = dev->data->dev_private;
2999 struct rte_flow *flow;
3002 TAILQ_FOREACH(flow, &priv->flows, next) {
3003 DRV_LOG(DEBUG, "port %u flow %p still referenced",
3004 dev->data->port_id, (void *)flow);
3011 * Enable a control flow configured from the control plane.
3014 * Pointer to Ethernet device.
3016 * An Ethernet flow spec to apply.
3018 * An Ethernet flow mask to apply.
3020 * A VLAN flow spec to apply.
3022 * A VLAN flow mask to apply.
3025 * 0 on success, a negative errno value otherwise and rte_errno is set.
3028 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
3029 struct rte_flow_item_eth *eth_spec,
3030 struct rte_flow_item_eth *eth_mask,
3031 struct rte_flow_item_vlan *vlan_spec,
3032 struct rte_flow_item_vlan *vlan_mask)
3034 struct priv *priv = dev->data->dev_private;
3035 const struct rte_flow_attr attr = {
3037 .priority = MLX5_CTRL_FLOW_PRIORITY,
3039 struct rte_flow_item items[] = {
3041 .type = RTE_FLOW_ITEM_TYPE_ETH,
3047 .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
3048 RTE_FLOW_ITEM_TYPE_END,
3054 .type = RTE_FLOW_ITEM_TYPE_END,
3057 uint16_t queue[priv->reta_idx_n];
3058 struct rte_flow_action_rss action_rss = {
3059 .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
3061 .types = priv->rss_conf.rss_hf,
3062 .key_len = priv->rss_conf.rss_key_len,
3063 .queue_num = priv->reta_idx_n,
3064 .key = priv->rss_conf.rss_key,
3067 struct rte_flow_action actions[] = {
3069 .type = RTE_FLOW_ACTION_TYPE_RSS,
3070 .conf = &action_rss,
3073 .type = RTE_FLOW_ACTION_TYPE_END,
3076 struct rte_flow *flow;
3077 struct rte_flow_error error;
3080 if (!priv->reta_idx_n) {
3084 for (i = 0; i != priv->reta_idx_n; ++i)
3085 queue[i] = (*priv->reta_idx)[i];
3086 flow = mlx5_flow_list_create(dev, &priv->ctrl_flows, &attr, items,
3094 * Enable a flow control configured from the control plane.
3097 * Pointer to Ethernet device.
3099 * An Ethernet flow spec to apply.
3101 * An Ethernet flow mask to apply.
3104 * 0 on success, a negative errno value otherwise and rte_errno is set.
3107 mlx5_ctrl_flow(struct rte_eth_dev *dev,
3108 struct rte_flow_item_eth *eth_spec,
3109 struct rte_flow_item_eth *eth_mask)
3111 return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
3117 * @see rte_flow_destroy()
3121 mlx5_flow_destroy(struct rte_eth_dev *dev,
3122 struct rte_flow *flow,
3123 struct rte_flow_error *error __rte_unused)
3125 struct priv *priv = dev->data->dev_private;
3127 mlx5_flow_list_destroy(dev, &priv->flows, flow);
3132 * Destroy all flows.
3134 * @see rte_flow_flush()
3138 mlx5_flow_flush(struct rte_eth_dev *dev,
3139 struct rte_flow_error *error __rte_unused)
3141 struct priv *priv = dev->data->dev_private;
3143 mlx5_flow_list_flush(dev, &priv->flows);
3147 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
3149 * Query flow counter.
3153 * @param counter_value
3154 * returned data from the counter.
3157 * 0 on success, a negative errno value otherwise and rte_errno is set.
3160 mlx5_flow_query_count(struct ibv_counter_set *cs,
3161 struct mlx5_flow_counter_stats *counter_stats,
3162 struct rte_flow_query_count *query_count,
3163 struct rte_flow_error *error)
3165 uint64_t counters[2];
3166 struct ibv_query_counter_set_attr query_cs_attr = {
3168 .query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
3170 struct ibv_counter_set_data query_out = {
3172 .outlen = 2 * sizeof(uint64_t),
3174 int err = mlx5_glue->query_counter_set(&query_cs_attr, &query_out);
3177 return rte_flow_error_set(error, err,
3178 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
3180 "cannot read counter");
3181 query_count->hits_set = 1;
3182 query_count->bytes_set = 1;
3183 query_count->hits = counters[0] - counter_stats->hits;
3184 query_count->bytes = counters[1] - counter_stats->bytes;
3185 if (query_count->reset) {
3186 counter_stats->hits = counters[0];
3187 counter_stats->bytes = counters[1];
3195 * @see rte_flow_query()
3199 mlx5_flow_query(struct rte_eth_dev *dev __rte_unused,
3200 struct rte_flow *flow,
3201 const struct rte_flow_action *action __rte_unused,
3203 struct rte_flow_error *error)
3208 ret = mlx5_flow_query_count(flow->cs,
3209 &flow->counter_stats,
3210 (struct rte_flow_query_count *)data,
3215 return rte_flow_error_set(error, EINVAL,
3216 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
3218 "no counter found for flow");
3227 * @see rte_flow_isolate()
3231 mlx5_flow_isolate(struct rte_eth_dev *dev,
3233 struct rte_flow_error *error)
3235 struct priv *priv = dev->data->dev_private;
3237 if (dev->data->dev_started) {
3238 rte_flow_error_set(error, EBUSY,
3239 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
3241 "port must be stopped first");
3244 priv->isolated = !!enable;
3246 dev->dev_ops = &mlx5_dev_ops_isolate;
3248 dev->dev_ops = &mlx5_dev_ops;
3253 * Convert a flow director filter to a generic flow.
3256 * Pointer to Ethernet device.
3257 * @param fdir_filter
3258 * Flow director filter to add.
3260 * Generic flow parameters structure.
3263 * 0 on success, a negative errno value otherwise and rte_errno is set.
3266 mlx5_fdir_filter_convert(struct rte_eth_dev *dev,
3267 const struct rte_eth_fdir_filter *fdir_filter,
3268 struct mlx5_fdir *attributes)
3270 struct priv *priv = dev->data->dev_private;
3271 const struct rte_eth_fdir_input *input = &fdir_filter->input;
3272 const struct rte_eth_fdir_masks *mask =
3273 &dev->data->dev_conf.fdir_conf.mask;
3275 /* Validate queue number. */
3276 if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
3277 DRV_LOG(ERR, "port %u invalid queue number %d",
3278 dev->data->port_id, fdir_filter->action.rx_queue);
3282 attributes->attr.ingress = 1;
3283 attributes->items[0] = (struct rte_flow_item) {
3284 .type = RTE_FLOW_ITEM_TYPE_ETH,
3285 .spec = &attributes->l2,
3286 .mask = &attributes->l2_mask,
3288 switch (fdir_filter->action.behavior) {
3289 case RTE_ETH_FDIR_ACCEPT:
3290 attributes->actions[0] = (struct rte_flow_action){
3291 .type = RTE_FLOW_ACTION_TYPE_QUEUE,
3292 .conf = &attributes->queue,
3295 case RTE_ETH_FDIR_REJECT:
3296 attributes->actions[0] = (struct rte_flow_action){
3297 .type = RTE_FLOW_ACTION_TYPE_DROP,
3301 DRV_LOG(ERR, "port %u invalid behavior %d",
3303 fdir_filter->action.behavior);
3304 rte_errno = ENOTSUP;
3307 attributes->queue.index = fdir_filter->action.rx_queue;
3309 switch (fdir_filter->input.flow_type) {
3310 case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
3311 case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
3312 case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
3313 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
3314 .src_addr = input->flow.ip4_flow.src_ip,
3315 .dst_addr = input->flow.ip4_flow.dst_ip,
3316 .time_to_live = input->flow.ip4_flow.ttl,
3317 .type_of_service = input->flow.ip4_flow.tos,
3318 .next_proto_id = input->flow.ip4_flow.proto,
3320 attributes->l3_mask.ipv4.hdr = (struct ipv4_hdr){
3321 .src_addr = mask->ipv4_mask.src_ip,
3322 .dst_addr = mask->ipv4_mask.dst_ip,
3323 .time_to_live = mask->ipv4_mask.ttl,
3324 .type_of_service = mask->ipv4_mask.tos,
3325 .next_proto_id = mask->ipv4_mask.proto,
3327 attributes->items[1] = (struct rte_flow_item){
3328 .type = RTE_FLOW_ITEM_TYPE_IPV4,
3329 .spec = &attributes->l3,
3330 .mask = &attributes->l3_mask,
3333 case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
3334 case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
3335 case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
3336 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
3337 .hop_limits = input->flow.ipv6_flow.hop_limits,
3338 .proto = input->flow.ipv6_flow.proto,
3341 memcpy(attributes->l3.ipv6.hdr.src_addr,
3342 input->flow.ipv6_flow.src_ip,
3343 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
3344 memcpy(attributes->l3.ipv6.hdr.dst_addr,
3345 input->flow.ipv6_flow.dst_ip,
3346 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
3347 memcpy(attributes->l3_mask.ipv6.hdr.src_addr,
3348 mask->ipv6_mask.src_ip,
3349 RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
3350 memcpy(attributes->l3_mask.ipv6.hdr.dst_addr,
3351 mask->ipv6_mask.dst_ip,
3352 RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
3353 attributes->items[1] = (struct rte_flow_item){
3354 .type = RTE_FLOW_ITEM_TYPE_IPV6,
3355 .spec = &attributes->l3,
3356 .mask = &attributes->l3_mask,
3360 DRV_LOG(ERR, "port %u invalid flow type%d",
3361 dev->data->port_id, fdir_filter->input.flow_type);
3362 rte_errno = ENOTSUP;
3366 switch (fdir_filter->input.flow_type) {
3367 case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
3368 attributes->l4.udp.hdr = (struct udp_hdr){
3369 .src_port = input->flow.udp4_flow.src_port,
3370 .dst_port = input->flow.udp4_flow.dst_port,
3372 attributes->l4_mask.udp.hdr = (struct udp_hdr){
3373 .src_port = mask->src_port_mask,
3374 .dst_port = mask->dst_port_mask,
3376 attributes->items[2] = (struct rte_flow_item){
3377 .type = RTE_FLOW_ITEM_TYPE_UDP,
3378 .spec = &attributes->l4,
3379 .mask = &attributes->l4_mask,
3382 case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
3383 attributes->l4.tcp.hdr = (struct tcp_hdr){
3384 .src_port = input->flow.tcp4_flow.src_port,
3385 .dst_port = input->flow.tcp4_flow.dst_port,
3387 attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
3388 .src_port = mask->src_port_mask,
3389 .dst_port = mask->dst_port_mask,
3391 attributes->items[2] = (struct rte_flow_item){
3392 .type = RTE_FLOW_ITEM_TYPE_TCP,
3393 .spec = &attributes->l4,
3394 .mask = &attributes->l4_mask,
3397 case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
3398 attributes->l4.udp.hdr = (struct udp_hdr){
3399 .src_port = input->flow.udp6_flow.src_port,
3400 .dst_port = input->flow.udp6_flow.dst_port,
3402 attributes->l4_mask.udp.hdr = (struct udp_hdr){
3403 .src_port = mask->src_port_mask,
3404 .dst_port = mask->dst_port_mask,
3406 attributes->items[2] = (struct rte_flow_item){
3407 .type = RTE_FLOW_ITEM_TYPE_UDP,
3408 .spec = &attributes->l4,
3409 .mask = &attributes->l4_mask,
3412 case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
3413 attributes->l4.tcp.hdr = (struct tcp_hdr){
3414 .src_port = input->flow.tcp6_flow.src_port,
3415 .dst_port = input->flow.tcp6_flow.dst_port,
3417 attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
3418 .src_port = mask->src_port_mask,
3419 .dst_port = mask->dst_port_mask,
3421 attributes->items[2] = (struct rte_flow_item){
3422 .type = RTE_FLOW_ITEM_TYPE_TCP,
3423 .spec = &attributes->l4,
3424 .mask = &attributes->l4_mask,
3427 case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
3428 case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
3431 DRV_LOG(ERR, "port %u invalid flow type%d",
3432 dev->data->port_id, fdir_filter->input.flow_type);
3433 rte_errno = ENOTSUP;
3440 * Add new flow director filter and store it in list.
3443 * Pointer to Ethernet device.
3444 * @param fdir_filter
3445 * Flow director filter to add.
3448 * 0 on success, a negative errno value otherwise and rte_errno is set.
3451 mlx5_fdir_filter_add(struct rte_eth_dev *dev,
3452 const struct rte_eth_fdir_filter *fdir_filter)
3454 struct priv *priv = dev->data->dev_private;
3455 struct mlx5_fdir attributes = {
3458 .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
3459 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
3463 struct mlx5_flow_parse parser = {
3464 .layer = HASH_RXQ_ETH,
3466 struct rte_flow_error error;
3467 struct rte_flow *flow;
3470 ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
3473 ret = mlx5_flow_convert(dev, &attributes.attr, attributes.items,
3474 attributes.actions, &error, &parser);
3477 flow = mlx5_flow_list_create(dev, &priv->flows, &attributes.attr,
3478 attributes.items, attributes.actions,
3481 DRV_LOG(DEBUG, "port %u FDIR created %p", dev->data->port_id,
3489 * Delete specific filter.
3492 * Pointer to Ethernet device.
3493 * @param fdir_filter
3494 * Filter to be deleted.
3497 * 0 on success, a negative errno value otherwise and rte_errno is set.
3500 mlx5_fdir_filter_delete(struct rte_eth_dev *dev,
3501 const struct rte_eth_fdir_filter *fdir_filter)
3503 struct priv *priv = dev->data->dev_private;
3504 struct mlx5_fdir attributes = {
3507 struct mlx5_flow_parse parser = {
3509 .layer = HASH_RXQ_ETH,
3511 struct rte_flow_error error;
3512 struct rte_flow *flow;
3516 ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
3519 ret = mlx5_flow_convert(dev, &attributes.attr, attributes.items,
3520 attributes.actions, &error, &parser);
3524 * Special case for drop action which is only set in the
3525 * specifications when the flow is created. In this situation the
3526 * drop specification is missing.
3529 struct ibv_flow_spec_action_drop *drop;
3531 drop = (void *)((uintptr_t)parser.queue[HASH_RXQ_ETH].ibv_attr +
3532 parser.queue[HASH_RXQ_ETH].offset);
3533 *drop = (struct ibv_flow_spec_action_drop){
3534 .type = IBV_FLOW_SPEC_ACTION_DROP,
3535 .size = sizeof(struct ibv_flow_spec_action_drop),
3537 parser.queue[HASH_RXQ_ETH].ibv_attr->num_of_specs++;
3539 TAILQ_FOREACH(flow, &priv->flows, next) {
3540 struct ibv_flow_attr *attr;
3541 struct ibv_spec_header *attr_h;
3543 struct ibv_flow_attr *flow_attr;
3544 struct ibv_spec_header *flow_h;
3546 unsigned int specs_n;
3547 unsigned int queue_id = parser.drop ? HASH_RXQ_ETH :
3550 attr = parser.queue[queue_id].ibv_attr;
3551 flow_attr = flow->frxq[queue_id].ibv_attr;
3552 /* Compare first the attributes. */
3554 memcmp(attr, flow_attr, sizeof(struct ibv_flow_attr)))
3556 if (attr->num_of_specs == 0)
3558 spec = (void *)((uintptr_t)attr +
3559 sizeof(struct ibv_flow_attr));
3560 flow_spec = (void *)((uintptr_t)flow_attr +
3561 sizeof(struct ibv_flow_attr));
3562 specs_n = RTE_MIN(attr->num_of_specs, flow_attr->num_of_specs);
3563 for (i = 0; i != specs_n; ++i) {
3566 if (memcmp(spec, flow_spec,
3567 RTE_MIN(attr_h->size, flow_h->size)))
3569 spec = (void *)((uintptr_t)spec + attr_h->size);
3570 flow_spec = (void *)((uintptr_t)flow_spec +
3573 /* At this point, the flow match. */
3576 /* The flow does not match. */
3579 ret = rte_errno; /* Save rte_errno before cleanup. */
3581 mlx5_flow_list_destroy(dev, &priv->flows, flow);
3583 for (i = 0; i != hash_rxq_init_n; ++i) {
3584 if (parser.queue[i].ibv_attr)
3585 rte_free(parser.queue[i].ibv_attr);
3587 rte_errno = ret; /* Restore rte_errno. */
3592 * Update queue for specific filter.
3595 * Pointer to Ethernet device.
3596 * @param fdir_filter
3597 * Filter to be updated.
3600 * 0 on success, a negative errno value otherwise and rte_errno is set.
3603 mlx5_fdir_filter_update(struct rte_eth_dev *dev,
3604 const struct rte_eth_fdir_filter *fdir_filter)
3608 ret = mlx5_fdir_filter_delete(dev, fdir_filter);
3611 return mlx5_fdir_filter_add(dev, fdir_filter);
3615 * Flush all filters.
3618 * Pointer to Ethernet device.
3621 mlx5_fdir_filter_flush(struct rte_eth_dev *dev)
3623 struct priv *priv = dev->data->dev_private;
3625 mlx5_flow_list_flush(dev, &priv->flows);
3629 * Get flow director information.
3632 * Pointer to Ethernet device.
3633 * @param[out] fdir_info
3634 * Resulting flow director information.
3637 mlx5_fdir_info_get(struct rte_eth_dev *dev, struct rte_eth_fdir_info *fdir_info)
3639 struct rte_eth_fdir_masks *mask =
3640 &dev->data->dev_conf.fdir_conf.mask;
3642 fdir_info->mode = dev->data->dev_conf.fdir_conf.mode;
3643 fdir_info->guarant_spc = 0;
3644 rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
3645 fdir_info->max_flexpayload = 0;
3646 fdir_info->flow_types_mask[0] = 0;
3647 fdir_info->flex_payload_unit = 0;
3648 fdir_info->max_flex_payload_segment_num = 0;
3649 fdir_info->flex_payload_limit = 0;
3650 memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
3654 * Deal with flow director operations.
3657 * Pointer to Ethernet device.
3659 * Operation to perform.
3661 * Pointer to operation-specific structure.
3664 * 0 on success, a negative errno value otherwise and rte_errno is set.
3667 mlx5_fdir_ctrl_func(struct rte_eth_dev *dev, enum rte_filter_op filter_op,
3670 enum rte_fdir_mode fdir_mode =
3671 dev->data->dev_conf.fdir_conf.mode;
3673 if (filter_op == RTE_ETH_FILTER_NOP)
3675 if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
3676 fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
3677 DRV_LOG(ERR, "port %u flow director mode %d not supported",
3678 dev->data->port_id, fdir_mode);
3682 switch (filter_op) {
3683 case RTE_ETH_FILTER_ADD:
3684 return mlx5_fdir_filter_add(dev, arg);
3685 case RTE_ETH_FILTER_UPDATE:
3686 return mlx5_fdir_filter_update(dev, arg);
3687 case RTE_ETH_FILTER_DELETE:
3688 return mlx5_fdir_filter_delete(dev, arg);
3689 case RTE_ETH_FILTER_FLUSH:
3690 mlx5_fdir_filter_flush(dev);
3692 case RTE_ETH_FILTER_INFO:
3693 mlx5_fdir_info_get(dev, arg);
3696 DRV_LOG(DEBUG, "port %u unknown operation %u",
3697 dev->data->port_id, filter_op);
3705 * Manage filter operations.
3708 * Pointer to Ethernet device structure.
3709 * @param filter_type
3712 * Operation to perform.
3714 * Pointer to operation-specific structure.
3717 * 0 on success, a negative errno value otherwise and rte_errno is set.
3720 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
3721 enum rte_filter_type filter_type,
3722 enum rte_filter_op filter_op,
3725 switch (filter_type) {
3726 case RTE_ETH_FILTER_GENERIC:
3727 if (filter_op != RTE_ETH_FILTER_GET) {
3731 *(const void **)arg = &mlx5_flow_ops;
3733 case RTE_ETH_FILTER_FDIR:
3734 return mlx5_fdir_ctrl_func(dev, filter_op, arg);
3736 DRV_LOG(ERR, "port %u filter type (%d) not supported",
3737 dev->data->port_id, filter_type);
3738 rte_errno = ENOTSUP;
3745 * Detect number of Verbs flow priorities supported.
3748 * Pointer to Ethernet device.
3751 * number of supported Verbs flow priority.
3754 mlx5_get_max_verbs_prio(struct rte_eth_dev *dev)
3756 struct priv *priv = dev->data->dev_private;
3757 unsigned int verb_priorities = MLX5_VERBS_FLOW_PRIO_8;
3759 struct ibv_flow_attr attr;
3760 struct ibv_flow_spec_eth eth;
3761 struct ibv_flow_spec_action_drop drop;
3767 .type = IBV_FLOW_SPEC_ETH,
3768 .size = sizeof(struct ibv_flow_spec_eth),
3771 .size = sizeof(struct ibv_flow_spec_action_drop),
3772 .type = IBV_FLOW_SPEC_ACTION_DROP,
3775 struct ibv_flow *flow;
3778 flow_attr.attr.priority = verb_priorities - 1;
3779 flow = mlx5_glue->create_flow(priv->flow_drop_queue->qp,
3782 claim_zero(mlx5_glue->destroy_flow(flow));
3783 /* Try more priorities. */
3784 verb_priorities *= 2;
3786 /* Failed, restore last right number. */
3787 verb_priorities /= 2;
3791 DRV_LOG(DEBUG, "port %u Verbs flow priorities: %d,"
3792 " user flow priorities: %d",
3793 dev->data->port_id, verb_priorities, MLX5_CTRL_FLOW_PRIORITY);
3794 return verb_priorities;