1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2016 6WIND S.A.
3 * Copyright 2016 Mellanox Technologies, Ltd
11 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
13 #pragma GCC diagnostic ignored "-Wpedantic"
15 #include <infiniband/verbs.h>
17 #pragma GCC diagnostic error "-Wpedantic"
20 #include <rte_common.h>
21 #include <rte_ether.h>
22 #include <rte_eth_ctrl.h>
23 #include <rte_ethdev_driver.h>
25 #include <rte_flow_driver.h>
26 #include <rte_malloc.h>
30 #include "mlx5_defs.h"
32 #include "mlx5_glue.h"
34 /* Flow priority for control plane flows. */
35 #define MLX5_CTRL_FLOW_PRIORITY 1
37 /* Internet Protocol versions. */
42 #ifndef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
43 struct ibv_flow_spec_counter_action {
48 /* Dev ops structure defined in mlx5.c */
49 extern const struct eth_dev_ops mlx5_dev_ops;
50 extern const struct eth_dev_ops mlx5_dev_ops_isolate;
52 /** Structure give to the conversion functions. */
53 struct mlx5_flow_data {
54 struct rte_eth_dev *dev; /** Ethernet device. */
55 struct mlx5_flow_parse *parser; /** Parser context. */
56 struct rte_flow_error *error; /** Error context. */
60 mlx5_flow_create_eth(const struct rte_flow_item *item,
61 const void *default_mask,
62 struct mlx5_flow_data *data);
65 mlx5_flow_create_vlan(const struct rte_flow_item *item,
66 const void *default_mask,
67 struct mlx5_flow_data *data);
70 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
71 const void *default_mask,
72 struct mlx5_flow_data *data);
75 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
76 const void *default_mask,
77 struct mlx5_flow_data *data);
80 mlx5_flow_create_udp(const struct rte_flow_item *item,
81 const void *default_mask,
82 struct mlx5_flow_data *data);
85 mlx5_flow_create_tcp(const struct rte_flow_item *item,
86 const void *default_mask,
87 struct mlx5_flow_data *data);
90 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
91 const void *default_mask,
92 struct mlx5_flow_data *data);
95 mlx5_flow_create_vxlan_gpe(const struct rte_flow_item *item,
96 const void *default_mask,
97 struct mlx5_flow_data *data);
100 mlx5_flow_create_gre(const struct rte_flow_item *item,
101 const void *default_mask,
102 struct mlx5_flow_data *data);
105 mlx5_flow_create_mpls(const struct rte_flow_item *item,
106 const void *default_mask,
107 struct mlx5_flow_data *data);
109 struct mlx5_flow_parse;
112 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
116 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id);
119 mlx5_flow_create_count(struct rte_eth_dev *dev, struct mlx5_flow_parse *parser);
121 /* Hash RX queue types. */
133 /* Initialization data for hash RX queue. */
134 struct hash_rxq_init {
135 uint64_t hash_fields; /* Fields that participate in the hash. */
136 uint64_t dpdk_rss_hf; /* Matching DPDK RSS hash fields. */
137 unsigned int flow_priority; /* Flow priority to use. */
138 unsigned int ip_version; /* Internet protocol. */
141 /* Initialization data for hash RX queues. */
142 const struct hash_rxq_init hash_rxq_init[] = {
144 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
145 IBV_RX_HASH_DST_IPV4 |
146 IBV_RX_HASH_SRC_PORT_TCP |
147 IBV_RX_HASH_DST_PORT_TCP),
148 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,
150 .ip_version = MLX5_IPV4,
153 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
154 IBV_RX_HASH_DST_IPV4 |
155 IBV_RX_HASH_SRC_PORT_UDP |
156 IBV_RX_HASH_DST_PORT_UDP),
157 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP,
159 .ip_version = MLX5_IPV4,
162 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
163 IBV_RX_HASH_DST_IPV4),
164 .dpdk_rss_hf = (ETH_RSS_IPV4 |
167 .ip_version = MLX5_IPV4,
170 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
171 IBV_RX_HASH_DST_IPV6 |
172 IBV_RX_HASH_SRC_PORT_TCP |
173 IBV_RX_HASH_DST_PORT_TCP),
174 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP,
176 .ip_version = MLX5_IPV6,
179 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
180 IBV_RX_HASH_DST_IPV6 |
181 IBV_RX_HASH_SRC_PORT_UDP |
182 IBV_RX_HASH_DST_PORT_UDP),
183 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP,
185 .ip_version = MLX5_IPV6,
188 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
189 IBV_RX_HASH_DST_IPV6),
190 .dpdk_rss_hf = (ETH_RSS_IPV6 |
193 .ip_version = MLX5_IPV6,
202 /* Number of entries in hash_rxq_init[]. */
203 const unsigned int hash_rxq_init_n = RTE_DIM(hash_rxq_init);
205 /** Structure for holding counter stats. */
206 struct mlx5_flow_counter_stats {
207 uint64_t hits; /**< Number of packets matched by the rule. */
208 uint64_t bytes; /**< Number of bytes matched by the rule. */
211 /** Structure for Drop queue. */
212 struct mlx5_hrxq_drop {
213 struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */
214 struct ibv_qp *qp; /**< Verbs queue pair. */
215 struct ibv_wq *wq; /**< Verbs work queue. */
216 struct ibv_cq *cq; /**< Verbs completion queue. */
219 /* Flows structures. */
221 uint64_t hash_fields; /**< Fields that participate in the hash. */
222 struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
223 struct ibv_flow *ibv_flow; /**< Verbs flow. */
224 struct mlx5_hrxq *hrxq; /**< Hash Rx queues. */
227 /* Drop flows structures. */
228 struct mlx5_flow_drop {
229 struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
230 struct ibv_flow *ibv_flow; /**< Verbs flow. */
234 TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
235 uint32_t mark:1; /**< Set if the flow is marked. */
236 uint32_t drop:1; /**< Drop queue. */
237 struct rte_flow_action_rss rss_conf; /**< RSS configuration */
238 uint16_t (*queues)[]; /**< Queues indexes to use. */
239 uint8_t rss_key[40]; /**< copy of the RSS key. */
240 uint32_t tunnel; /**< Tunnel type of RTE_PTYPE_TUNNEL_XXX. */
241 struct ibv_counter_set *cs; /**< Holds the counters for the rule. */
242 struct mlx5_flow_counter_stats counter_stats;/**<The counter stats. */
243 struct mlx5_flow frxq[RTE_DIM(hash_rxq_init)];
244 /**< Flow with Rx queue. */
247 /** Static initializer for items. */
249 (const enum rte_flow_item_type []){ \
250 __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
253 #define IS_TUNNEL(type) ( \
254 (type) == RTE_FLOW_ITEM_TYPE_VXLAN || \
255 (type) == RTE_FLOW_ITEM_TYPE_VXLAN_GPE || \
256 (type) == RTE_FLOW_ITEM_TYPE_GRE || \
257 (type) == RTE_FLOW_ITEM_TYPE_MPLS)
259 const uint32_t flow_ptype[] = {
260 [RTE_FLOW_ITEM_TYPE_VXLAN] = RTE_PTYPE_TUNNEL_VXLAN,
261 [RTE_FLOW_ITEM_TYPE_VXLAN_GPE] = RTE_PTYPE_TUNNEL_VXLAN_GPE,
262 [RTE_FLOW_ITEM_TYPE_GRE] = RTE_PTYPE_TUNNEL_GRE,
263 [RTE_FLOW_ITEM_TYPE_MPLS] = RTE_PTYPE_TUNNEL_MPLS_IN_GRE,
266 #define PTYPE_IDX(t) ((RTE_PTYPE_TUNNEL_MASK & (t)) >> 12)
268 const uint32_t ptype_ext[] = {
269 [PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN)] = RTE_PTYPE_TUNNEL_VXLAN |
271 [PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN_GPE)] = RTE_PTYPE_TUNNEL_VXLAN_GPE |
273 [PTYPE_IDX(RTE_PTYPE_TUNNEL_GRE)] = RTE_PTYPE_TUNNEL_GRE,
274 [PTYPE_IDX(RTE_PTYPE_TUNNEL_MPLS_IN_GRE)] =
275 RTE_PTYPE_TUNNEL_MPLS_IN_GRE,
276 [PTYPE_IDX(RTE_PTYPE_TUNNEL_MPLS_IN_UDP)] =
277 RTE_PTYPE_TUNNEL_MPLS_IN_GRE | RTE_PTYPE_L4_UDP,
280 /** Structure to generate a simple graph of layers supported by the NIC. */
281 struct mlx5_flow_items {
282 /** List of possible actions for these items. */
283 const enum rte_flow_action_type *const actions;
284 /** Bit-masks corresponding to the possibilities for the item. */
287 * Default bit-masks to use when item->mask is not provided. When
288 * \default_mask is also NULL, the full supported bit-mask (\mask) is
291 const void *default_mask;
292 /** Bit-masks size in bytes. */
293 const unsigned int mask_sz;
295 * Conversion function from rte_flow to NIC specific flow.
298 * rte_flow item to convert.
299 * @param default_mask
300 * Default bit-masks to use when item->mask is not provided.
302 * Internal structure to store the conversion.
305 * 0 on success, a negative errno value otherwise and rte_errno is
308 int (*convert)(const struct rte_flow_item *item,
309 const void *default_mask,
310 struct mlx5_flow_data *data);
311 /** Size in bytes of the destination structure. */
312 const unsigned int dst_sz;
313 /** List of possible following items. */
314 const enum rte_flow_item_type *const items;
317 /** Valid action for this PMD. */
318 static const enum rte_flow_action_type valid_actions[] = {
319 RTE_FLOW_ACTION_TYPE_DROP,
320 RTE_FLOW_ACTION_TYPE_QUEUE,
321 RTE_FLOW_ACTION_TYPE_MARK,
322 RTE_FLOW_ACTION_TYPE_FLAG,
323 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
324 RTE_FLOW_ACTION_TYPE_COUNT,
326 RTE_FLOW_ACTION_TYPE_END,
329 /** Graph of supported items and associated actions. */
330 static const struct mlx5_flow_items mlx5_flow_items[] = {
331 [RTE_FLOW_ITEM_TYPE_END] = {
332 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
333 RTE_FLOW_ITEM_TYPE_VXLAN,
334 RTE_FLOW_ITEM_TYPE_VXLAN_GPE,
335 RTE_FLOW_ITEM_TYPE_GRE),
337 [RTE_FLOW_ITEM_TYPE_ETH] = {
338 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
339 RTE_FLOW_ITEM_TYPE_IPV4,
340 RTE_FLOW_ITEM_TYPE_IPV6),
341 .actions = valid_actions,
342 .mask = &(const struct rte_flow_item_eth){
343 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
344 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
347 .default_mask = &rte_flow_item_eth_mask,
348 .mask_sz = sizeof(struct rte_flow_item_eth),
349 .convert = mlx5_flow_create_eth,
350 .dst_sz = sizeof(struct ibv_flow_spec_eth),
352 [RTE_FLOW_ITEM_TYPE_VLAN] = {
353 .items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
354 RTE_FLOW_ITEM_TYPE_IPV6),
355 .actions = valid_actions,
356 .mask = &(const struct rte_flow_item_vlan){
360 .default_mask = &rte_flow_item_vlan_mask,
361 .mask_sz = sizeof(struct rte_flow_item_vlan),
362 .convert = mlx5_flow_create_vlan,
365 [RTE_FLOW_ITEM_TYPE_IPV4] = {
366 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
367 RTE_FLOW_ITEM_TYPE_TCP,
368 RTE_FLOW_ITEM_TYPE_GRE),
369 .actions = valid_actions,
370 .mask = &(const struct rte_flow_item_ipv4){
374 .type_of_service = -1,
378 .default_mask = &rte_flow_item_ipv4_mask,
379 .mask_sz = sizeof(struct rte_flow_item_ipv4),
380 .convert = mlx5_flow_create_ipv4,
381 .dst_sz = sizeof(struct ibv_flow_spec_ipv4_ext),
383 [RTE_FLOW_ITEM_TYPE_IPV6] = {
384 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
385 RTE_FLOW_ITEM_TYPE_TCP,
386 RTE_FLOW_ITEM_TYPE_GRE),
387 .actions = valid_actions,
388 .mask = &(const struct rte_flow_item_ipv6){
391 0xff, 0xff, 0xff, 0xff,
392 0xff, 0xff, 0xff, 0xff,
393 0xff, 0xff, 0xff, 0xff,
394 0xff, 0xff, 0xff, 0xff,
397 0xff, 0xff, 0xff, 0xff,
398 0xff, 0xff, 0xff, 0xff,
399 0xff, 0xff, 0xff, 0xff,
400 0xff, 0xff, 0xff, 0xff,
407 .default_mask = &rte_flow_item_ipv6_mask,
408 .mask_sz = sizeof(struct rte_flow_item_ipv6),
409 .convert = mlx5_flow_create_ipv6,
410 .dst_sz = sizeof(struct ibv_flow_spec_ipv6),
412 [RTE_FLOW_ITEM_TYPE_UDP] = {
413 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN,
414 RTE_FLOW_ITEM_TYPE_VXLAN_GPE,
415 RTE_FLOW_ITEM_TYPE_MPLS),
416 .actions = valid_actions,
417 .mask = &(const struct rte_flow_item_udp){
423 .default_mask = &rte_flow_item_udp_mask,
424 .mask_sz = sizeof(struct rte_flow_item_udp),
425 .convert = mlx5_flow_create_udp,
426 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
428 [RTE_FLOW_ITEM_TYPE_TCP] = {
429 .actions = valid_actions,
430 .mask = &(const struct rte_flow_item_tcp){
436 .default_mask = &rte_flow_item_tcp_mask,
437 .mask_sz = sizeof(struct rte_flow_item_tcp),
438 .convert = mlx5_flow_create_tcp,
439 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
441 [RTE_FLOW_ITEM_TYPE_GRE] = {
442 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
443 RTE_FLOW_ITEM_TYPE_IPV4,
444 RTE_FLOW_ITEM_TYPE_IPV6,
445 RTE_FLOW_ITEM_TYPE_MPLS),
446 .actions = valid_actions,
447 .mask = &(const struct rte_flow_item_gre){
450 .default_mask = &rte_flow_item_gre_mask,
451 .mask_sz = sizeof(struct rte_flow_item_gre),
452 .convert = mlx5_flow_create_gre,
453 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
454 .dst_sz = sizeof(struct ibv_flow_spec_gre),
456 .dst_sz = sizeof(struct ibv_flow_spec_tunnel),
459 [RTE_FLOW_ITEM_TYPE_MPLS] = {
460 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
461 RTE_FLOW_ITEM_TYPE_IPV4,
462 RTE_FLOW_ITEM_TYPE_IPV6),
463 .actions = valid_actions,
464 .mask = &(const struct rte_flow_item_mpls){
465 .label_tc_s = "\xff\xff\xf0",
467 .default_mask = &rte_flow_item_mpls_mask,
468 .mask_sz = sizeof(struct rte_flow_item_mpls),
469 .convert = mlx5_flow_create_mpls,
470 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
471 .dst_sz = sizeof(struct ibv_flow_spec_mpls),
474 [RTE_FLOW_ITEM_TYPE_VXLAN] = {
475 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
476 RTE_FLOW_ITEM_TYPE_IPV4, /* For L3 VXLAN. */
477 RTE_FLOW_ITEM_TYPE_IPV6), /* For L3 VXLAN. */
478 .actions = valid_actions,
479 .mask = &(const struct rte_flow_item_vxlan){
480 .vni = "\xff\xff\xff",
482 .default_mask = &rte_flow_item_vxlan_mask,
483 .mask_sz = sizeof(struct rte_flow_item_vxlan),
484 .convert = mlx5_flow_create_vxlan,
485 .dst_sz = sizeof(struct ibv_flow_spec_tunnel),
487 [RTE_FLOW_ITEM_TYPE_VXLAN_GPE] = {
488 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
489 RTE_FLOW_ITEM_TYPE_IPV4,
490 RTE_FLOW_ITEM_TYPE_IPV6),
491 .actions = valid_actions,
492 .mask = &(const struct rte_flow_item_vxlan_gpe){
493 .vni = "\xff\xff\xff",
495 .default_mask = &rte_flow_item_vxlan_gpe_mask,
496 .mask_sz = sizeof(struct rte_flow_item_vxlan_gpe),
497 .convert = mlx5_flow_create_vxlan_gpe,
498 .dst_sz = sizeof(struct ibv_flow_spec_tunnel),
502 /** Structure to pass to the conversion function. */
503 struct mlx5_flow_parse {
504 uint32_t inner; /**< Verbs value, set once tunnel is encountered. */
506 /**< Whether resources should remain after a validate. */
507 uint32_t drop:1; /**< Target is a drop queue. */
508 uint32_t mark:1; /**< Mark is present in the flow. */
509 uint32_t count:1; /**< Count is present in the flow. */
510 uint32_t mark_id; /**< Mark identifier. */
511 struct rte_flow_action_rss rss_conf; /**< RSS configuration */
512 uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
513 uint8_t rss_key[40]; /**< copy of the RSS key. */
514 enum hash_rxq_type layer; /**< Last pattern layer detected. */
515 enum hash_rxq_type out_layer; /**< Last outer pattern layer detected. */
516 uint32_t tunnel; /**< Tunnel type of RTE_PTYPE_TUNNEL_XXX. */
517 struct ibv_counter_set *cs; /**< Holds the counter set for the rule */
519 struct ibv_flow_attr *ibv_attr;
520 /**< Pointer to Verbs attributes. */
522 /**< Current position or total size of the attribute. */
523 uint64_t hash_fields; /**< Verbs hash fields. */
524 } queue[RTE_DIM(hash_rxq_init)];
527 static const struct rte_flow_ops mlx5_flow_ops = {
528 .validate = mlx5_flow_validate,
529 .create = mlx5_flow_create,
530 .destroy = mlx5_flow_destroy,
531 .flush = mlx5_flow_flush,
532 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
533 .query = mlx5_flow_query,
537 .isolate = mlx5_flow_isolate,
540 /* Convert FDIR request to Generic flow. */
542 struct rte_flow_attr attr;
543 struct rte_flow_action actions[2];
544 struct rte_flow_item items[4];
545 struct rte_flow_item_eth l2;
546 struct rte_flow_item_eth l2_mask;
548 struct rte_flow_item_ipv4 ipv4;
549 struct rte_flow_item_ipv6 ipv6;
552 struct rte_flow_item_ipv4 ipv4;
553 struct rte_flow_item_ipv6 ipv6;
556 struct rte_flow_item_udp udp;
557 struct rte_flow_item_tcp tcp;
560 struct rte_flow_item_udp udp;
561 struct rte_flow_item_tcp tcp;
563 struct rte_flow_action_queue queue;
566 /* Verbs specification header. */
567 struct ibv_spec_header {
568 enum ibv_flow_spec_type type;
573 * Check item is fully supported by the NIC matching capability.
576 * Item specification.
578 * Bit-masks covering supported fields to compare with spec, last and mask in
581 * Bit-Mask size in bytes.
584 * 0 on success, a negative errno value otherwise and rte_errno is set.
587 mlx5_flow_item_validate(const struct rte_flow_item *item,
588 const uint8_t *mask, unsigned int size)
591 const uint8_t *spec = item->spec;
592 const uint8_t *last = item->last;
593 const uint8_t *m = item->mask ? item->mask : mask;
595 if (!spec && (item->mask || last))
600 * Single-pass check to make sure that:
601 * - item->mask is supported, no bits are set outside mask.
602 * - Both masked item->spec and item->last are equal (no range
605 for (i = 0; i < size; i++) {
608 if ((m[i] | mask[i]) != mask[i])
610 if (last && ((spec[i] & m[i]) != (last[i] & m[i])))
620 * Extract attribute to the parser.
623 * Flow rule attributes.
625 * Perform verbose error reporting if not NULL.
628 * 0 on success, a negative errno value otherwise and rte_errno is set.
631 mlx5_flow_convert_attributes(const struct rte_flow_attr *attr,
632 struct rte_flow_error *error)
635 rte_flow_error_set(error, ENOTSUP,
636 RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
638 "groups are not supported");
641 if (attr->priority && attr->priority != MLX5_CTRL_FLOW_PRIORITY) {
642 rte_flow_error_set(error, ENOTSUP,
643 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
645 "priorities are not supported");
649 rte_flow_error_set(error, ENOTSUP,
650 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
652 "egress is not supported");
655 if (attr->transfer) {
656 rte_flow_error_set(error, ENOTSUP,
657 RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
659 "transfer is not supported");
662 if (!attr->ingress) {
663 rte_flow_error_set(error, ENOTSUP,
664 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
666 "only ingress is supported");
673 * Extract actions request to the parser.
676 * Pointer to Ethernet device.
678 * Associated actions (list terminated by the END action).
680 * Perform verbose error reporting if not NULL.
681 * @param[in, out] parser
682 * Internal parser structure.
685 * 0 on success, a negative errno value otherwise and rte_errno is set.
688 mlx5_flow_convert_actions(struct rte_eth_dev *dev,
689 const struct rte_flow_action actions[],
690 struct rte_flow_error *error,
691 struct mlx5_flow_parse *parser)
693 enum { FATE = 1, MARK = 2, COUNT = 4, };
694 uint32_t overlap = 0;
695 struct priv *priv = dev->data->dev_private;
697 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
698 if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
700 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
702 goto exit_action_overlap;
705 } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
706 const struct rte_flow_action_queue *queue =
707 (const struct rte_flow_action_queue *)
711 goto exit_action_overlap;
713 if (!queue || (queue->index > (priv->rxqs_n - 1)))
714 goto exit_action_not_supported;
715 parser->queues[0] = queue->index;
716 parser->rss_conf = (struct rte_flow_action_rss){
718 .queue = parser->queues,
720 } else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
721 const struct rte_flow_action_rss *rss =
722 (const struct rte_flow_action_rss *)
724 const uint8_t *rss_key;
725 uint32_t rss_key_len;
729 goto exit_action_overlap;
732 rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ) {
733 rte_flow_error_set(error, EINVAL,
734 RTE_FLOW_ERROR_TYPE_ACTION,
736 "the only supported RSS hash"
737 " function is Toeplitz");
740 #ifndef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
741 if (parser->rss_conf.level > 1) {
742 rte_flow_error_set(error, EINVAL,
743 RTE_FLOW_ERROR_TYPE_ACTION,
745 "a nonzero RSS encapsulation"
746 " level is not supported");
750 if (parser->rss_conf.level > 2) {
751 rte_flow_error_set(error, EINVAL,
752 RTE_FLOW_ERROR_TYPE_ACTION,
754 "RSS encapsulation level"
755 " > 1 is not supported");
758 if (rss->types & MLX5_RSS_HF_MASK) {
759 rte_flow_error_set(error, EINVAL,
760 RTE_FLOW_ERROR_TYPE_ACTION,
762 "unsupported RSS type"
767 rss_key_len = rss->key_len;
770 rss_key_len = rss_hash_default_key_len;
771 rss_key = rss_hash_default_key;
773 if (rss_key_len != RTE_DIM(parser->rss_key)) {
774 rte_flow_error_set(error, EINVAL,
775 RTE_FLOW_ERROR_TYPE_ACTION,
777 "RSS hash key must be"
778 " exactly 40 bytes long");
781 if (!rss->queue_num) {
782 rte_flow_error_set(error, EINVAL,
783 RTE_FLOW_ERROR_TYPE_ACTION,
788 if (rss->queue_num > RTE_DIM(parser->queues)) {
789 rte_flow_error_set(error, EINVAL,
790 RTE_FLOW_ERROR_TYPE_ACTION,
792 "too many queues for RSS"
796 for (n = 0; n < rss->queue_num; ++n) {
797 if (rss->queue[n] >= priv->rxqs_n) {
798 rte_flow_error_set(error, EINVAL,
799 RTE_FLOW_ERROR_TYPE_ACTION,
801 "queue id > number of"
806 parser->rss_conf = (struct rte_flow_action_rss){
807 .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
808 .level = rss->level ? rss->level : 1,
810 .key_len = rss_key_len,
811 .queue_num = rss->queue_num,
812 .key = memcpy(parser->rss_key, rss_key,
813 sizeof(*rss_key) * rss_key_len),
814 .queue = memcpy(parser->queues, rss->queue,
815 sizeof(*rss->queue) *
818 } else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
819 const struct rte_flow_action_mark *mark =
820 (const struct rte_flow_action_mark *)
824 goto exit_action_overlap;
827 rte_flow_error_set(error, EINVAL,
828 RTE_FLOW_ERROR_TYPE_ACTION,
830 "mark must be defined");
832 } else if (mark->id >= MLX5_FLOW_MARK_MAX) {
833 rte_flow_error_set(error, ENOTSUP,
834 RTE_FLOW_ERROR_TYPE_ACTION,
836 "mark must be between 0"
841 parser->mark_id = mark->id;
842 } else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
844 goto exit_action_overlap;
847 } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT &&
848 priv->config.flow_counter_en) {
850 goto exit_action_overlap;
854 goto exit_action_not_supported;
857 /* When fate is unknown, drop traffic. */
858 if (!(overlap & FATE))
860 if (parser->drop && parser->mark)
862 if (!parser->rss_conf.queue_num && !parser->drop) {
863 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
864 NULL, "no valid action");
868 exit_action_not_supported:
869 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
870 actions, "action not supported");
873 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
874 actions, "overlapping actions are not supported");
882 * Pattern specification (list terminated by the END pattern item).
884 * Perform verbose error reporting if not NULL.
885 * @param[in, out] parser
886 * Internal parser structure.
889 * 0 on success, a negative errno value otherwise and rte_errno is set.
892 mlx5_flow_convert_items_validate(struct rte_eth_dev *dev,
893 const struct rte_flow_item items[],
894 struct rte_flow_error *error,
895 struct mlx5_flow_parse *parser)
897 struct priv *priv = dev->data->dev_private;
898 const struct mlx5_flow_items *cur_item = mlx5_flow_items;
900 unsigned int last_voids = 0;
903 /* Initialise the offsets to start after verbs attribute. */
904 for (i = 0; i != hash_rxq_init_n; ++i)
905 parser->queue[i].offset = sizeof(struct ibv_flow_attr);
906 for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
907 const struct mlx5_flow_items *token = NULL;
910 if (items->type == RTE_FLOW_ITEM_TYPE_VOID) {
916 cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
918 if (cur_item->items[i] == items->type) {
919 token = &mlx5_flow_items[items->type];
925 goto exit_item_not_supported;
928 ret = mlx5_flow_item_validate(items,
929 (const uint8_t *)cur_item->mask,
932 goto exit_item_not_supported;
933 if (IS_TUNNEL(items->type)) {
934 if (parser->tunnel &&
935 !((items - last_voids - 1)->type ==
936 RTE_FLOW_ITEM_TYPE_GRE && items->type ==
937 RTE_FLOW_ITEM_TYPE_MPLS)) {
938 rte_flow_error_set(error, ENOTSUP,
939 RTE_FLOW_ERROR_TYPE_ITEM,
941 "Cannot recognize multiple"
942 " tunnel encapsulations.");
945 if (items->type == RTE_FLOW_ITEM_TYPE_MPLS &&
946 !priv->config.mpls_en) {
947 rte_flow_error_set(error, ENOTSUP,
948 RTE_FLOW_ERROR_TYPE_ITEM,
950 "MPLS not supported or"
951 " disabled in firmware"
955 if (!priv->config.tunnel_en &&
956 parser->rss_conf.level > 1) {
957 rte_flow_error_set(error, ENOTSUP,
958 RTE_FLOW_ERROR_TYPE_ITEM,
960 "RSS on tunnel is not supported");
963 parser->inner = IBV_FLOW_SPEC_INNER;
964 parser->tunnel = flow_ptype[items->type];
967 parser->queue[HASH_RXQ_ETH].offset += cur_item->dst_sz;
969 for (n = 0; n != hash_rxq_init_n; ++n)
970 parser->queue[n].offset += cur_item->dst_sz;
975 parser->queue[HASH_RXQ_ETH].offset +=
976 sizeof(struct ibv_flow_spec_action_drop);
979 for (i = 0; i != hash_rxq_init_n; ++i)
980 parser->queue[i].offset +=
981 sizeof(struct ibv_flow_spec_action_tag);
984 unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
986 for (i = 0; i != hash_rxq_init_n; ++i)
987 parser->queue[i].offset += size;
990 exit_item_not_supported:
991 return rte_flow_error_set(error, -ret, RTE_FLOW_ERROR_TYPE_ITEM,
992 items, "item not supported");
996 * Allocate memory space to store verbs flow attributes.
999 * Amount of byte to allocate.
1001 * Perform verbose error reporting if not NULL.
1004 * A verbs flow attribute on success, NULL otherwise and rte_errno is set.
1006 static struct ibv_flow_attr *
1007 mlx5_flow_convert_allocate(unsigned int size, struct rte_flow_error *error)
1009 struct ibv_flow_attr *ibv_attr;
1011 ibv_attr = rte_calloc(__func__, 1, size, 0);
1013 rte_flow_error_set(error, ENOMEM,
1014 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1016 "cannot allocate verbs spec attributes");
1023 * Make inner packet matching with an higher priority from the non Inner
1027 * Pointer to Ethernet device.
1028 * @param[in, out] parser
1029 * Internal parser structure.
1031 * User flow attribute.
1034 mlx5_flow_update_priority(struct rte_eth_dev *dev,
1035 struct mlx5_flow_parse *parser,
1036 const struct rte_flow_attr *attr)
1038 struct priv *priv = dev->data->dev_private;
1042 /* 8 priorities >= 16 priorities
1043 * Control flow: 4-7 8-15
1044 * User normal flow: 1-3 4-7
1045 * User tunnel flow: 0-2 0-3
1047 priority = attr->priority * MLX5_VERBS_FLOW_PRIO_8;
1048 if (priv->config.max_verbs_prio == MLX5_VERBS_FLOW_PRIO_8)
1051 * Lower non-tunnel flow Verbs priority 1 if only support 8 Verbs
1052 * priorities, lower 4 otherwise.
1054 if (!parser->inner) {
1055 if (priv->config.max_verbs_prio == MLX5_VERBS_FLOW_PRIO_8)
1058 priority += MLX5_VERBS_FLOW_PRIO_8 / 2;
1061 parser->queue[HASH_RXQ_ETH].ibv_attr->priority = priority +
1062 hash_rxq_init[HASH_RXQ_ETH].flow_priority;
1065 for (i = 0; i != hash_rxq_init_n; ++i) {
1066 if (!parser->queue[i].ibv_attr)
1068 parser->queue[i].ibv_attr->priority = priority +
1069 hash_rxq_init[i].flow_priority;
1074 * Finalise verbs flow attributes.
1076 * @param[in, out] parser
1077 * Internal parser structure.
1080 mlx5_flow_convert_finalise(struct mlx5_flow_parse *parser)
1083 uint32_t inner = parser->inner;
1085 /* Don't create extra flows for outer RSS. */
1086 if (parser->tunnel && parser->rss_conf.level < 2)
1089 * Fill missing layers in verbs specifications, or compute the correct
1090 * offset to allocate the memory space for the attributes and
1093 for (i = 0; i != hash_rxq_init_n - 1; ++i) {
1095 struct ibv_flow_spec_ipv4_ext ipv4;
1096 struct ibv_flow_spec_ipv6 ipv6;
1097 struct ibv_flow_spec_tcp_udp udp_tcp;
1098 struct ibv_flow_spec_eth eth;
1103 if (i == parser->layer)
1105 if (parser->layer == HASH_RXQ_ETH ||
1106 parser->layer == HASH_RXQ_TUNNEL) {
1107 if (hash_rxq_init[i].ip_version == MLX5_IPV4) {
1108 size = sizeof(struct ibv_flow_spec_ipv4_ext);
1109 specs.ipv4 = (struct ibv_flow_spec_ipv4_ext){
1110 .type = inner | IBV_FLOW_SPEC_IPV4_EXT,
1114 size = sizeof(struct ibv_flow_spec_ipv6);
1115 specs.ipv6 = (struct ibv_flow_spec_ipv6){
1116 .type = inner | IBV_FLOW_SPEC_IPV6,
1120 if (parser->queue[i].ibv_attr) {
1121 dst = (void *)((uintptr_t)
1122 parser->queue[i].ibv_attr +
1123 parser->queue[i].offset);
1124 memcpy(dst, &specs, size);
1125 ++parser->queue[i].ibv_attr->num_of_specs;
1127 parser->queue[i].offset += size;
1129 if ((i == HASH_RXQ_UDPV4) || (i == HASH_RXQ_TCPV4) ||
1130 (i == HASH_RXQ_UDPV6) || (i == HASH_RXQ_TCPV6)) {
1131 size = sizeof(struct ibv_flow_spec_tcp_udp);
1132 specs.udp_tcp = (struct ibv_flow_spec_tcp_udp) {
1133 .type = inner | ((i == HASH_RXQ_UDPV4 ||
1134 i == HASH_RXQ_UDPV6) ?
1139 if (parser->queue[i].ibv_attr) {
1140 dst = (void *)((uintptr_t)
1141 parser->queue[i].ibv_attr +
1142 parser->queue[i].offset);
1143 memcpy(dst, &specs, size);
1144 ++parser->queue[i].ibv_attr->num_of_specs;
1146 parser->queue[i].offset += size;
1152 * Update flows according to pattern and RSS hash fields.
1154 * @param[in, out] parser
1155 * Internal parser structure.
1158 * 0 on success, a negative errno value otherwise and rte_errno is set.
1161 mlx5_flow_convert_rss(struct mlx5_flow_parse *parser)
1164 enum hash_rxq_type start;
1165 enum hash_rxq_type layer;
1166 int outer = parser->tunnel && parser->rss_conf.level < 2;
1167 uint64_t rss = parser->rss_conf.types;
1169 layer = outer ? parser->out_layer : parser->layer;
1170 if (layer == HASH_RXQ_TUNNEL)
1171 layer = HASH_RXQ_ETH;
1173 /* Only one hash type for outer RSS. */
1174 if (rss && layer == HASH_RXQ_ETH) {
1175 start = HASH_RXQ_TCPV4;
1176 } else if (rss && layer != HASH_RXQ_ETH &&
1177 !(rss & hash_rxq_init[layer].dpdk_rss_hf)) {
1178 /* If RSS not match L4 pattern, try L3 RSS. */
1179 if (layer < HASH_RXQ_IPV4)
1180 layer = HASH_RXQ_IPV4;
1181 else if (layer > HASH_RXQ_IPV4 && layer < HASH_RXQ_IPV6)
1182 layer = HASH_RXQ_IPV6;
1187 /* Scan first valid hash type. */
1188 for (i = start; rss && i <= layer; ++i) {
1189 if (!parser->queue[i].ibv_attr)
1191 if (hash_rxq_init[i].dpdk_rss_hf & rss)
1194 if (rss && i <= layer)
1195 parser->queue[layer].hash_fields =
1196 hash_rxq_init[i].hash_fields;
1197 /* Trim unused hash types. */
1198 for (i = 0; i != hash_rxq_init_n; ++i) {
1199 if (parser->queue[i].ibv_attr && i != layer) {
1200 rte_free(parser->queue[i].ibv_attr);
1201 parser->queue[i].ibv_attr = NULL;
1205 /* Expand for inner or normal RSS. */
1206 if (rss && (layer == HASH_RXQ_ETH || layer == HASH_RXQ_IPV4))
1207 start = HASH_RXQ_TCPV4;
1208 else if (rss && layer == HASH_RXQ_IPV6)
1209 start = HASH_RXQ_TCPV6;
1212 /* For L4 pattern, try L3 RSS if no L4 RSS. */
1213 /* Trim unused hash types. */
1214 for (i = 0; i != hash_rxq_init_n; ++i) {
1215 if (!parser->queue[i].ibv_attr)
1217 if (i < start || i > layer) {
1218 rte_free(parser->queue[i].ibv_attr);
1219 parser->queue[i].ibv_attr = NULL;
1224 if (hash_rxq_init[i].dpdk_rss_hf & rss) {
1225 parser->queue[i].hash_fields =
1226 hash_rxq_init[i].hash_fields;
1227 } else if (i != layer) {
1228 /* Remove unused RSS expansion. */
1229 rte_free(parser->queue[i].ibv_attr);
1230 parser->queue[i].ibv_attr = NULL;
1231 } else if (layer < HASH_RXQ_IPV4 &&
1232 (hash_rxq_init[HASH_RXQ_IPV4].dpdk_rss_hf &
1234 /* Allow IPv4 RSS on L4 pattern. */
1235 parser->queue[i].hash_fields =
1236 hash_rxq_init[HASH_RXQ_IPV4]
1238 } else if (i > HASH_RXQ_IPV4 && i < HASH_RXQ_IPV6 &&
1239 (hash_rxq_init[HASH_RXQ_IPV6].dpdk_rss_hf &
1241 /* Allow IPv4 RSS on L4 pattern. */
1242 parser->queue[i].hash_fields =
1243 hash_rxq_init[HASH_RXQ_IPV6]
1252 * Validate and convert a flow supported by the NIC.
1255 * Pointer to Ethernet device.
1257 * Flow rule attributes.
1258 * @param[in] pattern
1259 * Pattern specification (list terminated by the END pattern item).
1260 * @param[in] actions
1261 * Associated actions (list terminated by the END action).
1263 * Perform verbose error reporting if not NULL.
1264 * @param[in, out] parser
1265 * Internal parser structure.
1268 * 0 on success, a negative errno value otherwise and rte_errno is set.
1271 mlx5_flow_convert(struct rte_eth_dev *dev,
1272 const struct rte_flow_attr *attr,
1273 const struct rte_flow_item items[],
1274 const struct rte_flow_action actions[],
1275 struct rte_flow_error *error,
1276 struct mlx5_flow_parse *parser)
1278 const struct mlx5_flow_items *cur_item = mlx5_flow_items;
1282 /* First step. Validate the attributes, items and actions. */
1283 *parser = (struct mlx5_flow_parse){
1284 .create = parser->create,
1285 .layer = HASH_RXQ_ETH,
1286 .mark_id = MLX5_FLOW_MARK_DEFAULT,
1288 ret = mlx5_flow_convert_attributes(attr, error);
1291 ret = mlx5_flow_convert_actions(dev, actions, error, parser);
1294 ret = mlx5_flow_convert_items_validate(dev, items, error, parser);
1297 mlx5_flow_convert_finalise(parser);
1300 * Allocate the memory space to store verbs specifications.
1303 unsigned int offset = parser->queue[HASH_RXQ_ETH].offset;
1305 parser->queue[HASH_RXQ_ETH].ibv_attr =
1306 mlx5_flow_convert_allocate(offset, error);
1307 if (!parser->queue[HASH_RXQ_ETH].ibv_attr)
1309 parser->queue[HASH_RXQ_ETH].offset =
1310 sizeof(struct ibv_flow_attr);
1312 for (i = 0; i != hash_rxq_init_n; ++i) {
1313 unsigned int offset;
1315 offset = parser->queue[i].offset;
1316 parser->queue[i].ibv_attr =
1317 mlx5_flow_convert_allocate(offset, error);
1318 if (!parser->queue[i].ibv_attr)
1320 parser->queue[i].offset = sizeof(struct ibv_flow_attr);
1323 /* Third step. Conversion parse, fill the specifications. */
1326 parser->layer = HASH_RXQ_ETH;
1327 for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
1328 struct mlx5_flow_data data = {
1334 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
1336 cur_item = &mlx5_flow_items[items->type];
1337 ret = cur_item->convert(items,
1338 (cur_item->default_mask ?
1339 cur_item->default_mask :
1345 if (!parser->drop) {
1346 /* RSS check, remove unused hash types. */
1347 ret = mlx5_flow_convert_rss(parser);
1350 /* Complete missing specification. */
1351 mlx5_flow_convert_finalise(parser);
1353 mlx5_flow_update_priority(dev, parser, attr);
1355 mlx5_flow_create_flag_mark(parser, parser->mark_id);
1356 if (parser->count && parser->create) {
1357 mlx5_flow_create_count(dev, parser);
1359 goto exit_count_error;
1362 /* Only verification is expected, all resources should be released. */
1363 if (!parser->create) {
1364 for (i = 0; i != hash_rxq_init_n; ++i) {
1365 if (parser->queue[i].ibv_attr) {
1366 rte_free(parser->queue[i].ibv_attr);
1367 parser->queue[i].ibv_attr = NULL;
1373 for (i = 0; i != hash_rxq_init_n; ++i) {
1374 if (parser->queue[i].ibv_attr) {
1375 rte_free(parser->queue[i].ibv_attr);
1376 parser->queue[i].ibv_attr = NULL;
1379 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1380 NULL, "cannot allocate verbs spec attributes");
1383 rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1384 NULL, "cannot create counter");
1389 * Copy the specification created into the flow.
1392 * Internal parser structure.
1394 * Create specification.
1396 * Size in bytes of the specification to copy.
1399 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
1405 for (i = 0; i != hash_rxq_init_n; ++i) {
1406 if (!parser->queue[i].ibv_attr)
1408 dst = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1409 parser->queue[i].offset);
1410 memcpy(dst, src, size);
1411 ++parser->queue[i].ibv_attr->num_of_specs;
1412 parser->queue[i].offset += size;
1417 * Convert Ethernet item to Verbs specification.
1420 * Item specification.
1421 * @param default_mask[in]
1422 * Default bit-masks to use when item->mask is not provided.
1423 * @param data[in, out]
1427 * 0 on success, a negative errno value otherwise and rte_errno is set.
1430 mlx5_flow_create_eth(const struct rte_flow_item *item,
1431 const void *default_mask,
1432 struct mlx5_flow_data *data)
1434 const struct rte_flow_item_eth *spec = item->spec;
1435 const struct rte_flow_item_eth *mask = item->mask;
1436 struct mlx5_flow_parse *parser = data->parser;
1437 const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1438 struct ibv_flow_spec_eth eth = {
1439 .type = parser->inner | IBV_FLOW_SPEC_ETH,
1443 parser->layer = HASH_RXQ_ETH;
1448 mask = default_mask;
1449 memcpy(ð.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
1450 memcpy(ð.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
1451 eth.val.ether_type = spec->type;
1452 memcpy(ð.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
1453 memcpy(ð.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
1454 eth.mask.ether_type = mask->type;
1455 /* Remove unwanted bits from values. */
1456 for (i = 0; i < ETHER_ADDR_LEN; ++i) {
1457 eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
1458 eth.val.src_mac[i] &= eth.mask.src_mac[i];
1460 eth.val.ether_type &= eth.mask.ether_type;
1462 mlx5_flow_create_copy(parser, ð, eth_size);
1467 * Convert VLAN item to Verbs specification.
1470 * Item specification.
1471 * @param default_mask[in]
1472 * Default bit-masks to use when item->mask is not provided.
1473 * @param data[in, out]
1477 * 0 on success, a negative errno value otherwise and rte_errno is set.
1480 mlx5_flow_create_vlan(const struct rte_flow_item *item,
1481 const void *default_mask,
1482 struct mlx5_flow_data *data)
1484 const struct rte_flow_item_vlan *spec = item->spec;
1485 const struct rte_flow_item_vlan *mask = item->mask;
1486 struct mlx5_flow_parse *parser = data->parser;
1487 struct ibv_flow_spec_eth *eth;
1488 const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1489 const char *msg = "VLAN cannot be empty";
1494 mask = default_mask;
1496 for (i = 0; i != hash_rxq_init_n; ++i) {
1497 if (!parser->queue[i].ibv_attr)
1500 eth = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1501 parser->queue[i].offset - eth_size);
1502 eth->val.vlan_tag = spec->tci;
1503 eth->mask.vlan_tag = mask->tci;
1504 eth->val.vlan_tag &= eth->mask.vlan_tag;
1506 * From verbs perspective an empty VLAN is equivalent
1507 * to a packet without VLAN layer.
1509 if (!eth->mask.vlan_tag)
1511 /* Outer TPID cannot be matched. */
1512 if (eth->mask.ether_type) {
1513 msg = "VLAN TPID matching is not supported";
1516 eth->val.ether_type = spec->inner_type;
1517 eth->mask.ether_type = mask->inner_type;
1518 eth->val.ether_type &= eth->mask.ether_type;
1523 return rte_flow_error_set(data->error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM,
1528 * Convert IPv4 item to Verbs specification.
1531 * Item specification.
1532 * @param default_mask[in]
1533 * Default bit-masks to use when item->mask is not provided.
1534 * @param data[in, out]
1538 * 0 on success, a negative errno value otherwise and rte_errno is set.
1541 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
1542 const void *default_mask,
1543 struct mlx5_flow_data *data)
1545 struct priv *priv = data->dev->data->dev_private;
1546 const struct rte_flow_item_ipv4 *spec = item->spec;
1547 const struct rte_flow_item_ipv4 *mask = item->mask;
1548 struct mlx5_flow_parse *parser = data->parser;
1549 unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4_ext);
1550 struct ibv_flow_spec_ipv4_ext ipv4 = {
1551 .type = parser->inner | IBV_FLOW_SPEC_IPV4_EXT,
1555 if (parser->layer == HASH_RXQ_TUNNEL &&
1556 parser->tunnel == ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN)] &&
1557 !priv->config.l3_vxlan_en)
1558 return rte_flow_error_set(data->error, EINVAL,
1559 RTE_FLOW_ERROR_TYPE_ITEM,
1561 "L3 VXLAN not enabled by device"
1562 " parameter and/or not configured"
1564 parser->layer = HASH_RXQ_IPV4;
1567 mask = default_mask;
1568 ipv4.val = (struct ibv_flow_ipv4_ext_filter){
1569 .src_ip = spec->hdr.src_addr,
1570 .dst_ip = spec->hdr.dst_addr,
1571 .proto = spec->hdr.next_proto_id,
1572 .tos = spec->hdr.type_of_service,
1574 ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
1575 .src_ip = mask->hdr.src_addr,
1576 .dst_ip = mask->hdr.dst_addr,
1577 .proto = mask->hdr.next_proto_id,
1578 .tos = mask->hdr.type_of_service,
1580 /* Remove unwanted bits from values. */
1581 ipv4.val.src_ip &= ipv4.mask.src_ip;
1582 ipv4.val.dst_ip &= ipv4.mask.dst_ip;
1583 ipv4.val.proto &= ipv4.mask.proto;
1584 ipv4.val.tos &= ipv4.mask.tos;
1586 mlx5_flow_create_copy(parser, &ipv4, ipv4_size);
1591 * Convert IPv6 item to Verbs specification.
1594 * Item specification.
1595 * @param default_mask[in]
1596 * Default bit-masks to use when item->mask is not provided.
1597 * @param data[in, out]
1601 * 0 on success, a negative errno value otherwise and rte_errno is set.
1604 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
1605 const void *default_mask,
1606 struct mlx5_flow_data *data)
1608 struct priv *priv = data->dev->data->dev_private;
1609 const struct rte_flow_item_ipv6 *spec = item->spec;
1610 const struct rte_flow_item_ipv6 *mask = item->mask;
1611 struct mlx5_flow_parse *parser = data->parser;
1612 unsigned int ipv6_size = sizeof(struct ibv_flow_spec_ipv6);
1613 struct ibv_flow_spec_ipv6 ipv6 = {
1614 .type = parser->inner | IBV_FLOW_SPEC_IPV6,
1618 if (parser->layer == HASH_RXQ_TUNNEL &&
1619 parser->tunnel == ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN)] &&
1620 !priv->config.l3_vxlan_en)
1621 return rte_flow_error_set(data->error, EINVAL,
1622 RTE_FLOW_ERROR_TYPE_ITEM,
1624 "L3 VXLAN not enabled by device"
1625 " parameter and/or not configured"
1627 parser->layer = HASH_RXQ_IPV6;
1630 uint32_t vtc_flow_val;
1631 uint32_t vtc_flow_mask;
1634 mask = default_mask;
1635 memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
1636 RTE_DIM(ipv6.val.src_ip));
1637 memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
1638 RTE_DIM(ipv6.val.dst_ip));
1639 memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
1640 RTE_DIM(ipv6.mask.src_ip));
1641 memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
1642 RTE_DIM(ipv6.mask.dst_ip));
1643 vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
1644 vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
1645 ipv6.val.flow_label =
1646 rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >>
1648 ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >>
1650 ipv6.val.next_hdr = spec->hdr.proto;
1651 ipv6.val.hop_limit = spec->hdr.hop_limits;
1652 ipv6.mask.flow_label =
1653 rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >>
1655 ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >>
1657 ipv6.mask.next_hdr = mask->hdr.proto;
1658 ipv6.mask.hop_limit = mask->hdr.hop_limits;
1659 /* Remove unwanted bits from values. */
1660 for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
1661 ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
1662 ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
1664 ipv6.val.flow_label &= ipv6.mask.flow_label;
1665 ipv6.val.traffic_class &= ipv6.mask.traffic_class;
1666 ipv6.val.next_hdr &= ipv6.mask.next_hdr;
1667 ipv6.val.hop_limit &= ipv6.mask.hop_limit;
1669 mlx5_flow_create_copy(parser, &ipv6, ipv6_size);
1674 * Convert UDP item to Verbs specification.
1677 * Item specification.
1678 * @param default_mask[in]
1679 * Default bit-masks to use when item->mask is not provided.
1680 * @param data[in, out]
1684 * 0 on success, a negative errno value otherwise and rte_errno is set.
1687 mlx5_flow_create_udp(const struct rte_flow_item *item,
1688 const void *default_mask,
1689 struct mlx5_flow_data *data)
1691 const struct rte_flow_item_udp *spec = item->spec;
1692 const struct rte_flow_item_udp *mask = item->mask;
1693 struct mlx5_flow_parse *parser = data->parser;
1694 unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1695 struct ibv_flow_spec_tcp_udp udp = {
1696 .type = parser->inner | IBV_FLOW_SPEC_UDP,
1700 if (parser->layer == HASH_RXQ_IPV4)
1701 parser->layer = HASH_RXQ_UDPV4;
1703 parser->layer = HASH_RXQ_UDPV6;
1706 mask = default_mask;
1707 udp.val.dst_port = spec->hdr.dst_port;
1708 udp.val.src_port = spec->hdr.src_port;
1709 udp.mask.dst_port = mask->hdr.dst_port;
1710 udp.mask.src_port = mask->hdr.src_port;
1711 /* Remove unwanted bits from values. */
1712 udp.val.src_port &= udp.mask.src_port;
1713 udp.val.dst_port &= udp.mask.dst_port;
1715 mlx5_flow_create_copy(parser, &udp, udp_size);
1720 * Convert TCP item to Verbs specification.
1723 * Item specification.
1724 * @param default_mask[in]
1725 * Default bit-masks to use when item->mask is not provided.
1726 * @param data[in, out]
1730 * 0 on success, a negative errno value otherwise and rte_errno is set.
1733 mlx5_flow_create_tcp(const struct rte_flow_item *item,
1734 const void *default_mask,
1735 struct mlx5_flow_data *data)
1737 const struct rte_flow_item_tcp *spec = item->spec;
1738 const struct rte_flow_item_tcp *mask = item->mask;
1739 struct mlx5_flow_parse *parser = data->parser;
1740 unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1741 struct ibv_flow_spec_tcp_udp tcp = {
1742 .type = parser->inner | IBV_FLOW_SPEC_TCP,
1746 if (parser->layer == HASH_RXQ_IPV4)
1747 parser->layer = HASH_RXQ_TCPV4;
1749 parser->layer = HASH_RXQ_TCPV6;
1752 mask = default_mask;
1753 tcp.val.dst_port = spec->hdr.dst_port;
1754 tcp.val.src_port = spec->hdr.src_port;
1755 tcp.mask.dst_port = mask->hdr.dst_port;
1756 tcp.mask.src_port = mask->hdr.src_port;
1757 /* Remove unwanted bits from values. */
1758 tcp.val.src_port &= tcp.mask.src_port;
1759 tcp.val.dst_port &= tcp.mask.dst_port;
1761 mlx5_flow_create_copy(parser, &tcp, tcp_size);
1766 * Convert VXLAN item to Verbs specification.
1769 * Item specification.
1770 * @param default_mask[in]
1771 * Default bit-masks to use when item->mask is not provided.
1772 * @param data[in, out]
1776 * 0 on success, a negative errno value otherwise and rte_errno is set.
1779 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
1780 const void *default_mask,
1781 struct mlx5_flow_data *data)
1783 const struct rte_flow_item_vxlan *spec = item->spec;
1784 const struct rte_flow_item_vxlan *mask = item->mask;
1785 struct mlx5_flow_parse *parser = data->parser;
1786 unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1787 struct ibv_flow_spec_tunnel vxlan = {
1788 .type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1797 parser->inner = IBV_FLOW_SPEC_INNER;
1798 parser->tunnel = ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN)];
1799 parser->out_layer = parser->layer;
1800 parser->layer = HASH_RXQ_TUNNEL;
1803 mask = default_mask;
1804 memcpy(&id.vni[1], spec->vni, 3);
1805 vxlan.val.tunnel_id = id.vlan_id;
1806 memcpy(&id.vni[1], mask->vni, 3);
1807 vxlan.mask.tunnel_id = id.vlan_id;
1808 /* Remove unwanted bits from values. */
1809 vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
1812 * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
1813 * layer is defined in the Verbs specification it is interpreted as
1814 * wildcard and all packets will match this rule, if it follows a full
1815 * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
1816 * before will also match this rule.
1817 * To avoid such situation, VNI 0 is currently refused.
1819 /* Only allow tunnel w/o tunnel id pattern after proper outer spec. */
1820 if (parser->out_layer == HASH_RXQ_ETH && !vxlan.val.tunnel_id)
1821 return rte_flow_error_set(data->error, EINVAL,
1822 RTE_FLOW_ERROR_TYPE_ITEM,
1824 "VxLAN vni cannot be 0");
1825 mlx5_flow_create_copy(parser, &vxlan, size);
1830 * Convert VXLAN-GPE item to Verbs specification.
1833 * Item specification.
1834 * @param default_mask[in]
1835 * Default bit-masks to use when item->mask is not provided.
1836 * @param data[in, out]
1840 * 0 on success, a negative errno value otherwise and rte_errno is set.
1843 mlx5_flow_create_vxlan_gpe(const struct rte_flow_item *item,
1844 const void *default_mask,
1845 struct mlx5_flow_data *data)
1847 struct priv *priv = data->dev->data->dev_private;
1848 const struct rte_flow_item_vxlan_gpe *spec = item->spec;
1849 const struct rte_flow_item_vxlan_gpe *mask = item->mask;
1850 struct mlx5_flow_parse *parser = data->parser;
1851 unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1852 struct ibv_flow_spec_tunnel vxlan = {
1853 .type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1861 if (!priv->config.l3_vxlan_en)
1862 return rte_flow_error_set(data->error, EINVAL,
1863 RTE_FLOW_ERROR_TYPE_ITEM,
1865 "L3 VXLAN not enabled by device"
1866 " parameter and/or not configured"
1869 parser->inner = IBV_FLOW_SPEC_INNER;
1870 parser->tunnel = ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN_GPE)];
1871 parser->out_layer = parser->layer;
1872 parser->layer = HASH_RXQ_TUNNEL;
1875 mask = default_mask;
1876 memcpy(&id.vni[1], spec->vni, 3);
1877 vxlan.val.tunnel_id = id.vlan_id;
1878 memcpy(&id.vni[1], mask->vni, 3);
1879 vxlan.mask.tunnel_id = id.vlan_id;
1881 return rte_flow_error_set(data->error, EINVAL,
1882 RTE_FLOW_ERROR_TYPE_ITEM,
1884 "VxLAN-GPE protocol not"
1886 /* Remove unwanted bits from values. */
1887 vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
1890 * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
1891 * layer is defined in the Verbs specification it is interpreted as
1892 * wildcard and all packets will match this rule, if it follows a full
1893 * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
1894 * before will also match this rule.
1895 * To avoid such situation, VNI 0 is currently refused.
1897 /* Only allow tunnel w/o tunnel id pattern after proper outer spec. */
1898 if (parser->out_layer == HASH_RXQ_ETH && !vxlan.val.tunnel_id)
1899 return rte_flow_error_set(data->error, EINVAL,
1900 RTE_FLOW_ERROR_TYPE_ITEM,
1902 "VxLAN-GPE vni cannot be 0");
1903 mlx5_flow_create_copy(parser, &vxlan, size);
1908 * Convert GRE item to Verbs specification.
1911 * Item specification.
1912 * @param default_mask[in]
1913 * Default bit-masks to use when item->mask is not provided.
1914 * @param data[in, out]
1918 * 0 on success, a negative errno value otherwise and rte_errno is set.
1921 mlx5_flow_create_gre(const struct rte_flow_item *item,
1922 const void *default_mask,
1923 struct mlx5_flow_data *data)
1925 struct mlx5_flow_parse *parser = data->parser;
1926 #ifndef HAVE_IBV_DEVICE_MPLS_SUPPORT
1928 unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1929 struct ibv_flow_spec_tunnel tunnel = {
1930 .type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1934 const struct rte_flow_item_gre *spec = item->spec;
1935 const struct rte_flow_item_gre *mask = item->mask;
1936 unsigned int size = sizeof(struct ibv_flow_spec_gre);
1937 struct ibv_flow_spec_gre tunnel = {
1938 .type = parser->inner | IBV_FLOW_SPEC_GRE,
1942 struct ibv_flow_spec_ipv4_ext *ipv4;
1943 struct ibv_flow_spec_ipv6 *ipv6;
1946 parser->inner = IBV_FLOW_SPEC_INNER;
1947 parser->tunnel = ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_GRE)];
1948 parser->out_layer = parser->layer;
1949 parser->layer = HASH_RXQ_TUNNEL;
1950 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
1953 mask = default_mask;
1954 tunnel.val.c_ks_res0_ver = spec->c_rsvd0_ver;
1955 tunnel.val.protocol = spec->protocol;
1956 tunnel.mask.c_ks_res0_ver = mask->c_rsvd0_ver;
1957 tunnel.mask.protocol = mask->protocol;
1958 /* Remove unwanted bits from values. */
1959 tunnel.val.c_ks_res0_ver &= tunnel.mask.c_ks_res0_ver;
1960 tunnel.val.protocol &= tunnel.mask.protocol;
1961 tunnel.val.key &= tunnel.mask.key;
1964 /* Update encapsulation IP layer protocol. */
1965 for (i = 0; i != hash_rxq_init_n; ++i) {
1966 if (!parser->queue[i].ibv_attr)
1968 if (parser->out_layer == HASH_RXQ_IPV4) {
1969 ipv4 = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1970 parser->queue[i].offset -
1971 sizeof(struct ibv_flow_spec_ipv4_ext));
1972 if (ipv4->mask.proto && ipv4->val.proto != MLX5_GRE)
1974 ipv4->val.proto = MLX5_GRE;
1975 ipv4->mask.proto = 0xff;
1976 } else if (parser->out_layer == HASH_RXQ_IPV6) {
1977 ipv6 = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1978 parser->queue[i].offset -
1979 sizeof(struct ibv_flow_spec_ipv6));
1980 if (ipv6->mask.next_hdr &&
1981 ipv6->val.next_hdr != MLX5_GRE)
1983 ipv6->val.next_hdr = MLX5_GRE;
1984 ipv6->mask.next_hdr = 0xff;
1987 if (i != hash_rxq_init_n)
1988 return rte_flow_error_set(data->error, EINVAL,
1989 RTE_FLOW_ERROR_TYPE_ITEM,
1991 "IP protocol of GRE must be 47");
1992 mlx5_flow_create_copy(parser, &tunnel, size);
1997 * Convert MPLS item to Verbs specification.
1998 * MPLS tunnel types currently supported are MPLS-in-GRE and MPLS-in-UDP.
2001 * Item specification.
2002 * @param default_mask[in]
2003 * Default bit-masks to use when item->mask is not provided.
2004 * @param data[in, out]
2008 * 0 on success, a negative errno value otherwise and rte_errno is set.
2011 mlx5_flow_create_mpls(const struct rte_flow_item *item,
2012 const void *default_mask,
2013 struct mlx5_flow_data *data)
2015 #ifndef HAVE_IBV_DEVICE_MPLS_SUPPORT
2017 return rte_flow_error_set(data->error, ENOTSUP,
2018 RTE_FLOW_ERROR_TYPE_ITEM,
2020 "MPLS is not supported by driver");
2022 const struct rte_flow_item_mpls *spec = item->spec;
2023 const struct rte_flow_item_mpls *mask = item->mask;
2024 struct mlx5_flow_parse *parser = data->parser;
2025 unsigned int size = sizeof(struct ibv_flow_spec_mpls);
2026 struct ibv_flow_spec_mpls mpls = {
2027 .type = IBV_FLOW_SPEC_MPLS,
2031 parser->inner = IBV_FLOW_SPEC_INNER;
2032 if (parser->layer == HASH_RXQ_UDPV4 ||
2033 parser->layer == HASH_RXQ_UDPV6) {
2035 ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_MPLS_IN_UDP)];
2036 parser->out_layer = parser->layer;
2039 ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_MPLS_IN_GRE)];
2040 /* parser->out_layer stays as in GRE out_layer. */
2042 parser->layer = HASH_RXQ_TUNNEL;
2045 mask = default_mask;
2047 * The verbs label field includes the entire MPLS header:
2048 * bits 0:19 - label value field.
2049 * bits 20:22 - traffic class field.
2050 * bits 23 - bottom of stack bit.
2051 * bits 24:31 - ttl field.
2053 mpls.val.label = *(const uint32_t *)spec;
2054 mpls.mask.label = *(const uint32_t *)mask;
2055 /* Remove unwanted bits from values. */
2056 mpls.val.label &= mpls.mask.label;
2058 mlx5_flow_create_copy(parser, &mpls, size);
2064 * Convert mark/flag action to Verbs specification.
2067 * Internal parser structure.
2072 * 0 on success, a negative errno value otherwise and rte_errno is set.
2075 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id)
2077 unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
2078 struct ibv_flow_spec_action_tag tag = {
2079 .type = IBV_FLOW_SPEC_ACTION_TAG,
2081 .tag_id = mlx5_flow_mark_set(mark_id),
2084 assert(parser->mark);
2085 mlx5_flow_create_copy(parser, &tag, size);
2090 * Convert count action to Verbs specification.
2093 * Pointer to Ethernet device.
2095 * Pointer to MLX5 flow parser structure.
2098 * 0 on success, a negative errno value otherwise and rte_errno is set.
2101 mlx5_flow_create_count(struct rte_eth_dev *dev __rte_unused,
2102 struct mlx5_flow_parse *parser __rte_unused)
2104 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
2105 struct priv *priv = dev->data->dev_private;
2106 unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
2107 struct ibv_counter_set_init_attr init_attr = {0};
2108 struct ibv_flow_spec_counter_action counter = {
2109 .type = IBV_FLOW_SPEC_ACTION_COUNT,
2111 .counter_set_handle = 0,
2114 init_attr.counter_set_id = 0;
2115 parser->cs = mlx5_glue->create_counter_set(priv->ctx, &init_attr);
2120 counter.counter_set_handle = parser->cs->handle;
2121 mlx5_flow_create_copy(parser, &counter, size);
2127 * Complete flow rule creation with a drop queue.
2130 * Pointer to Ethernet device.
2132 * Internal parser structure.
2134 * Pointer to the rte_flow.
2136 * Perform verbose error reporting if not NULL.
2139 * 0 on success, a negative errno value otherwise and rte_errno is set.
2142 mlx5_flow_create_action_queue_drop(struct rte_eth_dev *dev,
2143 struct mlx5_flow_parse *parser,
2144 struct rte_flow *flow,
2145 struct rte_flow_error *error)
2147 struct priv *priv = dev->data->dev_private;
2148 struct ibv_flow_spec_action_drop *drop;
2149 unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
2154 drop = (void *)((uintptr_t)parser->queue[HASH_RXQ_ETH].ibv_attr +
2155 parser->queue[HASH_RXQ_ETH].offset);
2156 *drop = (struct ibv_flow_spec_action_drop){
2157 .type = IBV_FLOW_SPEC_ACTION_DROP,
2160 ++parser->queue[HASH_RXQ_ETH].ibv_attr->num_of_specs;
2161 parser->queue[HASH_RXQ_ETH].offset += size;
2162 flow->frxq[HASH_RXQ_ETH].ibv_attr =
2163 parser->queue[HASH_RXQ_ETH].ibv_attr;
2165 flow->cs = parser->cs;
2166 if (!dev->data->dev_started)
2168 parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
2169 flow->frxq[HASH_RXQ_ETH].ibv_flow =
2170 mlx5_glue->create_flow(priv->flow_drop_queue->qp,
2171 flow->frxq[HASH_RXQ_ETH].ibv_attr);
2172 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
2173 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
2174 NULL, "flow rule creation failure");
2180 if (flow->frxq[HASH_RXQ_ETH].ibv_flow) {
2181 claim_zero(mlx5_glue->destroy_flow
2182 (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2183 flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
2185 if (flow->frxq[HASH_RXQ_ETH].ibv_attr) {
2186 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
2187 flow->frxq[HASH_RXQ_ETH].ibv_attr = NULL;
2190 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
2198 * Create hash Rx queues when RSS is enabled.
2201 * Pointer to Ethernet device.
2203 * Internal parser structure.
2205 * Pointer to the rte_flow.
2207 * Perform verbose error reporting if not NULL.
2210 * 0 on success, a negative errno value otherwise and rte_errno is set.
2213 mlx5_flow_create_action_queue_rss(struct rte_eth_dev *dev,
2214 struct mlx5_flow_parse *parser,
2215 struct rte_flow *flow,
2216 struct rte_flow_error *error)
2220 for (i = 0; i != hash_rxq_init_n; ++i) {
2221 if (!parser->queue[i].ibv_attr)
2223 flow->frxq[i].ibv_attr = parser->queue[i].ibv_attr;
2224 parser->queue[i].ibv_attr = NULL;
2225 flow->frxq[i].hash_fields = parser->queue[i].hash_fields;
2226 if (!dev->data->dev_started)
2228 flow->frxq[i].hrxq =
2230 parser->rss_conf.key,
2231 parser->rss_conf.key_len,
2232 flow->frxq[i].hash_fields,
2233 parser->rss_conf.queue,
2234 parser->rss_conf.queue_num,
2236 parser->rss_conf.level);
2237 if (flow->frxq[i].hrxq)
2239 flow->frxq[i].hrxq =
2241 parser->rss_conf.key,
2242 parser->rss_conf.key_len,
2243 flow->frxq[i].hash_fields,
2244 parser->rss_conf.queue,
2245 parser->rss_conf.queue_num,
2247 parser->rss_conf.level);
2248 if (!flow->frxq[i].hrxq) {
2249 return rte_flow_error_set(error, ENOMEM,
2250 RTE_FLOW_ERROR_TYPE_HANDLE,
2252 "cannot create hash rxq");
2259 * RXQ update after flow rule creation.
2262 * Pointer to Ethernet device.
2264 * Pointer to the flow rule.
2267 mlx5_flow_create_update_rxqs(struct rte_eth_dev *dev, struct rte_flow *flow)
2269 struct priv *priv = dev->data->dev_private;
2273 if (!dev->data->dev_started)
2275 for (i = 0; i != flow->rss_conf.queue_num; ++i) {
2276 struct mlx5_rxq_data *rxq_data = (*priv->rxqs)
2277 [(*flow->queues)[i]];
2278 struct mlx5_rxq_ctrl *rxq_ctrl =
2279 container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
2280 uint8_t tunnel = PTYPE_IDX(flow->tunnel);
2282 rxq_data->mark |= flow->mark;
2285 rxq_ctrl->tunnel_types[tunnel] += 1;
2286 /* Clear tunnel type if more than one tunnel types set. */
2287 for (j = 0; j != RTE_DIM(rxq_ctrl->tunnel_types); ++j) {
2290 if (rxq_ctrl->tunnel_types[j] > 0) {
2291 rxq_data->tunnel = 0;
2295 if (j == RTE_DIM(rxq_ctrl->tunnel_types))
2296 rxq_data->tunnel = flow->tunnel;
2301 * Dump flow hash RX queue detail.
2304 * Pointer to Ethernet device.
2306 * Pointer to the rte_flow.
2308 * Hash RX queue index.
2311 mlx5_flow_dump(struct rte_eth_dev *dev __rte_unused,
2312 struct rte_flow *flow __rte_unused,
2313 unsigned int hrxq_idx __rte_unused)
2320 uint64_t extra_hash_fields = 0;
2322 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
2323 if (flow->tunnel && flow->rss_conf.level > 1)
2324 extra_hash_fields = (uint32_t)IBV_RX_HASH_INNER;
2326 spec_ptr = (uintptr_t)(flow->frxq[hrxq_idx].ibv_attr + 1);
2327 for (j = 0, off = 0; j < flow->frxq[hrxq_idx].ibv_attr->num_of_specs;
2329 struct ibv_flow_spec *spec = (void *)spec_ptr;
2330 off += sprintf(buf + off, " %x(%hu)", spec->hdr.type,
2332 spec_ptr += spec->hdr.size;
2335 "port %u Verbs flow %p type %u: hrxq:%p qp:%p ind:%p,"
2336 " hash:%" PRIx64 "/%u specs:%hhu(%hu), priority:%hu, type:%d,"
2337 " flags:%x, comp_mask:%x specs:%s",
2338 dev->data->port_id, (void *)flow, hrxq_idx,
2339 (void *)flow->frxq[hrxq_idx].hrxq,
2340 (void *)flow->frxq[hrxq_idx].hrxq->qp,
2341 (void *)flow->frxq[hrxq_idx].hrxq->ind_table,
2342 (flow->frxq[hrxq_idx].hash_fields | extra_hash_fields),
2343 flow->rss_conf.queue_num,
2344 flow->frxq[hrxq_idx].ibv_attr->num_of_specs,
2345 flow->frxq[hrxq_idx].ibv_attr->size,
2346 flow->frxq[hrxq_idx].ibv_attr->priority,
2347 flow->frxq[hrxq_idx].ibv_attr->type,
2348 flow->frxq[hrxq_idx].ibv_attr->flags,
2349 flow->frxq[hrxq_idx].ibv_attr->comp_mask,
2355 * Complete flow rule creation.
2358 * Pointer to Ethernet device.
2360 * Internal parser structure.
2362 * Pointer to the rte_flow.
2364 * Perform verbose error reporting if not NULL.
2367 * 0 on success, a negative errno value otherwise and rte_errno is set.
2370 mlx5_flow_create_action_queue(struct rte_eth_dev *dev,
2371 struct mlx5_flow_parse *parser,
2372 struct rte_flow *flow,
2373 struct rte_flow_error *error)
2375 struct priv *priv __rte_unused = dev->data->dev_private;
2378 unsigned int flows_n = 0;
2382 assert(!parser->drop);
2383 ret = mlx5_flow_create_action_queue_rss(dev, parser, flow, error);
2387 flow->cs = parser->cs;
2388 if (!dev->data->dev_started)
2390 for (i = 0; i != hash_rxq_init_n; ++i) {
2391 if (!flow->frxq[i].hrxq)
2393 flow->frxq[i].ibv_flow =
2394 mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
2395 flow->frxq[i].ibv_attr);
2396 mlx5_flow_dump(dev, flow, i);
2397 if (!flow->frxq[i].ibv_flow) {
2398 rte_flow_error_set(error, ENOMEM,
2399 RTE_FLOW_ERROR_TYPE_HANDLE,
2400 NULL, "flow rule creation failure");
2406 rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_HANDLE,
2407 NULL, "internal error in flow creation");
2410 mlx5_flow_create_update_rxqs(dev, flow);
2413 ret = rte_errno; /* Save rte_errno before cleanup. */
2415 for (i = 0; i != hash_rxq_init_n; ++i) {
2416 if (flow->frxq[i].ibv_flow) {
2417 struct ibv_flow *ibv_flow = flow->frxq[i].ibv_flow;
2419 claim_zero(mlx5_glue->destroy_flow(ibv_flow));
2421 if (flow->frxq[i].hrxq)
2422 mlx5_hrxq_release(dev, flow->frxq[i].hrxq);
2423 if (flow->frxq[i].ibv_attr)
2424 rte_free(flow->frxq[i].ibv_attr);
2427 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
2431 rte_errno = ret; /* Restore rte_errno. */
2439 * Pointer to Ethernet device.
2441 * Pointer to a TAILQ flow list.
2443 * Flow rule attributes.
2444 * @param[in] pattern
2445 * Pattern specification (list terminated by the END pattern item).
2446 * @param[in] actions
2447 * Associated actions (list terminated by the END action).
2449 * Perform verbose error reporting if not NULL.
2452 * A flow on success, NULL otherwise and rte_errno is set.
2454 static struct rte_flow *
2455 mlx5_flow_list_create(struct rte_eth_dev *dev,
2456 struct mlx5_flows *list,
2457 const struct rte_flow_attr *attr,
2458 const struct rte_flow_item items[],
2459 const struct rte_flow_action actions[],
2460 struct rte_flow_error *error)
2462 struct mlx5_flow_parse parser = { .create = 1, };
2463 struct rte_flow *flow = NULL;
2467 ret = mlx5_flow_convert(dev, attr, items, actions, error, &parser);
2470 flow = rte_calloc(__func__, 1,
2472 parser.rss_conf.queue_num * sizeof(uint16_t),
2475 rte_flow_error_set(error, ENOMEM,
2476 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2478 "cannot allocate flow memory");
2481 /* Copy configuration. */
2482 flow->queues = (uint16_t (*)[])(flow + 1);
2483 flow->tunnel = parser.tunnel;
2484 flow->rss_conf = (struct rte_flow_action_rss){
2485 .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
2486 .level = parser.rss_conf.level,
2487 .types = parser.rss_conf.types,
2488 .key_len = parser.rss_conf.key_len,
2489 .queue_num = parser.rss_conf.queue_num,
2490 .key = memcpy(flow->rss_key, parser.rss_conf.key,
2491 sizeof(*parser.rss_conf.key) *
2492 parser.rss_conf.key_len),
2493 .queue = memcpy(flow->queues, parser.rss_conf.queue,
2494 sizeof(*parser.rss_conf.queue) *
2495 parser.rss_conf.queue_num),
2497 flow->mark = parser.mark;
2498 /* finalise the flow. */
2500 ret = mlx5_flow_create_action_queue_drop(dev, &parser, flow,
2503 ret = mlx5_flow_create_action_queue(dev, &parser, flow, error);
2506 TAILQ_INSERT_TAIL(list, flow, next);
2507 DRV_LOG(DEBUG, "port %u flow created %p", dev->data->port_id,
2511 DRV_LOG(ERR, "port %u flow creation error: %s", dev->data->port_id,
2513 for (i = 0; i != hash_rxq_init_n; ++i) {
2514 if (parser.queue[i].ibv_attr)
2515 rte_free(parser.queue[i].ibv_attr);
2522 * Validate a flow supported by the NIC.
2524 * @see rte_flow_validate()
2528 mlx5_flow_validate(struct rte_eth_dev *dev,
2529 const struct rte_flow_attr *attr,
2530 const struct rte_flow_item items[],
2531 const struct rte_flow_action actions[],
2532 struct rte_flow_error *error)
2534 struct mlx5_flow_parse parser = { .create = 0, };
2536 return mlx5_flow_convert(dev, attr, items, actions, error, &parser);
2542 * @see rte_flow_create()
2546 mlx5_flow_create(struct rte_eth_dev *dev,
2547 const struct rte_flow_attr *attr,
2548 const struct rte_flow_item items[],
2549 const struct rte_flow_action actions[],
2550 struct rte_flow_error *error)
2552 struct priv *priv = dev->data->dev_private;
2554 return mlx5_flow_list_create(dev, &priv->flows, attr, items, actions,
2559 * Destroy a flow in a list.
2562 * Pointer to Ethernet device.
2564 * Pointer to a TAILQ flow list.
2569 mlx5_flow_list_destroy(struct rte_eth_dev *dev, struct mlx5_flows *list,
2570 struct rte_flow *flow)
2572 struct priv *priv = dev->data->dev_private;
2575 if (flow->drop || !dev->data->dev_started)
2577 for (i = 0; flow->tunnel && i != flow->rss_conf.queue_num; ++i) {
2578 /* Update queue tunnel type. */
2579 struct mlx5_rxq_data *rxq_data = (*priv->rxqs)
2580 [(*flow->queues)[i]];
2581 struct mlx5_rxq_ctrl *rxq_ctrl =
2582 container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
2583 uint8_t tunnel = PTYPE_IDX(flow->tunnel);
2585 assert(rxq_ctrl->tunnel_types[tunnel] > 0);
2586 rxq_ctrl->tunnel_types[tunnel] -= 1;
2587 if (!rxq_ctrl->tunnel_types[tunnel]) {
2588 /* Update tunnel type. */
2593 for (j = 0; j < RTE_DIM(rxq_ctrl->tunnel_types); j++)
2594 if (rxq_ctrl->tunnel_types[j]) {
2598 /* Keep same if more than one tunnel types left. */
2600 rxq_data->tunnel = ptype_ext[last];
2601 else if (types == 0)
2602 /* No tunnel type left. */
2603 rxq_data->tunnel = 0;
2606 for (i = 0; flow->mark && i != flow->rss_conf.queue_num; ++i) {
2607 struct rte_flow *tmp;
2611 * To remove the mark from the queue, the queue must not be
2612 * present in any other marked flow (RSS or not).
2614 TAILQ_FOREACH(tmp, list, next) {
2616 uint16_t *tqs = NULL;
2621 for (j = 0; j != hash_rxq_init_n; ++j) {
2622 if (!tmp->frxq[j].hrxq)
2624 tqs = tmp->frxq[j].hrxq->ind_table->queues;
2625 tq_n = tmp->frxq[j].hrxq->ind_table->queues_n;
2629 for (j = 0; (j != tq_n) && !mark; j++)
2630 if (tqs[j] == (*flow->queues)[i])
2633 (*priv->rxqs)[(*flow->queues)[i]]->mark = mark;
2637 if (flow->frxq[HASH_RXQ_ETH].ibv_flow)
2638 claim_zero(mlx5_glue->destroy_flow
2639 (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2640 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
2642 for (i = 0; i != hash_rxq_init_n; ++i) {
2643 struct mlx5_flow *frxq = &flow->frxq[i];
2646 claim_zero(mlx5_glue->destroy_flow
2649 mlx5_hrxq_release(dev, frxq->hrxq);
2651 rte_free(frxq->ibv_attr);
2655 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
2658 TAILQ_REMOVE(list, flow, next);
2659 DRV_LOG(DEBUG, "port %u flow destroyed %p", dev->data->port_id,
2665 * Destroy all flows.
2668 * Pointer to Ethernet device.
2670 * Pointer to a TAILQ flow list.
2673 mlx5_flow_list_flush(struct rte_eth_dev *dev, struct mlx5_flows *list)
2675 while (!TAILQ_EMPTY(list)) {
2676 struct rte_flow *flow;
2678 flow = TAILQ_FIRST(list);
2679 mlx5_flow_list_destroy(dev, list, flow);
2684 * Create drop queue.
2687 * Pointer to Ethernet device.
2690 * 0 on success, a negative errno value otherwise and rte_errno is set.
2693 mlx5_flow_create_drop_queue(struct rte_eth_dev *dev)
2695 struct priv *priv = dev->data->dev_private;
2696 struct mlx5_hrxq_drop *fdq = NULL;
2700 fdq = rte_calloc(__func__, 1, sizeof(*fdq), 0);
2703 "port %u cannot allocate memory for drop queue",
2704 dev->data->port_id);
2708 fdq->cq = mlx5_glue->create_cq(priv->ctx, 1, NULL, NULL, 0);
2710 DRV_LOG(WARNING, "port %u cannot allocate CQ for drop queue",
2711 dev->data->port_id);
2715 fdq->wq = mlx5_glue->create_wq
2717 &(struct ibv_wq_init_attr){
2718 .wq_type = IBV_WQT_RQ,
2725 DRV_LOG(WARNING, "port %u cannot allocate WQ for drop queue",
2726 dev->data->port_id);
2730 fdq->ind_table = mlx5_glue->create_rwq_ind_table
2732 &(struct ibv_rwq_ind_table_init_attr){
2733 .log_ind_tbl_size = 0,
2734 .ind_tbl = &fdq->wq,
2737 if (!fdq->ind_table) {
2739 "port %u cannot allocate indirection table for drop"
2741 dev->data->port_id);
2745 fdq->qp = mlx5_glue->create_qp_ex
2747 &(struct ibv_qp_init_attr_ex){
2748 .qp_type = IBV_QPT_RAW_PACKET,
2750 IBV_QP_INIT_ATTR_PD |
2751 IBV_QP_INIT_ATTR_IND_TABLE |
2752 IBV_QP_INIT_ATTR_RX_HASH,
2753 .rx_hash_conf = (struct ibv_rx_hash_conf){
2755 IBV_RX_HASH_FUNC_TOEPLITZ,
2756 .rx_hash_key_len = rss_hash_default_key_len,
2757 .rx_hash_key = rss_hash_default_key,
2758 .rx_hash_fields_mask = 0,
2760 .rwq_ind_tbl = fdq->ind_table,
2764 DRV_LOG(WARNING, "port %u cannot allocate QP for drop queue",
2765 dev->data->port_id);
2769 priv->flow_drop_queue = fdq;
2773 claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2775 claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2777 claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2779 claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2782 priv->flow_drop_queue = NULL;
2787 * Delete drop queue.
2790 * Pointer to Ethernet device.
2793 mlx5_flow_delete_drop_queue(struct rte_eth_dev *dev)
2795 struct priv *priv = dev->data->dev_private;
2796 struct mlx5_hrxq_drop *fdq = priv->flow_drop_queue;
2801 claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2803 claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2805 claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2807 claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2809 priv->flow_drop_queue = NULL;
2816 * Pointer to Ethernet device.
2818 * Pointer to a TAILQ flow list.
2821 mlx5_flow_stop(struct rte_eth_dev *dev, struct mlx5_flows *list)
2823 struct priv *priv = dev->data->dev_private;
2824 struct rte_flow *flow;
2827 TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) {
2828 struct mlx5_ind_table_ibv *ind_tbl = NULL;
2831 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow)
2833 claim_zero(mlx5_glue->destroy_flow
2834 (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2835 flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
2836 DRV_LOG(DEBUG, "port %u flow %p removed",
2837 dev->data->port_id, (void *)flow);
2841 /* Verify the flow has not already been cleaned. */
2842 for (i = 0; i != hash_rxq_init_n; ++i) {
2843 if (!flow->frxq[i].ibv_flow)
2846 * Indirection table may be necessary to remove the
2847 * flags in the Rx queues.
2848 * This helps to speed-up the process by avoiding
2851 ind_tbl = flow->frxq[i].hrxq->ind_table;
2854 if (i == hash_rxq_init_n)
2858 for (i = 0; i != ind_tbl->queues_n; ++i)
2859 (*priv->rxqs)[ind_tbl->queues[i]]->mark = 0;
2861 for (i = 0; i != hash_rxq_init_n; ++i) {
2862 if (!flow->frxq[i].ibv_flow)
2864 claim_zero(mlx5_glue->destroy_flow
2865 (flow->frxq[i].ibv_flow));
2866 flow->frxq[i].ibv_flow = NULL;
2867 mlx5_hrxq_release(dev, flow->frxq[i].hrxq);
2868 flow->frxq[i].hrxq = NULL;
2870 DRV_LOG(DEBUG, "port %u flow %p removed", dev->data->port_id,
2873 /* Cleanup Rx queue tunnel info. */
2874 for (i = 0; i != priv->rxqs_n; ++i) {
2875 struct mlx5_rxq_data *q = (*priv->rxqs)[i];
2876 struct mlx5_rxq_ctrl *rxq_ctrl =
2877 container_of(q, struct mlx5_rxq_ctrl, rxq);
2881 memset((void *)rxq_ctrl->tunnel_types, 0,
2882 sizeof(rxq_ctrl->tunnel_types));
2891 * Pointer to Ethernet device.
2893 * Pointer to a TAILQ flow list.
2896 * 0 on success, a negative errno value otherwise and rte_errno is set.
2899 mlx5_flow_start(struct rte_eth_dev *dev, struct mlx5_flows *list)
2901 struct priv *priv = dev->data->dev_private;
2902 struct rte_flow *flow;
2904 TAILQ_FOREACH(flow, list, next) {
2908 flow->frxq[HASH_RXQ_ETH].ibv_flow =
2909 mlx5_glue->create_flow
2910 (priv->flow_drop_queue->qp,
2911 flow->frxq[HASH_RXQ_ETH].ibv_attr);
2912 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
2914 "port %u flow %p cannot be applied",
2915 dev->data->port_id, (void *)flow);
2919 DRV_LOG(DEBUG, "port %u flow %p applied",
2920 dev->data->port_id, (void *)flow);
2924 for (i = 0; i != hash_rxq_init_n; ++i) {
2925 if (!flow->frxq[i].ibv_attr)
2927 flow->frxq[i].hrxq =
2928 mlx5_hrxq_get(dev, flow->rss_conf.key,
2929 flow->rss_conf.key_len,
2930 flow->frxq[i].hash_fields,
2931 flow->rss_conf.queue,
2932 flow->rss_conf.queue_num,
2934 flow->rss_conf.level);
2935 if (flow->frxq[i].hrxq)
2937 flow->frxq[i].hrxq =
2938 mlx5_hrxq_new(dev, flow->rss_conf.key,
2939 flow->rss_conf.key_len,
2940 flow->frxq[i].hash_fields,
2941 flow->rss_conf.queue,
2942 flow->rss_conf.queue_num,
2944 flow->rss_conf.level);
2945 if (!flow->frxq[i].hrxq) {
2947 "port %u flow %p cannot create hash"
2949 dev->data->port_id, (void *)flow);
2954 mlx5_flow_dump(dev, flow, i);
2955 flow->frxq[i].ibv_flow =
2956 mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
2957 flow->frxq[i].ibv_attr);
2958 if (!flow->frxq[i].ibv_flow) {
2960 "port %u flow %p type %u cannot be"
2962 dev->data->port_id, (void *)flow, i);
2967 mlx5_flow_create_update_rxqs(dev, flow);
2973 * Verify the flow list is empty
2976 * Pointer to Ethernet device.
2978 * @return the number of flows not released.
2981 mlx5_flow_verify(struct rte_eth_dev *dev)
2983 struct priv *priv = dev->data->dev_private;
2984 struct rte_flow *flow;
2987 TAILQ_FOREACH(flow, &priv->flows, next) {
2988 DRV_LOG(DEBUG, "port %u flow %p still referenced",
2989 dev->data->port_id, (void *)flow);
2996 * Enable a control flow configured from the control plane.
2999 * Pointer to Ethernet device.
3001 * An Ethernet flow spec to apply.
3003 * An Ethernet flow mask to apply.
3005 * A VLAN flow spec to apply.
3007 * A VLAN flow mask to apply.
3010 * 0 on success, a negative errno value otherwise and rte_errno is set.
3013 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
3014 struct rte_flow_item_eth *eth_spec,
3015 struct rte_flow_item_eth *eth_mask,
3016 struct rte_flow_item_vlan *vlan_spec,
3017 struct rte_flow_item_vlan *vlan_mask)
3019 struct priv *priv = dev->data->dev_private;
3020 const struct rte_flow_attr attr = {
3022 .priority = MLX5_CTRL_FLOW_PRIORITY,
3024 struct rte_flow_item items[] = {
3026 .type = RTE_FLOW_ITEM_TYPE_ETH,
3032 .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
3033 RTE_FLOW_ITEM_TYPE_END,
3039 .type = RTE_FLOW_ITEM_TYPE_END,
3042 uint16_t queue[priv->reta_idx_n];
3043 struct rte_flow_action_rss action_rss = {
3044 .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
3046 .types = priv->rss_conf.rss_hf,
3047 .key_len = priv->rss_conf.rss_key_len,
3048 .queue_num = priv->reta_idx_n,
3049 .key = priv->rss_conf.rss_key,
3052 struct rte_flow_action actions[] = {
3054 .type = RTE_FLOW_ACTION_TYPE_RSS,
3055 .conf = &action_rss,
3058 .type = RTE_FLOW_ACTION_TYPE_END,
3061 struct rte_flow *flow;
3062 struct rte_flow_error error;
3065 if (!priv->reta_idx_n) {
3069 for (i = 0; i != priv->reta_idx_n; ++i)
3070 queue[i] = (*priv->reta_idx)[i];
3071 flow = mlx5_flow_list_create(dev, &priv->ctrl_flows, &attr, items,
3079 * Enable a flow control configured from the control plane.
3082 * Pointer to Ethernet device.
3084 * An Ethernet flow spec to apply.
3086 * An Ethernet flow mask to apply.
3089 * 0 on success, a negative errno value otherwise and rte_errno is set.
3092 mlx5_ctrl_flow(struct rte_eth_dev *dev,
3093 struct rte_flow_item_eth *eth_spec,
3094 struct rte_flow_item_eth *eth_mask)
3096 return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
3102 * @see rte_flow_destroy()
3106 mlx5_flow_destroy(struct rte_eth_dev *dev,
3107 struct rte_flow *flow,
3108 struct rte_flow_error *error __rte_unused)
3110 struct priv *priv = dev->data->dev_private;
3112 mlx5_flow_list_destroy(dev, &priv->flows, flow);
3117 * Destroy all flows.
3119 * @see rte_flow_flush()
3123 mlx5_flow_flush(struct rte_eth_dev *dev,
3124 struct rte_flow_error *error __rte_unused)
3126 struct priv *priv = dev->data->dev_private;
3128 mlx5_flow_list_flush(dev, &priv->flows);
3132 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
3134 * Query flow counter.
3138 * @param counter_value
3139 * returned data from the counter.
3142 * 0 on success, a negative errno value otherwise and rte_errno is set.
3145 mlx5_flow_query_count(struct ibv_counter_set *cs,
3146 struct mlx5_flow_counter_stats *counter_stats,
3147 struct rte_flow_query_count *query_count,
3148 struct rte_flow_error *error)
3150 uint64_t counters[2];
3151 struct ibv_query_counter_set_attr query_cs_attr = {
3153 .query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
3155 struct ibv_counter_set_data query_out = {
3157 .outlen = 2 * sizeof(uint64_t),
3159 int err = mlx5_glue->query_counter_set(&query_cs_attr, &query_out);
3162 return rte_flow_error_set(error, err,
3163 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
3165 "cannot read counter");
3166 query_count->hits_set = 1;
3167 query_count->bytes_set = 1;
3168 query_count->hits = counters[0] - counter_stats->hits;
3169 query_count->bytes = counters[1] - counter_stats->bytes;
3170 if (query_count->reset) {
3171 counter_stats->hits = counters[0];
3172 counter_stats->bytes = counters[1];
3180 * @see rte_flow_query()
3184 mlx5_flow_query(struct rte_eth_dev *dev __rte_unused,
3185 struct rte_flow *flow,
3186 const struct rte_flow_action *action __rte_unused,
3188 struct rte_flow_error *error)
3193 ret = mlx5_flow_query_count(flow->cs,
3194 &flow->counter_stats,
3195 (struct rte_flow_query_count *)data,
3200 return rte_flow_error_set(error, EINVAL,
3201 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
3203 "no counter found for flow");
3212 * @see rte_flow_isolate()
3216 mlx5_flow_isolate(struct rte_eth_dev *dev,
3218 struct rte_flow_error *error)
3220 struct priv *priv = dev->data->dev_private;
3222 if (dev->data->dev_started) {
3223 rte_flow_error_set(error, EBUSY,
3224 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
3226 "port must be stopped first");
3229 priv->isolated = !!enable;
3231 dev->dev_ops = &mlx5_dev_ops_isolate;
3233 dev->dev_ops = &mlx5_dev_ops;
3238 * Convert a flow director filter to a generic flow.
3241 * Pointer to Ethernet device.
3242 * @param fdir_filter
3243 * Flow director filter to add.
3245 * Generic flow parameters structure.
3248 * 0 on success, a negative errno value otherwise and rte_errno is set.
3251 mlx5_fdir_filter_convert(struct rte_eth_dev *dev,
3252 const struct rte_eth_fdir_filter *fdir_filter,
3253 struct mlx5_fdir *attributes)
3255 struct priv *priv = dev->data->dev_private;
3256 const struct rte_eth_fdir_input *input = &fdir_filter->input;
3257 const struct rte_eth_fdir_masks *mask =
3258 &dev->data->dev_conf.fdir_conf.mask;
3260 /* Validate queue number. */
3261 if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
3262 DRV_LOG(ERR, "port %u invalid queue number %d",
3263 dev->data->port_id, fdir_filter->action.rx_queue);
3267 attributes->attr.ingress = 1;
3268 attributes->items[0] = (struct rte_flow_item) {
3269 .type = RTE_FLOW_ITEM_TYPE_ETH,
3270 .spec = &attributes->l2,
3271 .mask = &attributes->l2_mask,
3273 switch (fdir_filter->action.behavior) {
3274 case RTE_ETH_FDIR_ACCEPT:
3275 attributes->actions[0] = (struct rte_flow_action){
3276 .type = RTE_FLOW_ACTION_TYPE_QUEUE,
3277 .conf = &attributes->queue,
3280 case RTE_ETH_FDIR_REJECT:
3281 attributes->actions[0] = (struct rte_flow_action){
3282 .type = RTE_FLOW_ACTION_TYPE_DROP,
3286 DRV_LOG(ERR, "port %u invalid behavior %d",
3288 fdir_filter->action.behavior);
3289 rte_errno = ENOTSUP;
3292 attributes->queue.index = fdir_filter->action.rx_queue;
3294 switch (fdir_filter->input.flow_type) {
3295 case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
3296 case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
3297 case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
3298 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
3299 .src_addr = input->flow.ip4_flow.src_ip,
3300 .dst_addr = input->flow.ip4_flow.dst_ip,
3301 .time_to_live = input->flow.ip4_flow.ttl,
3302 .type_of_service = input->flow.ip4_flow.tos,
3303 .next_proto_id = input->flow.ip4_flow.proto,
3305 attributes->l3_mask.ipv4.hdr = (struct ipv4_hdr){
3306 .src_addr = mask->ipv4_mask.src_ip,
3307 .dst_addr = mask->ipv4_mask.dst_ip,
3308 .time_to_live = mask->ipv4_mask.ttl,
3309 .type_of_service = mask->ipv4_mask.tos,
3310 .next_proto_id = mask->ipv4_mask.proto,
3312 attributes->items[1] = (struct rte_flow_item){
3313 .type = RTE_FLOW_ITEM_TYPE_IPV4,
3314 .spec = &attributes->l3,
3315 .mask = &attributes->l3_mask,
3318 case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
3319 case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
3320 case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
3321 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
3322 .hop_limits = input->flow.ipv6_flow.hop_limits,
3323 .proto = input->flow.ipv6_flow.proto,
3326 memcpy(attributes->l3.ipv6.hdr.src_addr,
3327 input->flow.ipv6_flow.src_ip,
3328 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
3329 memcpy(attributes->l3.ipv6.hdr.dst_addr,
3330 input->flow.ipv6_flow.dst_ip,
3331 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
3332 memcpy(attributes->l3_mask.ipv6.hdr.src_addr,
3333 mask->ipv6_mask.src_ip,
3334 RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
3335 memcpy(attributes->l3_mask.ipv6.hdr.dst_addr,
3336 mask->ipv6_mask.dst_ip,
3337 RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
3338 attributes->items[1] = (struct rte_flow_item){
3339 .type = RTE_FLOW_ITEM_TYPE_IPV6,
3340 .spec = &attributes->l3,
3341 .mask = &attributes->l3_mask,
3345 DRV_LOG(ERR, "port %u invalid flow type%d",
3346 dev->data->port_id, fdir_filter->input.flow_type);
3347 rte_errno = ENOTSUP;
3351 switch (fdir_filter->input.flow_type) {
3352 case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
3353 attributes->l4.udp.hdr = (struct udp_hdr){
3354 .src_port = input->flow.udp4_flow.src_port,
3355 .dst_port = input->flow.udp4_flow.dst_port,
3357 attributes->l4_mask.udp.hdr = (struct udp_hdr){
3358 .src_port = mask->src_port_mask,
3359 .dst_port = mask->dst_port_mask,
3361 attributes->items[2] = (struct rte_flow_item){
3362 .type = RTE_FLOW_ITEM_TYPE_UDP,
3363 .spec = &attributes->l4,
3364 .mask = &attributes->l4_mask,
3367 case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
3368 attributes->l4.tcp.hdr = (struct tcp_hdr){
3369 .src_port = input->flow.tcp4_flow.src_port,
3370 .dst_port = input->flow.tcp4_flow.dst_port,
3372 attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
3373 .src_port = mask->src_port_mask,
3374 .dst_port = mask->dst_port_mask,
3376 attributes->items[2] = (struct rte_flow_item){
3377 .type = RTE_FLOW_ITEM_TYPE_TCP,
3378 .spec = &attributes->l4,
3379 .mask = &attributes->l4_mask,
3382 case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
3383 attributes->l4.udp.hdr = (struct udp_hdr){
3384 .src_port = input->flow.udp6_flow.src_port,
3385 .dst_port = input->flow.udp6_flow.dst_port,
3387 attributes->l4_mask.udp.hdr = (struct udp_hdr){
3388 .src_port = mask->src_port_mask,
3389 .dst_port = mask->dst_port_mask,
3391 attributes->items[2] = (struct rte_flow_item){
3392 .type = RTE_FLOW_ITEM_TYPE_UDP,
3393 .spec = &attributes->l4,
3394 .mask = &attributes->l4_mask,
3397 case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
3398 attributes->l4.tcp.hdr = (struct tcp_hdr){
3399 .src_port = input->flow.tcp6_flow.src_port,
3400 .dst_port = input->flow.tcp6_flow.dst_port,
3402 attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
3403 .src_port = mask->src_port_mask,
3404 .dst_port = mask->dst_port_mask,
3406 attributes->items[2] = (struct rte_flow_item){
3407 .type = RTE_FLOW_ITEM_TYPE_TCP,
3408 .spec = &attributes->l4,
3409 .mask = &attributes->l4_mask,
3412 case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
3413 case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
3416 DRV_LOG(ERR, "port %u invalid flow type%d",
3417 dev->data->port_id, fdir_filter->input.flow_type);
3418 rte_errno = ENOTSUP;
3425 * Add new flow director filter and store it in list.
3428 * Pointer to Ethernet device.
3429 * @param fdir_filter
3430 * Flow director filter to add.
3433 * 0 on success, a negative errno value otherwise and rte_errno is set.
3436 mlx5_fdir_filter_add(struct rte_eth_dev *dev,
3437 const struct rte_eth_fdir_filter *fdir_filter)
3439 struct priv *priv = dev->data->dev_private;
3440 struct mlx5_fdir attributes = {
3443 .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
3444 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
3448 struct mlx5_flow_parse parser = {
3449 .layer = HASH_RXQ_ETH,
3451 struct rte_flow_error error;
3452 struct rte_flow *flow;
3455 ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
3458 ret = mlx5_flow_convert(dev, &attributes.attr, attributes.items,
3459 attributes.actions, &error, &parser);
3462 flow = mlx5_flow_list_create(dev, &priv->flows, &attributes.attr,
3463 attributes.items, attributes.actions,
3466 DRV_LOG(DEBUG, "port %u FDIR created %p", dev->data->port_id,
3474 * Delete specific filter.
3477 * Pointer to Ethernet device.
3478 * @param fdir_filter
3479 * Filter to be deleted.
3482 * 0 on success, a negative errno value otherwise and rte_errno is set.
3485 mlx5_fdir_filter_delete(struct rte_eth_dev *dev,
3486 const struct rte_eth_fdir_filter *fdir_filter)
3488 struct priv *priv = dev->data->dev_private;
3489 struct mlx5_fdir attributes = {
3492 struct mlx5_flow_parse parser = {
3494 .layer = HASH_RXQ_ETH,
3496 struct rte_flow_error error;
3497 struct rte_flow *flow;
3501 ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
3504 ret = mlx5_flow_convert(dev, &attributes.attr, attributes.items,
3505 attributes.actions, &error, &parser);
3509 * Special case for drop action which is only set in the
3510 * specifications when the flow is created. In this situation the
3511 * drop specification is missing.
3514 struct ibv_flow_spec_action_drop *drop;
3516 drop = (void *)((uintptr_t)parser.queue[HASH_RXQ_ETH].ibv_attr +
3517 parser.queue[HASH_RXQ_ETH].offset);
3518 *drop = (struct ibv_flow_spec_action_drop){
3519 .type = IBV_FLOW_SPEC_ACTION_DROP,
3520 .size = sizeof(struct ibv_flow_spec_action_drop),
3522 parser.queue[HASH_RXQ_ETH].ibv_attr->num_of_specs++;
3524 TAILQ_FOREACH(flow, &priv->flows, next) {
3525 struct ibv_flow_attr *attr;
3526 struct ibv_spec_header *attr_h;
3528 struct ibv_flow_attr *flow_attr;
3529 struct ibv_spec_header *flow_h;
3531 unsigned int specs_n;
3532 unsigned int queue_id = parser.drop ? HASH_RXQ_ETH :
3535 attr = parser.queue[queue_id].ibv_attr;
3536 flow_attr = flow->frxq[queue_id].ibv_attr;
3537 /* Compare first the attributes. */
3539 memcmp(attr, flow_attr, sizeof(struct ibv_flow_attr)))
3541 if (attr->num_of_specs == 0)
3543 spec = (void *)((uintptr_t)attr +
3544 sizeof(struct ibv_flow_attr));
3545 flow_spec = (void *)((uintptr_t)flow_attr +
3546 sizeof(struct ibv_flow_attr));
3547 specs_n = RTE_MIN(attr->num_of_specs, flow_attr->num_of_specs);
3548 for (i = 0; i != specs_n; ++i) {
3551 if (memcmp(spec, flow_spec,
3552 RTE_MIN(attr_h->size, flow_h->size)))
3554 spec = (void *)((uintptr_t)spec + attr_h->size);
3555 flow_spec = (void *)((uintptr_t)flow_spec +
3558 /* At this point, the flow match. */
3561 /* The flow does not match. */
3565 mlx5_flow_list_destroy(dev, &priv->flows, flow);
3568 ret = rte_errno; /* Save rte_errno before cleanup. */
3569 for (i = 0; i != hash_rxq_init_n; ++i) {
3570 if (parser.queue[i].ibv_attr)
3571 rte_free(parser.queue[i].ibv_attr);
3574 rte_errno = ret; /* Restore rte_errno. */
3579 * Update queue for specific filter.
3582 * Pointer to Ethernet device.
3583 * @param fdir_filter
3584 * Filter to be updated.
3587 * 0 on success, a negative errno value otherwise and rte_errno is set.
3590 mlx5_fdir_filter_update(struct rte_eth_dev *dev,
3591 const struct rte_eth_fdir_filter *fdir_filter)
3595 ret = mlx5_fdir_filter_delete(dev, fdir_filter);
3598 return mlx5_fdir_filter_add(dev, fdir_filter);
3602 * Flush all filters.
3605 * Pointer to Ethernet device.
3608 mlx5_fdir_filter_flush(struct rte_eth_dev *dev)
3610 struct priv *priv = dev->data->dev_private;
3612 mlx5_flow_list_flush(dev, &priv->flows);
3616 * Get flow director information.
3619 * Pointer to Ethernet device.
3620 * @param[out] fdir_info
3621 * Resulting flow director information.
3624 mlx5_fdir_info_get(struct rte_eth_dev *dev, struct rte_eth_fdir_info *fdir_info)
3626 struct rte_eth_fdir_masks *mask =
3627 &dev->data->dev_conf.fdir_conf.mask;
3629 fdir_info->mode = dev->data->dev_conf.fdir_conf.mode;
3630 fdir_info->guarant_spc = 0;
3631 rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
3632 fdir_info->max_flexpayload = 0;
3633 fdir_info->flow_types_mask[0] = 0;
3634 fdir_info->flex_payload_unit = 0;
3635 fdir_info->max_flex_payload_segment_num = 0;
3636 fdir_info->flex_payload_limit = 0;
3637 memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
3641 * Deal with flow director operations.
3644 * Pointer to Ethernet device.
3646 * Operation to perform.
3648 * Pointer to operation-specific structure.
3651 * 0 on success, a negative errno value otherwise and rte_errno is set.
3654 mlx5_fdir_ctrl_func(struct rte_eth_dev *dev, enum rte_filter_op filter_op,
3657 enum rte_fdir_mode fdir_mode =
3658 dev->data->dev_conf.fdir_conf.mode;
3660 if (filter_op == RTE_ETH_FILTER_NOP)
3662 if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
3663 fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
3664 DRV_LOG(ERR, "port %u flow director mode %d not supported",
3665 dev->data->port_id, fdir_mode);
3669 switch (filter_op) {
3670 case RTE_ETH_FILTER_ADD:
3671 return mlx5_fdir_filter_add(dev, arg);
3672 case RTE_ETH_FILTER_UPDATE:
3673 return mlx5_fdir_filter_update(dev, arg);
3674 case RTE_ETH_FILTER_DELETE:
3675 return mlx5_fdir_filter_delete(dev, arg);
3676 case RTE_ETH_FILTER_FLUSH:
3677 mlx5_fdir_filter_flush(dev);
3679 case RTE_ETH_FILTER_INFO:
3680 mlx5_fdir_info_get(dev, arg);
3683 DRV_LOG(DEBUG, "port %u unknown operation %u",
3684 dev->data->port_id, filter_op);
3692 * Manage filter operations.
3695 * Pointer to Ethernet device structure.
3696 * @param filter_type
3699 * Operation to perform.
3701 * Pointer to operation-specific structure.
3704 * 0 on success, a negative errno value otherwise and rte_errno is set.
3707 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
3708 enum rte_filter_type filter_type,
3709 enum rte_filter_op filter_op,
3712 switch (filter_type) {
3713 case RTE_ETH_FILTER_GENERIC:
3714 if (filter_op != RTE_ETH_FILTER_GET) {
3718 *(const void **)arg = &mlx5_flow_ops;
3720 case RTE_ETH_FILTER_FDIR:
3721 return mlx5_fdir_ctrl_func(dev, filter_op, arg);
3723 DRV_LOG(ERR, "port %u filter type (%d) not supported",
3724 dev->data->port_id, filter_type);
3725 rte_errno = ENOTSUP;
3732 * Detect number of Verbs flow priorities supported.
3735 * Pointer to Ethernet device.
3738 * number of supported Verbs flow priority.
3741 mlx5_get_max_verbs_prio(struct rte_eth_dev *dev)
3743 struct priv *priv = dev->data->dev_private;
3744 unsigned int verb_priorities = MLX5_VERBS_FLOW_PRIO_8;
3746 struct ibv_flow_attr attr;
3747 struct ibv_flow_spec_eth eth;
3748 struct ibv_flow_spec_action_drop drop;
3754 .type = IBV_FLOW_SPEC_ETH,
3755 .size = sizeof(struct ibv_flow_spec_eth),
3758 .size = sizeof(struct ibv_flow_spec_action_drop),
3759 .type = IBV_FLOW_SPEC_ACTION_DROP,
3762 struct ibv_flow *flow;
3765 flow_attr.attr.priority = verb_priorities - 1;
3766 flow = mlx5_glue->create_flow(priv->flow_drop_queue->qp,
3769 claim_zero(mlx5_glue->destroy_flow(flow));
3770 /* Try more priorities. */
3771 verb_priorities *= 2;
3773 /* Failed, restore last right number. */
3774 verb_priorities /= 2;
3778 DRV_LOG(DEBUG, "port %u Verbs flow priorities: %d,"
3779 " user flow priorities: %d",
3780 dev->data->port_id, verb_priorities, MLX5_CTRL_FLOW_PRIORITY);
3781 return verb_priorities;