1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2016 6WIND S.A.
3 * Copyright 2016 Mellanox Technologies, Ltd
11 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
13 #pragma GCC diagnostic ignored "-Wpedantic"
15 #include <infiniband/verbs.h>
17 #pragma GCC diagnostic error "-Wpedantic"
20 #include <rte_common.h>
21 #include <rte_ether.h>
22 #include <rte_eth_ctrl.h>
23 #include <rte_ethdev_driver.h>
25 #include <rte_flow_driver.h>
26 #include <rte_malloc.h>
30 #include "mlx5_defs.h"
32 #include "mlx5_glue.h"
34 /* Flow priority for control plane flows. */
35 #define MLX5_CTRL_FLOW_PRIORITY 1
37 /* Internet Protocol versions. */
42 #ifndef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
43 struct ibv_flow_spec_counter_action {
48 /* Dev ops structure defined in mlx5.c */
49 extern const struct eth_dev_ops mlx5_dev_ops;
50 extern const struct eth_dev_ops mlx5_dev_ops_isolate;
52 /** Structure give to the conversion functions. */
53 struct mlx5_flow_data {
54 struct rte_eth_dev *dev; /** Ethernet device. */
55 struct mlx5_flow_parse *parser; /** Parser context. */
56 struct rte_flow_error *error; /** Error context. */
60 mlx5_flow_create_eth(const struct rte_flow_item *item,
61 const void *default_mask,
62 struct mlx5_flow_data *data);
65 mlx5_flow_create_vlan(const struct rte_flow_item *item,
66 const void *default_mask,
67 struct mlx5_flow_data *data);
70 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
71 const void *default_mask,
72 struct mlx5_flow_data *data);
75 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
76 const void *default_mask,
77 struct mlx5_flow_data *data);
80 mlx5_flow_create_udp(const struct rte_flow_item *item,
81 const void *default_mask,
82 struct mlx5_flow_data *data);
85 mlx5_flow_create_tcp(const struct rte_flow_item *item,
86 const void *default_mask,
87 struct mlx5_flow_data *data);
90 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
91 const void *default_mask,
92 struct mlx5_flow_data *data);
95 mlx5_flow_create_vxlan_gpe(const struct rte_flow_item *item,
96 const void *default_mask,
97 struct mlx5_flow_data *data);
100 mlx5_flow_create_gre(const struct rte_flow_item *item,
101 const void *default_mask,
102 struct mlx5_flow_data *data);
104 struct mlx5_flow_parse;
107 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
111 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id);
114 mlx5_flow_create_count(struct rte_eth_dev *dev, struct mlx5_flow_parse *parser);
116 /* Hash RX queue types. */
128 /* Initialization data for hash RX queue. */
129 struct hash_rxq_init {
130 uint64_t hash_fields; /* Fields that participate in the hash. */
131 uint64_t dpdk_rss_hf; /* Matching DPDK RSS hash fields. */
132 unsigned int flow_priority; /* Flow priority to use. */
133 unsigned int ip_version; /* Internet protocol. */
136 /* Initialization data for hash RX queues. */
137 const struct hash_rxq_init hash_rxq_init[] = {
139 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
140 IBV_RX_HASH_DST_IPV4 |
141 IBV_RX_HASH_SRC_PORT_TCP |
142 IBV_RX_HASH_DST_PORT_TCP),
143 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,
145 .ip_version = MLX5_IPV4,
148 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
149 IBV_RX_HASH_DST_IPV4 |
150 IBV_RX_HASH_SRC_PORT_UDP |
151 IBV_RX_HASH_DST_PORT_UDP),
152 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP,
154 .ip_version = MLX5_IPV4,
157 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
158 IBV_RX_HASH_DST_IPV4),
159 .dpdk_rss_hf = (ETH_RSS_IPV4 |
162 .ip_version = MLX5_IPV4,
165 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
166 IBV_RX_HASH_DST_IPV6 |
167 IBV_RX_HASH_SRC_PORT_TCP |
168 IBV_RX_HASH_DST_PORT_TCP),
169 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP,
171 .ip_version = MLX5_IPV6,
174 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
175 IBV_RX_HASH_DST_IPV6 |
176 IBV_RX_HASH_SRC_PORT_UDP |
177 IBV_RX_HASH_DST_PORT_UDP),
178 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP,
180 .ip_version = MLX5_IPV6,
183 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
184 IBV_RX_HASH_DST_IPV6),
185 .dpdk_rss_hf = (ETH_RSS_IPV6 |
188 .ip_version = MLX5_IPV6,
197 /* Number of entries in hash_rxq_init[]. */
198 const unsigned int hash_rxq_init_n = RTE_DIM(hash_rxq_init);
200 /** Structure for holding counter stats. */
201 struct mlx5_flow_counter_stats {
202 uint64_t hits; /**< Number of packets matched by the rule. */
203 uint64_t bytes; /**< Number of bytes matched by the rule. */
206 /** Structure for Drop queue. */
207 struct mlx5_hrxq_drop {
208 struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */
209 struct ibv_qp *qp; /**< Verbs queue pair. */
210 struct ibv_wq *wq; /**< Verbs work queue. */
211 struct ibv_cq *cq; /**< Verbs completion queue. */
214 /* Flows structures. */
216 uint64_t hash_fields; /**< Fields that participate in the hash. */
217 struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
218 struct ibv_flow *ibv_flow; /**< Verbs flow. */
219 struct mlx5_hrxq *hrxq; /**< Hash Rx queues. */
222 /* Drop flows structures. */
223 struct mlx5_flow_drop {
224 struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
225 struct ibv_flow *ibv_flow; /**< Verbs flow. */
229 TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
230 uint32_t mark:1; /**< Set if the flow is marked. */
231 uint32_t drop:1; /**< Drop queue. */
232 struct rte_flow_action_rss rss_conf; /**< RSS configuration */
233 uint16_t (*queues)[]; /**< Queues indexes to use. */
234 uint8_t rss_key[40]; /**< copy of the RSS key. */
235 uint32_t tunnel; /**< Tunnel type of RTE_PTYPE_TUNNEL_XXX. */
236 struct ibv_counter_set *cs; /**< Holds the counters for the rule. */
237 struct mlx5_flow_counter_stats counter_stats;/**<The counter stats. */
238 struct mlx5_flow frxq[RTE_DIM(hash_rxq_init)];
239 /**< Flow with Rx queue. */
242 /** Static initializer for items. */
244 (const enum rte_flow_item_type []){ \
245 __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
248 #define IS_TUNNEL(type) ( \
249 (type) == RTE_FLOW_ITEM_TYPE_VXLAN || \
250 (type) == RTE_FLOW_ITEM_TYPE_VXLAN_GPE || \
251 (type) == RTE_FLOW_ITEM_TYPE_GRE)
253 const uint32_t flow_ptype[] = {
254 [RTE_FLOW_ITEM_TYPE_VXLAN] = RTE_PTYPE_TUNNEL_VXLAN,
255 [RTE_FLOW_ITEM_TYPE_VXLAN_GPE] = RTE_PTYPE_TUNNEL_VXLAN_GPE,
256 [RTE_FLOW_ITEM_TYPE_GRE] = RTE_PTYPE_TUNNEL_GRE,
259 #define PTYPE_IDX(t) ((RTE_PTYPE_TUNNEL_MASK & (t)) >> 12)
261 const uint32_t ptype_ext[] = {
262 [PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN)] = RTE_PTYPE_TUNNEL_VXLAN |
264 [PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN_GPE)] = RTE_PTYPE_TUNNEL_VXLAN_GPE |
266 [PTYPE_IDX(RTE_PTYPE_TUNNEL_GRE)] = RTE_PTYPE_TUNNEL_GRE,
269 /** Structure to generate a simple graph of layers supported by the NIC. */
270 struct mlx5_flow_items {
271 /** List of possible actions for these items. */
272 const enum rte_flow_action_type *const actions;
273 /** Bit-masks corresponding to the possibilities for the item. */
276 * Default bit-masks to use when item->mask is not provided. When
277 * \default_mask is also NULL, the full supported bit-mask (\mask) is
280 const void *default_mask;
281 /** Bit-masks size in bytes. */
282 const unsigned int mask_sz;
284 * Conversion function from rte_flow to NIC specific flow.
287 * rte_flow item to convert.
288 * @param default_mask
289 * Default bit-masks to use when item->mask is not provided.
291 * Internal structure to store the conversion.
294 * 0 on success, a negative errno value otherwise and rte_errno is
297 int (*convert)(const struct rte_flow_item *item,
298 const void *default_mask,
299 struct mlx5_flow_data *data);
300 /** Size in bytes of the destination structure. */
301 const unsigned int dst_sz;
302 /** List of possible following items. */
303 const enum rte_flow_item_type *const items;
306 /** Valid action for this PMD. */
307 static const enum rte_flow_action_type valid_actions[] = {
308 RTE_FLOW_ACTION_TYPE_DROP,
309 RTE_FLOW_ACTION_TYPE_QUEUE,
310 RTE_FLOW_ACTION_TYPE_MARK,
311 RTE_FLOW_ACTION_TYPE_FLAG,
312 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
313 RTE_FLOW_ACTION_TYPE_COUNT,
315 RTE_FLOW_ACTION_TYPE_END,
318 /** Graph of supported items and associated actions. */
319 static const struct mlx5_flow_items mlx5_flow_items[] = {
320 [RTE_FLOW_ITEM_TYPE_END] = {
321 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
322 RTE_FLOW_ITEM_TYPE_VXLAN,
323 RTE_FLOW_ITEM_TYPE_VXLAN_GPE,
324 RTE_FLOW_ITEM_TYPE_GRE),
326 [RTE_FLOW_ITEM_TYPE_ETH] = {
327 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
328 RTE_FLOW_ITEM_TYPE_IPV4,
329 RTE_FLOW_ITEM_TYPE_IPV6),
330 .actions = valid_actions,
331 .mask = &(const struct rte_flow_item_eth){
332 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
333 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
336 .default_mask = &rte_flow_item_eth_mask,
337 .mask_sz = sizeof(struct rte_flow_item_eth),
338 .convert = mlx5_flow_create_eth,
339 .dst_sz = sizeof(struct ibv_flow_spec_eth),
341 [RTE_FLOW_ITEM_TYPE_VLAN] = {
342 .items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
343 RTE_FLOW_ITEM_TYPE_IPV6),
344 .actions = valid_actions,
345 .mask = &(const struct rte_flow_item_vlan){
349 .default_mask = &rte_flow_item_vlan_mask,
350 .mask_sz = sizeof(struct rte_flow_item_vlan),
351 .convert = mlx5_flow_create_vlan,
354 [RTE_FLOW_ITEM_TYPE_IPV4] = {
355 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
356 RTE_FLOW_ITEM_TYPE_TCP,
357 RTE_FLOW_ITEM_TYPE_GRE),
358 .actions = valid_actions,
359 .mask = &(const struct rte_flow_item_ipv4){
363 .type_of_service = -1,
367 .default_mask = &rte_flow_item_ipv4_mask,
368 .mask_sz = sizeof(struct rte_flow_item_ipv4),
369 .convert = mlx5_flow_create_ipv4,
370 .dst_sz = sizeof(struct ibv_flow_spec_ipv4_ext),
372 [RTE_FLOW_ITEM_TYPE_IPV6] = {
373 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
374 RTE_FLOW_ITEM_TYPE_TCP,
375 RTE_FLOW_ITEM_TYPE_GRE),
376 .actions = valid_actions,
377 .mask = &(const struct rte_flow_item_ipv6){
380 0xff, 0xff, 0xff, 0xff,
381 0xff, 0xff, 0xff, 0xff,
382 0xff, 0xff, 0xff, 0xff,
383 0xff, 0xff, 0xff, 0xff,
386 0xff, 0xff, 0xff, 0xff,
387 0xff, 0xff, 0xff, 0xff,
388 0xff, 0xff, 0xff, 0xff,
389 0xff, 0xff, 0xff, 0xff,
396 .default_mask = &rte_flow_item_ipv6_mask,
397 .mask_sz = sizeof(struct rte_flow_item_ipv6),
398 .convert = mlx5_flow_create_ipv6,
399 .dst_sz = sizeof(struct ibv_flow_spec_ipv6),
401 [RTE_FLOW_ITEM_TYPE_UDP] = {
402 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN,
403 RTE_FLOW_ITEM_TYPE_VXLAN_GPE),
404 .actions = valid_actions,
405 .mask = &(const struct rte_flow_item_udp){
411 .default_mask = &rte_flow_item_udp_mask,
412 .mask_sz = sizeof(struct rte_flow_item_udp),
413 .convert = mlx5_flow_create_udp,
414 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
416 [RTE_FLOW_ITEM_TYPE_TCP] = {
417 .actions = valid_actions,
418 .mask = &(const struct rte_flow_item_tcp){
424 .default_mask = &rte_flow_item_tcp_mask,
425 .mask_sz = sizeof(struct rte_flow_item_tcp),
426 .convert = mlx5_flow_create_tcp,
427 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
429 [RTE_FLOW_ITEM_TYPE_GRE] = {
430 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
431 RTE_FLOW_ITEM_TYPE_IPV4,
432 RTE_FLOW_ITEM_TYPE_IPV6),
433 .actions = valid_actions,
434 .mask = &(const struct rte_flow_item_gre){
437 .default_mask = &rte_flow_item_gre_mask,
438 .mask_sz = sizeof(struct rte_flow_item_gre),
439 .convert = mlx5_flow_create_gre,
440 .dst_sz = sizeof(struct ibv_flow_spec_tunnel),
442 [RTE_FLOW_ITEM_TYPE_VXLAN] = {
443 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
444 RTE_FLOW_ITEM_TYPE_IPV4, /* For L3 VXLAN. */
445 RTE_FLOW_ITEM_TYPE_IPV6), /* For L3 VXLAN. */
446 .actions = valid_actions,
447 .mask = &(const struct rte_flow_item_vxlan){
448 .vni = "\xff\xff\xff",
450 .default_mask = &rte_flow_item_vxlan_mask,
451 .mask_sz = sizeof(struct rte_flow_item_vxlan),
452 .convert = mlx5_flow_create_vxlan,
453 .dst_sz = sizeof(struct ibv_flow_spec_tunnel),
455 [RTE_FLOW_ITEM_TYPE_VXLAN_GPE] = {
456 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
457 RTE_FLOW_ITEM_TYPE_IPV4,
458 RTE_FLOW_ITEM_TYPE_IPV6),
459 .actions = valid_actions,
460 .mask = &(const struct rte_flow_item_vxlan_gpe){
461 .vni = "\xff\xff\xff",
463 .default_mask = &rte_flow_item_vxlan_gpe_mask,
464 .mask_sz = sizeof(struct rte_flow_item_vxlan_gpe),
465 .convert = mlx5_flow_create_vxlan_gpe,
466 .dst_sz = sizeof(struct ibv_flow_spec_tunnel),
470 /** Structure to pass to the conversion function. */
471 struct mlx5_flow_parse {
472 uint32_t inner; /**< Verbs value, set once tunnel is encountered. */
474 /**< Whether resources should remain after a validate. */
475 uint32_t drop:1; /**< Target is a drop queue. */
476 uint32_t mark:1; /**< Mark is present in the flow. */
477 uint32_t count:1; /**< Count is present in the flow. */
478 uint32_t mark_id; /**< Mark identifier. */
479 struct rte_flow_action_rss rss_conf; /**< RSS configuration */
480 uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
481 uint8_t rss_key[40]; /**< copy of the RSS key. */
482 enum hash_rxq_type layer; /**< Last pattern layer detected. */
483 enum hash_rxq_type out_layer; /**< Last outer pattern layer detected. */
484 uint32_t tunnel; /**< Tunnel type of RTE_PTYPE_TUNNEL_XXX. */
485 struct ibv_counter_set *cs; /**< Holds the counter set for the rule */
487 struct ibv_flow_attr *ibv_attr;
488 /**< Pointer to Verbs attributes. */
490 /**< Current position or total size of the attribute. */
491 uint64_t hash_fields; /**< Verbs hash fields. */
492 } queue[RTE_DIM(hash_rxq_init)];
495 static const struct rte_flow_ops mlx5_flow_ops = {
496 .validate = mlx5_flow_validate,
497 .create = mlx5_flow_create,
498 .destroy = mlx5_flow_destroy,
499 .flush = mlx5_flow_flush,
500 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
501 .query = mlx5_flow_query,
505 .isolate = mlx5_flow_isolate,
508 /* Convert FDIR request to Generic flow. */
510 struct rte_flow_attr attr;
511 struct rte_flow_action actions[2];
512 struct rte_flow_item items[4];
513 struct rte_flow_item_eth l2;
514 struct rte_flow_item_eth l2_mask;
516 struct rte_flow_item_ipv4 ipv4;
517 struct rte_flow_item_ipv6 ipv6;
520 struct rte_flow_item_ipv4 ipv4;
521 struct rte_flow_item_ipv6 ipv6;
524 struct rte_flow_item_udp udp;
525 struct rte_flow_item_tcp tcp;
528 struct rte_flow_item_udp udp;
529 struct rte_flow_item_tcp tcp;
531 struct rte_flow_action_queue queue;
534 /* Verbs specification header. */
535 struct ibv_spec_header {
536 enum ibv_flow_spec_type type;
541 * Check support for a given item.
544 * Item specification.
546 * Bit-masks covering supported fields to compare with spec, last and mask in
549 * Bit-Mask size in bytes.
552 * 0 on success, a negative errno value otherwise and rte_errno is set.
555 mlx5_flow_item_validate(const struct rte_flow_item *item,
556 const uint8_t *mask, unsigned int size)
558 if (!item->spec && (item->mask || item->last)) {
562 if (item->spec && !item->mask) {
564 const uint8_t *spec = item->spec;
566 for (i = 0; i < size; ++i)
567 if ((spec[i] | mask[i]) != mask[i]) {
572 if (item->last && !item->mask) {
574 const uint8_t *spec = item->last;
576 for (i = 0; i < size; ++i)
577 if ((spec[i] | mask[i]) != mask[i]) {
584 const uint8_t *spec = item->spec;
586 for (i = 0; i < size; ++i)
587 if ((spec[i] | mask[i]) != mask[i]) {
592 if (item->spec && item->last) {
595 const uint8_t *apply = mask;
601 for (i = 0; i < size; ++i) {
602 spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
603 last[i] = ((const uint8_t *)item->last)[i] & apply[i];
605 ret = memcmp(spec, last, size);
615 * Extract attribute to the parser.
618 * Flow rule attributes.
620 * Perform verbose error reporting if not NULL.
623 * 0 on success, a negative errno value otherwise and rte_errno is set.
626 mlx5_flow_convert_attributes(const struct rte_flow_attr *attr,
627 struct rte_flow_error *error)
630 rte_flow_error_set(error, ENOTSUP,
631 RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
633 "groups are not supported");
636 if (attr->priority && attr->priority != MLX5_CTRL_FLOW_PRIORITY) {
637 rte_flow_error_set(error, ENOTSUP,
638 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
640 "priorities are not supported");
644 rte_flow_error_set(error, ENOTSUP,
645 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
647 "egress is not supported");
650 if (attr->transfer) {
651 rte_flow_error_set(error, ENOTSUP,
652 RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
654 "transfer is not supported");
657 if (!attr->ingress) {
658 rte_flow_error_set(error, ENOTSUP,
659 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
661 "only ingress is supported");
668 * Extract actions request to the parser.
671 * Pointer to Ethernet device.
673 * Associated actions (list terminated by the END action).
675 * Perform verbose error reporting if not NULL.
676 * @param[in, out] parser
677 * Internal parser structure.
680 * 0 on success, a negative errno value otherwise and rte_errno is set.
683 mlx5_flow_convert_actions(struct rte_eth_dev *dev,
684 const struct rte_flow_action actions[],
685 struct rte_flow_error *error,
686 struct mlx5_flow_parse *parser)
688 enum { FATE = 1, MARK = 2, COUNT = 4, };
689 uint32_t overlap = 0;
690 struct priv *priv = dev->data->dev_private;
692 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
693 if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
695 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
697 goto exit_action_overlap;
700 } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
701 const struct rte_flow_action_queue *queue =
702 (const struct rte_flow_action_queue *)
706 goto exit_action_overlap;
708 if (!queue || (queue->index > (priv->rxqs_n - 1)))
709 goto exit_action_not_supported;
710 parser->queues[0] = queue->index;
711 parser->rss_conf = (struct rte_flow_action_rss){
713 .queue = parser->queues,
715 } else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
716 const struct rte_flow_action_rss *rss =
717 (const struct rte_flow_action_rss *)
719 const uint8_t *rss_key;
720 uint32_t rss_key_len;
724 goto exit_action_overlap;
727 rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ) {
728 rte_flow_error_set(error, EINVAL,
729 RTE_FLOW_ERROR_TYPE_ACTION,
731 "the only supported RSS hash"
732 " function is Toeplitz");
735 #ifndef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
736 if (parser->rss_conf.level > 1) {
737 rte_flow_error_set(error, EINVAL,
738 RTE_FLOW_ERROR_TYPE_ACTION,
740 "a nonzero RSS encapsulation"
741 " level is not supported");
745 if (parser->rss_conf.level > 2) {
746 rte_flow_error_set(error, EINVAL,
747 RTE_FLOW_ERROR_TYPE_ACTION,
749 "RSS encapsulation level"
750 " > 1 is not supported");
753 if (rss->types & MLX5_RSS_HF_MASK) {
754 rte_flow_error_set(error, EINVAL,
755 RTE_FLOW_ERROR_TYPE_ACTION,
757 "unsupported RSS type"
762 rss_key_len = rss->key_len;
765 rss_key_len = rss_hash_default_key_len;
766 rss_key = rss_hash_default_key;
768 if (rss_key_len != RTE_DIM(parser->rss_key)) {
769 rte_flow_error_set(error, EINVAL,
770 RTE_FLOW_ERROR_TYPE_ACTION,
772 "RSS hash key must be"
773 " exactly 40 bytes long");
776 if (!rss->queue_num) {
777 rte_flow_error_set(error, EINVAL,
778 RTE_FLOW_ERROR_TYPE_ACTION,
783 if (rss->queue_num > RTE_DIM(parser->queues)) {
784 rte_flow_error_set(error, EINVAL,
785 RTE_FLOW_ERROR_TYPE_ACTION,
787 "too many queues for RSS"
791 for (n = 0; n < rss->queue_num; ++n) {
792 if (rss->queue[n] >= priv->rxqs_n) {
793 rte_flow_error_set(error, EINVAL,
794 RTE_FLOW_ERROR_TYPE_ACTION,
796 "queue id > number of"
801 parser->rss_conf = (struct rte_flow_action_rss){
802 .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
805 .key_len = rss_key_len,
806 .queue_num = rss->queue_num,
807 .key = memcpy(parser->rss_key, rss_key,
808 sizeof(*rss_key) * rss_key_len),
809 .queue = memcpy(parser->queues, rss->queue,
810 sizeof(*rss->queue) *
813 } else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
814 const struct rte_flow_action_mark *mark =
815 (const struct rte_flow_action_mark *)
819 goto exit_action_overlap;
822 rte_flow_error_set(error, EINVAL,
823 RTE_FLOW_ERROR_TYPE_ACTION,
825 "mark must be defined");
827 } else if (mark->id >= MLX5_FLOW_MARK_MAX) {
828 rte_flow_error_set(error, ENOTSUP,
829 RTE_FLOW_ERROR_TYPE_ACTION,
831 "mark must be between 0"
836 parser->mark_id = mark->id;
837 } else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
839 goto exit_action_overlap;
842 } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT &&
843 priv->config.flow_counter_en) {
845 goto exit_action_overlap;
849 goto exit_action_not_supported;
852 /* When fate is unknown, drop traffic. */
853 if (!(overlap & FATE))
855 if (parser->drop && parser->mark)
857 if (!parser->rss_conf.queue_num && !parser->drop) {
858 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
859 NULL, "no valid action");
863 exit_action_not_supported:
864 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
865 actions, "action not supported");
868 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
869 actions, "overlapping actions are not supported");
877 * Pattern specification (list terminated by the END pattern item).
879 * Perform verbose error reporting if not NULL.
880 * @param[in, out] parser
881 * Internal parser structure.
884 * 0 on success, a negative errno value otherwise and rte_errno is set.
887 mlx5_flow_convert_items_validate(struct rte_eth_dev *dev,
888 const struct rte_flow_item items[],
889 struct rte_flow_error *error,
890 struct mlx5_flow_parse *parser)
892 struct priv *priv = dev->data->dev_private;
893 const struct mlx5_flow_items *cur_item = mlx5_flow_items;
897 /* Initialise the offsets to start after verbs attribute. */
898 for (i = 0; i != hash_rxq_init_n; ++i)
899 parser->queue[i].offset = sizeof(struct ibv_flow_attr);
900 for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
901 const struct mlx5_flow_items *token = NULL;
904 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
908 cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
910 if (cur_item->items[i] == items->type) {
911 token = &mlx5_flow_items[items->type];
917 goto exit_item_not_supported;
920 ret = mlx5_flow_item_validate(items,
921 (const uint8_t *)cur_item->mask,
924 goto exit_item_not_supported;
925 if (IS_TUNNEL(items->type)) {
926 if (parser->tunnel) {
927 rte_flow_error_set(error, ENOTSUP,
928 RTE_FLOW_ERROR_TYPE_ITEM,
930 "Cannot recognize multiple"
931 " tunnel encapsulations.");
934 if (!priv->config.tunnel_en &&
935 parser->rss_conf.level > 1) {
936 rte_flow_error_set(error, ENOTSUP,
937 RTE_FLOW_ERROR_TYPE_ITEM,
939 "RSS on tunnel is not supported");
942 parser->inner = IBV_FLOW_SPEC_INNER;
943 parser->tunnel = flow_ptype[items->type];
946 parser->queue[HASH_RXQ_ETH].offset += cur_item->dst_sz;
948 for (n = 0; n != hash_rxq_init_n; ++n)
949 parser->queue[n].offset += cur_item->dst_sz;
953 parser->queue[HASH_RXQ_ETH].offset +=
954 sizeof(struct ibv_flow_spec_action_drop);
957 for (i = 0; i != hash_rxq_init_n; ++i)
958 parser->queue[i].offset +=
959 sizeof(struct ibv_flow_spec_action_tag);
962 unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
964 for (i = 0; i != hash_rxq_init_n; ++i)
965 parser->queue[i].offset += size;
968 exit_item_not_supported:
969 return rte_flow_error_set(error, -ret, RTE_FLOW_ERROR_TYPE_ITEM,
970 items, "item not supported");
974 * Allocate memory space to store verbs flow attributes.
977 * Amount of byte to allocate.
979 * Perform verbose error reporting if not NULL.
982 * A verbs flow attribute on success, NULL otherwise and rte_errno is set.
984 static struct ibv_flow_attr *
985 mlx5_flow_convert_allocate(unsigned int size, struct rte_flow_error *error)
987 struct ibv_flow_attr *ibv_attr;
989 ibv_attr = rte_calloc(__func__, 1, size, 0);
991 rte_flow_error_set(error, ENOMEM,
992 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
994 "cannot allocate verbs spec attributes");
1001 * Make inner packet matching with an higher priority from the non Inner
1005 * Pointer to Ethernet device.
1006 * @param[in, out] parser
1007 * Internal parser structure.
1009 * User flow attribute.
1012 mlx5_flow_update_priority(struct rte_eth_dev *dev,
1013 struct mlx5_flow_parse *parser,
1014 const struct rte_flow_attr *attr)
1016 struct priv *priv = dev->data->dev_private;
1020 /* 8 priorities >= 16 priorities
1021 * Control flow: 4-7 8-15
1022 * User normal flow: 1-3 4-7
1023 * User tunnel flow: 0-2 0-3
1025 priority = attr->priority * MLX5_VERBS_FLOW_PRIO_8;
1026 if (priv->config.max_verbs_prio == MLX5_VERBS_FLOW_PRIO_8)
1029 * Lower non-tunnel flow Verbs priority 1 if only support 8 Verbs
1030 * priorities, lower 4 otherwise.
1032 if (!parser->inner) {
1033 if (priv->config.max_verbs_prio == MLX5_VERBS_FLOW_PRIO_8)
1036 priority += MLX5_VERBS_FLOW_PRIO_8 / 2;
1039 parser->queue[HASH_RXQ_ETH].ibv_attr->priority = priority +
1040 hash_rxq_init[HASH_RXQ_ETH].flow_priority;
1043 for (i = 0; i != hash_rxq_init_n; ++i) {
1044 if (!parser->queue[i].ibv_attr)
1046 parser->queue[i].ibv_attr->priority = priority +
1047 hash_rxq_init[i].flow_priority;
1052 * Finalise verbs flow attributes.
1054 * @param[in, out] parser
1055 * Internal parser structure.
1058 mlx5_flow_convert_finalise(struct mlx5_flow_parse *parser)
1061 uint32_t inner = parser->inner;
1063 /* Don't create extra flows for outer RSS. */
1064 if (parser->tunnel && parser->rss_conf.level < 2)
1067 * Fill missing layers in verbs specifications, or compute the correct
1068 * offset to allocate the memory space for the attributes and
1071 for (i = 0; i != hash_rxq_init_n - 1; ++i) {
1073 struct ibv_flow_spec_ipv4_ext ipv4;
1074 struct ibv_flow_spec_ipv6 ipv6;
1075 struct ibv_flow_spec_tcp_udp udp_tcp;
1076 struct ibv_flow_spec_eth eth;
1081 if (i == parser->layer)
1083 if (parser->layer == HASH_RXQ_ETH ||
1084 parser->layer == HASH_RXQ_TUNNEL) {
1085 if (hash_rxq_init[i].ip_version == MLX5_IPV4) {
1086 size = sizeof(struct ibv_flow_spec_ipv4_ext);
1087 specs.ipv4 = (struct ibv_flow_spec_ipv4_ext){
1088 .type = inner | IBV_FLOW_SPEC_IPV4_EXT,
1092 size = sizeof(struct ibv_flow_spec_ipv6);
1093 specs.ipv6 = (struct ibv_flow_spec_ipv6){
1094 .type = inner | IBV_FLOW_SPEC_IPV6,
1098 if (parser->queue[i].ibv_attr) {
1099 dst = (void *)((uintptr_t)
1100 parser->queue[i].ibv_attr +
1101 parser->queue[i].offset);
1102 memcpy(dst, &specs, size);
1103 ++parser->queue[i].ibv_attr->num_of_specs;
1105 parser->queue[i].offset += size;
1107 if ((i == HASH_RXQ_UDPV4) || (i == HASH_RXQ_TCPV4) ||
1108 (i == HASH_RXQ_UDPV6) || (i == HASH_RXQ_TCPV6)) {
1109 size = sizeof(struct ibv_flow_spec_tcp_udp);
1110 specs.udp_tcp = (struct ibv_flow_spec_tcp_udp) {
1111 .type = inner | ((i == HASH_RXQ_UDPV4 ||
1112 i == HASH_RXQ_UDPV6) ?
1117 if (parser->queue[i].ibv_attr) {
1118 dst = (void *)((uintptr_t)
1119 parser->queue[i].ibv_attr +
1120 parser->queue[i].offset);
1121 memcpy(dst, &specs, size);
1122 ++parser->queue[i].ibv_attr->num_of_specs;
1124 parser->queue[i].offset += size;
1130 * Update flows according to pattern and RSS hash fields.
1132 * @param[in, out] parser
1133 * Internal parser structure.
1136 * 0 on success, a negative errno value otherwise and rte_errno is set.
1139 mlx5_flow_convert_rss(struct mlx5_flow_parse *parser)
1142 enum hash_rxq_type start;
1143 enum hash_rxq_type layer;
1144 int outer = parser->tunnel && parser->rss_conf.level < 2;
1145 uint64_t rss = parser->rss_conf.types;
1147 /* Default to outer RSS. */
1148 if (!parser->rss_conf.level)
1149 parser->rss_conf.level = 1;
1150 layer = outer ? parser->out_layer : parser->layer;
1151 if (layer == HASH_RXQ_TUNNEL)
1152 layer = HASH_RXQ_ETH;
1154 /* Only one hash type for outer RSS. */
1155 if (rss && layer == HASH_RXQ_ETH) {
1156 start = HASH_RXQ_TCPV4;
1157 } else if (rss && layer != HASH_RXQ_ETH &&
1158 !(rss & hash_rxq_init[layer].dpdk_rss_hf)) {
1159 /* If RSS not match L4 pattern, try L3 RSS. */
1160 if (layer < HASH_RXQ_IPV4)
1161 layer = HASH_RXQ_IPV4;
1162 else if (layer > HASH_RXQ_IPV4 && layer < HASH_RXQ_IPV6)
1163 layer = HASH_RXQ_IPV6;
1168 /* Scan first valid hash type. */
1169 for (i = start; rss && i <= layer; ++i) {
1170 if (!parser->queue[i].ibv_attr)
1172 if (hash_rxq_init[i].dpdk_rss_hf & rss)
1175 if (rss && i <= layer)
1176 parser->queue[layer].hash_fields =
1177 hash_rxq_init[i].hash_fields;
1178 /* Trim unused hash types. */
1179 for (i = 0; i != hash_rxq_init_n; ++i) {
1180 if (parser->queue[i].ibv_attr && i != layer) {
1181 rte_free(parser->queue[i].ibv_attr);
1182 parser->queue[i].ibv_attr = NULL;
1186 /* Expand for inner or normal RSS. */
1187 if (rss && (layer == HASH_RXQ_ETH || layer == HASH_RXQ_IPV4))
1188 start = HASH_RXQ_TCPV4;
1189 else if (rss && layer == HASH_RXQ_IPV6)
1190 start = HASH_RXQ_TCPV6;
1193 /* For L4 pattern, try L3 RSS if no L4 RSS. */
1194 /* Trim unused hash types. */
1195 for (i = 0; i != hash_rxq_init_n; ++i) {
1196 if (!parser->queue[i].ibv_attr)
1198 if (i < start || i > layer) {
1199 rte_free(parser->queue[i].ibv_attr);
1200 parser->queue[i].ibv_attr = NULL;
1205 if (hash_rxq_init[i].dpdk_rss_hf & rss) {
1206 parser->queue[i].hash_fields =
1207 hash_rxq_init[i].hash_fields;
1208 } else if (i != layer) {
1209 /* Remove unused RSS expansion. */
1210 rte_free(parser->queue[i].ibv_attr);
1211 parser->queue[i].ibv_attr = NULL;
1212 } else if (layer < HASH_RXQ_IPV4 &&
1213 (hash_rxq_init[HASH_RXQ_IPV4].dpdk_rss_hf &
1215 /* Allow IPv4 RSS on L4 pattern. */
1216 parser->queue[i].hash_fields =
1217 hash_rxq_init[HASH_RXQ_IPV4]
1219 } else if (i > HASH_RXQ_IPV4 && i < HASH_RXQ_IPV6 &&
1220 (hash_rxq_init[HASH_RXQ_IPV6].dpdk_rss_hf &
1222 /* Allow IPv4 RSS on L4 pattern. */
1223 parser->queue[i].hash_fields =
1224 hash_rxq_init[HASH_RXQ_IPV6]
1233 * Validate and convert a flow supported by the NIC.
1236 * Pointer to Ethernet device.
1238 * Flow rule attributes.
1239 * @param[in] pattern
1240 * Pattern specification (list terminated by the END pattern item).
1241 * @param[in] actions
1242 * Associated actions (list terminated by the END action).
1244 * Perform verbose error reporting if not NULL.
1245 * @param[in, out] parser
1246 * Internal parser structure.
1249 * 0 on success, a negative errno value otherwise and rte_errno is set.
1252 mlx5_flow_convert(struct rte_eth_dev *dev,
1253 const struct rte_flow_attr *attr,
1254 const struct rte_flow_item items[],
1255 const struct rte_flow_action actions[],
1256 struct rte_flow_error *error,
1257 struct mlx5_flow_parse *parser)
1259 const struct mlx5_flow_items *cur_item = mlx5_flow_items;
1263 /* First step. Validate the attributes, items and actions. */
1264 *parser = (struct mlx5_flow_parse){
1265 .create = parser->create,
1266 .layer = HASH_RXQ_ETH,
1267 .mark_id = MLX5_FLOW_MARK_DEFAULT,
1269 ret = mlx5_flow_convert_attributes(attr, error);
1272 ret = mlx5_flow_convert_actions(dev, actions, error, parser);
1275 ret = mlx5_flow_convert_items_validate(dev, items, error, parser);
1278 mlx5_flow_convert_finalise(parser);
1281 * Allocate the memory space to store verbs specifications.
1284 unsigned int offset = parser->queue[HASH_RXQ_ETH].offset;
1286 parser->queue[HASH_RXQ_ETH].ibv_attr =
1287 mlx5_flow_convert_allocate(offset, error);
1288 if (!parser->queue[HASH_RXQ_ETH].ibv_attr)
1290 parser->queue[HASH_RXQ_ETH].offset =
1291 sizeof(struct ibv_flow_attr);
1293 for (i = 0; i != hash_rxq_init_n; ++i) {
1294 unsigned int offset;
1296 offset = parser->queue[i].offset;
1297 parser->queue[i].ibv_attr =
1298 mlx5_flow_convert_allocate(offset, error);
1299 if (!parser->queue[i].ibv_attr)
1301 parser->queue[i].offset = sizeof(struct ibv_flow_attr);
1304 /* Third step. Conversion parse, fill the specifications. */
1307 parser->layer = HASH_RXQ_ETH;
1308 for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
1309 struct mlx5_flow_data data = {
1315 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
1317 cur_item = &mlx5_flow_items[items->type];
1318 ret = cur_item->convert(items,
1319 (cur_item->default_mask ?
1320 cur_item->default_mask :
1326 if (!parser->drop) {
1327 /* RSS check, remove unused hash types. */
1328 ret = mlx5_flow_convert_rss(parser);
1331 /* Complete missing specification. */
1332 mlx5_flow_convert_finalise(parser);
1334 mlx5_flow_update_priority(dev, parser, attr);
1336 mlx5_flow_create_flag_mark(parser, parser->mark_id);
1337 if (parser->count && parser->create) {
1338 mlx5_flow_create_count(dev, parser);
1340 goto exit_count_error;
1343 /* Only verification is expected, all resources should be released. */
1344 if (!parser->create) {
1345 for (i = 0; i != hash_rxq_init_n; ++i) {
1346 if (parser->queue[i].ibv_attr) {
1347 rte_free(parser->queue[i].ibv_attr);
1348 parser->queue[i].ibv_attr = NULL;
1354 for (i = 0; i != hash_rxq_init_n; ++i) {
1355 if (parser->queue[i].ibv_attr) {
1356 rte_free(parser->queue[i].ibv_attr);
1357 parser->queue[i].ibv_attr = NULL;
1360 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1361 NULL, "cannot allocate verbs spec attributes");
1364 rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1365 NULL, "cannot create counter");
1370 * Copy the specification created into the flow.
1373 * Internal parser structure.
1375 * Create specification.
1377 * Size in bytes of the specification to copy.
1380 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
1386 for (i = 0; i != hash_rxq_init_n; ++i) {
1387 if (!parser->queue[i].ibv_attr)
1389 dst = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1390 parser->queue[i].offset);
1391 memcpy(dst, src, size);
1392 ++parser->queue[i].ibv_attr->num_of_specs;
1393 parser->queue[i].offset += size;
1398 * Convert Ethernet item to Verbs specification.
1401 * Item specification.
1402 * @param default_mask[in]
1403 * Default bit-masks to use when item->mask is not provided.
1404 * @param data[in, out]
1408 * 0 on success, a negative errno value otherwise and rte_errno is set.
1411 mlx5_flow_create_eth(const struct rte_flow_item *item,
1412 const void *default_mask,
1413 struct mlx5_flow_data *data)
1415 const struct rte_flow_item_eth *spec = item->spec;
1416 const struct rte_flow_item_eth *mask = item->mask;
1417 struct mlx5_flow_parse *parser = data->parser;
1418 const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1419 struct ibv_flow_spec_eth eth = {
1420 .type = parser->inner | IBV_FLOW_SPEC_ETH,
1424 parser->layer = HASH_RXQ_ETH;
1429 mask = default_mask;
1430 memcpy(ð.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
1431 memcpy(ð.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
1432 eth.val.ether_type = spec->type;
1433 memcpy(ð.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
1434 memcpy(ð.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
1435 eth.mask.ether_type = mask->type;
1436 /* Remove unwanted bits from values. */
1437 for (i = 0; i < ETHER_ADDR_LEN; ++i) {
1438 eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
1439 eth.val.src_mac[i] &= eth.mask.src_mac[i];
1441 eth.val.ether_type &= eth.mask.ether_type;
1443 mlx5_flow_create_copy(parser, ð, eth_size);
1448 * Convert VLAN item to Verbs specification.
1451 * Item specification.
1452 * @param default_mask[in]
1453 * Default bit-masks to use when item->mask is not provided.
1454 * @param data[in, out]
1458 * 0 on success, a negative errno value otherwise and rte_errno is set.
1461 mlx5_flow_create_vlan(const struct rte_flow_item *item,
1462 const void *default_mask,
1463 struct mlx5_flow_data *data)
1465 const struct rte_flow_item_vlan *spec = item->spec;
1466 const struct rte_flow_item_vlan *mask = item->mask;
1467 struct mlx5_flow_parse *parser = data->parser;
1468 struct ibv_flow_spec_eth *eth;
1469 const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1470 const char *msg = "VLAN cannot be empty";
1475 mask = default_mask;
1477 for (i = 0; i != hash_rxq_init_n; ++i) {
1478 if (!parser->queue[i].ibv_attr)
1481 eth = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1482 parser->queue[i].offset - eth_size);
1483 eth->val.vlan_tag = spec->tci;
1484 eth->mask.vlan_tag = mask->tci;
1485 eth->val.vlan_tag &= eth->mask.vlan_tag;
1487 * From verbs perspective an empty VLAN is equivalent
1488 * to a packet without VLAN layer.
1490 if (!eth->mask.vlan_tag)
1492 /* Outer TPID cannot be matched. */
1493 if (eth->mask.ether_type) {
1494 msg = "VLAN TPID matching is not supported";
1497 eth->val.ether_type = spec->inner_type;
1498 eth->mask.ether_type = mask->inner_type;
1499 eth->val.ether_type &= eth->mask.ether_type;
1504 return rte_flow_error_set(data->error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM,
1509 * Convert IPv4 item to Verbs specification.
1512 * Item specification.
1513 * @param default_mask[in]
1514 * Default bit-masks to use when item->mask is not provided.
1515 * @param data[in, out]
1519 * 0 on success, a negative errno value otherwise and rte_errno is set.
1522 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
1523 const void *default_mask,
1524 struct mlx5_flow_data *data)
1526 struct priv *priv = data->dev->data->dev_private;
1527 const struct rte_flow_item_ipv4 *spec = item->spec;
1528 const struct rte_flow_item_ipv4 *mask = item->mask;
1529 struct mlx5_flow_parse *parser = data->parser;
1530 unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4_ext);
1531 struct ibv_flow_spec_ipv4_ext ipv4 = {
1532 .type = parser->inner | IBV_FLOW_SPEC_IPV4_EXT,
1536 if (parser->layer == HASH_RXQ_TUNNEL &&
1537 parser->tunnel == ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN)] &&
1538 !priv->config.l3_vxlan_en)
1539 return rte_flow_error_set(data->error, EINVAL,
1540 RTE_FLOW_ERROR_TYPE_ITEM,
1542 "L3 VXLAN not enabled by device"
1543 " parameter and/or not configured"
1545 parser->layer = HASH_RXQ_IPV4;
1548 mask = default_mask;
1549 ipv4.val = (struct ibv_flow_ipv4_ext_filter){
1550 .src_ip = spec->hdr.src_addr,
1551 .dst_ip = spec->hdr.dst_addr,
1552 .proto = spec->hdr.next_proto_id,
1553 .tos = spec->hdr.type_of_service,
1555 ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
1556 .src_ip = mask->hdr.src_addr,
1557 .dst_ip = mask->hdr.dst_addr,
1558 .proto = mask->hdr.next_proto_id,
1559 .tos = mask->hdr.type_of_service,
1561 /* Remove unwanted bits from values. */
1562 ipv4.val.src_ip &= ipv4.mask.src_ip;
1563 ipv4.val.dst_ip &= ipv4.mask.dst_ip;
1564 ipv4.val.proto &= ipv4.mask.proto;
1565 ipv4.val.tos &= ipv4.mask.tos;
1567 mlx5_flow_create_copy(parser, &ipv4, ipv4_size);
1572 * Convert IPv6 item to Verbs specification.
1575 * Item specification.
1576 * @param default_mask[in]
1577 * Default bit-masks to use when item->mask is not provided.
1578 * @param data[in, out]
1582 * 0 on success, a negative errno value otherwise and rte_errno is set.
1585 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
1586 const void *default_mask,
1587 struct mlx5_flow_data *data)
1589 struct priv *priv = data->dev->data->dev_private;
1590 const struct rte_flow_item_ipv6 *spec = item->spec;
1591 const struct rte_flow_item_ipv6 *mask = item->mask;
1592 struct mlx5_flow_parse *parser = data->parser;
1593 unsigned int ipv6_size = sizeof(struct ibv_flow_spec_ipv6);
1594 struct ibv_flow_spec_ipv6 ipv6 = {
1595 .type = parser->inner | IBV_FLOW_SPEC_IPV6,
1599 if (parser->layer == HASH_RXQ_TUNNEL &&
1600 parser->tunnel == ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN)] &&
1601 !priv->config.l3_vxlan_en)
1602 return rte_flow_error_set(data->error, EINVAL,
1603 RTE_FLOW_ERROR_TYPE_ITEM,
1605 "L3 VXLAN not enabled by device"
1606 " parameter and/or not configured"
1608 parser->layer = HASH_RXQ_IPV6;
1611 uint32_t vtc_flow_val;
1612 uint32_t vtc_flow_mask;
1615 mask = default_mask;
1616 memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
1617 RTE_DIM(ipv6.val.src_ip));
1618 memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
1619 RTE_DIM(ipv6.val.dst_ip));
1620 memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
1621 RTE_DIM(ipv6.mask.src_ip));
1622 memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
1623 RTE_DIM(ipv6.mask.dst_ip));
1624 vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
1625 vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
1626 ipv6.val.flow_label =
1627 rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >>
1629 ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >>
1631 ipv6.val.next_hdr = spec->hdr.proto;
1632 ipv6.val.hop_limit = spec->hdr.hop_limits;
1633 ipv6.mask.flow_label =
1634 rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >>
1636 ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >>
1638 ipv6.mask.next_hdr = mask->hdr.proto;
1639 ipv6.mask.hop_limit = mask->hdr.hop_limits;
1640 /* Remove unwanted bits from values. */
1641 for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
1642 ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
1643 ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
1645 ipv6.val.flow_label &= ipv6.mask.flow_label;
1646 ipv6.val.traffic_class &= ipv6.mask.traffic_class;
1647 ipv6.val.next_hdr &= ipv6.mask.next_hdr;
1648 ipv6.val.hop_limit &= ipv6.mask.hop_limit;
1650 mlx5_flow_create_copy(parser, &ipv6, ipv6_size);
1655 * Convert UDP item to Verbs specification.
1658 * Item specification.
1659 * @param default_mask[in]
1660 * Default bit-masks to use when item->mask is not provided.
1661 * @param data[in, out]
1665 * 0 on success, a negative errno value otherwise and rte_errno is set.
1668 mlx5_flow_create_udp(const struct rte_flow_item *item,
1669 const void *default_mask,
1670 struct mlx5_flow_data *data)
1672 const struct rte_flow_item_udp *spec = item->spec;
1673 const struct rte_flow_item_udp *mask = item->mask;
1674 struct mlx5_flow_parse *parser = data->parser;
1675 unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1676 struct ibv_flow_spec_tcp_udp udp = {
1677 .type = parser->inner | IBV_FLOW_SPEC_UDP,
1681 if (parser->layer == HASH_RXQ_IPV4)
1682 parser->layer = HASH_RXQ_UDPV4;
1684 parser->layer = HASH_RXQ_UDPV6;
1687 mask = default_mask;
1688 udp.val.dst_port = spec->hdr.dst_port;
1689 udp.val.src_port = spec->hdr.src_port;
1690 udp.mask.dst_port = mask->hdr.dst_port;
1691 udp.mask.src_port = mask->hdr.src_port;
1692 /* Remove unwanted bits from values. */
1693 udp.val.src_port &= udp.mask.src_port;
1694 udp.val.dst_port &= udp.mask.dst_port;
1696 mlx5_flow_create_copy(parser, &udp, udp_size);
1701 * Convert TCP item to Verbs specification.
1704 * Item specification.
1705 * @param default_mask[in]
1706 * Default bit-masks to use when item->mask is not provided.
1707 * @param data[in, out]
1711 * 0 on success, a negative errno value otherwise and rte_errno is set.
1714 mlx5_flow_create_tcp(const struct rte_flow_item *item,
1715 const void *default_mask,
1716 struct mlx5_flow_data *data)
1718 const struct rte_flow_item_tcp *spec = item->spec;
1719 const struct rte_flow_item_tcp *mask = item->mask;
1720 struct mlx5_flow_parse *parser = data->parser;
1721 unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1722 struct ibv_flow_spec_tcp_udp tcp = {
1723 .type = parser->inner | IBV_FLOW_SPEC_TCP,
1727 if (parser->layer == HASH_RXQ_IPV4)
1728 parser->layer = HASH_RXQ_TCPV4;
1730 parser->layer = HASH_RXQ_TCPV6;
1733 mask = default_mask;
1734 tcp.val.dst_port = spec->hdr.dst_port;
1735 tcp.val.src_port = spec->hdr.src_port;
1736 tcp.mask.dst_port = mask->hdr.dst_port;
1737 tcp.mask.src_port = mask->hdr.src_port;
1738 /* Remove unwanted bits from values. */
1739 tcp.val.src_port &= tcp.mask.src_port;
1740 tcp.val.dst_port &= tcp.mask.dst_port;
1742 mlx5_flow_create_copy(parser, &tcp, tcp_size);
1747 * Convert VXLAN item to Verbs specification.
1750 * Item specification.
1751 * @param default_mask[in]
1752 * Default bit-masks to use when item->mask is not provided.
1753 * @param data[in, out]
1757 * 0 on success, a negative errno value otherwise and rte_errno is set.
1760 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
1761 const void *default_mask,
1762 struct mlx5_flow_data *data)
1764 const struct rte_flow_item_vxlan *spec = item->spec;
1765 const struct rte_flow_item_vxlan *mask = item->mask;
1766 struct mlx5_flow_parse *parser = data->parser;
1767 unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1768 struct ibv_flow_spec_tunnel vxlan = {
1769 .type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1778 parser->inner = IBV_FLOW_SPEC_INNER;
1779 parser->tunnel = ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN)];
1780 parser->out_layer = parser->layer;
1781 parser->layer = HASH_RXQ_TUNNEL;
1782 /* Default VXLAN to outer RSS. */
1783 if (!parser->rss_conf.level)
1784 parser->rss_conf.level = 1;
1787 mask = default_mask;
1788 memcpy(&id.vni[1], spec->vni, 3);
1789 vxlan.val.tunnel_id = id.vlan_id;
1790 memcpy(&id.vni[1], mask->vni, 3);
1791 vxlan.mask.tunnel_id = id.vlan_id;
1792 /* Remove unwanted bits from values. */
1793 vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
1796 * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
1797 * layer is defined in the Verbs specification it is interpreted as
1798 * wildcard and all packets will match this rule, if it follows a full
1799 * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
1800 * before will also match this rule.
1801 * To avoid such situation, VNI 0 is currently refused.
1803 /* Only allow tunnel w/o tunnel id pattern after proper outer spec. */
1804 if (parser->out_layer == HASH_RXQ_ETH && !vxlan.val.tunnel_id)
1805 return rte_flow_error_set(data->error, EINVAL,
1806 RTE_FLOW_ERROR_TYPE_ITEM,
1808 "VxLAN vni cannot be 0");
1809 mlx5_flow_create_copy(parser, &vxlan, size);
1814 * Convert VXLAN-GPE item to Verbs specification.
1817 * Item specification.
1818 * @param default_mask[in]
1819 * Default bit-masks to use when item->mask is not provided.
1820 * @param data[in, out]
1824 * 0 on success, a negative errno value otherwise and rte_errno is set.
1827 mlx5_flow_create_vxlan_gpe(const struct rte_flow_item *item,
1828 const void *default_mask,
1829 struct mlx5_flow_data *data)
1831 struct priv *priv = data->dev->data->dev_private;
1832 const struct rte_flow_item_vxlan_gpe *spec = item->spec;
1833 const struct rte_flow_item_vxlan_gpe *mask = item->mask;
1834 struct mlx5_flow_parse *parser = data->parser;
1835 unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1836 struct ibv_flow_spec_tunnel vxlan = {
1837 .type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1845 if (!priv->config.l3_vxlan_en)
1846 return rte_flow_error_set(data->error, EINVAL,
1847 RTE_FLOW_ERROR_TYPE_ITEM,
1849 "L3 VXLAN not enabled by device"
1850 " parameter and/or not configured"
1853 parser->inner = IBV_FLOW_SPEC_INNER;
1854 parser->tunnel = ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN_GPE)];
1855 parser->out_layer = parser->layer;
1856 parser->layer = HASH_RXQ_TUNNEL;
1857 /* Default VXLAN-GPE to outer RSS. */
1858 if (!parser->rss_conf.level)
1859 parser->rss_conf.level = 1;
1862 mask = default_mask;
1863 memcpy(&id.vni[1], spec->vni, 3);
1864 vxlan.val.tunnel_id = id.vlan_id;
1865 memcpy(&id.vni[1], mask->vni, 3);
1866 vxlan.mask.tunnel_id = id.vlan_id;
1868 return rte_flow_error_set(data->error, EINVAL,
1869 RTE_FLOW_ERROR_TYPE_ITEM,
1871 "VxLAN-GPE protocol not"
1873 /* Remove unwanted bits from values. */
1874 vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
1877 * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
1878 * layer is defined in the Verbs specification it is interpreted as
1879 * wildcard and all packets will match this rule, if it follows a full
1880 * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
1881 * before will also match this rule.
1882 * To avoid such situation, VNI 0 is currently refused.
1884 /* Only allow tunnel w/o tunnel id pattern after proper outer spec. */
1885 if (parser->out_layer == HASH_RXQ_ETH && !vxlan.val.tunnel_id)
1886 return rte_flow_error_set(data->error, EINVAL,
1887 RTE_FLOW_ERROR_TYPE_ITEM,
1889 "VxLAN-GPE vni cannot be 0");
1890 mlx5_flow_create_copy(parser, &vxlan, size);
1895 * Convert GRE item to Verbs specification.
1898 * Item specification.
1899 * @param default_mask[in]
1900 * Default bit-masks to use when item->mask is not provided.
1901 * @param data[in, out]
1905 * 0 on success, a negative errno value otherwise and rte_errno is set.
1908 mlx5_flow_create_gre(const struct rte_flow_item *item __rte_unused,
1909 const void *default_mask __rte_unused,
1910 struct mlx5_flow_data *data)
1912 struct mlx5_flow_parse *parser = data->parser;
1913 unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1914 struct ibv_flow_spec_tunnel tunnel = {
1915 .type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1918 struct ibv_flow_spec_ipv4_ext *ipv4;
1919 struct ibv_flow_spec_ipv6 *ipv6;
1922 parser->inner = IBV_FLOW_SPEC_INNER;
1923 parser->tunnel = ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_GRE)];
1924 parser->out_layer = parser->layer;
1925 parser->layer = HASH_RXQ_TUNNEL;
1926 /* Default GRE to inner RSS. */
1927 if (!parser->rss_conf.level)
1928 parser->rss_conf.level = 2;
1929 /* Update encapsulation IP layer protocol. */
1930 for (i = 0; i != hash_rxq_init_n; ++i) {
1931 if (!parser->queue[i].ibv_attr)
1933 if (parser->out_layer == HASH_RXQ_IPV4) {
1934 ipv4 = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1935 parser->queue[i].offset -
1936 sizeof(struct ibv_flow_spec_ipv4_ext));
1937 if (ipv4->mask.proto && ipv4->val.proto != MLX5_GRE)
1939 ipv4->val.proto = MLX5_GRE;
1940 ipv4->mask.proto = 0xff;
1941 } else if (parser->out_layer == HASH_RXQ_IPV6) {
1942 ipv6 = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1943 parser->queue[i].offset -
1944 sizeof(struct ibv_flow_spec_ipv6));
1945 if (ipv6->mask.next_hdr &&
1946 ipv6->val.next_hdr != MLX5_GRE)
1948 ipv6->val.next_hdr = MLX5_GRE;
1949 ipv6->mask.next_hdr = 0xff;
1952 if (i != hash_rxq_init_n)
1953 return rte_flow_error_set(data->error, EINVAL,
1954 RTE_FLOW_ERROR_TYPE_ITEM,
1956 "IP protocol of GRE must be 47");
1957 mlx5_flow_create_copy(parser, &tunnel, size);
1962 * Convert mark/flag action to Verbs specification.
1965 * Internal parser structure.
1970 * 0 on success, a negative errno value otherwise and rte_errno is set.
1973 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id)
1975 unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1976 struct ibv_flow_spec_action_tag tag = {
1977 .type = IBV_FLOW_SPEC_ACTION_TAG,
1979 .tag_id = mlx5_flow_mark_set(mark_id),
1982 assert(parser->mark);
1983 mlx5_flow_create_copy(parser, &tag, size);
1988 * Convert count action to Verbs specification.
1991 * Pointer to Ethernet device.
1993 * Pointer to MLX5 flow parser structure.
1996 * 0 on success, a negative errno value otherwise and rte_errno is set.
1999 mlx5_flow_create_count(struct rte_eth_dev *dev __rte_unused,
2000 struct mlx5_flow_parse *parser __rte_unused)
2002 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
2003 struct priv *priv = dev->data->dev_private;
2004 unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
2005 struct ibv_counter_set_init_attr init_attr = {0};
2006 struct ibv_flow_spec_counter_action counter = {
2007 .type = IBV_FLOW_SPEC_ACTION_COUNT,
2009 .counter_set_handle = 0,
2012 init_attr.counter_set_id = 0;
2013 parser->cs = mlx5_glue->create_counter_set(priv->ctx, &init_attr);
2018 counter.counter_set_handle = parser->cs->handle;
2019 mlx5_flow_create_copy(parser, &counter, size);
2025 * Complete flow rule creation with a drop queue.
2028 * Pointer to Ethernet device.
2030 * Internal parser structure.
2032 * Pointer to the rte_flow.
2034 * Perform verbose error reporting if not NULL.
2037 * 0 on success, a negative errno value otherwise and rte_errno is set.
2040 mlx5_flow_create_action_queue_drop(struct rte_eth_dev *dev,
2041 struct mlx5_flow_parse *parser,
2042 struct rte_flow *flow,
2043 struct rte_flow_error *error)
2045 struct priv *priv = dev->data->dev_private;
2046 struct ibv_flow_spec_action_drop *drop;
2047 unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
2052 drop = (void *)((uintptr_t)parser->queue[HASH_RXQ_ETH].ibv_attr +
2053 parser->queue[HASH_RXQ_ETH].offset);
2054 *drop = (struct ibv_flow_spec_action_drop){
2055 .type = IBV_FLOW_SPEC_ACTION_DROP,
2058 ++parser->queue[HASH_RXQ_ETH].ibv_attr->num_of_specs;
2059 parser->queue[HASH_RXQ_ETH].offset += size;
2060 flow->frxq[HASH_RXQ_ETH].ibv_attr =
2061 parser->queue[HASH_RXQ_ETH].ibv_attr;
2063 flow->cs = parser->cs;
2064 if (!priv->dev->data->dev_started)
2066 parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
2067 flow->frxq[HASH_RXQ_ETH].ibv_flow =
2068 mlx5_glue->create_flow(priv->flow_drop_queue->qp,
2069 flow->frxq[HASH_RXQ_ETH].ibv_attr);
2070 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
2071 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
2072 NULL, "flow rule creation failure");
2078 if (flow->frxq[HASH_RXQ_ETH].ibv_flow) {
2079 claim_zero(mlx5_glue->destroy_flow
2080 (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2081 flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
2083 if (flow->frxq[HASH_RXQ_ETH].ibv_attr) {
2084 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
2085 flow->frxq[HASH_RXQ_ETH].ibv_attr = NULL;
2088 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
2096 * Create hash Rx queues when RSS is enabled.
2099 * Pointer to Ethernet device.
2101 * Internal parser structure.
2103 * Pointer to the rte_flow.
2105 * Perform verbose error reporting if not NULL.
2108 * 0 on success, a negative errno value otherwise and rte_errno is set.
2111 mlx5_flow_create_action_queue_rss(struct rte_eth_dev *dev,
2112 struct mlx5_flow_parse *parser,
2113 struct rte_flow *flow,
2114 struct rte_flow_error *error)
2116 struct priv *priv = dev->data->dev_private;
2119 for (i = 0; i != hash_rxq_init_n; ++i) {
2120 if (!parser->queue[i].ibv_attr)
2122 flow->frxq[i].ibv_attr = parser->queue[i].ibv_attr;
2123 parser->queue[i].ibv_attr = NULL;
2124 flow->frxq[i].hash_fields = parser->queue[i].hash_fields;
2125 if (!priv->dev->data->dev_started)
2127 flow->frxq[i].hrxq =
2129 parser->rss_conf.key,
2130 parser->rss_conf.key_len,
2131 flow->frxq[i].hash_fields,
2132 parser->rss_conf.queue,
2133 parser->rss_conf.queue_num,
2135 parser->rss_conf.level);
2136 if (flow->frxq[i].hrxq)
2138 flow->frxq[i].hrxq =
2140 parser->rss_conf.key,
2141 parser->rss_conf.key_len,
2142 flow->frxq[i].hash_fields,
2143 parser->rss_conf.queue,
2144 parser->rss_conf.queue_num,
2146 parser->rss_conf.level);
2147 if (!flow->frxq[i].hrxq) {
2148 return rte_flow_error_set(error, ENOMEM,
2149 RTE_FLOW_ERROR_TYPE_HANDLE,
2151 "cannot create hash rxq");
2158 * RXQ update after flow rule creation.
2161 * Pointer to Ethernet device.
2163 * Pointer to the flow rule.
2166 mlx5_flow_create_update_rxqs(struct rte_eth_dev *dev, struct rte_flow *flow)
2168 struct priv *priv = dev->data->dev_private;
2172 if (!dev->data->dev_started)
2174 for (i = 0; i != flow->rss_conf.queue_num; ++i) {
2175 struct mlx5_rxq_data *rxq_data = (*priv->rxqs)
2176 [(*flow->queues)[i]];
2177 struct mlx5_rxq_ctrl *rxq_ctrl =
2178 container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
2179 uint8_t tunnel = PTYPE_IDX(flow->tunnel);
2181 rxq_data->mark |= flow->mark;
2184 rxq_ctrl->tunnel_types[tunnel] += 1;
2185 /* Clear tunnel type if more than one tunnel types set. */
2186 for (j = 0; j != RTE_DIM(rxq_ctrl->tunnel_types); ++j) {
2189 if (rxq_ctrl->tunnel_types[j] > 0) {
2190 rxq_data->tunnel = 0;
2194 if (j == RTE_DIM(rxq_ctrl->tunnel_types))
2195 rxq_data->tunnel = flow->tunnel;
2200 * Dump flow hash RX queue detail.
2203 * Pointer to Ethernet device.
2205 * Pointer to the rte_flow.
2207 * Hash RX queue index.
2210 mlx5_flow_dump(struct rte_eth_dev *dev __rte_unused,
2211 struct rte_flow *flow __rte_unused,
2212 unsigned int hrxq_idx __rte_unused)
2220 spec_ptr = (uintptr_t)(flow->frxq[hrxq_idx].ibv_attr + 1);
2221 for (j = 0, off = 0; j < flow->frxq[hrxq_idx].ibv_attr->num_of_specs;
2223 struct ibv_flow_spec *spec = (void *)spec_ptr;
2224 off += sprintf(buf + off, " %x(%hu)", spec->hdr.type,
2226 spec_ptr += spec->hdr.size;
2229 "port %u Verbs flow %p type %u: hrxq:%p qp:%p ind:%p,"
2230 " hash:%" PRIx64 "/%u specs:%hhu(%hu), priority:%hu, type:%d,"
2231 " flags:%x, comp_mask:%x specs:%s",
2232 dev->data->port_id, (void *)flow, hrxq_idx,
2233 (void *)flow->frxq[hrxq_idx].hrxq,
2234 (void *)flow->frxq[hrxq_idx].hrxq->qp,
2235 (void *)flow->frxq[hrxq_idx].hrxq->ind_table,
2236 flow->frxq[hrxq_idx].hash_fields |
2238 flow->rss_conf.level > 1 ? (uint32_t)IBV_RX_HASH_INNER : 0),
2239 flow->rss_conf.queue_num,
2240 flow->frxq[hrxq_idx].ibv_attr->num_of_specs,
2241 flow->frxq[hrxq_idx].ibv_attr->size,
2242 flow->frxq[hrxq_idx].ibv_attr->priority,
2243 flow->frxq[hrxq_idx].ibv_attr->type,
2244 flow->frxq[hrxq_idx].ibv_attr->flags,
2245 flow->frxq[hrxq_idx].ibv_attr->comp_mask,
2251 * Complete flow rule creation.
2254 * Pointer to Ethernet device.
2256 * Internal parser structure.
2258 * Pointer to the rte_flow.
2260 * Perform verbose error reporting if not NULL.
2263 * 0 on success, a negative errno value otherwise and rte_errno is set.
2266 mlx5_flow_create_action_queue(struct rte_eth_dev *dev,
2267 struct mlx5_flow_parse *parser,
2268 struct rte_flow *flow,
2269 struct rte_flow_error *error)
2271 struct priv *priv = dev->data->dev_private;
2274 unsigned int flows_n = 0;
2278 assert(!parser->drop);
2279 ret = mlx5_flow_create_action_queue_rss(dev, parser, flow, error);
2283 flow->cs = parser->cs;
2284 if (!priv->dev->data->dev_started)
2286 for (i = 0; i != hash_rxq_init_n; ++i) {
2287 if (!flow->frxq[i].hrxq)
2289 flow->frxq[i].ibv_flow =
2290 mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
2291 flow->frxq[i].ibv_attr);
2292 mlx5_flow_dump(dev, flow, i);
2293 if (!flow->frxq[i].ibv_flow) {
2294 rte_flow_error_set(error, ENOMEM,
2295 RTE_FLOW_ERROR_TYPE_HANDLE,
2296 NULL, "flow rule creation failure");
2302 rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_HANDLE,
2303 NULL, "internal error in flow creation");
2306 mlx5_flow_create_update_rxqs(dev, flow);
2309 ret = rte_errno; /* Save rte_errno before cleanup. */
2311 for (i = 0; i != hash_rxq_init_n; ++i) {
2312 if (flow->frxq[i].ibv_flow) {
2313 struct ibv_flow *ibv_flow = flow->frxq[i].ibv_flow;
2315 claim_zero(mlx5_glue->destroy_flow(ibv_flow));
2317 if (flow->frxq[i].hrxq)
2318 mlx5_hrxq_release(dev, flow->frxq[i].hrxq);
2319 if (flow->frxq[i].ibv_attr)
2320 rte_free(flow->frxq[i].ibv_attr);
2323 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
2327 rte_errno = ret; /* Restore rte_errno. */
2335 * Pointer to Ethernet device.
2337 * Pointer to a TAILQ flow list.
2339 * Flow rule attributes.
2340 * @param[in] pattern
2341 * Pattern specification (list terminated by the END pattern item).
2342 * @param[in] actions
2343 * Associated actions (list terminated by the END action).
2345 * Perform verbose error reporting if not NULL.
2348 * A flow on success, NULL otherwise and rte_errno is set.
2350 static struct rte_flow *
2351 mlx5_flow_list_create(struct rte_eth_dev *dev,
2352 struct mlx5_flows *list,
2353 const struct rte_flow_attr *attr,
2354 const struct rte_flow_item items[],
2355 const struct rte_flow_action actions[],
2356 struct rte_flow_error *error)
2358 struct mlx5_flow_parse parser = { .create = 1, };
2359 struct rte_flow *flow = NULL;
2363 ret = mlx5_flow_convert(dev, attr, items, actions, error, &parser);
2366 flow = rte_calloc(__func__, 1,
2368 parser.rss_conf.queue_num * sizeof(uint16_t),
2371 rte_flow_error_set(error, ENOMEM,
2372 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2374 "cannot allocate flow memory");
2377 /* Copy configuration. */
2378 flow->queues = (uint16_t (*)[])(flow + 1);
2379 flow->tunnel = parser.tunnel;
2380 flow->rss_conf = (struct rte_flow_action_rss){
2381 .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
2383 .types = parser.rss_conf.types,
2384 .key_len = parser.rss_conf.key_len,
2385 .queue_num = parser.rss_conf.queue_num,
2386 .key = memcpy(flow->rss_key, parser.rss_conf.key,
2387 sizeof(*parser.rss_conf.key) *
2388 parser.rss_conf.key_len),
2389 .queue = memcpy(flow->queues, parser.rss_conf.queue,
2390 sizeof(*parser.rss_conf.queue) *
2391 parser.rss_conf.queue_num),
2393 flow->mark = parser.mark;
2394 /* finalise the flow. */
2396 ret = mlx5_flow_create_action_queue_drop(dev, &parser, flow,
2399 ret = mlx5_flow_create_action_queue(dev, &parser, flow, error);
2402 TAILQ_INSERT_TAIL(list, flow, next);
2403 DRV_LOG(DEBUG, "port %u flow created %p", dev->data->port_id,
2407 DRV_LOG(ERR, "port %u flow creation error: %s", dev->data->port_id,
2409 for (i = 0; i != hash_rxq_init_n; ++i) {
2410 if (parser.queue[i].ibv_attr)
2411 rte_free(parser.queue[i].ibv_attr);
2418 * Validate a flow supported by the NIC.
2420 * @see rte_flow_validate()
2424 mlx5_flow_validate(struct rte_eth_dev *dev,
2425 const struct rte_flow_attr *attr,
2426 const struct rte_flow_item items[],
2427 const struct rte_flow_action actions[],
2428 struct rte_flow_error *error)
2430 struct mlx5_flow_parse parser = { .create = 0, };
2432 return mlx5_flow_convert(dev, attr, items, actions, error, &parser);
2438 * @see rte_flow_create()
2442 mlx5_flow_create(struct rte_eth_dev *dev,
2443 const struct rte_flow_attr *attr,
2444 const struct rte_flow_item items[],
2445 const struct rte_flow_action actions[],
2446 struct rte_flow_error *error)
2448 struct priv *priv = dev->data->dev_private;
2450 return mlx5_flow_list_create(dev, &priv->flows, attr, items, actions,
2455 * Destroy a flow in a list.
2458 * Pointer to Ethernet device.
2460 * Pointer to a TAILQ flow list.
2465 mlx5_flow_list_destroy(struct rte_eth_dev *dev, struct mlx5_flows *list,
2466 struct rte_flow *flow)
2468 struct priv *priv = dev->data->dev_private;
2471 if (flow->drop || !dev->data->dev_started)
2473 for (i = 0; flow->tunnel && i != flow->rss_conf.queue_num; ++i) {
2474 /* Update queue tunnel type. */
2475 struct mlx5_rxq_data *rxq_data = (*priv->rxqs)
2476 [(*flow->queues)[i]];
2477 struct mlx5_rxq_ctrl *rxq_ctrl =
2478 container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
2479 uint8_t tunnel = PTYPE_IDX(flow->tunnel);
2481 assert(rxq_ctrl->tunnel_types[tunnel] > 0);
2482 rxq_ctrl->tunnel_types[tunnel] -= 1;
2483 if (!rxq_ctrl->tunnel_types[tunnel]) {
2484 /* Update tunnel type. */
2489 for (j = 0; j < RTE_DIM(rxq_ctrl->tunnel_types); j++)
2490 if (rxq_ctrl->tunnel_types[j]) {
2494 /* Keep same if more than one tunnel types left. */
2496 rxq_data->tunnel = ptype_ext[last];
2497 else if (types == 0)
2498 /* No tunnel type left. */
2499 rxq_data->tunnel = 0;
2502 for (i = 0; flow->mark && i != flow->rss_conf.queue_num; ++i) {
2503 struct rte_flow *tmp;
2507 * To remove the mark from the queue, the queue must not be
2508 * present in any other marked flow (RSS or not).
2510 TAILQ_FOREACH(tmp, list, next) {
2512 uint16_t *tqs = NULL;
2517 for (j = 0; j != hash_rxq_init_n; ++j) {
2518 if (!tmp->frxq[j].hrxq)
2520 tqs = tmp->frxq[j].hrxq->ind_table->queues;
2521 tq_n = tmp->frxq[j].hrxq->ind_table->queues_n;
2525 for (j = 0; (j != tq_n) && !mark; j++)
2526 if (tqs[j] == (*flow->queues)[i])
2529 (*priv->rxqs)[(*flow->queues)[i]]->mark = mark;
2533 if (flow->frxq[HASH_RXQ_ETH].ibv_flow)
2534 claim_zero(mlx5_glue->destroy_flow
2535 (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2536 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
2538 for (i = 0; i != hash_rxq_init_n; ++i) {
2539 struct mlx5_flow *frxq = &flow->frxq[i];
2542 claim_zero(mlx5_glue->destroy_flow
2545 mlx5_hrxq_release(dev, frxq->hrxq);
2547 rte_free(frxq->ibv_attr);
2551 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
2554 TAILQ_REMOVE(list, flow, next);
2555 DRV_LOG(DEBUG, "port %u flow destroyed %p", dev->data->port_id,
2561 * Destroy all flows.
2564 * Pointer to Ethernet device.
2566 * Pointer to a TAILQ flow list.
2569 mlx5_flow_list_flush(struct rte_eth_dev *dev, struct mlx5_flows *list)
2571 while (!TAILQ_EMPTY(list)) {
2572 struct rte_flow *flow;
2574 flow = TAILQ_FIRST(list);
2575 mlx5_flow_list_destroy(dev, list, flow);
2580 * Create drop queue.
2583 * Pointer to Ethernet device.
2586 * 0 on success, a negative errno value otherwise and rte_errno is set.
2589 mlx5_flow_create_drop_queue(struct rte_eth_dev *dev)
2591 struct priv *priv = dev->data->dev_private;
2592 struct mlx5_hrxq_drop *fdq = NULL;
2596 fdq = rte_calloc(__func__, 1, sizeof(*fdq), 0);
2599 "port %u cannot allocate memory for drop queue",
2600 dev->data->port_id);
2604 fdq->cq = mlx5_glue->create_cq(priv->ctx, 1, NULL, NULL, 0);
2606 DRV_LOG(WARNING, "port %u cannot allocate CQ for drop queue",
2607 dev->data->port_id);
2611 fdq->wq = mlx5_glue->create_wq
2613 &(struct ibv_wq_init_attr){
2614 .wq_type = IBV_WQT_RQ,
2621 DRV_LOG(WARNING, "port %u cannot allocate WQ for drop queue",
2622 dev->data->port_id);
2626 fdq->ind_table = mlx5_glue->create_rwq_ind_table
2628 &(struct ibv_rwq_ind_table_init_attr){
2629 .log_ind_tbl_size = 0,
2630 .ind_tbl = &fdq->wq,
2633 if (!fdq->ind_table) {
2635 "port %u cannot allocate indirection table for drop"
2637 dev->data->port_id);
2641 fdq->qp = mlx5_glue->create_qp_ex
2643 &(struct ibv_qp_init_attr_ex){
2644 .qp_type = IBV_QPT_RAW_PACKET,
2646 IBV_QP_INIT_ATTR_PD |
2647 IBV_QP_INIT_ATTR_IND_TABLE |
2648 IBV_QP_INIT_ATTR_RX_HASH,
2649 .rx_hash_conf = (struct ibv_rx_hash_conf){
2651 IBV_RX_HASH_FUNC_TOEPLITZ,
2652 .rx_hash_key_len = rss_hash_default_key_len,
2653 .rx_hash_key = rss_hash_default_key,
2654 .rx_hash_fields_mask = 0,
2656 .rwq_ind_tbl = fdq->ind_table,
2660 DRV_LOG(WARNING, "port %u cannot allocate QP for drop queue",
2661 dev->data->port_id);
2665 priv->flow_drop_queue = fdq;
2669 claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2671 claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2673 claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2675 claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2678 priv->flow_drop_queue = NULL;
2683 * Delete drop queue.
2686 * Pointer to Ethernet device.
2689 mlx5_flow_delete_drop_queue(struct rte_eth_dev *dev)
2691 struct priv *priv = dev->data->dev_private;
2692 struct mlx5_hrxq_drop *fdq = priv->flow_drop_queue;
2697 claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2699 claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2701 claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2703 claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2705 priv->flow_drop_queue = NULL;
2712 * Pointer to Ethernet device.
2714 * Pointer to a TAILQ flow list.
2717 mlx5_flow_stop(struct rte_eth_dev *dev, struct mlx5_flows *list)
2719 struct priv *priv = dev->data->dev_private;
2720 struct rte_flow *flow;
2723 TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) {
2724 struct mlx5_ind_table_ibv *ind_tbl = NULL;
2727 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow)
2729 claim_zero(mlx5_glue->destroy_flow
2730 (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2731 flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
2732 DRV_LOG(DEBUG, "port %u flow %p removed",
2733 dev->data->port_id, (void *)flow);
2737 /* Verify the flow has not already been cleaned. */
2738 for (i = 0; i != hash_rxq_init_n; ++i) {
2739 if (!flow->frxq[i].ibv_flow)
2742 * Indirection table may be necessary to remove the
2743 * flags in the Rx queues.
2744 * This helps to speed-up the process by avoiding
2747 ind_tbl = flow->frxq[i].hrxq->ind_table;
2750 if (i == hash_rxq_init_n)
2754 for (i = 0; i != ind_tbl->queues_n; ++i)
2755 (*priv->rxqs)[ind_tbl->queues[i]]->mark = 0;
2757 for (i = 0; i != hash_rxq_init_n; ++i) {
2758 if (!flow->frxq[i].ibv_flow)
2760 claim_zero(mlx5_glue->destroy_flow
2761 (flow->frxq[i].ibv_flow));
2762 flow->frxq[i].ibv_flow = NULL;
2763 mlx5_hrxq_release(dev, flow->frxq[i].hrxq);
2764 flow->frxq[i].hrxq = NULL;
2766 DRV_LOG(DEBUG, "port %u flow %p removed", dev->data->port_id,
2769 /* Cleanup Rx queue tunnel info. */
2770 for (i = 0; i != priv->rxqs_n; ++i) {
2771 struct mlx5_rxq_data *q = (*priv->rxqs)[i];
2772 struct mlx5_rxq_ctrl *rxq_ctrl =
2773 container_of(q, struct mlx5_rxq_ctrl, rxq);
2777 memset((void *)rxq_ctrl->tunnel_types, 0,
2778 sizeof(rxq_ctrl->tunnel_types));
2787 * Pointer to Ethernet device.
2789 * Pointer to a TAILQ flow list.
2792 * 0 on success, a negative errno value otherwise and rte_errno is set.
2795 mlx5_flow_start(struct rte_eth_dev *dev, struct mlx5_flows *list)
2797 struct priv *priv = dev->data->dev_private;
2798 struct rte_flow *flow;
2800 TAILQ_FOREACH(flow, list, next) {
2804 flow->frxq[HASH_RXQ_ETH].ibv_flow =
2805 mlx5_glue->create_flow
2806 (priv->flow_drop_queue->qp,
2807 flow->frxq[HASH_RXQ_ETH].ibv_attr);
2808 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
2810 "port %u flow %p cannot be applied",
2811 dev->data->port_id, (void *)flow);
2815 DRV_LOG(DEBUG, "port %u flow %p applied",
2816 dev->data->port_id, (void *)flow);
2820 for (i = 0; i != hash_rxq_init_n; ++i) {
2821 if (!flow->frxq[i].ibv_attr)
2823 flow->frxq[i].hrxq =
2824 mlx5_hrxq_get(dev, flow->rss_conf.key,
2825 flow->rss_conf.key_len,
2826 flow->frxq[i].hash_fields,
2827 flow->rss_conf.queue,
2828 flow->rss_conf.queue_num,
2830 flow->rss_conf.level);
2831 if (flow->frxq[i].hrxq)
2833 flow->frxq[i].hrxq =
2834 mlx5_hrxq_new(dev, flow->rss_conf.key,
2835 flow->rss_conf.key_len,
2836 flow->frxq[i].hash_fields,
2837 flow->rss_conf.queue,
2838 flow->rss_conf.queue_num,
2840 flow->rss_conf.level);
2841 if (!flow->frxq[i].hrxq) {
2843 "port %u flow %p cannot create hash"
2845 dev->data->port_id, (void *)flow);
2850 mlx5_flow_dump(dev, flow, i);
2851 flow->frxq[i].ibv_flow =
2852 mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
2853 flow->frxq[i].ibv_attr);
2854 if (!flow->frxq[i].ibv_flow) {
2856 "port %u flow %p type %u cannot be"
2858 dev->data->port_id, (void *)flow, i);
2863 mlx5_flow_create_update_rxqs(dev, flow);
2869 * Verify the flow list is empty
2872 * Pointer to Ethernet device.
2874 * @return the number of flows not released.
2877 mlx5_flow_verify(struct rte_eth_dev *dev)
2879 struct priv *priv = dev->data->dev_private;
2880 struct rte_flow *flow;
2883 TAILQ_FOREACH(flow, &priv->flows, next) {
2884 DRV_LOG(DEBUG, "port %u flow %p still referenced",
2885 dev->data->port_id, (void *)flow);
2892 * Enable a control flow configured from the control plane.
2895 * Pointer to Ethernet device.
2897 * An Ethernet flow spec to apply.
2899 * An Ethernet flow mask to apply.
2901 * A VLAN flow spec to apply.
2903 * A VLAN flow mask to apply.
2906 * 0 on success, a negative errno value otherwise and rte_errno is set.
2909 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
2910 struct rte_flow_item_eth *eth_spec,
2911 struct rte_flow_item_eth *eth_mask,
2912 struct rte_flow_item_vlan *vlan_spec,
2913 struct rte_flow_item_vlan *vlan_mask)
2915 struct priv *priv = dev->data->dev_private;
2916 const struct rte_flow_attr attr = {
2918 .priority = MLX5_CTRL_FLOW_PRIORITY,
2920 struct rte_flow_item items[] = {
2922 .type = RTE_FLOW_ITEM_TYPE_ETH,
2928 .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
2929 RTE_FLOW_ITEM_TYPE_END,
2935 .type = RTE_FLOW_ITEM_TYPE_END,
2938 uint16_t queue[priv->reta_idx_n];
2939 struct rte_flow_action_rss action_rss = {
2940 .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
2942 .types = priv->rss_conf.rss_hf,
2943 .key_len = priv->rss_conf.rss_key_len,
2944 .queue_num = priv->reta_idx_n,
2945 .key = priv->rss_conf.rss_key,
2948 struct rte_flow_action actions[] = {
2950 .type = RTE_FLOW_ACTION_TYPE_RSS,
2951 .conf = &action_rss,
2954 .type = RTE_FLOW_ACTION_TYPE_END,
2957 struct rte_flow *flow;
2958 struct rte_flow_error error;
2961 if (!priv->reta_idx_n) {
2965 for (i = 0; i != priv->reta_idx_n; ++i)
2966 queue[i] = (*priv->reta_idx)[i];
2967 flow = mlx5_flow_list_create(dev, &priv->ctrl_flows, &attr, items,
2975 * Enable a flow control configured from the control plane.
2978 * Pointer to Ethernet device.
2980 * An Ethernet flow spec to apply.
2982 * An Ethernet flow mask to apply.
2985 * 0 on success, a negative errno value otherwise and rte_errno is set.
2988 mlx5_ctrl_flow(struct rte_eth_dev *dev,
2989 struct rte_flow_item_eth *eth_spec,
2990 struct rte_flow_item_eth *eth_mask)
2992 return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
2998 * @see rte_flow_destroy()
3002 mlx5_flow_destroy(struct rte_eth_dev *dev,
3003 struct rte_flow *flow,
3004 struct rte_flow_error *error __rte_unused)
3006 struct priv *priv = dev->data->dev_private;
3008 mlx5_flow_list_destroy(dev, &priv->flows, flow);
3013 * Destroy all flows.
3015 * @see rte_flow_flush()
3019 mlx5_flow_flush(struct rte_eth_dev *dev,
3020 struct rte_flow_error *error __rte_unused)
3022 struct priv *priv = dev->data->dev_private;
3024 mlx5_flow_list_flush(dev, &priv->flows);
3028 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
3030 * Query flow counter.
3034 * @param counter_value
3035 * returned data from the counter.
3038 * 0 on success, a negative errno value otherwise and rte_errno is set.
3041 mlx5_flow_query_count(struct ibv_counter_set *cs,
3042 struct mlx5_flow_counter_stats *counter_stats,
3043 struct rte_flow_query_count *query_count,
3044 struct rte_flow_error *error)
3046 uint64_t counters[2];
3047 struct ibv_query_counter_set_attr query_cs_attr = {
3049 .query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
3051 struct ibv_counter_set_data query_out = {
3053 .outlen = 2 * sizeof(uint64_t),
3055 int err = mlx5_glue->query_counter_set(&query_cs_attr, &query_out);
3058 return rte_flow_error_set(error, err,
3059 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
3061 "cannot read counter");
3062 query_count->hits_set = 1;
3063 query_count->bytes_set = 1;
3064 query_count->hits = counters[0] - counter_stats->hits;
3065 query_count->bytes = counters[1] - counter_stats->bytes;
3066 if (query_count->reset) {
3067 counter_stats->hits = counters[0];
3068 counter_stats->bytes = counters[1];
3076 * @see rte_flow_query()
3080 mlx5_flow_query(struct rte_eth_dev *dev __rte_unused,
3081 struct rte_flow *flow,
3082 const struct rte_flow_action *action __rte_unused,
3084 struct rte_flow_error *error)
3089 ret = mlx5_flow_query_count(flow->cs,
3090 &flow->counter_stats,
3091 (struct rte_flow_query_count *)data,
3096 return rte_flow_error_set(error, EINVAL,
3097 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
3099 "no counter found for flow");
3108 * @see rte_flow_isolate()
3112 mlx5_flow_isolate(struct rte_eth_dev *dev,
3114 struct rte_flow_error *error)
3116 struct priv *priv = dev->data->dev_private;
3118 if (dev->data->dev_started) {
3119 rte_flow_error_set(error, EBUSY,
3120 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
3122 "port must be stopped first");
3125 priv->isolated = !!enable;
3127 priv->dev->dev_ops = &mlx5_dev_ops_isolate;
3129 priv->dev->dev_ops = &mlx5_dev_ops;
3134 * Convert a flow director filter to a generic flow.
3137 * Pointer to Ethernet device.
3138 * @param fdir_filter
3139 * Flow director filter to add.
3141 * Generic flow parameters structure.
3144 * 0 on success, a negative errno value otherwise and rte_errno is set.
3147 mlx5_fdir_filter_convert(struct rte_eth_dev *dev,
3148 const struct rte_eth_fdir_filter *fdir_filter,
3149 struct mlx5_fdir *attributes)
3151 struct priv *priv = dev->data->dev_private;
3152 const struct rte_eth_fdir_input *input = &fdir_filter->input;
3153 const struct rte_eth_fdir_masks *mask =
3154 &dev->data->dev_conf.fdir_conf.mask;
3156 /* Validate queue number. */
3157 if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
3158 DRV_LOG(ERR, "port %u invalid queue number %d",
3159 dev->data->port_id, fdir_filter->action.rx_queue);
3163 attributes->attr.ingress = 1;
3164 attributes->items[0] = (struct rte_flow_item) {
3165 .type = RTE_FLOW_ITEM_TYPE_ETH,
3166 .spec = &attributes->l2,
3167 .mask = &attributes->l2_mask,
3169 switch (fdir_filter->action.behavior) {
3170 case RTE_ETH_FDIR_ACCEPT:
3171 attributes->actions[0] = (struct rte_flow_action){
3172 .type = RTE_FLOW_ACTION_TYPE_QUEUE,
3173 .conf = &attributes->queue,
3176 case RTE_ETH_FDIR_REJECT:
3177 attributes->actions[0] = (struct rte_flow_action){
3178 .type = RTE_FLOW_ACTION_TYPE_DROP,
3182 DRV_LOG(ERR, "port %u invalid behavior %d",
3184 fdir_filter->action.behavior);
3185 rte_errno = ENOTSUP;
3188 attributes->queue.index = fdir_filter->action.rx_queue;
3190 switch (fdir_filter->input.flow_type) {
3191 case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
3192 case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
3193 case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
3194 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
3195 .src_addr = input->flow.ip4_flow.src_ip,
3196 .dst_addr = input->flow.ip4_flow.dst_ip,
3197 .time_to_live = input->flow.ip4_flow.ttl,
3198 .type_of_service = input->flow.ip4_flow.tos,
3199 .next_proto_id = input->flow.ip4_flow.proto,
3201 attributes->l3_mask.ipv4.hdr = (struct ipv4_hdr){
3202 .src_addr = mask->ipv4_mask.src_ip,
3203 .dst_addr = mask->ipv4_mask.dst_ip,
3204 .time_to_live = mask->ipv4_mask.ttl,
3205 .type_of_service = mask->ipv4_mask.tos,
3206 .next_proto_id = mask->ipv4_mask.proto,
3208 attributes->items[1] = (struct rte_flow_item){
3209 .type = RTE_FLOW_ITEM_TYPE_IPV4,
3210 .spec = &attributes->l3,
3211 .mask = &attributes->l3_mask,
3214 case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
3215 case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
3216 case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
3217 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
3218 .hop_limits = input->flow.ipv6_flow.hop_limits,
3219 .proto = input->flow.ipv6_flow.proto,
3222 memcpy(attributes->l3.ipv6.hdr.src_addr,
3223 input->flow.ipv6_flow.src_ip,
3224 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
3225 memcpy(attributes->l3.ipv6.hdr.dst_addr,
3226 input->flow.ipv6_flow.dst_ip,
3227 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
3228 memcpy(attributes->l3_mask.ipv6.hdr.src_addr,
3229 mask->ipv6_mask.src_ip,
3230 RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
3231 memcpy(attributes->l3_mask.ipv6.hdr.dst_addr,
3232 mask->ipv6_mask.dst_ip,
3233 RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
3234 attributes->items[1] = (struct rte_flow_item){
3235 .type = RTE_FLOW_ITEM_TYPE_IPV6,
3236 .spec = &attributes->l3,
3237 .mask = &attributes->l3_mask,
3241 DRV_LOG(ERR, "port %u invalid flow type%d",
3242 dev->data->port_id, fdir_filter->input.flow_type);
3243 rte_errno = ENOTSUP;
3247 switch (fdir_filter->input.flow_type) {
3248 case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
3249 attributes->l4.udp.hdr = (struct udp_hdr){
3250 .src_port = input->flow.udp4_flow.src_port,
3251 .dst_port = input->flow.udp4_flow.dst_port,
3253 attributes->l4_mask.udp.hdr = (struct udp_hdr){
3254 .src_port = mask->src_port_mask,
3255 .dst_port = mask->dst_port_mask,
3257 attributes->items[2] = (struct rte_flow_item){
3258 .type = RTE_FLOW_ITEM_TYPE_UDP,
3259 .spec = &attributes->l4,
3260 .mask = &attributes->l4_mask,
3263 case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
3264 attributes->l4.tcp.hdr = (struct tcp_hdr){
3265 .src_port = input->flow.tcp4_flow.src_port,
3266 .dst_port = input->flow.tcp4_flow.dst_port,
3268 attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
3269 .src_port = mask->src_port_mask,
3270 .dst_port = mask->dst_port_mask,
3272 attributes->items[2] = (struct rte_flow_item){
3273 .type = RTE_FLOW_ITEM_TYPE_TCP,
3274 .spec = &attributes->l4,
3275 .mask = &attributes->l4_mask,
3278 case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
3279 attributes->l4.udp.hdr = (struct udp_hdr){
3280 .src_port = input->flow.udp6_flow.src_port,
3281 .dst_port = input->flow.udp6_flow.dst_port,
3283 attributes->l4_mask.udp.hdr = (struct udp_hdr){
3284 .src_port = mask->src_port_mask,
3285 .dst_port = mask->dst_port_mask,
3287 attributes->items[2] = (struct rte_flow_item){
3288 .type = RTE_FLOW_ITEM_TYPE_UDP,
3289 .spec = &attributes->l4,
3290 .mask = &attributes->l4_mask,
3293 case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
3294 attributes->l4.tcp.hdr = (struct tcp_hdr){
3295 .src_port = input->flow.tcp6_flow.src_port,
3296 .dst_port = input->flow.tcp6_flow.dst_port,
3298 attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
3299 .src_port = mask->src_port_mask,
3300 .dst_port = mask->dst_port_mask,
3302 attributes->items[2] = (struct rte_flow_item){
3303 .type = RTE_FLOW_ITEM_TYPE_TCP,
3304 .spec = &attributes->l4,
3305 .mask = &attributes->l4_mask,
3308 case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
3309 case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
3312 DRV_LOG(ERR, "port %u invalid flow type%d",
3313 dev->data->port_id, fdir_filter->input.flow_type);
3314 rte_errno = ENOTSUP;
3321 * Add new flow director filter and store it in list.
3324 * Pointer to Ethernet device.
3325 * @param fdir_filter
3326 * Flow director filter to add.
3329 * 0 on success, a negative errno value otherwise and rte_errno is set.
3332 mlx5_fdir_filter_add(struct rte_eth_dev *dev,
3333 const struct rte_eth_fdir_filter *fdir_filter)
3335 struct priv *priv = dev->data->dev_private;
3336 struct mlx5_fdir attributes = {
3339 .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
3340 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
3344 struct mlx5_flow_parse parser = {
3345 .layer = HASH_RXQ_ETH,
3347 struct rte_flow_error error;
3348 struct rte_flow *flow;
3351 ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
3354 ret = mlx5_flow_convert(dev, &attributes.attr, attributes.items,
3355 attributes.actions, &error, &parser);
3358 flow = mlx5_flow_list_create(dev, &priv->flows, &attributes.attr,
3359 attributes.items, attributes.actions,
3362 DRV_LOG(DEBUG, "port %u FDIR created %p", dev->data->port_id,
3370 * Delete specific filter.
3373 * Pointer to Ethernet device.
3374 * @param fdir_filter
3375 * Filter to be deleted.
3378 * 0 on success, a negative errno value otherwise and rte_errno is set.
3381 mlx5_fdir_filter_delete(struct rte_eth_dev *dev,
3382 const struct rte_eth_fdir_filter *fdir_filter)
3384 struct priv *priv = dev->data->dev_private;
3385 struct mlx5_fdir attributes = {
3388 struct mlx5_flow_parse parser = {
3390 .layer = HASH_RXQ_ETH,
3392 struct rte_flow_error error;
3393 struct rte_flow *flow;
3397 ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
3400 ret = mlx5_flow_convert(dev, &attributes.attr, attributes.items,
3401 attributes.actions, &error, &parser);
3405 * Special case for drop action which is only set in the
3406 * specifications when the flow is created. In this situation the
3407 * drop specification is missing.
3410 struct ibv_flow_spec_action_drop *drop;
3412 drop = (void *)((uintptr_t)parser.queue[parser.layer].ibv_attr +
3413 parser.queue[parser.layer].offset);
3414 *drop = (struct ibv_flow_spec_action_drop){
3415 .type = IBV_FLOW_SPEC_ACTION_DROP,
3416 .size = sizeof(struct ibv_flow_spec_action_drop),
3418 parser.queue[parser.layer].ibv_attr->num_of_specs++;
3420 TAILQ_FOREACH(flow, &priv->flows, next) {
3421 struct ibv_flow_attr *attr;
3422 struct ibv_spec_header *attr_h;
3424 struct ibv_flow_attr *flow_attr;
3425 struct ibv_spec_header *flow_h;
3427 unsigned int specs_n;
3429 attr = parser.queue[parser.layer].ibv_attr;
3430 flow_attr = flow->frxq[parser.layer].ibv_attr;
3431 /* Compare first the attributes. */
3432 if (memcmp(attr, flow_attr, sizeof(struct ibv_flow_attr)))
3434 if (attr->num_of_specs == 0)
3436 spec = (void *)((uintptr_t)attr +
3437 sizeof(struct ibv_flow_attr));
3438 flow_spec = (void *)((uintptr_t)flow_attr +
3439 sizeof(struct ibv_flow_attr));
3440 specs_n = RTE_MIN(attr->num_of_specs, flow_attr->num_of_specs);
3441 for (i = 0; i != specs_n; ++i) {
3444 if (memcmp(spec, flow_spec,
3445 RTE_MIN(attr_h->size, flow_h->size)))
3447 spec = (void *)((uintptr_t)spec + attr_h->size);
3448 flow_spec = (void *)((uintptr_t)flow_spec +
3451 /* At this point, the flow match. */
3454 /* The flow does not match. */
3457 ret = rte_errno; /* Save rte_errno before cleanup. */
3459 mlx5_flow_list_destroy(dev, &priv->flows, flow);
3461 for (i = 0; i != hash_rxq_init_n; ++i) {
3462 if (parser.queue[i].ibv_attr)
3463 rte_free(parser.queue[i].ibv_attr);
3465 rte_errno = ret; /* Restore rte_errno. */
3470 * Update queue for specific filter.
3473 * Pointer to Ethernet device.
3474 * @param fdir_filter
3475 * Filter to be updated.
3478 * 0 on success, a negative errno value otherwise and rte_errno is set.
3481 mlx5_fdir_filter_update(struct rte_eth_dev *dev,
3482 const struct rte_eth_fdir_filter *fdir_filter)
3486 ret = mlx5_fdir_filter_delete(dev, fdir_filter);
3489 return mlx5_fdir_filter_add(dev, fdir_filter);
3493 * Flush all filters.
3496 * Pointer to Ethernet device.
3499 mlx5_fdir_filter_flush(struct rte_eth_dev *dev)
3501 struct priv *priv = dev->data->dev_private;
3503 mlx5_flow_list_flush(dev, &priv->flows);
3507 * Get flow director information.
3510 * Pointer to Ethernet device.
3511 * @param[out] fdir_info
3512 * Resulting flow director information.
3515 mlx5_fdir_info_get(struct rte_eth_dev *dev, struct rte_eth_fdir_info *fdir_info)
3517 struct priv *priv = dev->data->dev_private;
3518 struct rte_eth_fdir_masks *mask =
3519 &priv->dev->data->dev_conf.fdir_conf.mask;
3521 fdir_info->mode = priv->dev->data->dev_conf.fdir_conf.mode;
3522 fdir_info->guarant_spc = 0;
3523 rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
3524 fdir_info->max_flexpayload = 0;
3525 fdir_info->flow_types_mask[0] = 0;
3526 fdir_info->flex_payload_unit = 0;
3527 fdir_info->max_flex_payload_segment_num = 0;
3528 fdir_info->flex_payload_limit = 0;
3529 memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
3533 * Deal with flow director operations.
3536 * Pointer to Ethernet device.
3538 * Operation to perform.
3540 * Pointer to operation-specific structure.
3543 * 0 on success, a negative errno value otherwise and rte_errno is set.
3546 mlx5_fdir_ctrl_func(struct rte_eth_dev *dev, enum rte_filter_op filter_op,
3549 struct priv *priv = dev->data->dev_private;
3550 enum rte_fdir_mode fdir_mode =
3551 priv->dev->data->dev_conf.fdir_conf.mode;
3553 if (filter_op == RTE_ETH_FILTER_NOP)
3555 if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
3556 fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
3557 DRV_LOG(ERR, "port %u flow director mode %d not supported",
3558 dev->data->port_id, fdir_mode);
3562 switch (filter_op) {
3563 case RTE_ETH_FILTER_ADD:
3564 return mlx5_fdir_filter_add(dev, arg);
3565 case RTE_ETH_FILTER_UPDATE:
3566 return mlx5_fdir_filter_update(dev, arg);
3567 case RTE_ETH_FILTER_DELETE:
3568 return mlx5_fdir_filter_delete(dev, arg);
3569 case RTE_ETH_FILTER_FLUSH:
3570 mlx5_fdir_filter_flush(dev);
3572 case RTE_ETH_FILTER_INFO:
3573 mlx5_fdir_info_get(dev, arg);
3576 DRV_LOG(DEBUG, "port %u unknown operation %u",
3577 dev->data->port_id, filter_op);
3585 * Manage filter operations.
3588 * Pointer to Ethernet device structure.
3589 * @param filter_type
3592 * Operation to perform.
3594 * Pointer to operation-specific structure.
3597 * 0 on success, a negative errno value otherwise and rte_errno is set.
3600 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
3601 enum rte_filter_type filter_type,
3602 enum rte_filter_op filter_op,
3605 switch (filter_type) {
3606 case RTE_ETH_FILTER_GENERIC:
3607 if (filter_op != RTE_ETH_FILTER_GET) {
3611 *(const void **)arg = &mlx5_flow_ops;
3613 case RTE_ETH_FILTER_FDIR:
3614 return mlx5_fdir_ctrl_func(dev, filter_op, arg);
3616 DRV_LOG(ERR, "port %u filter type (%d) not supported",
3617 dev->data->port_id, filter_type);
3618 rte_errno = ENOTSUP;
3625 * Detect number of Verbs flow priorities supported.
3628 * Pointer to Ethernet device.
3631 * number of supported Verbs flow priority.
3634 mlx5_get_max_verbs_prio(struct rte_eth_dev *dev)
3636 struct priv *priv = dev->data->dev_private;
3637 unsigned int verb_priorities = MLX5_VERBS_FLOW_PRIO_8;
3639 struct ibv_flow_attr attr;
3640 struct ibv_flow_spec_eth eth;
3641 struct ibv_flow_spec_action_drop drop;
3647 .type = IBV_FLOW_SPEC_ETH,
3648 .size = sizeof(struct ibv_flow_spec_eth),
3651 .size = sizeof(struct ibv_flow_spec_action_drop),
3652 .type = IBV_FLOW_SPEC_ACTION_DROP,
3655 struct ibv_flow *flow;
3658 flow_attr.attr.priority = verb_priorities - 1;
3659 flow = mlx5_glue->create_flow(priv->flow_drop_queue->qp,
3662 claim_zero(mlx5_glue->destroy_flow(flow));
3663 /* Try more priorities. */
3664 verb_priorities *= 2;
3666 /* Failed, restore last right number. */
3667 verb_priorities /= 2;
3671 DRV_LOG(DEBUG, "port %u Verbs flow priorities: %d,"
3672 " user flow priorities: %d",
3673 dev->data->port_id, verb_priorities, MLX5_CTRL_FLOW_PRIORITY);
3674 return verb_priorities;