1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2016 6WIND S.A.
3 * Copyright 2016 Mellanox Technologies, Ltd
11 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
13 #pragma GCC diagnostic ignored "-Wpedantic"
15 #include <infiniband/verbs.h>
17 #pragma GCC diagnostic error "-Wpedantic"
20 #include <rte_common.h>
21 #include <rte_ether.h>
22 #include <rte_eth_ctrl.h>
23 #include <rte_ethdev_driver.h>
25 #include <rte_flow_driver.h>
26 #include <rte_malloc.h>
30 #include "mlx5_defs.h"
32 #include "mlx5_glue.h"
34 /* Flow priority for control plane flows. */
35 #define MLX5_CTRL_FLOW_PRIORITY 1
37 /* Internet Protocol versions. */
42 #ifndef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
43 struct ibv_flow_spec_counter_action {
48 /* Dev ops structure defined in mlx5.c */
49 extern const struct eth_dev_ops mlx5_dev_ops;
50 extern const struct eth_dev_ops mlx5_dev_ops_isolate;
52 /** Structure give to the conversion functions. */
53 struct mlx5_flow_data {
54 struct rte_eth_dev *dev; /** Ethernet device. */
55 struct mlx5_flow_parse *parser; /** Parser context. */
56 struct rte_flow_error *error; /** Error context. */
60 mlx5_flow_create_eth(const struct rte_flow_item *item,
61 const void *default_mask,
62 struct mlx5_flow_data *data);
65 mlx5_flow_create_vlan(const struct rte_flow_item *item,
66 const void *default_mask,
67 struct mlx5_flow_data *data);
70 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
71 const void *default_mask,
72 struct mlx5_flow_data *data);
75 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
76 const void *default_mask,
77 struct mlx5_flow_data *data);
80 mlx5_flow_create_udp(const struct rte_flow_item *item,
81 const void *default_mask,
82 struct mlx5_flow_data *data);
85 mlx5_flow_create_tcp(const struct rte_flow_item *item,
86 const void *default_mask,
87 struct mlx5_flow_data *data);
90 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
91 const void *default_mask,
92 struct mlx5_flow_data *data);
95 mlx5_flow_create_vxlan_gpe(const struct rte_flow_item *item,
96 const void *default_mask,
97 struct mlx5_flow_data *data);
100 mlx5_flow_create_gre(const struct rte_flow_item *item,
101 const void *default_mask,
102 struct mlx5_flow_data *data);
104 struct mlx5_flow_parse;
107 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
111 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id);
114 mlx5_flow_create_count(struct rte_eth_dev *dev, struct mlx5_flow_parse *parser);
116 /* Hash RX queue types. */
128 /* Initialization data for hash RX queue. */
129 struct hash_rxq_init {
130 uint64_t hash_fields; /* Fields that participate in the hash. */
131 uint64_t dpdk_rss_hf; /* Matching DPDK RSS hash fields. */
132 unsigned int flow_priority; /* Flow priority to use. */
133 unsigned int ip_version; /* Internet protocol. */
136 /* Initialization data for hash RX queues. */
137 const struct hash_rxq_init hash_rxq_init[] = {
139 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
140 IBV_RX_HASH_DST_IPV4 |
141 IBV_RX_HASH_SRC_PORT_TCP |
142 IBV_RX_HASH_DST_PORT_TCP),
143 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,
145 .ip_version = MLX5_IPV4,
148 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
149 IBV_RX_HASH_DST_IPV4 |
150 IBV_RX_HASH_SRC_PORT_UDP |
151 IBV_RX_HASH_DST_PORT_UDP),
152 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP,
154 .ip_version = MLX5_IPV4,
157 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
158 IBV_RX_HASH_DST_IPV4),
159 .dpdk_rss_hf = (ETH_RSS_IPV4 |
162 .ip_version = MLX5_IPV4,
165 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
166 IBV_RX_HASH_DST_IPV6 |
167 IBV_RX_HASH_SRC_PORT_TCP |
168 IBV_RX_HASH_DST_PORT_TCP),
169 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP,
171 .ip_version = MLX5_IPV6,
174 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
175 IBV_RX_HASH_DST_IPV6 |
176 IBV_RX_HASH_SRC_PORT_UDP |
177 IBV_RX_HASH_DST_PORT_UDP),
178 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP,
180 .ip_version = MLX5_IPV6,
183 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
184 IBV_RX_HASH_DST_IPV6),
185 .dpdk_rss_hf = (ETH_RSS_IPV6 |
188 .ip_version = MLX5_IPV6,
197 /* Number of entries in hash_rxq_init[]. */
198 const unsigned int hash_rxq_init_n = RTE_DIM(hash_rxq_init);
200 /** Structure for holding counter stats. */
201 struct mlx5_flow_counter_stats {
202 uint64_t hits; /**< Number of packets matched by the rule. */
203 uint64_t bytes; /**< Number of bytes matched by the rule. */
206 /** Structure for Drop queue. */
207 struct mlx5_hrxq_drop {
208 struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */
209 struct ibv_qp *qp; /**< Verbs queue pair. */
210 struct ibv_wq *wq; /**< Verbs work queue. */
211 struct ibv_cq *cq; /**< Verbs completion queue. */
214 /* Flows structures. */
216 uint64_t hash_fields; /**< Fields that participate in the hash. */
217 struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
218 struct ibv_flow *ibv_flow; /**< Verbs flow. */
219 struct mlx5_hrxq *hrxq; /**< Hash Rx queues. */
222 /* Drop flows structures. */
223 struct mlx5_flow_drop {
224 struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
225 struct ibv_flow *ibv_flow; /**< Verbs flow. */
229 TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
230 uint32_t mark:1; /**< Set if the flow is marked. */
231 uint32_t drop:1; /**< Drop queue. */
232 struct rte_flow_action_rss rss_conf; /**< RSS configuration */
233 uint16_t (*queues)[]; /**< Queues indexes to use. */
234 uint8_t rss_key[40]; /**< copy of the RSS key. */
235 uint32_t tunnel; /**< Tunnel type of RTE_PTYPE_TUNNEL_XXX. */
236 struct ibv_counter_set *cs; /**< Holds the counters for the rule. */
237 struct mlx5_flow_counter_stats counter_stats;/**<The counter stats. */
238 struct mlx5_flow frxq[RTE_DIM(hash_rxq_init)];
239 /**< Flow with Rx queue. */
242 /** Static initializer for items. */
244 (const enum rte_flow_item_type []){ \
245 __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
248 #define IS_TUNNEL(type) ( \
249 (type) == RTE_FLOW_ITEM_TYPE_VXLAN || \
250 (type) == RTE_FLOW_ITEM_TYPE_VXLAN_GPE || \
251 (type) == RTE_FLOW_ITEM_TYPE_GRE)
253 const uint32_t flow_ptype[] = {
254 [RTE_FLOW_ITEM_TYPE_VXLAN] = RTE_PTYPE_TUNNEL_VXLAN,
255 [RTE_FLOW_ITEM_TYPE_VXLAN_GPE] = RTE_PTYPE_TUNNEL_VXLAN_GPE,
256 [RTE_FLOW_ITEM_TYPE_GRE] = RTE_PTYPE_TUNNEL_GRE,
259 #define PTYPE_IDX(t) ((RTE_PTYPE_TUNNEL_MASK & (t)) >> 12)
261 const uint32_t ptype_ext[] = {
262 [PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN)] = RTE_PTYPE_TUNNEL_VXLAN |
264 [PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN_GPE)] = RTE_PTYPE_TUNNEL_VXLAN_GPE |
266 [PTYPE_IDX(RTE_PTYPE_TUNNEL_GRE)] = RTE_PTYPE_TUNNEL_GRE,
269 /** Structure to generate a simple graph of layers supported by the NIC. */
270 struct mlx5_flow_items {
271 /** List of possible actions for these items. */
272 const enum rte_flow_action_type *const actions;
273 /** Bit-masks corresponding to the possibilities for the item. */
276 * Default bit-masks to use when item->mask is not provided. When
277 * \default_mask is also NULL, the full supported bit-mask (\mask) is
280 const void *default_mask;
281 /** Bit-masks size in bytes. */
282 const unsigned int mask_sz;
284 * Conversion function from rte_flow to NIC specific flow.
287 * rte_flow item to convert.
288 * @param default_mask
289 * Default bit-masks to use when item->mask is not provided.
291 * Internal structure to store the conversion.
294 * 0 on success, a negative errno value otherwise and rte_errno is
297 int (*convert)(const struct rte_flow_item *item,
298 const void *default_mask,
299 struct mlx5_flow_data *data);
300 /** Size in bytes of the destination structure. */
301 const unsigned int dst_sz;
302 /** List of possible following items. */
303 const enum rte_flow_item_type *const items;
306 /** Valid action for this PMD. */
307 static const enum rte_flow_action_type valid_actions[] = {
308 RTE_FLOW_ACTION_TYPE_DROP,
309 RTE_FLOW_ACTION_TYPE_QUEUE,
310 RTE_FLOW_ACTION_TYPE_MARK,
311 RTE_FLOW_ACTION_TYPE_FLAG,
312 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
313 RTE_FLOW_ACTION_TYPE_COUNT,
315 RTE_FLOW_ACTION_TYPE_END,
318 /** Graph of supported items and associated actions. */
319 static const struct mlx5_flow_items mlx5_flow_items[] = {
320 [RTE_FLOW_ITEM_TYPE_END] = {
321 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
322 RTE_FLOW_ITEM_TYPE_VXLAN,
323 RTE_FLOW_ITEM_TYPE_VXLAN_GPE,
324 RTE_FLOW_ITEM_TYPE_GRE),
326 [RTE_FLOW_ITEM_TYPE_ETH] = {
327 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
328 RTE_FLOW_ITEM_TYPE_IPV4,
329 RTE_FLOW_ITEM_TYPE_IPV6),
330 .actions = valid_actions,
331 .mask = &(const struct rte_flow_item_eth){
332 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
333 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
336 .default_mask = &rte_flow_item_eth_mask,
337 .mask_sz = sizeof(struct rte_flow_item_eth),
338 .convert = mlx5_flow_create_eth,
339 .dst_sz = sizeof(struct ibv_flow_spec_eth),
341 [RTE_FLOW_ITEM_TYPE_VLAN] = {
342 .items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
343 RTE_FLOW_ITEM_TYPE_IPV6),
344 .actions = valid_actions,
345 .mask = &(const struct rte_flow_item_vlan){
349 .default_mask = &rte_flow_item_vlan_mask,
350 .mask_sz = sizeof(struct rte_flow_item_vlan),
351 .convert = mlx5_flow_create_vlan,
354 [RTE_FLOW_ITEM_TYPE_IPV4] = {
355 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
356 RTE_FLOW_ITEM_TYPE_TCP,
357 RTE_FLOW_ITEM_TYPE_GRE),
358 .actions = valid_actions,
359 .mask = &(const struct rte_flow_item_ipv4){
363 .type_of_service = -1,
367 .default_mask = &rte_flow_item_ipv4_mask,
368 .mask_sz = sizeof(struct rte_flow_item_ipv4),
369 .convert = mlx5_flow_create_ipv4,
370 .dst_sz = sizeof(struct ibv_flow_spec_ipv4_ext),
372 [RTE_FLOW_ITEM_TYPE_IPV6] = {
373 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
374 RTE_FLOW_ITEM_TYPE_TCP,
375 RTE_FLOW_ITEM_TYPE_GRE),
376 .actions = valid_actions,
377 .mask = &(const struct rte_flow_item_ipv6){
380 0xff, 0xff, 0xff, 0xff,
381 0xff, 0xff, 0xff, 0xff,
382 0xff, 0xff, 0xff, 0xff,
383 0xff, 0xff, 0xff, 0xff,
386 0xff, 0xff, 0xff, 0xff,
387 0xff, 0xff, 0xff, 0xff,
388 0xff, 0xff, 0xff, 0xff,
389 0xff, 0xff, 0xff, 0xff,
396 .default_mask = &rte_flow_item_ipv6_mask,
397 .mask_sz = sizeof(struct rte_flow_item_ipv6),
398 .convert = mlx5_flow_create_ipv6,
399 .dst_sz = sizeof(struct ibv_flow_spec_ipv6),
401 [RTE_FLOW_ITEM_TYPE_UDP] = {
402 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN,
403 RTE_FLOW_ITEM_TYPE_VXLAN_GPE),
404 .actions = valid_actions,
405 .mask = &(const struct rte_flow_item_udp){
411 .default_mask = &rte_flow_item_udp_mask,
412 .mask_sz = sizeof(struct rte_flow_item_udp),
413 .convert = mlx5_flow_create_udp,
414 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
416 [RTE_FLOW_ITEM_TYPE_TCP] = {
417 .actions = valid_actions,
418 .mask = &(const struct rte_flow_item_tcp){
424 .default_mask = &rte_flow_item_tcp_mask,
425 .mask_sz = sizeof(struct rte_flow_item_tcp),
426 .convert = mlx5_flow_create_tcp,
427 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
429 [RTE_FLOW_ITEM_TYPE_GRE] = {
430 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
431 RTE_FLOW_ITEM_TYPE_IPV4,
432 RTE_FLOW_ITEM_TYPE_IPV6),
433 .actions = valid_actions,
434 .mask = &(const struct rte_flow_item_gre){
437 .default_mask = &rte_flow_item_gre_mask,
438 .mask_sz = sizeof(struct rte_flow_item_gre),
439 .convert = mlx5_flow_create_gre,
440 .dst_sz = sizeof(struct ibv_flow_spec_tunnel),
442 [RTE_FLOW_ITEM_TYPE_VXLAN] = {
443 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
444 RTE_FLOW_ITEM_TYPE_IPV4, /* For L3 VXLAN. */
445 RTE_FLOW_ITEM_TYPE_IPV6), /* For L3 VXLAN. */
446 .actions = valid_actions,
447 .mask = &(const struct rte_flow_item_vxlan){
448 .vni = "\xff\xff\xff",
450 .default_mask = &rte_flow_item_vxlan_mask,
451 .mask_sz = sizeof(struct rte_flow_item_vxlan),
452 .convert = mlx5_flow_create_vxlan,
453 .dst_sz = sizeof(struct ibv_flow_spec_tunnel),
455 [RTE_FLOW_ITEM_TYPE_VXLAN_GPE] = {
456 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
457 RTE_FLOW_ITEM_TYPE_IPV4,
458 RTE_FLOW_ITEM_TYPE_IPV6),
459 .actions = valid_actions,
460 .mask = &(const struct rte_flow_item_vxlan_gpe){
461 .vni = "\xff\xff\xff",
463 .default_mask = &rte_flow_item_vxlan_gpe_mask,
464 .mask_sz = sizeof(struct rte_flow_item_vxlan_gpe),
465 .convert = mlx5_flow_create_vxlan_gpe,
466 .dst_sz = sizeof(struct ibv_flow_spec_tunnel),
470 /** Structure to pass to the conversion function. */
471 struct mlx5_flow_parse {
472 uint32_t inner; /**< Verbs value, set once tunnel is encountered. */
474 /**< Whether resources should remain after a validate. */
475 uint32_t drop:1; /**< Target is a drop queue. */
476 uint32_t mark:1; /**< Mark is present in the flow. */
477 uint32_t count:1; /**< Count is present in the flow. */
478 uint32_t mark_id; /**< Mark identifier. */
479 struct rte_flow_action_rss rss_conf; /**< RSS configuration */
480 uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
481 uint8_t rss_key[40]; /**< copy of the RSS key. */
482 enum hash_rxq_type layer; /**< Last pattern layer detected. */
483 enum hash_rxq_type out_layer; /**< Last outer pattern layer detected. */
484 uint32_t tunnel; /**< Tunnel type of RTE_PTYPE_TUNNEL_XXX. */
485 struct ibv_counter_set *cs; /**< Holds the counter set for the rule */
487 struct ibv_flow_attr *ibv_attr;
488 /**< Pointer to Verbs attributes. */
490 /**< Current position or total size of the attribute. */
491 uint64_t hash_fields; /**< Verbs hash fields. */
492 } queue[RTE_DIM(hash_rxq_init)];
495 static const struct rte_flow_ops mlx5_flow_ops = {
496 .validate = mlx5_flow_validate,
497 .create = mlx5_flow_create,
498 .destroy = mlx5_flow_destroy,
499 .flush = mlx5_flow_flush,
500 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
501 .query = mlx5_flow_query,
505 .isolate = mlx5_flow_isolate,
508 /* Convert FDIR request to Generic flow. */
510 struct rte_flow_attr attr;
511 struct rte_flow_action actions[2];
512 struct rte_flow_item items[4];
513 struct rte_flow_item_eth l2;
514 struct rte_flow_item_eth l2_mask;
516 struct rte_flow_item_ipv4 ipv4;
517 struct rte_flow_item_ipv6 ipv6;
520 struct rte_flow_item_ipv4 ipv4;
521 struct rte_flow_item_ipv6 ipv6;
524 struct rte_flow_item_udp udp;
525 struct rte_flow_item_tcp tcp;
528 struct rte_flow_item_udp udp;
529 struct rte_flow_item_tcp tcp;
531 struct rte_flow_action_queue queue;
534 /* Verbs specification header. */
535 struct ibv_spec_header {
536 enum ibv_flow_spec_type type;
541 * Check support for a given item.
544 * Item specification.
546 * Bit-masks covering supported fields to compare with spec, last and mask in
549 * Bit-Mask size in bytes.
552 * 0 on success, a negative errno value otherwise and rte_errno is set.
555 mlx5_flow_item_validate(const struct rte_flow_item *item,
556 const uint8_t *mask, unsigned int size)
558 if (!item->spec && (item->mask || item->last)) {
562 if (item->spec && !item->mask) {
564 const uint8_t *spec = item->spec;
566 for (i = 0; i < size; ++i)
567 if ((spec[i] | mask[i]) != mask[i]) {
572 if (item->last && !item->mask) {
574 const uint8_t *spec = item->last;
576 for (i = 0; i < size; ++i)
577 if ((spec[i] | mask[i]) != mask[i]) {
584 const uint8_t *spec = item->spec;
586 for (i = 0; i < size; ++i)
587 if ((spec[i] | mask[i]) != mask[i]) {
592 if (item->spec && item->last) {
595 const uint8_t *apply = mask;
601 for (i = 0; i < size; ++i) {
602 spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
603 last[i] = ((const uint8_t *)item->last)[i] & apply[i];
605 ret = memcmp(spec, last, size);
615 * Extract attribute to the parser.
618 * Flow rule attributes.
620 * Perform verbose error reporting if not NULL.
623 * 0 on success, a negative errno value otherwise and rte_errno is set.
626 mlx5_flow_convert_attributes(const struct rte_flow_attr *attr,
627 struct rte_flow_error *error)
630 rte_flow_error_set(error, ENOTSUP,
631 RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
633 "groups are not supported");
636 if (attr->priority && attr->priority != MLX5_CTRL_FLOW_PRIORITY) {
637 rte_flow_error_set(error, ENOTSUP,
638 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
640 "priorities are not supported");
644 rte_flow_error_set(error, ENOTSUP,
645 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
647 "egress is not supported");
650 if (attr->transfer) {
651 rte_flow_error_set(error, ENOTSUP,
652 RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
654 "transfer is not supported");
657 if (!attr->ingress) {
658 rte_flow_error_set(error, ENOTSUP,
659 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
661 "only ingress is supported");
668 * Extract actions request to the parser.
671 * Pointer to Ethernet device.
673 * Associated actions (list terminated by the END action).
675 * Perform verbose error reporting if not NULL.
676 * @param[in, out] parser
677 * Internal parser structure.
680 * 0 on success, a negative errno value otherwise and rte_errno is set.
683 mlx5_flow_convert_actions(struct rte_eth_dev *dev,
684 const struct rte_flow_action actions[],
685 struct rte_flow_error *error,
686 struct mlx5_flow_parse *parser)
688 enum { FATE = 1, MARK = 2, COUNT = 4, };
689 uint32_t overlap = 0;
690 struct priv *priv = dev->data->dev_private;
692 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
693 if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
695 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
697 goto exit_action_overlap;
700 } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
701 const struct rte_flow_action_queue *queue =
702 (const struct rte_flow_action_queue *)
706 goto exit_action_overlap;
708 if (!queue || (queue->index > (priv->rxqs_n - 1)))
709 goto exit_action_not_supported;
710 parser->queues[0] = queue->index;
711 parser->rss_conf = (struct rte_flow_action_rss){
713 .queue = parser->queues,
715 } else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
716 const struct rte_flow_action_rss *rss =
717 (const struct rte_flow_action_rss *)
719 const uint8_t *rss_key;
720 uint32_t rss_key_len;
724 goto exit_action_overlap;
727 rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ) {
728 rte_flow_error_set(error, EINVAL,
729 RTE_FLOW_ERROR_TYPE_ACTION,
731 "the only supported RSS hash"
732 " function is Toeplitz");
735 #ifndef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
736 if (parser->rss_conf.level > 1) {
737 rte_flow_error_set(error, EINVAL,
738 RTE_FLOW_ERROR_TYPE_ACTION,
740 "a nonzero RSS encapsulation"
741 " level is not supported");
745 if (parser->rss_conf.level > 2) {
746 rte_flow_error_set(error, EINVAL,
747 RTE_FLOW_ERROR_TYPE_ACTION,
749 "RSS encapsulation level"
750 " > 1 is not supported");
753 if (rss->types & MLX5_RSS_HF_MASK) {
754 rte_flow_error_set(error, EINVAL,
755 RTE_FLOW_ERROR_TYPE_ACTION,
757 "unsupported RSS type"
762 rss_key_len = rss->key_len;
765 rss_key_len = rss_hash_default_key_len;
766 rss_key = rss_hash_default_key;
768 if (rss_key_len != RTE_DIM(parser->rss_key)) {
769 rte_flow_error_set(error, EINVAL,
770 RTE_FLOW_ERROR_TYPE_ACTION,
772 "RSS hash key must be"
773 " exactly 40 bytes long");
776 if (!rss->queue_num) {
777 rte_flow_error_set(error, EINVAL,
778 RTE_FLOW_ERROR_TYPE_ACTION,
783 if (rss->queue_num > RTE_DIM(parser->queues)) {
784 rte_flow_error_set(error, EINVAL,
785 RTE_FLOW_ERROR_TYPE_ACTION,
787 "too many queues for RSS"
791 for (n = 0; n < rss->queue_num; ++n) {
792 if (rss->queue[n] >= priv->rxqs_n) {
793 rte_flow_error_set(error, EINVAL,
794 RTE_FLOW_ERROR_TYPE_ACTION,
796 "queue id > number of"
801 parser->rss_conf = (struct rte_flow_action_rss){
802 .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
805 .key_len = rss_key_len,
806 .queue_num = rss->queue_num,
807 .key = memcpy(parser->rss_key, rss_key,
808 sizeof(*rss_key) * rss_key_len),
809 .queue = memcpy(parser->queues, rss->queue,
810 sizeof(*rss->queue) *
813 } else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
814 const struct rte_flow_action_mark *mark =
815 (const struct rte_flow_action_mark *)
819 goto exit_action_overlap;
822 rte_flow_error_set(error, EINVAL,
823 RTE_FLOW_ERROR_TYPE_ACTION,
825 "mark must be defined");
827 } else if (mark->id >= MLX5_FLOW_MARK_MAX) {
828 rte_flow_error_set(error, ENOTSUP,
829 RTE_FLOW_ERROR_TYPE_ACTION,
831 "mark must be between 0"
836 parser->mark_id = mark->id;
837 } else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
839 goto exit_action_overlap;
842 } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT &&
843 priv->config.flow_counter_en) {
845 goto exit_action_overlap;
849 goto exit_action_not_supported;
852 /* When fate is unknown, drop traffic. */
853 if (!(overlap & FATE))
855 if (parser->drop && parser->mark)
857 if (!parser->rss_conf.queue_num && !parser->drop) {
858 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
859 NULL, "no valid action");
863 exit_action_not_supported:
864 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
865 actions, "action not supported");
868 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
869 actions, "overlapping actions are not supported");
877 * Pattern specification (list terminated by the END pattern item).
879 * Perform verbose error reporting if not NULL.
880 * @param[in, out] parser
881 * Internal parser structure.
884 * 0 on success, a negative errno value otherwise and rte_errno is set.
887 mlx5_flow_convert_items_validate(struct rte_eth_dev *dev,
888 const struct rte_flow_item items[],
889 struct rte_flow_error *error,
890 struct mlx5_flow_parse *parser)
892 struct priv *priv = dev->data->dev_private;
893 const struct mlx5_flow_items *cur_item = mlx5_flow_items;
897 /* Initialise the offsets to start after verbs attribute. */
898 for (i = 0; i != hash_rxq_init_n; ++i)
899 parser->queue[i].offset = sizeof(struct ibv_flow_attr);
900 for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
901 const struct mlx5_flow_items *token = NULL;
904 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
908 cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
910 if (cur_item->items[i] == items->type) {
911 token = &mlx5_flow_items[items->type];
917 goto exit_item_not_supported;
920 ret = mlx5_flow_item_validate(items,
921 (const uint8_t *)cur_item->mask,
924 goto exit_item_not_supported;
925 if (IS_TUNNEL(items->type)) {
926 if (parser->tunnel) {
927 rte_flow_error_set(error, ENOTSUP,
928 RTE_FLOW_ERROR_TYPE_ITEM,
930 "Cannot recognize multiple"
931 " tunnel encapsulations.");
934 if (!priv->config.tunnel_en &&
935 parser->rss_conf.level > 1) {
936 rte_flow_error_set(error, ENOTSUP,
937 RTE_FLOW_ERROR_TYPE_ITEM,
939 "RSS on tunnel is not supported");
942 parser->inner = IBV_FLOW_SPEC_INNER;
943 parser->tunnel = flow_ptype[items->type];
946 parser->queue[HASH_RXQ_ETH].offset += cur_item->dst_sz;
948 for (n = 0; n != hash_rxq_init_n; ++n)
949 parser->queue[n].offset += cur_item->dst_sz;
953 parser->queue[HASH_RXQ_ETH].offset +=
954 sizeof(struct ibv_flow_spec_action_drop);
957 for (i = 0; i != hash_rxq_init_n; ++i)
958 parser->queue[i].offset +=
959 sizeof(struct ibv_flow_spec_action_tag);
962 unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
964 for (i = 0; i != hash_rxq_init_n; ++i)
965 parser->queue[i].offset += size;
968 exit_item_not_supported:
969 return rte_flow_error_set(error, -ret, RTE_FLOW_ERROR_TYPE_ITEM,
970 items, "item not supported");
974 * Allocate memory space to store verbs flow attributes.
977 * Amount of byte to allocate.
979 * Perform verbose error reporting if not NULL.
982 * A verbs flow attribute on success, NULL otherwise and rte_errno is set.
984 static struct ibv_flow_attr *
985 mlx5_flow_convert_allocate(unsigned int size, struct rte_flow_error *error)
987 struct ibv_flow_attr *ibv_attr;
989 ibv_attr = rte_calloc(__func__, 1, size, 0);
991 rte_flow_error_set(error, ENOMEM,
992 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
994 "cannot allocate verbs spec attributes");
1001 * Make inner packet matching with an higher priority from the non Inner
1005 * Pointer to Ethernet device.
1006 * @param[in, out] parser
1007 * Internal parser structure.
1009 * User flow attribute.
1012 mlx5_flow_update_priority(struct rte_eth_dev *dev,
1013 struct mlx5_flow_parse *parser,
1014 const struct rte_flow_attr *attr)
1016 struct priv *priv = dev->data->dev_private;
1020 /* 8 priorities >= 16 priorities
1021 * Control flow: 4-7 8-15
1022 * User normal flow: 1-3 4-7
1023 * User tunnel flow: 0-2 0-3
1025 priority = attr->priority * MLX5_VERBS_FLOW_PRIO_8;
1026 if (priv->config.max_verbs_prio == MLX5_VERBS_FLOW_PRIO_8)
1029 * Lower non-tunnel flow Verbs priority 1 if only support 8 Verbs
1030 * priorities, lower 4 otherwise.
1032 if (!parser->inner) {
1033 if (priv->config.max_verbs_prio == MLX5_VERBS_FLOW_PRIO_8)
1036 priority += MLX5_VERBS_FLOW_PRIO_8 / 2;
1039 parser->queue[HASH_RXQ_ETH].ibv_attr->priority = priority +
1040 hash_rxq_init[HASH_RXQ_ETH].flow_priority;
1043 for (i = 0; i != hash_rxq_init_n; ++i) {
1044 if (!parser->queue[i].ibv_attr)
1046 parser->queue[i].ibv_attr->priority = priority +
1047 hash_rxq_init[i].flow_priority;
1052 * Finalise verbs flow attributes.
1054 * @param[in, out] parser
1055 * Internal parser structure.
1058 mlx5_flow_convert_finalise(struct mlx5_flow_parse *parser)
1061 uint32_t inner = parser->inner;
1063 /* Don't create extra flows for outer RSS. */
1064 if (parser->tunnel && parser->rss_conf.level < 2)
1067 * Fill missing layers in verbs specifications, or compute the correct
1068 * offset to allocate the memory space for the attributes and
1071 for (i = 0; i != hash_rxq_init_n - 1; ++i) {
1073 struct ibv_flow_spec_ipv4_ext ipv4;
1074 struct ibv_flow_spec_ipv6 ipv6;
1075 struct ibv_flow_spec_tcp_udp udp_tcp;
1076 struct ibv_flow_spec_eth eth;
1081 if (i == parser->layer)
1083 if (parser->layer == HASH_RXQ_ETH ||
1084 parser->layer == HASH_RXQ_TUNNEL) {
1085 if (hash_rxq_init[i].ip_version == MLX5_IPV4) {
1086 size = sizeof(struct ibv_flow_spec_ipv4_ext);
1087 specs.ipv4 = (struct ibv_flow_spec_ipv4_ext){
1088 .type = inner | IBV_FLOW_SPEC_IPV4_EXT,
1092 size = sizeof(struct ibv_flow_spec_ipv6);
1093 specs.ipv6 = (struct ibv_flow_spec_ipv6){
1094 .type = inner | IBV_FLOW_SPEC_IPV6,
1098 if (parser->queue[i].ibv_attr) {
1099 dst = (void *)((uintptr_t)
1100 parser->queue[i].ibv_attr +
1101 parser->queue[i].offset);
1102 memcpy(dst, &specs, size);
1103 ++parser->queue[i].ibv_attr->num_of_specs;
1105 parser->queue[i].offset += size;
1107 if ((i == HASH_RXQ_UDPV4) || (i == HASH_RXQ_TCPV4) ||
1108 (i == HASH_RXQ_UDPV6) || (i == HASH_RXQ_TCPV6)) {
1109 size = sizeof(struct ibv_flow_spec_tcp_udp);
1110 specs.udp_tcp = (struct ibv_flow_spec_tcp_udp) {
1111 .type = inner | ((i == HASH_RXQ_UDPV4 ||
1112 i == HASH_RXQ_UDPV6) ?
1117 if (parser->queue[i].ibv_attr) {
1118 dst = (void *)((uintptr_t)
1119 parser->queue[i].ibv_attr +
1120 parser->queue[i].offset);
1121 memcpy(dst, &specs, size);
1122 ++parser->queue[i].ibv_attr->num_of_specs;
1124 parser->queue[i].offset += size;
1130 * Update flows according to pattern and RSS hash fields.
1132 * @param[in, out] parser
1133 * Internal parser structure.
1136 * 0 on success, a negative errno value otherwise and rte_errno is set.
1139 mlx5_flow_convert_rss(struct mlx5_flow_parse *parser)
1142 enum hash_rxq_type start;
1143 enum hash_rxq_type layer;
1144 int outer = parser->tunnel && parser->rss_conf.level < 2;
1145 uint64_t rss = parser->rss_conf.types;
1147 /* Default to outer RSS. */
1148 if (!parser->rss_conf.level)
1149 parser->rss_conf.level = 1;
1150 layer = outer ? parser->out_layer : parser->layer;
1151 if (layer == HASH_RXQ_TUNNEL)
1152 layer = HASH_RXQ_ETH;
1154 /* Only one hash type for outer RSS. */
1155 if (rss && layer == HASH_RXQ_ETH) {
1156 start = HASH_RXQ_TCPV4;
1157 } else if (rss && layer != HASH_RXQ_ETH &&
1158 !(rss & hash_rxq_init[layer].dpdk_rss_hf)) {
1159 /* If RSS not match L4 pattern, try L3 RSS. */
1160 if (layer < HASH_RXQ_IPV4)
1161 layer = HASH_RXQ_IPV4;
1162 else if (layer > HASH_RXQ_IPV4 && layer < HASH_RXQ_IPV6)
1163 layer = HASH_RXQ_IPV6;
1168 /* Scan first valid hash type. */
1169 for (i = start; rss && i <= layer; ++i) {
1170 if (!parser->queue[i].ibv_attr)
1172 if (hash_rxq_init[i].dpdk_rss_hf & rss)
1175 if (rss && i <= layer)
1176 parser->queue[layer].hash_fields =
1177 hash_rxq_init[i].hash_fields;
1178 /* Trim unused hash types. */
1179 for (i = 0; i != hash_rxq_init_n; ++i) {
1180 if (parser->queue[i].ibv_attr && i != layer) {
1181 rte_free(parser->queue[i].ibv_attr);
1182 parser->queue[i].ibv_attr = NULL;
1186 /* Expand for inner or normal RSS. */
1187 if (rss && (layer == HASH_RXQ_ETH || layer == HASH_RXQ_IPV4))
1188 start = HASH_RXQ_TCPV4;
1189 else if (rss && layer == HASH_RXQ_IPV6)
1190 start = HASH_RXQ_TCPV6;
1193 /* For L4 pattern, try L3 RSS if no L4 RSS. */
1194 /* Trim unused hash types. */
1195 for (i = 0; i != hash_rxq_init_n; ++i) {
1196 if (!parser->queue[i].ibv_attr)
1198 if (i < start || i > layer) {
1199 rte_free(parser->queue[i].ibv_attr);
1200 parser->queue[i].ibv_attr = NULL;
1205 if (hash_rxq_init[i].dpdk_rss_hf & rss) {
1206 parser->queue[i].hash_fields =
1207 hash_rxq_init[i].hash_fields;
1208 } else if (i != layer) {
1209 /* Remove unused RSS expansion. */
1210 rte_free(parser->queue[i].ibv_attr);
1211 parser->queue[i].ibv_attr = NULL;
1212 } else if (layer < HASH_RXQ_IPV4 &&
1213 (hash_rxq_init[HASH_RXQ_IPV4].dpdk_rss_hf &
1215 /* Allow IPv4 RSS on L4 pattern. */
1216 parser->queue[i].hash_fields =
1217 hash_rxq_init[HASH_RXQ_IPV4]
1219 } else if (i > HASH_RXQ_IPV4 && i < HASH_RXQ_IPV6 &&
1220 (hash_rxq_init[HASH_RXQ_IPV6].dpdk_rss_hf &
1222 /* Allow IPv4 RSS on L4 pattern. */
1223 parser->queue[i].hash_fields =
1224 hash_rxq_init[HASH_RXQ_IPV6]
1233 * Validate and convert a flow supported by the NIC.
1236 * Pointer to Ethernet device.
1238 * Flow rule attributes.
1239 * @param[in] pattern
1240 * Pattern specification (list terminated by the END pattern item).
1241 * @param[in] actions
1242 * Associated actions (list terminated by the END action).
1244 * Perform verbose error reporting if not NULL.
1245 * @param[in, out] parser
1246 * Internal parser structure.
1249 * 0 on success, a negative errno value otherwise and rte_errno is set.
1252 mlx5_flow_convert(struct rte_eth_dev *dev,
1253 const struct rte_flow_attr *attr,
1254 const struct rte_flow_item items[],
1255 const struct rte_flow_action actions[],
1256 struct rte_flow_error *error,
1257 struct mlx5_flow_parse *parser)
1259 const struct mlx5_flow_items *cur_item = mlx5_flow_items;
1263 /* First step. Validate the attributes, items and actions. */
1264 *parser = (struct mlx5_flow_parse){
1265 .create = parser->create,
1266 .layer = HASH_RXQ_ETH,
1267 .mark_id = MLX5_FLOW_MARK_DEFAULT,
1269 ret = mlx5_flow_convert_attributes(attr, error);
1272 ret = mlx5_flow_convert_actions(dev, actions, error, parser);
1275 ret = mlx5_flow_convert_items_validate(dev, items, error, parser);
1278 mlx5_flow_convert_finalise(parser);
1281 * Allocate the memory space to store verbs specifications.
1284 unsigned int offset = parser->queue[HASH_RXQ_ETH].offset;
1286 parser->queue[HASH_RXQ_ETH].ibv_attr =
1287 mlx5_flow_convert_allocate(offset, error);
1288 if (!parser->queue[HASH_RXQ_ETH].ibv_attr)
1290 parser->queue[HASH_RXQ_ETH].offset =
1291 sizeof(struct ibv_flow_attr);
1293 for (i = 0; i != hash_rxq_init_n; ++i) {
1294 unsigned int offset;
1296 offset = parser->queue[i].offset;
1297 parser->queue[i].ibv_attr =
1298 mlx5_flow_convert_allocate(offset, error);
1299 if (!parser->queue[i].ibv_attr)
1301 parser->queue[i].offset = sizeof(struct ibv_flow_attr);
1304 /* Third step. Conversion parse, fill the specifications. */
1307 parser->layer = HASH_RXQ_ETH;
1308 for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
1309 struct mlx5_flow_data data = {
1315 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
1317 cur_item = &mlx5_flow_items[items->type];
1318 ret = cur_item->convert(items,
1319 (cur_item->default_mask ?
1320 cur_item->default_mask :
1326 if (!parser->drop) {
1327 /* RSS check, remove unused hash types. */
1328 ret = mlx5_flow_convert_rss(parser);
1331 /* Complete missing specification. */
1332 mlx5_flow_convert_finalise(parser);
1334 mlx5_flow_update_priority(dev, parser, attr);
1336 mlx5_flow_create_flag_mark(parser, parser->mark_id);
1337 if (parser->count && parser->create) {
1338 mlx5_flow_create_count(dev, parser);
1340 goto exit_count_error;
1343 /* Only verification is expected, all resources should be released. */
1344 if (!parser->create) {
1345 for (i = 0; i != hash_rxq_init_n; ++i) {
1346 if (parser->queue[i].ibv_attr) {
1347 rte_free(parser->queue[i].ibv_attr);
1348 parser->queue[i].ibv_attr = NULL;
1354 for (i = 0; i != hash_rxq_init_n; ++i) {
1355 if (parser->queue[i].ibv_attr) {
1356 rte_free(parser->queue[i].ibv_attr);
1357 parser->queue[i].ibv_attr = NULL;
1360 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1361 NULL, "cannot allocate verbs spec attributes");
1364 rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1365 NULL, "cannot create counter");
1370 * Copy the specification created into the flow.
1373 * Internal parser structure.
1375 * Create specification.
1377 * Size in bytes of the specification to copy.
1380 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
1386 for (i = 0; i != hash_rxq_init_n; ++i) {
1387 if (!parser->queue[i].ibv_attr)
1389 dst = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1390 parser->queue[i].offset);
1391 memcpy(dst, src, size);
1392 ++parser->queue[i].ibv_attr->num_of_specs;
1393 parser->queue[i].offset += size;
1398 * Convert Ethernet item to Verbs specification.
1401 * Item specification.
1402 * @param default_mask[in]
1403 * Default bit-masks to use when item->mask is not provided.
1404 * @param data[in, out]
1408 * 0 on success, a negative errno value otherwise and rte_errno is set.
1411 mlx5_flow_create_eth(const struct rte_flow_item *item,
1412 const void *default_mask,
1413 struct mlx5_flow_data *data)
1415 const struct rte_flow_item_eth *spec = item->spec;
1416 const struct rte_flow_item_eth *mask = item->mask;
1417 struct mlx5_flow_parse *parser = data->parser;
1418 const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1419 struct ibv_flow_spec_eth eth = {
1420 .type = parser->inner | IBV_FLOW_SPEC_ETH,
1424 parser->layer = HASH_RXQ_ETH;
1429 mask = default_mask;
1430 memcpy(ð.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
1431 memcpy(ð.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
1432 eth.val.ether_type = spec->type;
1433 memcpy(ð.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
1434 memcpy(ð.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
1435 eth.mask.ether_type = mask->type;
1436 /* Remove unwanted bits from values. */
1437 for (i = 0; i < ETHER_ADDR_LEN; ++i) {
1438 eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
1439 eth.val.src_mac[i] &= eth.mask.src_mac[i];
1441 eth.val.ether_type &= eth.mask.ether_type;
1443 mlx5_flow_create_copy(parser, ð, eth_size);
1448 * Convert VLAN item to Verbs specification.
1451 * Item specification.
1452 * @param default_mask[in]
1453 * Default bit-masks to use when item->mask is not provided.
1454 * @param data[in, out]
1458 * 0 on success, a negative errno value otherwise and rte_errno is set.
1461 mlx5_flow_create_vlan(const struct rte_flow_item *item,
1462 const void *default_mask,
1463 struct mlx5_flow_data *data)
1465 const struct rte_flow_item_vlan *spec = item->spec;
1466 const struct rte_flow_item_vlan *mask = item->mask;
1467 struct mlx5_flow_parse *parser = data->parser;
1468 struct ibv_flow_spec_eth *eth;
1469 const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1470 const char *msg = "VLAN cannot be empty";
1475 mask = default_mask;
1477 for (i = 0; i != hash_rxq_init_n; ++i) {
1478 if (!parser->queue[i].ibv_attr)
1481 eth = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1482 parser->queue[i].offset - eth_size);
1483 eth->val.vlan_tag = spec->tci;
1484 eth->mask.vlan_tag = mask->tci;
1485 eth->val.vlan_tag &= eth->mask.vlan_tag;
1487 * From verbs perspective an empty VLAN is equivalent
1488 * to a packet without VLAN layer.
1490 if (!eth->mask.vlan_tag)
1492 /* Outer TPID cannot be matched. */
1493 if (eth->mask.ether_type) {
1494 msg = "VLAN TPID matching is not supported";
1497 eth->val.ether_type = spec->inner_type;
1498 eth->mask.ether_type = mask->inner_type;
1499 eth->val.ether_type &= eth->mask.ether_type;
1504 return rte_flow_error_set(data->error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM,
1509 * Convert IPv4 item to Verbs specification.
1512 * Item specification.
1513 * @param default_mask[in]
1514 * Default bit-masks to use when item->mask is not provided.
1515 * @param data[in, out]
1519 * 0 on success, a negative errno value otherwise and rte_errno is set.
1522 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
1523 const void *default_mask,
1524 struct mlx5_flow_data *data)
1526 struct priv *priv = data->dev->data->dev_private;
1527 const struct rte_flow_item_ipv4 *spec = item->spec;
1528 const struct rte_flow_item_ipv4 *mask = item->mask;
1529 struct mlx5_flow_parse *parser = data->parser;
1530 unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4_ext);
1531 struct ibv_flow_spec_ipv4_ext ipv4 = {
1532 .type = parser->inner | IBV_FLOW_SPEC_IPV4_EXT,
1536 if (parser->layer == HASH_RXQ_TUNNEL &&
1537 parser->tunnel == ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN)] &&
1538 !priv->config.l3_vxlan_en)
1539 return rte_flow_error_set(data->error, EINVAL,
1540 RTE_FLOW_ERROR_TYPE_ITEM,
1542 "L3 VXLAN not enabled by device"
1543 " parameter and/or not configured"
1545 parser->layer = HASH_RXQ_IPV4;
1548 mask = default_mask;
1549 ipv4.val = (struct ibv_flow_ipv4_ext_filter){
1550 .src_ip = spec->hdr.src_addr,
1551 .dst_ip = spec->hdr.dst_addr,
1552 .proto = spec->hdr.next_proto_id,
1553 .tos = spec->hdr.type_of_service,
1555 ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
1556 .src_ip = mask->hdr.src_addr,
1557 .dst_ip = mask->hdr.dst_addr,
1558 .proto = mask->hdr.next_proto_id,
1559 .tos = mask->hdr.type_of_service,
1561 /* Remove unwanted bits from values. */
1562 ipv4.val.src_ip &= ipv4.mask.src_ip;
1563 ipv4.val.dst_ip &= ipv4.mask.dst_ip;
1564 ipv4.val.proto &= ipv4.mask.proto;
1565 ipv4.val.tos &= ipv4.mask.tos;
1567 mlx5_flow_create_copy(parser, &ipv4, ipv4_size);
1572 * Convert IPv6 item to Verbs specification.
1575 * Item specification.
1576 * @param default_mask[in]
1577 * Default bit-masks to use when item->mask is not provided.
1578 * @param data[in, out]
1582 * 0 on success, a negative errno value otherwise and rte_errno is set.
1585 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
1586 const void *default_mask,
1587 struct mlx5_flow_data *data)
1589 struct priv *priv = data->dev->data->dev_private;
1590 const struct rte_flow_item_ipv6 *spec = item->spec;
1591 const struct rte_flow_item_ipv6 *mask = item->mask;
1592 struct mlx5_flow_parse *parser = data->parser;
1593 unsigned int ipv6_size = sizeof(struct ibv_flow_spec_ipv6);
1594 struct ibv_flow_spec_ipv6 ipv6 = {
1595 .type = parser->inner | IBV_FLOW_SPEC_IPV6,
1599 if (parser->layer == HASH_RXQ_TUNNEL &&
1600 parser->tunnel == ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN)] &&
1601 !priv->config.l3_vxlan_en)
1602 return rte_flow_error_set(data->error, EINVAL,
1603 RTE_FLOW_ERROR_TYPE_ITEM,
1605 "L3 VXLAN not enabled by device"
1606 " parameter and/or not configured"
1608 parser->layer = HASH_RXQ_IPV6;
1611 uint32_t vtc_flow_val;
1612 uint32_t vtc_flow_mask;
1615 mask = default_mask;
1616 memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
1617 RTE_DIM(ipv6.val.src_ip));
1618 memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
1619 RTE_DIM(ipv6.val.dst_ip));
1620 memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
1621 RTE_DIM(ipv6.mask.src_ip));
1622 memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
1623 RTE_DIM(ipv6.mask.dst_ip));
1624 vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
1625 vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
1626 ipv6.val.flow_label =
1627 rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >>
1629 ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >>
1631 ipv6.val.next_hdr = spec->hdr.proto;
1632 ipv6.val.hop_limit = spec->hdr.hop_limits;
1633 ipv6.mask.flow_label =
1634 rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >>
1636 ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >>
1638 ipv6.mask.next_hdr = mask->hdr.proto;
1639 ipv6.mask.hop_limit = mask->hdr.hop_limits;
1640 /* Remove unwanted bits from values. */
1641 for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
1642 ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
1643 ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
1645 ipv6.val.flow_label &= ipv6.mask.flow_label;
1646 ipv6.val.traffic_class &= ipv6.mask.traffic_class;
1647 ipv6.val.next_hdr &= ipv6.mask.next_hdr;
1648 ipv6.val.hop_limit &= ipv6.mask.hop_limit;
1650 mlx5_flow_create_copy(parser, &ipv6, ipv6_size);
1655 * Convert UDP item to Verbs specification.
1658 * Item specification.
1659 * @param default_mask[in]
1660 * Default bit-masks to use when item->mask is not provided.
1661 * @param data[in, out]
1665 * 0 on success, a negative errno value otherwise and rte_errno is set.
1668 mlx5_flow_create_udp(const struct rte_flow_item *item,
1669 const void *default_mask,
1670 struct mlx5_flow_data *data)
1672 const struct rte_flow_item_udp *spec = item->spec;
1673 const struct rte_flow_item_udp *mask = item->mask;
1674 struct mlx5_flow_parse *parser = data->parser;
1675 unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1676 struct ibv_flow_spec_tcp_udp udp = {
1677 .type = parser->inner | IBV_FLOW_SPEC_UDP,
1681 if (parser->layer == HASH_RXQ_IPV4)
1682 parser->layer = HASH_RXQ_UDPV4;
1684 parser->layer = HASH_RXQ_UDPV6;
1687 mask = default_mask;
1688 udp.val.dst_port = spec->hdr.dst_port;
1689 udp.val.src_port = spec->hdr.src_port;
1690 udp.mask.dst_port = mask->hdr.dst_port;
1691 udp.mask.src_port = mask->hdr.src_port;
1692 /* Remove unwanted bits from values. */
1693 udp.val.src_port &= udp.mask.src_port;
1694 udp.val.dst_port &= udp.mask.dst_port;
1696 mlx5_flow_create_copy(parser, &udp, udp_size);
1701 * Convert TCP item to Verbs specification.
1704 * Item specification.
1705 * @param default_mask[in]
1706 * Default bit-masks to use when item->mask is not provided.
1707 * @param data[in, out]
1711 * 0 on success, a negative errno value otherwise and rte_errno is set.
1714 mlx5_flow_create_tcp(const struct rte_flow_item *item,
1715 const void *default_mask,
1716 struct mlx5_flow_data *data)
1718 const struct rte_flow_item_tcp *spec = item->spec;
1719 const struct rte_flow_item_tcp *mask = item->mask;
1720 struct mlx5_flow_parse *parser = data->parser;
1721 unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1722 struct ibv_flow_spec_tcp_udp tcp = {
1723 .type = parser->inner | IBV_FLOW_SPEC_TCP,
1727 if (parser->layer == HASH_RXQ_IPV4)
1728 parser->layer = HASH_RXQ_TCPV4;
1730 parser->layer = HASH_RXQ_TCPV6;
1733 mask = default_mask;
1734 tcp.val.dst_port = spec->hdr.dst_port;
1735 tcp.val.src_port = spec->hdr.src_port;
1736 tcp.mask.dst_port = mask->hdr.dst_port;
1737 tcp.mask.src_port = mask->hdr.src_port;
1738 /* Remove unwanted bits from values. */
1739 tcp.val.src_port &= tcp.mask.src_port;
1740 tcp.val.dst_port &= tcp.mask.dst_port;
1742 mlx5_flow_create_copy(parser, &tcp, tcp_size);
1747 * Convert VXLAN item to Verbs specification.
1750 * Item specification.
1751 * @param default_mask[in]
1752 * Default bit-masks to use when item->mask is not provided.
1753 * @param data[in, out]
1757 * 0 on success, a negative errno value otherwise and rte_errno is set.
1760 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
1761 const void *default_mask,
1762 struct mlx5_flow_data *data)
1764 const struct rte_flow_item_vxlan *spec = item->spec;
1765 const struct rte_flow_item_vxlan *mask = item->mask;
1766 struct mlx5_flow_parse *parser = data->parser;
1767 unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1768 struct ibv_flow_spec_tunnel vxlan = {
1769 .type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1778 parser->inner = IBV_FLOW_SPEC_INNER;
1779 parser->tunnel = ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN)];
1780 parser->out_layer = parser->layer;
1781 parser->layer = HASH_RXQ_TUNNEL;
1782 /* Default VXLAN to outer RSS. */
1783 if (!parser->rss_conf.level)
1784 parser->rss_conf.level = 1;
1787 mask = default_mask;
1788 memcpy(&id.vni[1], spec->vni, 3);
1789 vxlan.val.tunnel_id = id.vlan_id;
1790 memcpy(&id.vni[1], mask->vni, 3);
1791 vxlan.mask.tunnel_id = id.vlan_id;
1792 /* Remove unwanted bits from values. */
1793 vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
1796 * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
1797 * layer is defined in the Verbs specification it is interpreted as
1798 * wildcard and all packets will match this rule, if it follows a full
1799 * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
1800 * before will also match this rule.
1801 * To avoid such situation, VNI 0 is currently refused.
1803 if (!vxlan.val.tunnel_id)
1804 return rte_flow_error_set(data->error, EINVAL,
1805 RTE_FLOW_ERROR_TYPE_ITEM,
1807 "VxLAN vni cannot be 0");
1808 mlx5_flow_create_copy(parser, &vxlan, size);
1813 * Convert VXLAN-GPE item to Verbs specification.
1816 * Item specification.
1817 * @param default_mask[in]
1818 * Default bit-masks to use when item->mask is not provided.
1819 * @param data[in, out]
1823 * 0 on success, a negative errno value otherwise and rte_errno is set.
1826 mlx5_flow_create_vxlan_gpe(const struct rte_flow_item *item,
1827 const void *default_mask,
1828 struct mlx5_flow_data *data)
1830 struct priv *priv = data->dev->data->dev_private;
1831 const struct rte_flow_item_vxlan_gpe *spec = item->spec;
1832 const struct rte_flow_item_vxlan_gpe *mask = item->mask;
1833 struct mlx5_flow_parse *parser = data->parser;
1834 unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1835 struct ibv_flow_spec_tunnel vxlan = {
1836 .type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1844 if (!priv->config.l3_vxlan_en)
1845 return rte_flow_error_set(data->error, EINVAL,
1846 RTE_FLOW_ERROR_TYPE_ITEM,
1848 "L3 VXLAN not enabled by device"
1849 " parameter and/or not configured"
1852 parser->inner = IBV_FLOW_SPEC_INNER;
1853 parser->tunnel = ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN_GPE)];
1854 parser->out_layer = parser->layer;
1855 parser->layer = HASH_RXQ_TUNNEL;
1856 /* Default VXLAN-GPE to outer RSS. */
1857 if (!parser->rss_conf.level)
1858 parser->rss_conf.level = 1;
1861 mask = default_mask;
1862 memcpy(&id.vni[1], spec->vni, 3);
1863 vxlan.val.tunnel_id = id.vlan_id;
1864 memcpy(&id.vni[1], mask->vni, 3);
1865 vxlan.mask.tunnel_id = id.vlan_id;
1867 return rte_flow_error_set(data->error, EINVAL,
1868 RTE_FLOW_ERROR_TYPE_ITEM,
1870 "VxLAN-GPE protocol not"
1872 /* Remove unwanted bits from values. */
1873 vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
1876 * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
1877 * layer is defined in the Verbs specification it is interpreted as
1878 * wildcard and all packets will match this rule, if it follows a full
1879 * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
1880 * before will also match this rule.
1881 * To avoid such situation, VNI 0 is currently refused.
1883 /* Only allow tunnel w/o tunnel id pattern after proper outer spec. */
1884 if (parser->out_layer == HASH_RXQ_ETH && !vxlan.val.tunnel_id)
1885 return rte_flow_error_set(data->error, EINVAL,
1886 RTE_FLOW_ERROR_TYPE_ITEM,
1888 "VxLAN-GPE vni cannot be 0");
1889 mlx5_flow_create_copy(parser, &vxlan, size);
1894 * Convert GRE item to Verbs specification.
1897 * Item specification.
1898 * @param default_mask[in]
1899 * Default bit-masks to use when item->mask is not provided.
1900 * @param data[in, out]
1904 * 0 on success, a negative errno value otherwise and rte_errno is set.
1907 mlx5_flow_create_gre(const struct rte_flow_item *item __rte_unused,
1908 const void *default_mask __rte_unused,
1909 struct mlx5_flow_data *data)
1911 struct mlx5_flow_parse *parser = data->parser;
1912 unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1913 struct ibv_flow_spec_tunnel tunnel = {
1914 .type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1917 struct ibv_flow_spec_ipv4_ext *ipv4;
1918 struct ibv_flow_spec_ipv6 *ipv6;
1921 parser->inner = IBV_FLOW_SPEC_INNER;
1922 parser->tunnel = ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_GRE)];
1923 parser->out_layer = parser->layer;
1924 parser->layer = HASH_RXQ_TUNNEL;
1925 /* Default GRE to inner RSS. */
1926 if (!parser->rss_conf.level)
1927 parser->rss_conf.level = 2;
1928 /* Update encapsulation IP layer protocol. */
1929 for (i = 0; i != hash_rxq_init_n; ++i) {
1930 if (!parser->queue[i].ibv_attr)
1932 if (parser->out_layer == HASH_RXQ_IPV4) {
1933 ipv4 = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1934 parser->queue[i].offset -
1935 sizeof(struct ibv_flow_spec_ipv4_ext));
1936 if (ipv4->mask.proto && ipv4->val.proto != MLX5_GRE)
1938 ipv4->val.proto = MLX5_GRE;
1939 ipv4->mask.proto = 0xff;
1940 } else if (parser->out_layer == HASH_RXQ_IPV6) {
1941 ipv6 = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1942 parser->queue[i].offset -
1943 sizeof(struct ibv_flow_spec_ipv6));
1944 if (ipv6->mask.next_hdr &&
1945 ipv6->val.next_hdr != MLX5_GRE)
1947 ipv6->val.next_hdr = MLX5_GRE;
1948 ipv6->mask.next_hdr = 0xff;
1951 if (i != hash_rxq_init_n)
1952 return rte_flow_error_set(data->error, EINVAL,
1953 RTE_FLOW_ERROR_TYPE_ITEM,
1955 "IP protocol of GRE must be 47");
1956 mlx5_flow_create_copy(parser, &tunnel, size);
1961 * Convert mark/flag action to Verbs specification.
1964 * Internal parser structure.
1969 * 0 on success, a negative errno value otherwise and rte_errno is set.
1972 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id)
1974 unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1975 struct ibv_flow_spec_action_tag tag = {
1976 .type = IBV_FLOW_SPEC_ACTION_TAG,
1978 .tag_id = mlx5_flow_mark_set(mark_id),
1981 assert(parser->mark);
1982 mlx5_flow_create_copy(parser, &tag, size);
1987 * Convert count action to Verbs specification.
1990 * Pointer to Ethernet device.
1992 * Pointer to MLX5 flow parser structure.
1995 * 0 on success, a negative errno value otherwise and rte_errno is set.
1998 mlx5_flow_create_count(struct rte_eth_dev *dev __rte_unused,
1999 struct mlx5_flow_parse *parser __rte_unused)
2001 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
2002 struct priv *priv = dev->data->dev_private;
2003 unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
2004 struct ibv_counter_set_init_attr init_attr = {0};
2005 struct ibv_flow_spec_counter_action counter = {
2006 .type = IBV_FLOW_SPEC_ACTION_COUNT,
2008 .counter_set_handle = 0,
2011 init_attr.counter_set_id = 0;
2012 parser->cs = mlx5_glue->create_counter_set(priv->ctx, &init_attr);
2017 counter.counter_set_handle = parser->cs->handle;
2018 mlx5_flow_create_copy(parser, &counter, size);
2024 * Complete flow rule creation with a drop queue.
2027 * Pointer to Ethernet device.
2029 * Internal parser structure.
2031 * Pointer to the rte_flow.
2033 * Perform verbose error reporting if not NULL.
2036 * 0 on success, a negative errno value otherwise and rte_errno is set.
2039 mlx5_flow_create_action_queue_drop(struct rte_eth_dev *dev,
2040 struct mlx5_flow_parse *parser,
2041 struct rte_flow *flow,
2042 struct rte_flow_error *error)
2044 struct priv *priv = dev->data->dev_private;
2045 struct ibv_flow_spec_action_drop *drop;
2046 unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
2051 drop = (void *)((uintptr_t)parser->queue[HASH_RXQ_ETH].ibv_attr +
2052 parser->queue[HASH_RXQ_ETH].offset);
2053 *drop = (struct ibv_flow_spec_action_drop){
2054 .type = IBV_FLOW_SPEC_ACTION_DROP,
2057 ++parser->queue[HASH_RXQ_ETH].ibv_attr->num_of_specs;
2058 parser->queue[HASH_RXQ_ETH].offset += size;
2059 flow->frxq[HASH_RXQ_ETH].ibv_attr =
2060 parser->queue[HASH_RXQ_ETH].ibv_attr;
2062 flow->cs = parser->cs;
2063 if (!priv->dev->data->dev_started)
2065 parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
2066 flow->frxq[HASH_RXQ_ETH].ibv_flow =
2067 mlx5_glue->create_flow(priv->flow_drop_queue->qp,
2068 flow->frxq[HASH_RXQ_ETH].ibv_attr);
2069 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
2070 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
2071 NULL, "flow rule creation failure");
2077 if (flow->frxq[HASH_RXQ_ETH].ibv_flow) {
2078 claim_zero(mlx5_glue->destroy_flow
2079 (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2080 flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
2082 if (flow->frxq[HASH_RXQ_ETH].ibv_attr) {
2083 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
2084 flow->frxq[HASH_RXQ_ETH].ibv_attr = NULL;
2087 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
2095 * Create hash Rx queues when RSS is enabled.
2098 * Pointer to Ethernet device.
2100 * Internal parser structure.
2102 * Pointer to the rte_flow.
2104 * Perform verbose error reporting if not NULL.
2107 * 0 on success, a negative errno value otherwise and rte_errno is set.
2110 mlx5_flow_create_action_queue_rss(struct rte_eth_dev *dev,
2111 struct mlx5_flow_parse *parser,
2112 struct rte_flow *flow,
2113 struct rte_flow_error *error)
2115 struct priv *priv = dev->data->dev_private;
2118 for (i = 0; i != hash_rxq_init_n; ++i) {
2119 if (!parser->queue[i].ibv_attr)
2121 flow->frxq[i].ibv_attr = parser->queue[i].ibv_attr;
2122 parser->queue[i].ibv_attr = NULL;
2123 flow->frxq[i].hash_fields = parser->queue[i].hash_fields;
2124 if (!priv->dev->data->dev_started)
2126 flow->frxq[i].hrxq =
2128 parser->rss_conf.key,
2129 parser->rss_conf.key_len,
2130 flow->frxq[i].hash_fields,
2131 parser->rss_conf.queue,
2132 parser->rss_conf.queue_num,
2134 parser->rss_conf.level);
2135 if (flow->frxq[i].hrxq)
2137 flow->frxq[i].hrxq =
2139 parser->rss_conf.key,
2140 parser->rss_conf.key_len,
2141 flow->frxq[i].hash_fields,
2142 parser->rss_conf.queue,
2143 parser->rss_conf.queue_num,
2145 parser->rss_conf.level);
2146 if (!flow->frxq[i].hrxq) {
2147 return rte_flow_error_set(error, ENOMEM,
2148 RTE_FLOW_ERROR_TYPE_HANDLE,
2150 "cannot create hash rxq");
2157 * RXQ update after flow rule creation.
2160 * Pointer to Ethernet device.
2162 * Pointer to the flow rule.
2165 mlx5_flow_create_update_rxqs(struct rte_eth_dev *dev, struct rte_flow *flow)
2167 struct priv *priv = dev->data->dev_private;
2171 if (!dev->data->dev_started)
2173 for (i = 0; i != flow->rss_conf.queue_num; ++i) {
2174 struct mlx5_rxq_data *rxq_data = (*priv->rxqs)
2175 [(*flow->queues)[i]];
2176 struct mlx5_rxq_ctrl *rxq_ctrl =
2177 container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
2178 uint8_t tunnel = PTYPE_IDX(flow->tunnel);
2180 rxq_data->mark |= flow->mark;
2183 rxq_ctrl->tunnel_types[tunnel] += 1;
2184 /* Clear tunnel type if more than one tunnel types set. */
2185 for (j = 0; j != RTE_DIM(rxq_ctrl->tunnel_types); ++j) {
2188 if (rxq_ctrl->tunnel_types[j] > 0) {
2189 rxq_data->tunnel = 0;
2193 if (j == RTE_DIM(rxq_ctrl->tunnel_types))
2194 rxq_data->tunnel = flow->tunnel;
2199 * Dump flow hash RX queue detail.
2202 * Pointer to Ethernet device.
2204 * Pointer to the rte_flow.
2206 * Hash RX queue index.
2209 mlx5_flow_dump(struct rte_eth_dev *dev __rte_unused,
2210 struct rte_flow *flow __rte_unused,
2211 unsigned int hrxq_idx __rte_unused)
2219 spec_ptr = (uintptr_t)(flow->frxq[hrxq_idx].ibv_attr + 1);
2220 for (j = 0, off = 0; j < flow->frxq[hrxq_idx].ibv_attr->num_of_specs;
2222 struct ibv_flow_spec *spec = (void *)spec_ptr;
2223 off += sprintf(buf + off, " %x(%hu)", spec->hdr.type,
2225 spec_ptr += spec->hdr.size;
2228 "port %u Verbs flow %p type %u: hrxq:%p qp:%p ind:%p,"
2229 " hash:%" PRIx64 "/%u specs:%hhu(%hu), priority:%hu, type:%d,"
2230 " flags:%x, comp_mask:%x specs:%s",
2231 dev->data->port_id, (void *)flow, hrxq_idx,
2232 (void *)flow->frxq[hrxq_idx].hrxq,
2233 (void *)flow->frxq[hrxq_idx].hrxq->qp,
2234 (void *)flow->frxq[hrxq_idx].hrxq->ind_table,
2235 flow->frxq[hrxq_idx].hash_fields |
2237 flow->rss_conf.level > 1 ? (uint32_t)IBV_RX_HASH_INNER : 0),
2238 flow->rss_conf.queue_num,
2239 flow->frxq[hrxq_idx].ibv_attr->num_of_specs,
2240 flow->frxq[hrxq_idx].ibv_attr->size,
2241 flow->frxq[hrxq_idx].ibv_attr->priority,
2242 flow->frxq[hrxq_idx].ibv_attr->type,
2243 flow->frxq[hrxq_idx].ibv_attr->flags,
2244 flow->frxq[hrxq_idx].ibv_attr->comp_mask,
2250 * Complete flow rule creation.
2253 * Pointer to Ethernet device.
2255 * Internal parser structure.
2257 * Pointer to the rte_flow.
2259 * Perform verbose error reporting if not NULL.
2262 * 0 on success, a negative errno value otherwise and rte_errno is set.
2265 mlx5_flow_create_action_queue(struct rte_eth_dev *dev,
2266 struct mlx5_flow_parse *parser,
2267 struct rte_flow *flow,
2268 struct rte_flow_error *error)
2270 struct priv *priv = dev->data->dev_private;
2273 unsigned int flows_n = 0;
2277 assert(!parser->drop);
2278 ret = mlx5_flow_create_action_queue_rss(dev, parser, flow, error);
2282 flow->cs = parser->cs;
2283 if (!priv->dev->data->dev_started)
2285 for (i = 0; i != hash_rxq_init_n; ++i) {
2286 if (!flow->frxq[i].hrxq)
2288 flow->frxq[i].ibv_flow =
2289 mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
2290 flow->frxq[i].ibv_attr);
2291 mlx5_flow_dump(dev, flow, i);
2292 if (!flow->frxq[i].ibv_flow) {
2293 rte_flow_error_set(error, ENOMEM,
2294 RTE_FLOW_ERROR_TYPE_HANDLE,
2295 NULL, "flow rule creation failure");
2301 rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_HANDLE,
2302 NULL, "internal error in flow creation");
2305 mlx5_flow_create_update_rxqs(dev, flow);
2308 ret = rte_errno; /* Save rte_errno before cleanup. */
2310 for (i = 0; i != hash_rxq_init_n; ++i) {
2311 if (flow->frxq[i].ibv_flow) {
2312 struct ibv_flow *ibv_flow = flow->frxq[i].ibv_flow;
2314 claim_zero(mlx5_glue->destroy_flow(ibv_flow));
2316 if (flow->frxq[i].hrxq)
2317 mlx5_hrxq_release(dev, flow->frxq[i].hrxq);
2318 if (flow->frxq[i].ibv_attr)
2319 rte_free(flow->frxq[i].ibv_attr);
2322 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
2326 rte_errno = ret; /* Restore rte_errno. */
2334 * Pointer to Ethernet device.
2336 * Pointer to a TAILQ flow list.
2338 * Flow rule attributes.
2339 * @param[in] pattern
2340 * Pattern specification (list terminated by the END pattern item).
2341 * @param[in] actions
2342 * Associated actions (list terminated by the END action).
2344 * Perform verbose error reporting if not NULL.
2347 * A flow on success, NULL otherwise and rte_errno is set.
2349 static struct rte_flow *
2350 mlx5_flow_list_create(struct rte_eth_dev *dev,
2351 struct mlx5_flows *list,
2352 const struct rte_flow_attr *attr,
2353 const struct rte_flow_item items[],
2354 const struct rte_flow_action actions[],
2355 struct rte_flow_error *error)
2357 struct mlx5_flow_parse parser = { .create = 1, };
2358 struct rte_flow *flow = NULL;
2362 ret = mlx5_flow_convert(dev, attr, items, actions, error, &parser);
2365 flow = rte_calloc(__func__, 1,
2367 parser.rss_conf.queue_num * sizeof(uint16_t),
2370 rte_flow_error_set(error, ENOMEM,
2371 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2373 "cannot allocate flow memory");
2376 /* Copy configuration. */
2377 flow->queues = (uint16_t (*)[])(flow + 1);
2378 flow->tunnel = parser.tunnel;
2379 flow->rss_conf = (struct rte_flow_action_rss){
2380 .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
2382 .types = parser.rss_conf.types,
2383 .key_len = parser.rss_conf.key_len,
2384 .queue_num = parser.rss_conf.queue_num,
2385 .key = memcpy(flow->rss_key, parser.rss_conf.key,
2386 sizeof(*parser.rss_conf.key) *
2387 parser.rss_conf.key_len),
2388 .queue = memcpy(flow->queues, parser.rss_conf.queue,
2389 sizeof(*parser.rss_conf.queue) *
2390 parser.rss_conf.queue_num),
2392 flow->mark = parser.mark;
2393 /* finalise the flow. */
2395 ret = mlx5_flow_create_action_queue_drop(dev, &parser, flow,
2398 ret = mlx5_flow_create_action_queue(dev, &parser, flow, error);
2401 TAILQ_INSERT_TAIL(list, flow, next);
2402 DRV_LOG(DEBUG, "port %u flow created %p", dev->data->port_id,
2406 DRV_LOG(ERR, "port %u flow creation error: %s", dev->data->port_id,
2408 for (i = 0; i != hash_rxq_init_n; ++i) {
2409 if (parser.queue[i].ibv_attr)
2410 rte_free(parser.queue[i].ibv_attr);
2417 * Validate a flow supported by the NIC.
2419 * @see rte_flow_validate()
2423 mlx5_flow_validate(struct rte_eth_dev *dev,
2424 const struct rte_flow_attr *attr,
2425 const struct rte_flow_item items[],
2426 const struct rte_flow_action actions[],
2427 struct rte_flow_error *error)
2429 struct mlx5_flow_parse parser = { .create = 0, };
2431 return mlx5_flow_convert(dev, attr, items, actions, error, &parser);
2437 * @see rte_flow_create()
2441 mlx5_flow_create(struct rte_eth_dev *dev,
2442 const struct rte_flow_attr *attr,
2443 const struct rte_flow_item items[],
2444 const struct rte_flow_action actions[],
2445 struct rte_flow_error *error)
2447 struct priv *priv = dev->data->dev_private;
2449 return mlx5_flow_list_create(dev, &priv->flows, attr, items, actions,
2454 * Destroy a flow in a list.
2457 * Pointer to Ethernet device.
2459 * Pointer to a TAILQ flow list.
2464 mlx5_flow_list_destroy(struct rte_eth_dev *dev, struct mlx5_flows *list,
2465 struct rte_flow *flow)
2467 struct priv *priv = dev->data->dev_private;
2470 if (flow->drop || !dev->data->dev_started)
2472 for (i = 0; flow->tunnel && i != flow->rss_conf.queue_num; ++i) {
2473 /* Update queue tunnel type. */
2474 struct mlx5_rxq_data *rxq_data = (*priv->rxqs)
2475 [(*flow->queues)[i]];
2476 struct mlx5_rxq_ctrl *rxq_ctrl =
2477 container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
2478 uint8_t tunnel = PTYPE_IDX(flow->tunnel);
2480 assert(rxq_ctrl->tunnel_types[tunnel] > 0);
2481 rxq_ctrl->tunnel_types[tunnel] -= 1;
2482 if (!rxq_ctrl->tunnel_types[tunnel]) {
2483 /* Update tunnel type. */
2488 for (j = 0; j < RTE_DIM(rxq_ctrl->tunnel_types); j++)
2489 if (rxq_ctrl->tunnel_types[j]) {
2493 /* Keep same if more than one tunnel types left. */
2495 rxq_data->tunnel = ptype_ext[last];
2496 else if (types == 0)
2497 /* No tunnel type left. */
2498 rxq_data->tunnel = 0;
2501 for (i = 0; flow->mark && i != flow->rss_conf.queue_num; ++i) {
2502 struct rte_flow *tmp;
2506 * To remove the mark from the queue, the queue must not be
2507 * present in any other marked flow (RSS or not).
2509 TAILQ_FOREACH(tmp, list, next) {
2511 uint16_t *tqs = NULL;
2516 for (j = 0; j != hash_rxq_init_n; ++j) {
2517 if (!tmp->frxq[j].hrxq)
2519 tqs = tmp->frxq[j].hrxq->ind_table->queues;
2520 tq_n = tmp->frxq[j].hrxq->ind_table->queues_n;
2524 for (j = 0; (j != tq_n) && !mark; j++)
2525 if (tqs[j] == (*flow->queues)[i])
2528 (*priv->rxqs)[(*flow->queues)[i]]->mark = mark;
2532 if (flow->frxq[HASH_RXQ_ETH].ibv_flow)
2533 claim_zero(mlx5_glue->destroy_flow
2534 (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2535 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
2537 for (i = 0; i != hash_rxq_init_n; ++i) {
2538 struct mlx5_flow *frxq = &flow->frxq[i];
2541 claim_zero(mlx5_glue->destroy_flow
2544 mlx5_hrxq_release(dev, frxq->hrxq);
2546 rte_free(frxq->ibv_attr);
2550 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
2553 TAILQ_REMOVE(list, flow, next);
2554 DRV_LOG(DEBUG, "port %u flow destroyed %p", dev->data->port_id,
2560 * Destroy all flows.
2563 * Pointer to Ethernet device.
2565 * Pointer to a TAILQ flow list.
2568 mlx5_flow_list_flush(struct rte_eth_dev *dev, struct mlx5_flows *list)
2570 while (!TAILQ_EMPTY(list)) {
2571 struct rte_flow *flow;
2573 flow = TAILQ_FIRST(list);
2574 mlx5_flow_list_destroy(dev, list, flow);
2579 * Create drop queue.
2582 * Pointer to Ethernet device.
2585 * 0 on success, a negative errno value otherwise and rte_errno is set.
2588 mlx5_flow_create_drop_queue(struct rte_eth_dev *dev)
2590 struct priv *priv = dev->data->dev_private;
2591 struct mlx5_hrxq_drop *fdq = NULL;
2595 fdq = rte_calloc(__func__, 1, sizeof(*fdq), 0);
2598 "port %u cannot allocate memory for drop queue",
2599 dev->data->port_id);
2603 fdq->cq = mlx5_glue->create_cq(priv->ctx, 1, NULL, NULL, 0);
2605 DRV_LOG(WARNING, "port %u cannot allocate CQ for drop queue",
2606 dev->data->port_id);
2610 fdq->wq = mlx5_glue->create_wq
2612 &(struct ibv_wq_init_attr){
2613 .wq_type = IBV_WQT_RQ,
2620 DRV_LOG(WARNING, "port %u cannot allocate WQ for drop queue",
2621 dev->data->port_id);
2625 fdq->ind_table = mlx5_glue->create_rwq_ind_table
2627 &(struct ibv_rwq_ind_table_init_attr){
2628 .log_ind_tbl_size = 0,
2629 .ind_tbl = &fdq->wq,
2632 if (!fdq->ind_table) {
2634 "port %u cannot allocate indirection table for drop"
2636 dev->data->port_id);
2640 fdq->qp = mlx5_glue->create_qp_ex
2642 &(struct ibv_qp_init_attr_ex){
2643 .qp_type = IBV_QPT_RAW_PACKET,
2645 IBV_QP_INIT_ATTR_PD |
2646 IBV_QP_INIT_ATTR_IND_TABLE |
2647 IBV_QP_INIT_ATTR_RX_HASH,
2648 .rx_hash_conf = (struct ibv_rx_hash_conf){
2650 IBV_RX_HASH_FUNC_TOEPLITZ,
2651 .rx_hash_key_len = rss_hash_default_key_len,
2652 .rx_hash_key = rss_hash_default_key,
2653 .rx_hash_fields_mask = 0,
2655 .rwq_ind_tbl = fdq->ind_table,
2659 DRV_LOG(WARNING, "port %u cannot allocate QP for drop queue",
2660 dev->data->port_id);
2664 priv->flow_drop_queue = fdq;
2668 claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2670 claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2672 claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2674 claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2677 priv->flow_drop_queue = NULL;
2682 * Delete drop queue.
2685 * Pointer to Ethernet device.
2688 mlx5_flow_delete_drop_queue(struct rte_eth_dev *dev)
2690 struct priv *priv = dev->data->dev_private;
2691 struct mlx5_hrxq_drop *fdq = priv->flow_drop_queue;
2696 claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2698 claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2700 claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2702 claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2704 priv->flow_drop_queue = NULL;
2711 * Pointer to Ethernet device.
2713 * Pointer to a TAILQ flow list.
2716 mlx5_flow_stop(struct rte_eth_dev *dev, struct mlx5_flows *list)
2718 struct priv *priv = dev->data->dev_private;
2719 struct rte_flow *flow;
2722 TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) {
2723 struct mlx5_ind_table_ibv *ind_tbl = NULL;
2726 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow)
2728 claim_zero(mlx5_glue->destroy_flow
2729 (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2730 flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
2731 DRV_LOG(DEBUG, "port %u flow %p removed",
2732 dev->data->port_id, (void *)flow);
2736 /* Verify the flow has not already been cleaned. */
2737 for (i = 0; i != hash_rxq_init_n; ++i) {
2738 if (!flow->frxq[i].ibv_flow)
2741 * Indirection table may be necessary to remove the
2742 * flags in the Rx queues.
2743 * This helps to speed-up the process by avoiding
2746 ind_tbl = flow->frxq[i].hrxq->ind_table;
2749 if (i == hash_rxq_init_n)
2753 for (i = 0; i != ind_tbl->queues_n; ++i)
2754 (*priv->rxqs)[ind_tbl->queues[i]]->mark = 0;
2756 for (i = 0; i != hash_rxq_init_n; ++i) {
2757 if (!flow->frxq[i].ibv_flow)
2759 claim_zero(mlx5_glue->destroy_flow
2760 (flow->frxq[i].ibv_flow));
2761 flow->frxq[i].ibv_flow = NULL;
2762 mlx5_hrxq_release(dev, flow->frxq[i].hrxq);
2763 flow->frxq[i].hrxq = NULL;
2765 DRV_LOG(DEBUG, "port %u flow %p removed", dev->data->port_id,
2768 /* Cleanup Rx queue tunnel info. */
2769 for (i = 0; i != priv->rxqs_n; ++i) {
2770 struct mlx5_rxq_data *q = (*priv->rxqs)[i];
2771 struct mlx5_rxq_ctrl *rxq_ctrl =
2772 container_of(q, struct mlx5_rxq_ctrl, rxq);
2776 memset((void *)rxq_ctrl->tunnel_types, 0,
2777 sizeof(rxq_ctrl->tunnel_types));
2786 * Pointer to Ethernet device.
2788 * Pointer to a TAILQ flow list.
2791 * 0 on success, a negative errno value otherwise and rte_errno is set.
2794 mlx5_flow_start(struct rte_eth_dev *dev, struct mlx5_flows *list)
2796 struct priv *priv = dev->data->dev_private;
2797 struct rte_flow *flow;
2799 TAILQ_FOREACH(flow, list, next) {
2803 flow->frxq[HASH_RXQ_ETH].ibv_flow =
2804 mlx5_glue->create_flow
2805 (priv->flow_drop_queue->qp,
2806 flow->frxq[HASH_RXQ_ETH].ibv_attr);
2807 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
2809 "port %u flow %p cannot be applied",
2810 dev->data->port_id, (void *)flow);
2814 DRV_LOG(DEBUG, "port %u flow %p applied",
2815 dev->data->port_id, (void *)flow);
2819 for (i = 0; i != hash_rxq_init_n; ++i) {
2820 if (!flow->frxq[i].ibv_attr)
2822 flow->frxq[i].hrxq =
2823 mlx5_hrxq_get(dev, flow->rss_conf.key,
2824 flow->rss_conf.key_len,
2825 flow->frxq[i].hash_fields,
2826 flow->rss_conf.queue,
2827 flow->rss_conf.queue_num,
2829 flow->rss_conf.level);
2830 if (flow->frxq[i].hrxq)
2832 flow->frxq[i].hrxq =
2833 mlx5_hrxq_new(dev, flow->rss_conf.key,
2834 flow->rss_conf.key_len,
2835 flow->frxq[i].hash_fields,
2836 flow->rss_conf.queue,
2837 flow->rss_conf.queue_num,
2839 flow->rss_conf.level);
2840 if (!flow->frxq[i].hrxq) {
2842 "port %u flow %p cannot create hash"
2844 dev->data->port_id, (void *)flow);
2849 mlx5_flow_dump(dev, flow, i);
2850 flow->frxq[i].ibv_flow =
2851 mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
2852 flow->frxq[i].ibv_attr);
2853 if (!flow->frxq[i].ibv_flow) {
2855 "port %u flow %p type %u cannot be"
2857 dev->data->port_id, (void *)flow, i);
2862 mlx5_flow_create_update_rxqs(dev, flow);
2868 * Verify the flow list is empty
2871 * Pointer to Ethernet device.
2873 * @return the number of flows not released.
2876 mlx5_flow_verify(struct rte_eth_dev *dev)
2878 struct priv *priv = dev->data->dev_private;
2879 struct rte_flow *flow;
2882 TAILQ_FOREACH(flow, &priv->flows, next) {
2883 DRV_LOG(DEBUG, "port %u flow %p still referenced",
2884 dev->data->port_id, (void *)flow);
2891 * Enable a control flow configured from the control plane.
2894 * Pointer to Ethernet device.
2896 * An Ethernet flow spec to apply.
2898 * An Ethernet flow mask to apply.
2900 * A VLAN flow spec to apply.
2902 * A VLAN flow mask to apply.
2905 * 0 on success, a negative errno value otherwise and rte_errno is set.
2908 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
2909 struct rte_flow_item_eth *eth_spec,
2910 struct rte_flow_item_eth *eth_mask,
2911 struct rte_flow_item_vlan *vlan_spec,
2912 struct rte_flow_item_vlan *vlan_mask)
2914 struct priv *priv = dev->data->dev_private;
2915 const struct rte_flow_attr attr = {
2917 .priority = MLX5_CTRL_FLOW_PRIORITY,
2919 struct rte_flow_item items[] = {
2921 .type = RTE_FLOW_ITEM_TYPE_ETH,
2927 .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
2928 RTE_FLOW_ITEM_TYPE_END,
2934 .type = RTE_FLOW_ITEM_TYPE_END,
2937 uint16_t queue[priv->reta_idx_n];
2938 struct rte_flow_action_rss action_rss = {
2939 .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
2941 .types = priv->rss_conf.rss_hf,
2942 .key_len = priv->rss_conf.rss_key_len,
2943 .queue_num = priv->reta_idx_n,
2944 .key = priv->rss_conf.rss_key,
2947 struct rte_flow_action actions[] = {
2949 .type = RTE_FLOW_ACTION_TYPE_RSS,
2950 .conf = &action_rss,
2953 .type = RTE_FLOW_ACTION_TYPE_END,
2956 struct rte_flow *flow;
2957 struct rte_flow_error error;
2960 if (!priv->reta_idx_n) {
2964 for (i = 0; i != priv->reta_idx_n; ++i)
2965 queue[i] = (*priv->reta_idx)[i];
2966 flow = mlx5_flow_list_create(dev, &priv->ctrl_flows, &attr, items,
2974 * Enable a flow control configured from the control plane.
2977 * Pointer to Ethernet device.
2979 * An Ethernet flow spec to apply.
2981 * An Ethernet flow mask to apply.
2984 * 0 on success, a negative errno value otherwise and rte_errno is set.
2987 mlx5_ctrl_flow(struct rte_eth_dev *dev,
2988 struct rte_flow_item_eth *eth_spec,
2989 struct rte_flow_item_eth *eth_mask)
2991 return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
2997 * @see rte_flow_destroy()
3001 mlx5_flow_destroy(struct rte_eth_dev *dev,
3002 struct rte_flow *flow,
3003 struct rte_flow_error *error __rte_unused)
3005 struct priv *priv = dev->data->dev_private;
3007 mlx5_flow_list_destroy(dev, &priv->flows, flow);
3012 * Destroy all flows.
3014 * @see rte_flow_flush()
3018 mlx5_flow_flush(struct rte_eth_dev *dev,
3019 struct rte_flow_error *error __rte_unused)
3021 struct priv *priv = dev->data->dev_private;
3023 mlx5_flow_list_flush(dev, &priv->flows);
3027 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
3029 * Query flow counter.
3033 * @param counter_value
3034 * returned data from the counter.
3037 * 0 on success, a negative errno value otherwise and rte_errno is set.
3040 mlx5_flow_query_count(struct ibv_counter_set *cs,
3041 struct mlx5_flow_counter_stats *counter_stats,
3042 struct rte_flow_query_count *query_count,
3043 struct rte_flow_error *error)
3045 uint64_t counters[2];
3046 struct ibv_query_counter_set_attr query_cs_attr = {
3048 .query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
3050 struct ibv_counter_set_data query_out = {
3052 .outlen = 2 * sizeof(uint64_t),
3054 int err = mlx5_glue->query_counter_set(&query_cs_attr, &query_out);
3057 return rte_flow_error_set(error, err,
3058 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
3060 "cannot read counter");
3061 query_count->hits_set = 1;
3062 query_count->bytes_set = 1;
3063 query_count->hits = counters[0] - counter_stats->hits;
3064 query_count->bytes = counters[1] - counter_stats->bytes;
3065 if (query_count->reset) {
3066 counter_stats->hits = counters[0];
3067 counter_stats->bytes = counters[1];
3075 * @see rte_flow_query()
3079 mlx5_flow_query(struct rte_eth_dev *dev __rte_unused,
3080 struct rte_flow *flow,
3081 enum rte_flow_action_type action __rte_unused,
3083 struct rte_flow_error *error)
3088 ret = mlx5_flow_query_count(flow->cs,
3089 &flow->counter_stats,
3090 (struct rte_flow_query_count *)data,
3095 return rte_flow_error_set(error, EINVAL,
3096 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
3098 "no counter found for flow");
3107 * @see rte_flow_isolate()
3111 mlx5_flow_isolate(struct rte_eth_dev *dev,
3113 struct rte_flow_error *error)
3115 struct priv *priv = dev->data->dev_private;
3117 if (dev->data->dev_started) {
3118 rte_flow_error_set(error, EBUSY,
3119 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
3121 "port must be stopped first");
3124 priv->isolated = !!enable;
3126 priv->dev->dev_ops = &mlx5_dev_ops_isolate;
3128 priv->dev->dev_ops = &mlx5_dev_ops;
3133 * Convert a flow director filter to a generic flow.
3136 * Pointer to Ethernet device.
3137 * @param fdir_filter
3138 * Flow director filter to add.
3140 * Generic flow parameters structure.
3143 * 0 on success, a negative errno value otherwise and rte_errno is set.
3146 mlx5_fdir_filter_convert(struct rte_eth_dev *dev,
3147 const struct rte_eth_fdir_filter *fdir_filter,
3148 struct mlx5_fdir *attributes)
3150 struct priv *priv = dev->data->dev_private;
3151 const struct rte_eth_fdir_input *input = &fdir_filter->input;
3152 const struct rte_eth_fdir_masks *mask =
3153 &dev->data->dev_conf.fdir_conf.mask;
3155 /* Validate queue number. */
3156 if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
3157 DRV_LOG(ERR, "port %u invalid queue number %d",
3158 dev->data->port_id, fdir_filter->action.rx_queue);
3162 attributes->attr.ingress = 1;
3163 attributes->items[0] = (struct rte_flow_item) {
3164 .type = RTE_FLOW_ITEM_TYPE_ETH,
3165 .spec = &attributes->l2,
3166 .mask = &attributes->l2_mask,
3168 switch (fdir_filter->action.behavior) {
3169 case RTE_ETH_FDIR_ACCEPT:
3170 attributes->actions[0] = (struct rte_flow_action){
3171 .type = RTE_FLOW_ACTION_TYPE_QUEUE,
3172 .conf = &attributes->queue,
3175 case RTE_ETH_FDIR_REJECT:
3176 attributes->actions[0] = (struct rte_flow_action){
3177 .type = RTE_FLOW_ACTION_TYPE_DROP,
3181 DRV_LOG(ERR, "port %u invalid behavior %d",
3183 fdir_filter->action.behavior);
3184 rte_errno = ENOTSUP;
3187 attributes->queue.index = fdir_filter->action.rx_queue;
3189 switch (fdir_filter->input.flow_type) {
3190 case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
3191 case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
3192 case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
3193 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
3194 .src_addr = input->flow.ip4_flow.src_ip,
3195 .dst_addr = input->flow.ip4_flow.dst_ip,
3196 .time_to_live = input->flow.ip4_flow.ttl,
3197 .type_of_service = input->flow.ip4_flow.tos,
3198 .next_proto_id = input->flow.ip4_flow.proto,
3200 attributes->l3_mask.ipv4.hdr = (struct ipv4_hdr){
3201 .src_addr = mask->ipv4_mask.src_ip,
3202 .dst_addr = mask->ipv4_mask.dst_ip,
3203 .time_to_live = mask->ipv4_mask.ttl,
3204 .type_of_service = mask->ipv4_mask.tos,
3205 .next_proto_id = mask->ipv4_mask.proto,
3207 attributes->items[1] = (struct rte_flow_item){
3208 .type = RTE_FLOW_ITEM_TYPE_IPV4,
3209 .spec = &attributes->l3,
3210 .mask = &attributes->l3_mask,
3213 case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
3214 case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
3215 case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
3216 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
3217 .hop_limits = input->flow.ipv6_flow.hop_limits,
3218 .proto = input->flow.ipv6_flow.proto,
3221 memcpy(attributes->l3.ipv6.hdr.src_addr,
3222 input->flow.ipv6_flow.src_ip,
3223 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
3224 memcpy(attributes->l3.ipv6.hdr.dst_addr,
3225 input->flow.ipv6_flow.dst_ip,
3226 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
3227 memcpy(attributes->l3_mask.ipv6.hdr.src_addr,
3228 mask->ipv6_mask.src_ip,
3229 RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
3230 memcpy(attributes->l3_mask.ipv6.hdr.dst_addr,
3231 mask->ipv6_mask.dst_ip,
3232 RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
3233 attributes->items[1] = (struct rte_flow_item){
3234 .type = RTE_FLOW_ITEM_TYPE_IPV6,
3235 .spec = &attributes->l3,
3236 .mask = &attributes->l3_mask,
3240 DRV_LOG(ERR, "port %u invalid flow type%d",
3241 dev->data->port_id, fdir_filter->input.flow_type);
3242 rte_errno = ENOTSUP;
3246 switch (fdir_filter->input.flow_type) {
3247 case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
3248 attributes->l4.udp.hdr = (struct udp_hdr){
3249 .src_port = input->flow.udp4_flow.src_port,
3250 .dst_port = input->flow.udp4_flow.dst_port,
3252 attributes->l4_mask.udp.hdr = (struct udp_hdr){
3253 .src_port = mask->src_port_mask,
3254 .dst_port = mask->dst_port_mask,
3256 attributes->items[2] = (struct rte_flow_item){
3257 .type = RTE_FLOW_ITEM_TYPE_UDP,
3258 .spec = &attributes->l4,
3259 .mask = &attributes->l4_mask,
3262 case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
3263 attributes->l4.tcp.hdr = (struct tcp_hdr){
3264 .src_port = input->flow.tcp4_flow.src_port,
3265 .dst_port = input->flow.tcp4_flow.dst_port,
3267 attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
3268 .src_port = mask->src_port_mask,
3269 .dst_port = mask->dst_port_mask,
3271 attributes->items[2] = (struct rte_flow_item){
3272 .type = RTE_FLOW_ITEM_TYPE_TCP,
3273 .spec = &attributes->l4,
3274 .mask = &attributes->l4_mask,
3277 case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
3278 attributes->l4.udp.hdr = (struct udp_hdr){
3279 .src_port = input->flow.udp6_flow.src_port,
3280 .dst_port = input->flow.udp6_flow.dst_port,
3282 attributes->l4_mask.udp.hdr = (struct udp_hdr){
3283 .src_port = mask->src_port_mask,
3284 .dst_port = mask->dst_port_mask,
3286 attributes->items[2] = (struct rte_flow_item){
3287 .type = RTE_FLOW_ITEM_TYPE_UDP,
3288 .spec = &attributes->l4,
3289 .mask = &attributes->l4_mask,
3292 case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
3293 attributes->l4.tcp.hdr = (struct tcp_hdr){
3294 .src_port = input->flow.tcp6_flow.src_port,
3295 .dst_port = input->flow.tcp6_flow.dst_port,
3297 attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
3298 .src_port = mask->src_port_mask,
3299 .dst_port = mask->dst_port_mask,
3301 attributes->items[2] = (struct rte_flow_item){
3302 .type = RTE_FLOW_ITEM_TYPE_TCP,
3303 .spec = &attributes->l4,
3304 .mask = &attributes->l4_mask,
3307 case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
3308 case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
3311 DRV_LOG(ERR, "port %u invalid flow type%d",
3312 dev->data->port_id, fdir_filter->input.flow_type);
3313 rte_errno = ENOTSUP;
3320 * Add new flow director filter and store it in list.
3323 * Pointer to Ethernet device.
3324 * @param fdir_filter
3325 * Flow director filter to add.
3328 * 0 on success, a negative errno value otherwise and rte_errno is set.
3331 mlx5_fdir_filter_add(struct rte_eth_dev *dev,
3332 const struct rte_eth_fdir_filter *fdir_filter)
3334 struct priv *priv = dev->data->dev_private;
3335 struct mlx5_fdir attributes = {
3338 .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
3339 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
3343 struct mlx5_flow_parse parser = {
3344 .layer = HASH_RXQ_ETH,
3346 struct rte_flow_error error;
3347 struct rte_flow *flow;
3350 ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
3353 ret = mlx5_flow_convert(dev, &attributes.attr, attributes.items,
3354 attributes.actions, &error, &parser);
3357 flow = mlx5_flow_list_create(dev, &priv->flows, &attributes.attr,
3358 attributes.items, attributes.actions,
3361 DRV_LOG(DEBUG, "port %u FDIR created %p", dev->data->port_id,
3369 * Delete specific filter.
3372 * Pointer to Ethernet device.
3373 * @param fdir_filter
3374 * Filter to be deleted.
3377 * 0 on success, a negative errno value otherwise and rte_errno is set.
3380 mlx5_fdir_filter_delete(struct rte_eth_dev *dev,
3381 const struct rte_eth_fdir_filter *fdir_filter)
3383 struct priv *priv = dev->data->dev_private;
3384 struct mlx5_fdir attributes = {
3387 struct mlx5_flow_parse parser = {
3389 .layer = HASH_RXQ_ETH,
3391 struct rte_flow_error error;
3392 struct rte_flow *flow;
3396 ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
3399 ret = mlx5_flow_convert(dev, &attributes.attr, attributes.items,
3400 attributes.actions, &error, &parser);
3404 * Special case for drop action which is only set in the
3405 * specifications when the flow is created. In this situation the
3406 * drop specification is missing.
3409 struct ibv_flow_spec_action_drop *drop;
3411 drop = (void *)((uintptr_t)parser.queue[HASH_RXQ_ETH].ibv_attr +
3412 parser.queue[HASH_RXQ_ETH].offset);
3413 *drop = (struct ibv_flow_spec_action_drop){
3414 .type = IBV_FLOW_SPEC_ACTION_DROP,
3415 .size = sizeof(struct ibv_flow_spec_action_drop),
3417 parser.queue[HASH_RXQ_ETH].ibv_attr->num_of_specs++;
3419 TAILQ_FOREACH(flow, &priv->flows, next) {
3420 struct ibv_flow_attr *attr;
3421 struct ibv_spec_header *attr_h;
3423 struct ibv_flow_attr *flow_attr;
3424 struct ibv_spec_header *flow_h;
3426 unsigned int specs_n;
3428 attr = parser.queue[HASH_RXQ_ETH].ibv_attr;
3429 flow_attr = flow->frxq[HASH_RXQ_ETH].ibv_attr;
3430 /* Compare first the attributes. */
3431 if (memcmp(attr, flow_attr, sizeof(struct ibv_flow_attr)))
3433 if (attr->num_of_specs == 0)
3435 spec = (void *)((uintptr_t)attr +
3436 sizeof(struct ibv_flow_attr));
3437 flow_spec = (void *)((uintptr_t)flow_attr +
3438 sizeof(struct ibv_flow_attr));
3439 specs_n = RTE_MIN(attr->num_of_specs, flow_attr->num_of_specs);
3440 for (i = 0; i != specs_n; ++i) {
3443 if (memcmp(spec, flow_spec,
3444 RTE_MIN(attr_h->size, flow_h->size)))
3446 spec = (void *)((uintptr_t)spec + attr_h->size);
3447 flow_spec = (void *)((uintptr_t)flow_spec +
3450 /* At this point, the flow match. */
3453 /* The flow does not match. */
3456 ret = rte_errno; /* Save rte_errno before cleanup. */
3458 mlx5_flow_list_destroy(dev, &priv->flows, flow);
3460 for (i = 0; i != hash_rxq_init_n; ++i) {
3461 if (parser.queue[i].ibv_attr)
3462 rte_free(parser.queue[i].ibv_attr);
3464 rte_errno = ret; /* Restore rte_errno. */
3469 * Update queue for specific filter.
3472 * Pointer to Ethernet device.
3473 * @param fdir_filter
3474 * Filter to be updated.
3477 * 0 on success, a negative errno value otherwise and rte_errno is set.
3480 mlx5_fdir_filter_update(struct rte_eth_dev *dev,
3481 const struct rte_eth_fdir_filter *fdir_filter)
3485 ret = mlx5_fdir_filter_delete(dev, fdir_filter);
3488 return mlx5_fdir_filter_add(dev, fdir_filter);
3492 * Flush all filters.
3495 * Pointer to Ethernet device.
3498 mlx5_fdir_filter_flush(struct rte_eth_dev *dev)
3500 struct priv *priv = dev->data->dev_private;
3502 mlx5_flow_list_flush(dev, &priv->flows);
3506 * Get flow director information.
3509 * Pointer to Ethernet device.
3510 * @param[out] fdir_info
3511 * Resulting flow director information.
3514 mlx5_fdir_info_get(struct rte_eth_dev *dev, struct rte_eth_fdir_info *fdir_info)
3516 struct priv *priv = dev->data->dev_private;
3517 struct rte_eth_fdir_masks *mask =
3518 &priv->dev->data->dev_conf.fdir_conf.mask;
3520 fdir_info->mode = priv->dev->data->dev_conf.fdir_conf.mode;
3521 fdir_info->guarant_spc = 0;
3522 rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
3523 fdir_info->max_flexpayload = 0;
3524 fdir_info->flow_types_mask[0] = 0;
3525 fdir_info->flex_payload_unit = 0;
3526 fdir_info->max_flex_payload_segment_num = 0;
3527 fdir_info->flex_payload_limit = 0;
3528 memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
3532 * Deal with flow director operations.
3535 * Pointer to Ethernet device.
3537 * Operation to perform.
3539 * Pointer to operation-specific structure.
3542 * 0 on success, a negative errno value otherwise and rte_errno is set.
3545 mlx5_fdir_ctrl_func(struct rte_eth_dev *dev, enum rte_filter_op filter_op,
3548 struct priv *priv = dev->data->dev_private;
3549 enum rte_fdir_mode fdir_mode =
3550 priv->dev->data->dev_conf.fdir_conf.mode;
3552 if (filter_op == RTE_ETH_FILTER_NOP)
3554 if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
3555 fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
3556 DRV_LOG(ERR, "port %u flow director mode %d not supported",
3557 dev->data->port_id, fdir_mode);
3561 switch (filter_op) {
3562 case RTE_ETH_FILTER_ADD:
3563 return mlx5_fdir_filter_add(dev, arg);
3564 case RTE_ETH_FILTER_UPDATE:
3565 return mlx5_fdir_filter_update(dev, arg);
3566 case RTE_ETH_FILTER_DELETE:
3567 return mlx5_fdir_filter_delete(dev, arg);
3568 case RTE_ETH_FILTER_FLUSH:
3569 mlx5_fdir_filter_flush(dev);
3571 case RTE_ETH_FILTER_INFO:
3572 mlx5_fdir_info_get(dev, arg);
3575 DRV_LOG(DEBUG, "port %u unknown operation %u",
3576 dev->data->port_id, filter_op);
3584 * Manage filter operations.
3587 * Pointer to Ethernet device structure.
3588 * @param filter_type
3591 * Operation to perform.
3593 * Pointer to operation-specific structure.
3596 * 0 on success, a negative errno value otherwise and rte_errno is set.
3599 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
3600 enum rte_filter_type filter_type,
3601 enum rte_filter_op filter_op,
3604 switch (filter_type) {
3605 case RTE_ETH_FILTER_GENERIC:
3606 if (filter_op != RTE_ETH_FILTER_GET) {
3610 *(const void **)arg = &mlx5_flow_ops;
3612 case RTE_ETH_FILTER_FDIR:
3613 return mlx5_fdir_ctrl_func(dev, filter_op, arg);
3615 DRV_LOG(ERR, "port %u filter type (%d) not supported",
3616 dev->data->port_id, filter_type);
3617 rte_errno = ENOTSUP;
3624 * Detect number of Verbs flow priorities supported.
3627 * Pointer to Ethernet device.
3630 * number of supported Verbs flow priority.
3633 mlx5_get_max_verbs_prio(struct rte_eth_dev *dev)
3635 struct priv *priv = dev->data->dev_private;
3636 unsigned int verb_priorities = MLX5_VERBS_FLOW_PRIO_8;
3638 struct ibv_flow_attr attr;
3639 struct ibv_flow_spec_eth eth;
3640 struct ibv_flow_spec_action_drop drop;
3646 .type = IBV_FLOW_SPEC_ETH,
3647 .size = sizeof(struct ibv_flow_spec_eth),
3650 .size = sizeof(struct ibv_flow_spec_action_drop),
3651 .type = IBV_FLOW_SPEC_ACTION_DROP,
3654 struct ibv_flow *flow;
3657 flow_attr.attr.priority = verb_priorities - 1;
3658 flow = mlx5_glue->create_flow(priv->flow_drop_queue->qp,
3661 claim_zero(mlx5_glue->destroy_flow(flow));
3662 /* Try more priorities. */
3663 verb_priorities *= 2;
3665 /* Failed, restore last right number. */
3666 verb_priorities /= 2;
3670 DRV_LOG(DEBUG, "port %u Verbs flow priorities: %d,"
3671 " user flow priorities: %d",
3672 dev->data->port_id, verb_priorities, MLX5_CTRL_FLOW_PRIORITY);
3673 return verb_priorities;