1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2016 6WIND S.A.
3 * Copyright 2016 Mellanox Technologies, Ltd
11 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
13 #pragma GCC diagnostic ignored "-Wpedantic"
15 #include <infiniband/verbs.h>
17 #pragma GCC diagnostic error "-Wpedantic"
20 #include <rte_common.h>
21 #include <rte_ethdev_driver.h>
23 #include <rte_flow_driver.h>
24 #include <rte_malloc.h>
28 #include "mlx5_defs.h"
30 #include "mlx5_glue.h"
32 /* Define minimal priority for control plane flows. */
33 #define MLX5_CTRL_FLOW_PRIORITY 4
35 /* Internet Protocol versions. */
39 #ifndef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
40 struct ibv_flow_spec_counter_action {
45 /* Dev ops structure defined in mlx5.c */
46 extern const struct eth_dev_ops mlx5_dev_ops;
47 extern const struct eth_dev_ops mlx5_dev_ops_isolate;
49 /** Structure give to the conversion functions. */
50 struct mlx5_flow_data {
51 struct mlx5_flow_parse *parser; /** Parser context. */
52 struct rte_flow_error *error; /** Error context. */
56 mlx5_flow_create_eth(const struct rte_flow_item *item,
57 const void *default_mask,
58 struct mlx5_flow_data *data);
61 mlx5_flow_create_vlan(const struct rte_flow_item *item,
62 const void *default_mask,
63 struct mlx5_flow_data *data);
66 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
67 const void *default_mask,
68 struct mlx5_flow_data *data);
71 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
72 const void *default_mask,
73 struct mlx5_flow_data *data);
76 mlx5_flow_create_udp(const struct rte_flow_item *item,
77 const void *default_mask,
78 struct mlx5_flow_data *data);
81 mlx5_flow_create_tcp(const struct rte_flow_item *item,
82 const void *default_mask,
83 struct mlx5_flow_data *data);
86 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
87 const void *default_mask,
88 struct mlx5_flow_data *data);
90 struct mlx5_flow_parse;
93 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
97 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id);
100 mlx5_flow_create_count(struct rte_eth_dev *dev, struct mlx5_flow_parse *parser);
102 /* Hash RX queue types. */
113 /* Initialization data for hash RX queue. */
114 struct hash_rxq_init {
115 uint64_t hash_fields; /* Fields that participate in the hash. */
116 uint64_t dpdk_rss_hf; /* Matching DPDK RSS hash fields. */
117 unsigned int flow_priority; /* Flow priority to use. */
118 unsigned int ip_version; /* Internet protocol. */
121 /* Initialization data for hash RX queues. */
122 const struct hash_rxq_init hash_rxq_init[] = {
124 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
125 IBV_RX_HASH_DST_IPV4 |
126 IBV_RX_HASH_SRC_PORT_TCP |
127 IBV_RX_HASH_DST_PORT_TCP),
128 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,
130 .ip_version = MLX5_IPV4,
133 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
134 IBV_RX_HASH_DST_IPV4 |
135 IBV_RX_HASH_SRC_PORT_UDP |
136 IBV_RX_HASH_DST_PORT_UDP),
137 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP,
139 .ip_version = MLX5_IPV4,
142 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
143 IBV_RX_HASH_DST_IPV4),
144 .dpdk_rss_hf = (ETH_RSS_IPV4 |
147 .ip_version = MLX5_IPV4,
150 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
151 IBV_RX_HASH_DST_IPV6 |
152 IBV_RX_HASH_SRC_PORT_TCP |
153 IBV_RX_HASH_DST_PORT_TCP),
154 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP,
156 .ip_version = MLX5_IPV6,
159 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
160 IBV_RX_HASH_DST_IPV6 |
161 IBV_RX_HASH_SRC_PORT_UDP |
162 IBV_RX_HASH_DST_PORT_UDP),
163 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP,
165 .ip_version = MLX5_IPV6,
168 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
169 IBV_RX_HASH_DST_IPV6),
170 .dpdk_rss_hf = (ETH_RSS_IPV6 |
173 .ip_version = MLX5_IPV6,
182 /* Number of entries in hash_rxq_init[]. */
183 const unsigned int hash_rxq_init_n = RTE_DIM(hash_rxq_init);
185 /** Structure for holding counter stats. */
186 struct mlx5_flow_counter_stats {
187 uint64_t hits; /**< Number of packets matched by the rule. */
188 uint64_t bytes; /**< Number of bytes matched by the rule. */
191 /** Structure for Drop queue. */
192 struct mlx5_hrxq_drop {
193 struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */
194 struct ibv_qp *qp; /**< Verbs queue pair. */
195 struct ibv_wq *wq; /**< Verbs work queue. */
196 struct ibv_cq *cq; /**< Verbs completion queue. */
199 /* Flows structures. */
201 uint64_t hash_fields; /**< Fields that participate in the hash. */
202 struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
203 struct ibv_flow *ibv_flow; /**< Verbs flow. */
204 struct mlx5_hrxq *hrxq; /**< Hash Rx queues. */
207 /* Drop flows structures. */
208 struct mlx5_flow_drop {
209 struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
210 struct ibv_flow *ibv_flow; /**< Verbs flow. */
214 TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
215 uint32_t mark:1; /**< Set if the flow is marked. */
216 uint32_t drop:1; /**< Drop queue. */
217 struct rte_flow_action_rss rss_conf; /**< RSS configuration */
218 uint16_t (*queues)[]; /**< Queues indexes to use. */
219 uint8_t rss_key[40]; /**< copy of the RSS key. */
220 struct ibv_counter_set *cs; /**< Holds the counters for the rule. */
221 struct mlx5_flow_counter_stats counter_stats;/**<The counter stats. */
222 struct mlx5_flow frxq[RTE_DIM(hash_rxq_init)];
223 /**< Flow with Rx queue. */
226 /** Static initializer for items. */
228 (const enum rte_flow_item_type []){ \
229 __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
232 /** Structure to generate a simple graph of layers supported by the NIC. */
233 struct mlx5_flow_items {
234 /** List of possible actions for these items. */
235 const enum rte_flow_action_type *const actions;
236 /** Bit-masks corresponding to the possibilities for the item. */
239 * Default bit-masks to use when item->mask is not provided. When
240 * \default_mask is also NULL, the full supported bit-mask (\mask) is
243 const void *default_mask;
244 /** Bit-masks size in bytes. */
245 const unsigned int mask_sz;
247 * Conversion function from rte_flow to NIC specific flow.
250 * rte_flow item to convert.
251 * @param default_mask
252 * Default bit-masks to use when item->mask is not provided.
254 * Internal structure to store the conversion.
257 * 0 on success, a negative errno value otherwise and rte_errno is
260 int (*convert)(const struct rte_flow_item *item,
261 const void *default_mask,
262 struct mlx5_flow_data *data);
263 /** Size in bytes of the destination structure. */
264 const unsigned int dst_sz;
265 /** List of possible following items. */
266 const enum rte_flow_item_type *const items;
269 /** Valid action for this PMD. */
270 static const enum rte_flow_action_type valid_actions[] = {
271 RTE_FLOW_ACTION_TYPE_DROP,
272 RTE_FLOW_ACTION_TYPE_QUEUE,
273 RTE_FLOW_ACTION_TYPE_MARK,
274 RTE_FLOW_ACTION_TYPE_FLAG,
275 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
276 RTE_FLOW_ACTION_TYPE_COUNT,
278 RTE_FLOW_ACTION_TYPE_END,
281 /** Graph of supported items and associated actions. */
282 static const struct mlx5_flow_items mlx5_flow_items[] = {
283 [RTE_FLOW_ITEM_TYPE_END] = {
284 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
285 RTE_FLOW_ITEM_TYPE_VXLAN),
287 [RTE_FLOW_ITEM_TYPE_ETH] = {
288 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
289 RTE_FLOW_ITEM_TYPE_IPV4,
290 RTE_FLOW_ITEM_TYPE_IPV6),
291 .actions = valid_actions,
292 .mask = &(const struct rte_flow_item_eth){
293 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
294 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
297 .default_mask = &rte_flow_item_eth_mask,
298 .mask_sz = sizeof(struct rte_flow_item_eth),
299 .convert = mlx5_flow_create_eth,
300 .dst_sz = sizeof(struct ibv_flow_spec_eth),
302 [RTE_FLOW_ITEM_TYPE_VLAN] = {
303 .items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
304 RTE_FLOW_ITEM_TYPE_IPV6),
305 .actions = valid_actions,
306 .mask = &(const struct rte_flow_item_vlan){
309 .default_mask = &rte_flow_item_vlan_mask,
310 .mask_sz = sizeof(struct rte_flow_item_vlan),
311 .convert = mlx5_flow_create_vlan,
314 [RTE_FLOW_ITEM_TYPE_IPV4] = {
315 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
316 RTE_FLOW_ITEM_TYPE_TCP),
317 .actions = valid_actions,
318 .mask = &(const struct rte_flow_item_ipv4){
322 .type_of_service = -1,
326 .default_mask = &rte_flow_item_ipv4_mask,
327 .mask_sz = sizeof(struct rte_flow_item_ipv4),
328 .convert = mlx5_flow_create_ipv4,
329 .dst_sz = sizeof(struct ibv_flow_spec_ipv4_ext),
331 [RTE_FLOW_ITEM_TYPE_IPV6] = {
332 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
333 RTE_FLOW_ITEM_TYPE_TCP),
334 .actions = valid_actions,
335 .mask = &(const struct rte_flow_item_ipv6){
338 0xff, 0xff, 0xff, 0xff,
339 0xff, 0xff, 0xff, 0xff,
340 0xff, 0xff, 0xff, 0xff,
341 0xff, 0xff, 0xff, 0xff,
344 0xff, 0xff, 0xff, 0xff,
345 0xff, 0xff, 0xff, 0xff,
346 0xff, 0xff, 0xff, 0xff,
347 0xff, 0xff, 0xff, 0xff,
354 .default_mask = &rte_flow_item_ipv6_mask,
355 .mask_sz = sizeof(struct rte_flow_item_ipv6),
356 .convert = mlx5_flow_create_ipv6,
357 .dst_sz = sizeof(struct ibv_flow_spec_ipv6),
359 [RTE_FLOW_ITEM_TYPE_UDP] = {
360 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN),
361 .actions = valid_actions,
362 .mask = &(const struct rte_flow_item_udp){
368 .default_mask = &rte_flow_item_udp_mask,
369 .mask_sz = sizeof(struct rte_flow_item_udp),
370 .convert = mlx5_flow_create_udp,
371 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
373 [RTE_FLOW_ITEM_TYPE_TCP] = {
374 .actions = valid_actions,
375 .mask = &(const struct rte_flow_item_tcp){
381 .default_mask = &rte_flow_item_tcp_mask,
382 .mask_sz = sizeof(struct rte_flow_item_tcp),
383 .convert = mlx5_flow_create_tcp,
384 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
386 [RTE_FLOW_ITEM_TYPE_VXLAN] = {
387 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH),
388 .actions = valid_actions,
389 .mask = &(const struct rte_flow_item_vxlan){
390 .vni = "\xff\xff\xff",
392 .default_mask = &rte_flow_item_vxlan_mask,
393 .mask_sz = sizeof(struct rte_flow_item_vxlan),
394 .convert = mlx5_flow_create_vxlan,
395 .dst_sz = sizeof(struct ibv_flow_spec_tunnel),
399 /** Structure to pass to the conversion function. */
400 struct mlx5_flow_parse {
401 uint32_t inner; /**< Set once VXLAN is encountered. */
403 /**< Whether resources should remain after a validate. */
404 uint32_t drop:1; /**< Target is a drop queue. */
405 uint32_t mark:1; /**< Mark is present in the flow. */
406 uint32_t count:1; /**< Count is present in the flow. */
407 uint32_t mark_id; /**< Mark identifier. */
408 struct rte_flow_action_rss rss_conf; /**< RSS configuration */
409 uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
410 uint8_t rss_key[40]; /**< copy of the RSS key. */
411 enum hash_rxq_type layer; /**< Last pattern layer detected. */
412 struct ibv_counter_set *cs; /**< Holds the counter set for the rule */
414 struct ibv_flow_attr *ibv_attr;
415 /**< Pointer to Verbs attributes. */
417 /**< Current position or total size of the attribute. */
418 } queue[RTE_DIM(hash_rxq_init)];
421 static const struct rte_flow_ops mlx5_flow_ops = {
422 .validate = mlx5_flow_validate,
423 .create = mlx5_flow_create,
424 .destroy = mlx5_flow_destroy,
425 .flush = mlx5_flow_flush,
426 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
427 .query = mlx5_flow_query,
431 .isolate = mlx5_flow_isolate,
434 /* Convert FDIR request to Generic flow. */
436 struct rte_flow_attr attr;
437 struct rte_flow_action actions[2];
438 struct rte_flow_item items[4];
439 struct rte_flow_item_eth l2;
440 struct rte_flow_item_eth l2_mask;
442 struct rte_flow_item_ipv4 ipv4;
443 struct rte_flow_item_ipv6 ipv6;
446 struct rte_flow_item_ipv4 ipv4;
447 struct rte_flow_item_ipv6 ipv6;
450 struct rte_flow_item_udp udp;
451 struct rte_flow_item_tcp tcp;
454 struct rte_flow_item_udp udp;
455 struct rte_flow_item_tcp tcp;
457 struct rte_flow_action_queue queue;
460 /* Verbs specification header. */
461 struct ibv_spec_header {
462 enum ibv_flow_spec_type type;
467 * Check support for a given item.
470 * Item specification.
472 * Bit-masks covering supported fields to compare with spec, last and mask in
475 * Bit-Mask size in bytes.
478 * 0 on success, a negative errno value otherwise and rte_errno is set.
481 mlx5_flow_item_validate(const struct rte_flow_item *item,
482 const uint8_t *mask, unsigned int size)
484 if (!item->spec && (item->mask || item->last)) {
488 if (item->spec && !item->mask) {
490 const uint8_t *spec = item->spec;
492 for (i = 0; i < size; ++i)
493 if ((spec[i] | mask[i]) != mask[i]) {
498 if (item->last && !item->mask) {
500 const uint8_t *spec = item->last;
502 for (i = 0; i < size; ++i)
503 if ((spec[i] | mask[i]) != mask[i]) {
510 const uint8_t *spec = item->spec;
512 for (i = 0; i < size; ++i)
513 if ((spec[i] | mask[i]) != mask[i]) {
518 if (item->spec && item->last) {
521 const uint8_t *apply = mask;
527 for (i = 0; i < size; ++i) {
528 spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
529 last[i] = ((const uint8_t *)item->last)[i] & apply[i];
531 ret = memcmp(spec, last, size);
541 * Extract attribute to the parser.
544 * Flow rule attributes.
546 * Perform verbose error reporting if not NULL.
549 * 0 on success, a negative errno value otherwise and rte_errno is set.
552 mlx5_flow_convert_attributes(const struct rte_flow_attr *attr,
553 struct rte_flow_error *error)
556 rte_flow_error_set(error, ENOTSUP,
557 RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
559 "groups are not supported");
562 if (attr->priority && attr->priority != MLX5_CTRL_FLOW_PRIORITY) {
563 rte_flow_error_set(error, ENOTSUP,
564 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
566 "priorities are not supported");
570 rte_flow_error_set(error, ENOTSUP,
571 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
573 "egress is not supported");
576 if (!attr->ingress) {
577 rte_flow_error_set(error, ENOTSUP,
578 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
580 "only ingress is supported");
587 * Extract actions request to the parser.
590 * Pointer to Ethernet device.
592 * Associated actions (list terminated by the END action).
594 * Perform verbose error reporting if not NULL.
595 * @param[in, out] parser
596 * Internal parser structure.
599 * 0 on success, a negative errno value otherwise and rte_errno is set.
602 mlx5_flow_convert_actions(struct rte_eth_dev *dev,
603 const struct rte_flow_action actions[],
604 struct rte_flow_error *error,
605 struct mlx5_flow_parse *parser)
607 enum { FATE = 1, MARK = 2, COUNT = 4, };
608 uint32_t overlap = 0;
609 struct priv *priv = dev->data->dev_private;
611 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
612 if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
614 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
616 goto exit_action_overlap;
619 } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
620 const struct rte_flow_action_queue *queue =
621 (const struct rte_flow_action_queue *)
625 goto exit_action_overlap;
627 if (!queue || (queue->index > (priv->rxqs_n - 1)))
628 goto exit_action_not_supported;
629 parser->queues[0] = queue->index;
630 parser->rss_conf = (struct rte_flow_action_rss){
632 .queue = parser->queues,
634 } else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
635 const struct rte_flow_action_rss *rss =
636 (const struct rte_flow_action_rss *)
638 const uint8_t *rss_key;
639 uint32_t rss_key_len;
643 goto exit_action_overlap;
645 if (rss->types & MLX5_RSS_HF_MASK) {
646 rte_flow_error_set(error, EINVAL,
647 RTE_FLOW_ERROR_TYPE_ACTION,
649 "unsupported RSS type"
654 rss_key_len = rss->key_len;
657 rss_key_len = rss_hash_default_key_len;
658 rss_key = rss_hash_default_key;
660 if (rss_key_len != RTE_DIM(parser->rss_key)) {
661 rte_flow_error_set(error, EINVAL,
662 RTE_FLOW_ERROR_TYPE_ACTION,
664 "RSS hash key must be"
665 " exactly 40 bytes long");
668 if (!rss->queue_num) {
669 rte_flow_error_set(error, EINVAL,
670 RTE_FLOW_ERROR_TYPE_ACTION,
675 if (rss->queue_num > RTE_DIM(parser->queues)) {
676 rte_flow_error_set(error, EINVAL,
677 RTE_FLOW_ERROR_TYPE_ACTION,
679 "too many queues for RSS"
683 for (n = 0; n < rss->queue_num; ++n) {
684 if (rss->queue[n] >= priv->rxqs_n) {
685 rte_flow_error_set(error, EINVAL,
686 RTE_FLOW_ERROR_TYPE_ACTION,
688 "queue id > number of"
693 parser->rss_conf = (struct rte_flow_action_rss){
695 .key_len = rss_key_len,
696 .queue_num = rss->queue_num,
697 .key = memcpy(parser->rss_key, rss_key,
698 sizeof(*rss_key) * rss_key_len),
699 .queue = memcpy(parser->queues, rss->queue,
700 sizeof(*rss->queue) *
703 } else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
704 const struct rte_flow_action_mark *mark =
705 (const struct rte_flow_action_mark *)
709 goto exit_action_overlap;
712 rte_flow_error_set(error, EINVAL,
713 RTE_FLOW_ERROR_TYPE_ACTION,
715 "mark must be defined");
717 } else if (mark->id >= MLX5_FLOW_MARK_MAX) {
718 rte_flow_error_set(error, ENOTSUP,
719 RTE_FLOW_ERROR_TYPE_ACTION,
721 "mark must be between 0"
726 parser->mark_id = mark->id;
727 } else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
729 goto exit_action_overlap;
732 } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT &&
733 priv->config.flow_counter_en) {
735 goto exit_action_overlap;
739 goto exit_action_not_supported;
742 /* When fate is unknown, drop traffic. */
743 if (!(overlap & FATE))
745 if (parser->drop && parser->mark)
747 if (!parser->rss_conf.queue_num && !parser->drop) {
748 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
749 NULL, "no valid action");
753 exit_action_not_supported:
754 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
755 actions, "action not supported");
758 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
759 actions, "overlapping actions are not supported");
767 * Pattern specification (list terminated by the END pattern item).
769 * Perform verbose error reporting if not NULL.
770 * @param[in, out] parser
771 * Internal parser structure.
774 * 0 on success, a negative errno value otherwise and rte_errno is set.
777 mlx5_flow_convert_items_validate(const struct rte_flow_item items[],
778 struct rte_flow_error *error,
779 struct mlx5_flow_parse *parser)
781 const struct mlx5_flow_items *cur_item = mlx5_flow_items;
785 /* Initialise the offsets to start after verbs attribute. */
786 for (i = 0; i != hash_rxq_init_n; ++i)
787 parser->queue[i].offset = sizeof(struct ibv_flow_attr);
788 for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
789 const struct mlx5_flow_items *token = NULL;
792 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
796 cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
798 if (cur_item->items[i] == items->type) {
799 token = &mlx5_flow_items[items->type];
805 goto exit_item_not_supported;
808 ret = mlx5_flow_item_validate(items,
809 (const uint8_t *)cur_item->mask,
812 goto exit_item_not_supported;
813 if (items->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
815 rte_flow_error_set(error, ENOTSUP,
816 RTE_FLOW_ERROR_TYPE_ITEM,
818 "cannot recognize multiple"
819 " VXLAN encapsulations");
822 parser->inner = IBV_FLOW_SPEC_INNER;
825 parser->queue[HASH_RXQ_ETH].offset += cur_item->dst_sz;
827 for (n = 0; n != hash_rxq_init_n; ++n)
828 parser->queue[n].offset += cur_item->dst_sz;
832 parser->queue[HASH_RXQ_ETH].offset +=
833 sizeof(struct ibv_flow_spec_action_drop);
836 for (i = 0; i != hash_rxq_init_n; ++i)
837 parser->queue[i].offset +=
838 sizeof(struct ibv_flow_spec_action_tag);
841 unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
843 for (i = 0; i != hash_rxq_init_n; ++i)
844 parser->queue[i].offset += size;
847 exit_item_not_supported:
848 return rte_flow_error_set(error, -ret, RTE_FLOW_ERROR_TYPE_ITEM,
849 items, "item not supported");
853 * Allocate memory space to store verbs flow attributes.
856 * Amount of byte to allocate.
858 * Perform verbose error reporting if not NULL.
861 * A verbs flow attribute on success, NULL otherwise and rte_errno is set.
863 static struct ibv_flow_attr *
864 mlx5_flow_convert_allocate(unsigned int size, struct rte_flow_error *error)
866 struct ibv_flow_attr *ibv_attr;
868 ibv_attr = rte_calloc(__func__, 1, size, 0);
870 rte_flow_error_set(error, ENOMEM,
871 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
873 "cannot allocate verbs spec attributes");
880 * Make inner packet matching with an higher priority from the non Inner
883 * @param[in, out] parser
884 * Internal parser structure.
886 * User flow attribute.
889 mlx5_flow_update_priority(struct mlx5_flow_parse *parser,
890 const struct rte_flow_attr *attr)
895 parser->queue[HASH_RXQ_ETH].ibv_attr->priority =
897 hash_rxq_init[HASH_RXQ_ETH].flow_priority;
900 for (i = 0; i != hash_rxq_init_n; ++i) {
901 if (parser->queue[i].ibv_attr) {
902 parser->queue[i].ibv_attr->priority =
904 hash_rxq_init[i].flow_priority -
905 (parser->inner ? 1 : 0);
911 * Finalise verbs flow attributes.
913 * @param[in, out] parser
914 * Internal parser structure.
917 mlx5_flow_convert_finalise(struct mlx5_flow_parse *parser)
919 const unsigned int ipv4 =
920 hash_rxq_init[parser->layer].ip_version == MLX5_IPV4;
921 const enum hash_rxq_type hmin = ipv4 ? HASH_RXQ_TCPV4 : HASH_RXQ_TCPV6;
922 const enum hash_rxq_type hmax = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
923 const enum hash_rxq_type ohmin = ipv4 ? HASH_RXQ_TCPV6 : HASH_RXQ_TCPV4;
924 const enum hash_rxq_type ohmax = ipv4 ? HASH_RXQ_IPV6 : HASH_RXQ_IPV4;
925 const enum hash_rxq_type ip = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
928 /* Remove any other flow not matching the pattern. */
929 if (parser->rss_conf.queue_num == 1 && !parser->rss_conf.types) {
930 for (i = 0; i != hash_rxq_init_n; ++i) {
931 if (i == HASH_RXQ_ETH)
933 rte_free(parser->queue[i].ibv_attr);
934 parser->queue[i].ibv_attr = NULL;
938 if (parser->layer == HASH_RXQ_ETH) {
942 * This layer becomes useless as the pattern define under
945 rte_free(parser->queue[HASH_RXQ_ETH].ibv_attr);
946 parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
948 /* Remove opposite kind of layer e.g. IPv6 if the pattern is IPv4. */
949 for (i = ohmin; i != (ohmax + 1); ++i) {
950 if (!parser->queue[i].ibv_attr)
952 rte_free(parser->queue[i].ibv_attr);
953 parser->queue[i].ibv_attr = NULL;
955 /* Remove impossible flow according to the RSS configuration. */
956 if (hash_rxq_init[parser->layer].dpdk_rss_hf &
957 parser->rss_conf.types) {
958 /* Remove any other flow. */
959 for (i = hmin; i != (hmax + 1); ++i) {
960 if ((i == parser->layer) ||
961 (!parser->queue[i].ibv_attr))
963 rte_free(parser->queue[i].ibv_attr);
964 parser->queue[i].ibv_attr = NULL;
966 } else if (!parser->queue[ip].ibv_attr) {
967 /* no RSS possible with the current configuration. */
968 parser->rss_conf.queue_num = 1;
973 * Fill missing layers in verbs specifications, or compute the correct
974 * offset to allocate the memory space for the attributes and
977 for (i = 0; i != hash_rxq_init_n - 1; ++i) {
979 struct ibv_flow_spec_ipv4_ext ipv4;
980 struct ibv_flow_spec_ipv6 ipv6;
981 struct ibv_flow_spec_tcp_udp udp_tcp;
986 if (i == parser->layer)
988 if (parser->layer == HASH_RXQ_ETH) {
989 if (hash_rxq_init[i].ip_version == MLX5_IPV4) {
990 size = sizeof(struct ibv_flow_spec_ipv4_ext);
991 specs.ipv4 = (struct ibv_flow_spec_ipv4_ext){
992 .type = IBV_FLOW_SPEC_IPV4_EXT,
996 size = sizeof(struct ibv_flow_spec_ipv6);
997 specs.ipv6 = (struct ibv_flow_spec_ipv6){
998 .type = IBV_FLOW_SPEC_IPV6,
1002 if (parser->queue[i].ibv_attr) {
1003 dst = (void *)((uintptr_t)
1004 parser->queue[i].ibv_attr +
1005 parser->queue[i].offset);
1006 memcpy(dst, &specs, size);
1007 ++parser->queue[i].ibv_attr->num_of_specs;
1009 parser->queue[i].offset += size;
1011 if ((i == HASH_RXQ_UDPV4) || (i == HASH_RXQ_TCPV4) ||
1012 (i == HASH_RXQ_UDPV6) || (i == HASH_RXQ_TCPV6)) {
1013 size = sizeof(struct ibv_flow_spec_tcp_udp);
1014 specs.udp_tcp = (struct ibv_flow_spec_tcp_udp) {
1015 .type = ((i == HASH_RXQ_UDPV4 ||
1016 i == HASH_RXQ_UDPV6) ?
1021 if (parser->queue[i].ibv_attr) {
1022 dst = (void *)((uintptr_t)
1023 parser->queue[i].ibv_attr +
1024 parser->queue[i].offset);
1025 memcpy(dst, &specs, size);
1026 ++parser->queue[i].ibv_attr->num_of_specs;
1028 parser->queue[i].offset += size;
1034 * Validate and convert a flow supported by the NIC.
1037 * Pointer to Ethernet device.
1039 * Flow rule attributes.
1040 * @param[in] pattern
1041 * Pattern specification (list terminated by the END pattern item).
1042 * @param[in] actions
1043 * Associated actions (list terminated by the END action).
1045 * Perform verbose error reporting if not NULL.
1046 * @param[in, out] parser
1047 * Internal parser structure.
1050 * 0 on success, a negative errno value otherwise and rte_errno is set.
1053 mlx5_flow_convert(struct rte_eth_dev *dev,
1054 const struct rte_flow_attr *attr,
1055 const struct rte_flow_item items[],
1056 const struct rte_flow_action actions[],
1057 struct rte_flow_error *error,
1058 struct mlx5_flow_parse *parser)
1060 const struct mlx5_flow_items *cur_item = mlx5_flow_items;
1064 /* First step. Validate the attributes, items and actions. */
1065 *parser = (struct mlx5_flow_parse){
1066 .create = parser->create,
1067 .layer = HASH_RXQ_ETH,
1068 .mark_id = MLX5_FLOW_MARK_DEFAULT,
1070 ret = mlx5_flow_convert_attributes(attr, error);
1073 ret = mlx5_flow_convert_actions(dev, actions, error, parser);
1076 ret = mlx5_flow_convert_items_validate(items, error, parser);
1079 mlx5_flow_convert_finalise(parser);
1082 * Allocate the memory space to store verbs specifications.
1085 unsigned int offset = parser->queue[HASH_RXQ_ETH].offset;
1087 parser->queue[HASH_RXQ_ETH].ibv_attr =
1088 mlx5_flow_convert_allocate(offset, error);
1089 if (!parser->queue[HASH_RXQ_ETH].ibv_attr)
1091 parser->queue[HASH_RXQ_ETH].offset =
1092 sizeof(struct ibv_flow_attr);
1094 for (i = 0; i != hash_rxq_init_n; ++i) {
1095 unsigned int offset;
1097 if (!(parser->rss_conf.types &
1098 hash_rxq_init[i].dpdk_rss_hf) &&
1099 (i != HASH_RXQ_ETH))
1101 offset = parser->queue[i].offset;
1102 parser->queue[i].ibv_attr =
1103 mlx5_flow_convert_allocate(offset, error);
1104 if (!parser->queue[i].ibv_attr)
1106 parser->queue[i].offset = sizeof(struct ibv_flow_attr);
1109 /* Third step. Conversion parse, fill the specifications. */
1111 for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
1112 struct mlx5_flow_data data = {
1117 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
1119 cur_item = &mlx5_flow_items[items->type];
1120 ret = cur_item->convert(items,
1121 (cur_item->default_mask ?
1122 cur_item->default_mask :
1129 mlx5_flow_create_flag_mark(parser, parser->mark_id);
1130 if (parser->count && parser->create) {
1131 mlx5_flow_create_count(dev, parser);
1133 goto exit_count_error;
1136 * Last step. Complete missing specification to reach the RSS
1140 mlx5_flow_convert_finalise(parser);
1141 mlx5_flow_update_priority(parser, attr);
1143 /* Only verification is expected, all resources should be released. */
1144 if (!parser->create) {
1145 for (i = 0; i != hash_rxq_init_n; ++i) {
1146 if (parser->queue[i].ibv_attr) {
1147 rte_free(parser->queue[i].ibv_attr);
1148 parser->queue[i].ibv_attr = NULL;
1154 for (i = 0; i != hash_rxq_init_n; ++i) {
1155 if (parser->queue[i].ibv_attr) {
1156 rte_free(parser->queue[i].ibv_attr);
1157 parser->queue[i].ibv_attr = NULL;
1160 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1161 NULL, "cannot allocate verbs spec attributes");
1164 rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1165 NULL, "cannot create counter");
1170 * Copy the specification created into the flow.
1173 * Internal parser structure.
1175 * Create specification.
1177 * Size in bytes of the specification to copy.
1180 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
1186 for (i = 0; i != hash_rxq_init_n; ++i) {
1187 if (!parser->queue[i].ibv_attr)
1189 /* Specification must be the same l3 type or none. */
1190 if (parser->layer == HASH_RXQ_ETH ||
1191 (hash_rxq_init[parser->layer].ip_version ==
1192 hash_rxq_init[i].ip_version) ||
1193 (hash_rxq_init[i].ip_version == 0)) {
1194 dst = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1195 parser->queue[i].offset);
1196 memcpy(dst, src, size);
1197 ++parser->queue[i].ibv_attr->num_of_specs;
1198 parser->queue[i].offset += size;
1204 * Convert Ethernet item to Verbs specification.
1207 * Item specification.
1208 * @param default_mask[in]
1209 * Default bit-masks to use when item->mask is not provided.
1210 * @param data[in, out]
1214 * 0 on success, a negative errno value otherwise and rte_errno is set.
1217 mlx5_flow_create_eth(const struct rte_flow_item *item,
1218 const void *default_mask,
1219 struct mlx5_flow_data *data)
1221 const struct rte_flow_item_eth *spec = item->spec;
1222 const struct rte_flow_item_eth *mask = item->mask;
1223 struct mlx5_flow_parse *parser = data->parser;
1224 const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1225 struct ibv_flow_spec_eth eth = {
1226 .type = parser->inner | IBV_FLOW_SPEC_ETH,
1230 /* Don't update layer for the inner pattern. */
1232 parser->layer = HASH_RXQ_ETH;
1237 mask = default_mask;
1238 memcpy(ð.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
1239 memcpy(ð.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
1240 eth.val.ether_type = spec->type;
1241 memcpy(ð.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
1242 memcpy(ð.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
1243 eth.mask.ether_type = mask->type;
1244 /* Remove unwanted bits from values. */
1245 for (i = 0; i < ETHER_ADDR_LEN; ++i) {
1246 eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
1247 eth.val.src_mac[i] &= eth.mask.src_mac[i];
1249 eth.val.ether_type &= eth.mask.ether_type;
1251 mlx5_flow_create_copy(parser, ð, eth_size);
1256 * Convert VLAN item to Verbs specification.
1259 * Item specification.
1260 * @param default_mask[in]
1261 * Default bit-masks to use when item->mask is not provided.
1262 * @param data[in, out]
1266 * 0 on success, a negative errno value otherwise and rte_errno is set.
1269 mlx5_flow_create_vlan(const struct rte_flow_item *item,
1270 const void *default_mask,
1271 struct mlx5_flow_data *data)
1273 const struct rte_flow_item_vlan *spec = item->spec;
1274 const struct rte_flow_item_vlan *mask = item->mask;
1275 struct mlx5_flow_parse *parser = data->parser;
1276 struct ibv_flow_spec_eth *eth;
1277 const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1282 mask = default_mask;
1284 for (i = 0; i != hash_rxq_init_n; ++i) {
1285 if (!parser->queue[i].ibv_attr)
1288 eth = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1289 parser->queue[i].offset - eth_size);
1290 eth->val.vlan_tag = spec->tci;
1291 eth->mask.vlan_tag = mask->tci;
1292 eth->val.vlan_tag &= eth->mask.vlan_tag;
1294 * From verbs perspective an empty VLAN is equivalent
1295 * to a packet without VLAN layer.
1297 if (!eth->mask.vlan_tag)
1303 return rte_flow_error_set(data->error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM,
1304 item, "VLAN cannot be empty");
1308 * Convert IPv4 item to Verbs specification.
1311 * Item specification.
1312 * @param default_mask[in]
1313 * Default bit-masks to use when item->mask is not provided.
1314 * @param data[in, out]
1318 * 0 on success, a negative errno value otherwise and rte_errno is set.
1321 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
1322 const void *default_mask,
1323 struct mlx5_flow_data *data)
1325 const struct rte_flow_item_ipv4 *spec = item->spec;
1326 const struct rte_flow_item_ipv4 *mask = item->mask;
1327 struct mlx5_flow_parse *parser = data->parser;
1328 unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4_ext);
1329 struct ibv_flow_spec_ipv4_ext ipv4 = {
1330 .type = parser->inner | IBV_FLOW_SPEC_IPV4_EXT,
1334 /* Don't update layer for the inner pattern. */
1336 parser->layer = HASH_RXQ_IPV4;
1339 mask = default_mask;
1340 ipv4.val = (struct ibv_flow_ipv4_ext_filter){
1341 .src_ip = spec->hdr.src_addr,
1342 .dst_ip = spec->hdr.dst_addr,
1343 .proto = spec->hdr.next_proto_id,
1344 .tos = spec->hdr.type_of_service,
1346 ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
1347 .src_ip = mask->hdr.src_addr,
1348 .dst_ip = mask->hdr.dst_addr,
1349 .proto = mask->hdr.next_proto_id,
1350 .tos = mask->hdr.type_of_service,
1352 /* Remove unwanted bits from values. */
1353 ipv4.val.src_ip &= ipv4.mask.src_ip;
1354 ipv4.val.dst_ip &= ipv4.mask.dst_ip;
1355 ipv4.val.proto &= ipv4.mask.proto;
1356 ipv4.val.tos &= ipv4.mask.tos;
1358 mlx5_flow_create_copy(parser, &ipv4, ipv4_size);
1363 * Convert IPv6 item to Verbs specification.
1366 * Item specification.
1367 * @param default_mask[in]
1368 * Default bit-masks to use when item->mask is not provided.
1369 * @param data[in, out]
1373 * 0 on success, a negative errno value otherwise and rte_errno is set.
1376 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
1377 const void *default_mask,
1378 struct mlx5_flow_data *data)
1380 const struct rte_flow_item_ipv6 *spec = item->spec;
1381 const struct rte_flow_item_ipv6 *mask = item->mask;
1382 struct mlx5_flow_parse *parser = data->parser;
1383 unsigned int ipv6_size = sizeof(struct ibv_flow_spec_ipv6);
1384 struct ibv_flow_spec_ipv6 ipv6 = {
1385 .type = parser->inner | IBV_FLOW_SPEC_IPV6,
1389 /* Don't update layer for the inner pattern. */
1391 parser->layer = HASH_RXQ_IPV6;
1394 uint32_t vtc_flow_val;
1395 uint32_t vtc_flow_mask;
1398 mask = default_mask;
1399 memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
1400 RTE_DIM(ipv6.val.src_ip));
1401 memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
1402 RTE_DIM(ipv6.val.dst_ip));
1403 memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
1404 RTE_DIM(ipv6.mask.src_ip));
1405 memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
1406 RTE_DIM(ipv6.mask.dst_ip));
1407 vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
1408 vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
1409 ipv6.val.flow_label =
1410 rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >>
1412 ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >>
1414 ipv6.val.next_hdr = spec->hdr.proto;
1415 ipv6.val.hop_limit = spec->hdr.hop_limits;
1416 ipv6.mask.flow_label =
1417 rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >>
1419 ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >>
1421 ipv6.mask.next_hdr = mask->hdr.proto;
1422 ipv6.mask.hop_limit = mask->hdr.hop_limits;
1423 /* Remove unwanted bits from values. */
1424 for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
1425 ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
1426 ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
1428 ipv6.val.flow_label &= ipv6.mask.flow_label;
1429 ipv6.val.traffic_class &= ipv6.mask.traffic_class;
1430 ipv6.val.next_hdr &= ipv6.mask.next_hdr;
1431 ipv6.val.hop_limit &= ipv6.mask.hop_limit;
1433 mlx5_flow_create_copy(parser, &ipv6, ipv6_size);
1438 * Convert UDP item to Verbs specification.
1441 * Item specification.
1442 * @param default_mask[in]
1443 * Default bit-masks to use when item->mask is not provided.
1444 * @param data[in, out]
1448 * 0 on success, a negative errno value otherwise and rte_errno is set.
1451 mlx5_flow_create_udp(const struct rte_flow_item *item,
1452 const void *default_mask,
1453 struct mlx5_flow_data *data)
1455 const struct rte_flow_item_udp *spec = item->spec;
1456 const struct rte_flow_item_udp *mask = item->mask;
1457 struct mlx5_flow_parse *parser = data->parser;
1458 unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1459 struct ibv_flow_spec_tcp_udp udp = {
1460 .type = parser->inner | IBV_FLOW_SPEC_UDP,
1464 /* Don't update layer for the inner pattern. */
1465 if (!parser->inner) {
1466 if (parser->layer == HASH_RXQ_IPV4)
1467 parser->layer = HASH_RXQ_UDPV4;
1469 parser->layer = HASH_RXQ_UDPV6;
1473 mask = default_mask;
1474 udp.val.dst_port = spec->hdr.dst_port;
1475 udp.val.src_port = spec->hdr.src_port;
1476 udp.mask.dst_port = mask->hdr.dst_port;
1477 udp.mask.src_port = mask->hdr.src_port;
1478 /* Remove unwanted bits from values. */
1479 udp.val.src_port &= udp.mask.src_port;
1480 udp.val.dst_port &= udp.mask.dst_port;
1482 mlx5_flow_create_copy(parser, &udp, udp_size);
1487 * Convert TCP item to Verbs specification.
1490 * Item specification.
1491 * @param default_mask[in]
1492 * Default bit-masks to use when item->mask is not provided.
1493 * @param data[in, out]
1497 * 0 on success, a negative errno value otherwise and rte_errno is set.
1500 mlx5_flow_create_tcp(const struct rte_flow_item *item,
1501 const void *default_mask,
1502 struct mlx5_flow_data *data)
1504 const struct rte_flow_item_tcp *spec = item->spec;
1505 const struct rte_flow_item_tcp *mask = item->mask;
1506 struct mlx5_flow_parse *parser = data->parser;
1507 unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1508 struct ibv_flow_spec_tcp_udp tcp = {
1509 .type = parser->inner | IBV_FLOW_SPEC_TCP,
1513 /* Don't update layer for the inner pattern. */
1514 if (!parser->inner) {
1515 if (parser->layer == HASH_RXQ_IPV4)
1516 parser->layer = HASH_RXQ_TCPV4;
1518 parser->layer = HASH_RXQ_TCPV6;
1522 mask = default_mask;
1523 tcp.val.dst_port = spec->hdr.dst_port;
1524 tcp.val.src_port = spec->hdr.src_port;
1525 tcp.mask.dst_port = mask->hdr.dst_port;
1526 tcp.mask.src_port = mask->hdr.src_port;
1527 /* Remove unwanted bits from values. */
1528 tcp.val.src_port &= tcp.mask.src_port;
1529 tcp.val.dst_port &= tcp.mask.dst_port;
1531 mlx5_flow_create_copy(parser, &tcp, tcp_size);
1536 * Convert VXLAN item to Verbs specification.
1539 * Item specification.
1540 * @param default_mask[in]
1541 * Default bit-masks to use when item->mask is not provided.
1542 * @param data[in, out]
1546 * 0 on success, a negative errno value otherwise and rte_errno is set.
1549 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
1550 const void *default_mask,
1551 struct mlx5_flow_data *data)
1553 const struct rte_flow_item_vxlan *spec = item->spec;
1554 const struct rte_flow_item_vxlan *mask = item->mask;
1555 struct mlx5_flow_parse *parser = data->parser;
1556 unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1557 struct ibv_flow_spec_tunnel vxlan = {
1558 .type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1567 parser->inner = IBV_FLOW_SPEC_INNER;
1570 mask = default_mask;
1571 memcpy(&id.vni[1], spec->vni, 3);
1572 vxlan.val.tunnel_id = id.vlan_id;
1573 memcpy(&id.vni[1], mask->vni, 3);
1574 vxlan.mask.tunnel_id = id.vlan_id;
1575 /* Remove unwanted bits from values. */
1576 vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
1579 * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
1580 * layer is defined in the Verbs specification it is interpreted as
1581 * wildcard and all packets will match this rule, if it follows a full
1582 * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
1583 * before will also match this rule.
1584 * To avoid such situation, VNI 0 is currently refused.
1586 if (!vxlan.val.tunnel_id)
1587 return rte_flow_error_set(data->error, EINVAL,
1588 RTE_FLOW_ERROR_TYPE_ITEM,
1590 "VxLAN vni cannot be 0");
1591 mlx5_flow_create_copy(parser, &vxlan, size);
1596 * Convert mark/flag action to Verbs specification.
1599 * Internal parser structure.
1604 * 0 on success, a negative errno value otherwise and rte_errno is set.
1607 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id)
1609 unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1610 struct ibv_flow_spec_action_tag tag = {
1611 .type = IBV_FLOW_SPEC_ACTION_TAG,
1613 .tag_id = mlx5_flow_mark_set(mark_id),
1616 assert(parser->mark);
1617 mlx5_flow_create_copy(parser, &tag, size);
1622 * Convert count action to Verbs specification.
1625 * Pointer to Ethernet device.
1627 * Pointer to MLX5 flow parser structure.
1630 * 0 on success, a negative errno value otherwise and rte_errno is set.
1633 mlx5_flow_create_count(struct rte_eth_dev *dev __rte_unused,
1634 struct mlx5_flow_parse *parser __rte_unused)
1636 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
1637 struct priv *priv = dev->data->dev_private;
1638 unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
1639 struct ibv_counter_set_init_attr init_attr = {0};
1640 struct ibv_flow_spec_counter_action counter = {
1641 .type = IBV_FLOW_SPEC_ACTION_COUNT,
1643 .counter_set_handle = 0,
1646 init_attr.counter_set_id = 0;
1647 parser->cs = mlx5_glue->create_counter_set(priv->ctx, &init_attr);
1652 counter.counter_set_handle = parser->cs->handle;
1653 mlx5_flow_create_copy(parser, &counter, size);
1659 * Complete flow rule creation with a drop queue.
1662 * Pointer to Ethernet device.
1664 * Internal parser structure.
1666 * Pointer to the rte_flow.
1668 * Perform verbose error reporting if not NULL.
1671 * 0 on success, a negative errno value otherwise and rte_errno is set.
1674 mlx5_flow_create_action_queue_drop(struct rte_eth_dev *dev,
1675 struct mlx5_flow_parse *parser,
1676 struct rte_flow *flow,
1677 struct rte_flow_error *error)
1679 struct priv *priv = dev->data->dev_private;
1680 struct ibv_flow_spec_action_drop *drop;
1681 unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
1686 drop = (void *)((uintptr_t)parser->queue[HASH_RXQ_ETH].ibv_attr +
1687 parser->queue[HASH_RXQ_ETH].offset);
1688 *drop = (struct ibv_flow_spec_action_drop){
1689 .type = IBV_FLOW_SPEC_ACTION_DROP,
1692 ++parser->queue[HASH_RXQ_ETH].ibv_attr->num_of_specs;
1693 parser->queue[HASH_RXQ_ETH].offset += size;
1694 flow->frxq[HASH_RXQ_ETH].ibv_attr =
1695 parser->queue[HASH_RXQ_ETH].ibv_attr;
1697 flow->cs = parser->cs;
1698 if (!priv->dev->data->dev_started)
1700 parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
1701 flow->frxq[HASH_RXQ_ETH].ibv_flow =
1702 mlx5_glue->create_flow(priv->flow_drop_queue->qp,
1703 flow->frxq[HASH_RXQ_ETH].ibv_attr);
1704 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
1705 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1706 NULL, "flow rule creation failure");
1712 if (flow->frxq[HASH_RXQ_ETH].ibv_flow) {
1713 claim_zero(mlx5_glue->destroy_flow
1714 (flow->frxq[HASH_RXQ_ETH].ibv_flow));
1715 flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
1717 if (flow->frxq[HASH_RXQ_ETH].ibv_attr) {
1718 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
1719 flow->frxq[HASH_RXQ_ETH].ibv_attr = NULL;
1722 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
1730 * Create hash Rx queues when RSS is enabled.
1733 * Pointer to Ethernet device.
1735 * Internal parser structure.
1737 * Pointer to the rte_flow.
1739 * Perform verbose error reporting if not NULL.
1742 * 0 on success, a negative errno value otherwise and rte_errno is set.
1745 mlx5_flow_create_action_queue_rss(struct rte_eth_dev *dev,
1746 struct mlx5_flow_parse *parser,
1747 struct rte_flow *flow,
1748 struct rte_flow_error *error)
1750 struct priv *priv = dev->data->dev_private;
1753 for (i = 0; i != hash_rxq_init_n; ++i) {
1754 uint64_t hash_fields;
1756 if (!parser->queue[i].ibv_attr)
1758 flow->frxq[i].ibv_attr = parser->queue[i].ibv_attr;
1759 parser->queue[i].ibv_attr = NULL;
1760 hash_fields = hash_rxq_init[i].hash_fields;
1761 if (!priv->dev->data->dev_started)
1763 flow->frxq[i].hrxq =
1765 parser->rss_conf.key,
1766 parser->rss_conf.key_len,
1768 parser->rss_conf.queue,
1769 parser->rss_conf.queue_num);
1770 if (flow->frxq[i].hrxq)
1772 flow->frxq[i].hrxq =
1774 parser->rss_conf.key,
1775 parser->rss_conf.key_len,
1777 parser->rss_conf.queue,
1778 parser->rss_conf.queue_num);
1779 if (!flow->frxq[i].hrxq) {
1780 return rte_flow_error_set(error, ENOMEM,
1781 RTE_FLOW_ERROR_TYPE_HANDLE,
1783 "cannot create hash rxq");
1790 * Complete flow rule creation.
1793 * Pointer to Ethernet device.
1795 * Internal parser structure.
1797 * Pointer to the rte_flow.
1799 * Perform verbose error reporting if not NULL.
1802 * 0 on success, a negative errno value otherwise and rte_errno is set.
1805 mlx5_flow_create_action_queue(struct rte_eth_dev *dev,
1806 struct mlx5_flow_parse *parser,
1807 struct rte_flow *flow,
1808 struct rte_flow_error *error)
1810 struct priv *priv = dev->data->dev_private;
1813 unsigned int flows_n = 0;
1817 assert(!parser->drop);
1818 ret = mlx5_flow_create_action_queue_rss(dev, parser, flow, error);
1822 flow->cs = parser->cs;
1823 if (!priv->dev->data->dev_started)
1825 for (i = 0; i != hash_rxq_init_n; ++i) {
1826 if (!flow->frxq[i].hrxq)
1828 flow->frxq[i].ibv_flow =
1829 mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
1830 flow->frxq[i].ibv_attr);
1831 if (!flow->frxq[i].ibv_flow) {
1832 rte_flow_error_set(error, ENOMEM,
1833 RTE_FLOW_ERROR_TYPE_HANDLE,
1834 NULL, "flow rule creation failure");
1838 DRV_LOG(DEBUG, "port %u %p type %d QP %p ibv_flow %p",
1841 (void *)flow->frxq[i].hrxq,
1842 (void *)flow->frxq[i].ibv_flow);
1845 rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_HANDLE,
1846 NULL, "internal error in flow creation");
1849 for (i = 0; i != parser->rss_conf.queue_num; ++i) {
1850 struct mlx5_rxq_data *q =
1851 (*priv->rxqs)[parser->rss_conf.queue[i]];
1853 q->mark |= parser->mark;
1857 ret = rte_errno; /* Save rte_errno before cleanup. */
1859 for (i = 0; i != hash_rxq_init_n; ++i) {
1860 if (flow->frxq[i].ibv_flow) {
1861 struct ibv_flow *ibv_flow = flow->frxq[i].ibv_flow;
1863 claim_zero(mlx5_glue->destroy_flow(ibv_flow));
1865 if (flow->frxq[i].hrxq)
1866 mlx5_hrxq_release(dev, flow->frxq[i].hrxq);
1867 if (flow->frxq[i].ibv_attr)
1868 rte_free(flow->frxq[i].ibv_attr);
1871 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
1875 rte_errno = ret; /* Restore rte_errno. */
1883 * Pointer to Ethernet device.
1885 * Pointer to a TAILQ flow list.
1887 * Flow rule attributes.
1888 * @param[in] pattern
1889 * Pattern specification (list terminated by the END pattern item).
1890 * @param[in] actions
1891 * Associated actions (list terminated by the END action).
1893 * Perform verbose error reporting if not NULL.
1896 * A flow on success, NULL otherwise and rte_errno is set.
1898 static struct rte_flow *
1899 mlx5_flow_list_create(struct rte_eth_dev *dev,
1900 struct mlx5_flows *list,
1901 const struct rte_flow_attr *attr,
1902 const struct rte_flow_item items[],
1903 const struct rte_flow_action actions[],
1904 struct rte_flow_error *error)
1906 struct mlx5_flow_parse parser = { .create = 1, };
1907 struct rte_flow *flow = NULL;
1911 ret = mlx5_flow_convert(dev, attr, items, actions, error, &parser);
1914 flow = rte_calloc(__func__, 1,
1916 parser.rss_conf.queue_num * sizeof(uint16_t),
1919 rte_flow_error_set(error, ENOMEM,
1920 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1922 "cannot allocate flow memory");
1925 /* Copy configuration. */
1926 flow->queues = (uint16_t (*)[])(flow + 1);
1927 flow->rss_conf = (struct rte_flow_action_rss){
1928 .types = parser.rss_conf.types,
1929 .key_len = parser.rss_conf.key_len,
1930 .queue_num = parser.rss_conf.queue_num,
1931 .key = memcpy(flow->rss_key, parser.rss_conf.key,
1932 sizeof(*parser.rss_conf.key) *
1933 parser.rss_conf.key_len),
1934 .queue = memcpy(flow->queues, parser.rss_conf.queue,
1935 sizeof(*parser.rss_conf.queue) *
1936 parser.rss_conf.queue_num),
1938 flow->mark = parser.mark;
1939 /* finalise the flow. */
1941 ret = mlx5_flow_create_action_queue_drop(dev, &parser, flow,
1944 ret = mlx5_flow_create_action_queue(dev, &parser, flow, error);
1947 TAILQ_INSERT_TAIL(list, flow, next);
1948 DRV_LOG(DEBUG, "port %u flow created %p", dev->data->port_id,
1952 DRV_LOG(ERR, "port %u flow creation error: %s", dev->data->port_id,
1954 for (i = 0; i != hash_rxq_init_n; ++i) {
1955 if (parser.queue[i].ibv_attr)
1956 rte_free(parser.queue[i].ibv_attr);
1963 * Validate a flow supported by the NIC.
1965 * @see rte_flow_validate()
1969 mlx5_flow_validate(struct rte_eth_dev *dev,
1970 const struct rte_flow_attr *attr,
1971 const struct rte_flow_item items[],
1972 const struct rte_flow_action actions[],
1973 struct rte_flow_error *error)
1975 struct mlx5_flow_parse parser = { .create = 0, };
1977 return mlx5_flow_convert(dev, attr, items, actions, error, &parser);
1983 * @see rte_flow_create()
1987 mlx5_flow_create(struct rte_eth_dev *dev,
1988 const struct rte_flow_attr *attr,
1989 const struct rte_flow_item items[],
1990 const struct rte_flow_action actions[],
1991 struct rte_flow_error *error)
1993 struct priv *priv = dev->data->dev_private;
1995 return mlx5_flow_list_create(dev, &priv->flows, attr, items, actions,
2000 * Destroy a flow in a list.
2003 * Pointer to Ethernet device.
2005 * Pointer to a TAILQ flow list.
2010 mlx5_flow_list_destroy(struct rte_eth_dev *dev, struct mlx5_flows *list,
2011 struct rte_flow *flow)
2013 struct priv *priv = dev->data->dev_private;
2016 if (flow->drop || !flow->mark)
2018 for (i = 0; i != flow->rss_conf.queue_num; ++i) {
2019 struct rte_flow *tmp;
2023 * To remove the mark from the queue, the queue must not be
2024 * present in any other marked flow (RSS or not).
2026 TAILQ_FOREACH(tmp, list, next) {
2028 uint16_t *tqs = NULL;
2033 for (j = 0; j != hash_rxq_init_n; ++j) {
2034 if (!tmp->frxq[j].hrxq)
2036 tqs = tmp->frxq[j].hrxq->ind_table->queues;
2037 tq_n = tmp->frxq[j].hrxq->ind_table->queues_n;
2041 for (j = 0; (j != tq_n) && !mark; j++)
2042 if (tqs[j] == (*flow->queues)[i])
2045 (*priv->rxqs)[(*flow->queues)[i]]->mark = mark;
2049 if (flow->frxq[HASH_RXQ_ETH].ibv_flow)
2050 claim_zero(mlx5_glue->destroy_flow
2051 (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2052 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
2054 for (i = 0; i != hash_rxq_init_n; ++i) {
2055 struct mlx5_flow *frxq = &flow->frxq[i];
2058 claim_zero(mlx5_glue->destroy_flow
2061 mlx5_hrxq_release(dev, frxq->hrxq);
2063 rte_free(frxq->ibv_attr);
2067 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
2070 TAILQ_REMOVE(list, flow, next);
2071 DRV_LOG(DEBUG, "port %u flow destroyed %p", dev->data->port_id,
2077 * Destroy all flows.
2080 * Pointer to Ethernet device.
2082 * Pointer to a TAILQ flow list.
2085 mlx5_flow_list_flush(struct rte_eth_dev *dev, struct mlx5_flows *list)
2087 while (!TAILQ_EMPTY(list)) {
2088 struct rte_flow *flow;
2090 flow = TAILQ_FIRST(list);
2091 mlx5_flow_list_destroy(dev, list, flow);
2096 * Create drop queue.
2099 * Pointer to Ethernet device.
2102 * 0 on success, a negative errno value otherwise and rte_errno is set.
2105 mlx5_flow_create_drop_queue(struct rte_eth_dev *dev)
2107 struct priv *priv = dev->data->dev_private;
2108 struct mlx5_hrxq_drop *fdq = NULL;
2112 fdq = rte_calloc(__func__, 1, sizeof(*fdq), 0);
2115 "port %u cannot allocate memory for drop queue",
2116 dev->data->port_id);
2120 fdq->cq = mlx5_glue->create_cq(priv->ctx, 1, NULL, NULL, 0);
2122 DRV_LOG(WARNING, "port %u cannot allocate CQ for drop queue",
2123 dev->data->port_id);
2127 fdq->wq = mlx5_glue->create_wq
2129 &(struct ibv_wq_init_attr){
2130 .wq_type = IBV_WQT_RQ,
2137 DRV_LOG(WARNING, "port %u cannot allocate WQ for drop queue",
2138 dev->data->port_id);
2142 fdq->ind_table = mlx5_glue->create_rwq_ind_table
2144 &(struct ibv_rwq_ind_table_init_attr){
2145 .log_ind_tbl_size = 0,
2146 .ind_tbl = &fdq->wq,
2149 if (!fdq->ind_table) {
2151 "port %u cannot allocate indirection table for drop"
2153 dev->data->port_id);
2157 fdq->qp = mlx5_glue->create_qp_ex
2159 &(struct ibv_qp_init_attr_ex){
2160 .qp_type = IBV_QPT_RAW_PACKET,
2162 IBV_QP_INIT_ATTR_PD |
2163 IBV_QP_INIT_ATTR_IND_TABLE |
2164 IBV_QP_INIT_ATTR_RX_HASH,
2165 .rx_hash_conf = (struct ibv_rx_hash_conf){
2167 IBV_RX_HASH_FUNC_TOEPLITZ,
2168 .rx_hash_key_len = rss_hash_default_key_len,
2169 .rx_hash_key = rss_hash_default_key,
2170 .rx_hash_fields_mask = 0,
2172 .rwq_ind_tbl = fdq->ind_table,
2176 DRV_LOG(WARNING, "port %u cannot allocate QP for drop queue",
2177 dev->data->port_id);
2181 priv->flow_drop_queue = fdq;
2185 claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2187 claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2189 claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2191 claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2194 priv->flow_drop_queue = NULL;
2199 * Delete drop queue.
2202 * Pointer to Ethernet device.
2205 mlx5_flow_delete_drop_queue(struct rte_eth_dev *dev)
2207 struct priv *priv = dev->data->dev_private;
2208 struct mlx5_hrxq_drop *fdq = priv->flow_drop_queue;
2213 claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2215 claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2217 claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2219 claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2221 priv->flow_drop_queue = NULL;
2228 * Pointer to Ethernet device.
2230 * Pointer to a TAILQ flow list.
2233 mlx5_flow_stop(struct rte_eth_dev *dev, struct mlx5_flows *list)
2235 struct priv *priv = dev->data->dev_private;
2236 struct rte_flow *flow;
2238 TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) {
2240 struct mlx5_ind_table_ibv *ind_tbl = NULL;
2243 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow)
2245 claim_zero(mlx5_glue->destroy_flow
2246 (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2247 flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
2248 DRV_LOG(DEBUG, "port %u flow %p removed",
2249 dev->data->port_id, (void *)flow);
2253 /* Verify the flow has not already been cleaned. */
2254 for (i = 0; i != hash_rxq_init_n; ++i) {
2255 if (!flow->frxq[i].ibv_flow)
2258 * Indirection table may be necessary to remove the
2259 * flags in the Rx queues.
2260 * This helps to speed-up the process by avoiding
2263 ind_tbl = flow->frxq[i].hrxq->ind_table;
2266 if (i == hash_rxq_init_n)
2270 for (i = 0; i != ind_tbl->queues_n; ++i)
2271 (*priv->rxqs)[ind_tbl->queues[i]]->mark = 0;
2273 for (i = 0; i != hash_rxq_init_n; ++i) {
2274 if (!flow->frxq[i].ibv_flow)
2276 claim_zero(mlx5_glue->destroy_flow
2277 (flow->frxq[i].ibv_flow));
2278 flow->frxq[i].ibv_flow = NULL;
2279 mlx5_hrxq_release(dev, flow->frxq[i].hrxq);
2280 flow->frxq[i].hrxq = NULL;
2282 DRV_LOG(DEBUG, "port %u flow %p removed", dev->data->port_id,
2291 * Pointer to Ethernet device.
2293 * Pointer to a TAILQ flow list.
2296 * 0 on success, a negative errno value otherwise and rte_errno is set.
2299 mlx5_flow_start(struct rte_eth_dev *dev, struct mlx5_flows *list)
2301 struct priv *priv = dev->data->dev_private;
2302 struct rte_flow *flow;
2304 TAILQ_FOREACH(flow, list, next) {
2308 flow->frxq[HASH_RXQ_ETH].ibv_flow =
2309 mlx5_glue->create_flow
2310 (priv->flow_drop_queue->qp,
2311 flow->frxq[HASH_RXQ_ETH].ibv_attr);
2312 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
2314 "port %u flow %p cannot be applied",
2315 dev->data->port_id, (void *)flow);
2319 DRV_LOG(DEBUG, "port %u flow %p applied",
2320 dev->data->port_id, (void *)flow);
2324 for (i = 0; i != hash_rxq_init_n; ++i) {
2325 if (!flow->frxq[i].ibv_attr)
2327 flow->frxq[i].hrxq =
2328 mlx5_hrxq_get(dev, flow->rss_conf.key,
2329 flow->rss_conf.key_len,
2330 hash_rxq_init[i].hash_fields,
2331 flow->rss_conf.queue,
2332 flow->rss_conf.queue_num);
2333 if (flow->frxq[i].hrxq)
2335 flow->frxq[i].hrxq =
2336 mlx5_hrxq_new(dev, flow->rss_conf.key,
2337 flow->rss_conf.key_len,
2338 hash_rxq_init[i].hash_fields,
2339 flow->rss_conf.queue,
2340 flow->rss_conf.queue_num);
2341 if (!flow->frxq[i].hrxq) {
2343 "port %u flow %p cannot be applied",
2344 dev->data->port_id, (void *)flow);
2349 flow->frxq[i].ibv_flow =
2350 mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
2351 flow->frxq[i].ibv_attr);
2352 if (!flow->frxq[i].ibv_flow) {
2354 "port %u flow %p cannot be applied",
2355 dev->data->port_id, (void *)flow);
2359 DRV_LOG(DEBUG, "port %u flow %p applied",
2360 dev->data->port_id, (void *)flow);
2364 for (i = 0; i != flow->rss_conf.queue_num; ++i)
2365 (*priv->rxqs)[flow->rss_conf.queue[i]]->mark = 1;
2371 * Verify the flow list is empty
2374 * Pointer to Ethernet device.
2376 * @return the number of flows not released.
2379 mlx5_flow_verify(struct rte_eth_dev *dev)
2381 struct priv *priv = dev->data->dev_private;
2382 struct rte_flow *flow;
2385 TAILQ_FOREACH(flow, &priv->flows, next) {
2386 DRV_LOG(DEBUG, "port %u flow %p still referenced",
2387 dev->data->port_id, (void *)flow);
2394 * Enable a control flow configured from the control plane.
2397 * Pointer to Ethernet device.
2399 * An Ethernet flow spec to apply.
2401 * An Ethernet flow mask to apply.
2403 * A VLAN flow spec to apply.
2405 * A VLAN flow mask to apply.
2408 * 0 on success, a negative errno value otherwise and rte_errno is set.
2411 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
2412 struct rte_flow_item_eth *eth_spec,
2413 struct rte_flow_item_eth *eth_mask,
2414 struct rte_flow_item_vlan *vlan_spec,
2415 struct rte_flow_item_vlan *vlan_mask)
2417 struct priv *priv = dev->data->dev_private;
2418 const struct rte_flow_attr attr = {
2420 .priority = MLX5_CTRL_FLOW_PRIORITY,
2422 struct rte_flow_item items[] = {
2424 .type = RTE_FLOW_ITEM_TYPE_ETH,
2430 .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
2431 RTE_FLOW_ITEM_TYPE_END,
2437 .type = RTE_FLOW_ITEM_TYPE_END,
2440 uint16_t queue[priv->reta_idx_n];
2441 struct rte_flow_action_rss action_rss = {
2442 .types = priv->rss_conf.rss_hf,
2443 .key_len = priv->rss_conf.rss_key_len,
2444 .queue_num = priv->reta_idx_n,
2445 .key = priv->rss_conf.rss_key,
2448 struct rte_flow_action actions[] = {
2450 .type = RTE_FLOW_ACTION_TYPE_RSS,
2451 .conf = &action_rss,
2454 .type = RTE_FLOW_ACTION_TYPE_END,
2457 struct rte_flow *flow;
2458 struct rte_flow_error error;
2461 if (!priv->reta_idx_n) {
2465 for (i = 0; i != priv->reta_idx_n; ++i)
2466 queue[i] = (*priv->reta_idx)[i];
2467 flow = mlx5_flow_list_create(dev, &priv->ctrl_flows, &attr, items,
2475 * Enable a flow control configured from the control plane.
2478 * Pointer to Ethernet device.
2480 * An Ethernet flow spec to apply.
2482 * An Ethernet flow mask to apply.
2485 * 0 on success, a negative errno value otherwise and rte_errno is set.
2488 mlx5_ctrl_flow(struct rte_eth_dev *dev,
2489 struct rte_flow_item_eth *eth_spec,
2490 struct rte_flow_item_eth *eth_mask)
2492 return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
2498 * @see rte_flow_destroy()
2502 mlx5_flow_destroy(struct rte_eth_dev *dev,
2503 struct rte_flow *flow,
2504 struct rte_flow_error *error __rte_unused)
2506 struct priv *priv = dev->data->dev_private;
2508 mlx5_flow_list_destroy(dev, &priv->flows, flow);
2513 * Destroy all flows.
2515 * @see rte_flow_flush()
2519 mlx5_flow_flush(struct rte_eth_dev *dev,
2520 struct rte_flow_error *error __rte_unused)
2522 struct priv *priv = dev->data->dev_private;
2524 mlx5_flow_list_flush(dev, &priv->flows);
2528 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
2530 * Query flow counter.
2534 * @param counter_value
2535 * returned data from the counter.
2538 * 0 on success, a negative errno value otherwise and rte_errno is set.
2541 mlx5_flow_query_count(struct ibv_counter_set *cs,
2542 struct mlx5_flow_counter_stats *counter_stats,
2543 struct rte_flow_query_count *query_count,
2544 struct rte_flow_error *error)
2546 uint64_t counters[2];
2547 struct ibv_query_counter_set_attr query_cs_attr = {
2549 .query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
2551 struct ibv_counter_set_data query_out = {
2553 .outlen = 2 * sizeof(uint64_t),
2555 int err = mlx5_glue->query_counter_set(&query_cs_attr, &query_out);
2558 return rte_flow_error_set(error, err,
2559 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2561 "cannot read counter");
2562 query_count->hits_set = 1;
2563 query_count->bytes_set = 1;
2564 query_count->hits = counters[0] - counter_stats->hits;
2565 query_count->bytes = counters[1] - counter_stats->bytes;
2566 if (query_count->reset) {
2567 counter_stats->hits = counters[0];
2568 counter_stats->bytes = counters[1];
2576 * @see rte_flow_query()
2580 mlx5_flow_query(struct rte_eth_dev *dev __rte_unused,
2581 struct rte_flow *flow,
2582 enum rte_flow_action_type action __rte_unused,
2584 struct rte_flow_error *error)
2589 ret = mlx5_flow_query_count(flow->cs,
2590 &flow->counter_stats,
2591 (struct rte_flow_query_count *)data,
2596 return rte_flow_error_set(error, EINVAL,
2597 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2599 "no counter found for flow");
2608 * @see rte_flow_isolate()
2612 mlx5_flow_isolate(struct rte_eth_dev *dev,
2614 struct rte_flow_error *error)
2616 struct priv *priv = dev->data->dev_private;
2618 if (dev->data->dev_started) {
2619 rte_flow_error_set(error, EBUSY,
2620 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2622 "port must be stopped first");
2625 priv->isolated = !!enable;
2627 priv->dev->dev_ops = &mlx5_dev_ops_isolate;
2629 priv->dev->dev_ops = &mlx5_dev_ops;
2634 * Convert a flow director filter to a generic flow.
2637 * Pointer to Ethernet device.
2638 * @param fdir_filter
2639 * Flow director filter to add.
2641 * Generic flow parameters structure.
2644 * 0 on success, a negative errno value otherwise and rte_errno is set.
2647 mlx5_fdir_filter_convert(struct rte_eth_dev *dev,
2648 const struct rte_eth_fdir_filter *fdir_filter,
2649 struct mlx5_fdir *attributes)
2651 struct priv *priv = dev->data->dev_private;
2652 const struct rte_eth_fdir_input *input = &fdir_filter->input;
2653 const struct rte_eth_fdir_masks *mask =
2654 &dev->data->dev_conf.fdir_conf.mask;
2656 /* Validate queue number. */
2657 if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
2658 DRV_LOG(ERR, "port %u invalid queue number %d",
2659 dev->data->port_id, fdir_filter->action.rx_queue);
2663 attributes->attr.ingress = 1;
2664 attributes->items[0] = (struct rte_flow_item) {
2665 .type = RTE_FLOW_ITEM_TYPE_ETH,
2666 .spec = &attributes->l2,
2667 .mask = &attributes->l2_mask,
2669 switch (fdir_filter->action.behavior) {
2670 case RTE_ETH_FDIR_ACCEPT:
2671 attributes->actions[0] = (struct rte_flow_action){
2672 .type = RTE_FLOW_ACTION_TYPE_QUEUE,
2673 .conf = &attributes->queue,
2676 case RTE_ETH_FDIR_REJECT:
2677 attributes->actions[0] = (struct rte_flow_action){
2678 .type = RTE_FLOW_ACTION_TYPE_DROP,
2682 DRV_LOG(ERR, "port %u invalid behavior %d",
2684 fdir_filter->action.behavior);
2685 rte_errno = ENOTSUP;
2688 attributes->queue.index = fdir_filter->action.rx_queue;
2690 switch (fdir_filter->input.flow_type) {
2691 case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
2692 case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
2693 case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
2694 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2695 .src_addr = input->flow.ip4_flow.src_ip,
2696 .dst_addr = input->flow.ip4_flow.dst_ip,
2697 .time_to_live = input->flow.ip4_flow.ttl,
2698 .type_of_service = input->flow.ip4_flow.tos,
2699 .next_proto_id = input->flow.ip4_flow.proto,
2701 attributes->l3_mask.ipv4.hdr = (struct ipv4_hdr){
2702 .src_addr = mask->ipv4_mask.src_ip,
2703 .dst_addr = mask->ipv4_mask.dst_ip,
2704 .time_to_live = mask->ipv4_mask.ttl,
2705 .type_of_service = mask->ipv4_mask.tos,
2706 .next_proto_id = mask->ipv4_mask.proto,
2708 attributes->items[1] = (struct rte_flow_item){
2709 .type = RTE_FLOW_ITEM_TYPE_IPV4,
2710 .spec = &attributes->l3,
2711 .mask = &attributes->l3_mask,
2714 case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
2715 case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
2716 case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
2717 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2718 .hop_limits = input->flow.ipv6_flow.hop_limits,
2719 .proto = input->flow.ipv6_flow.proto,
2722 memcpy(attributes->l3.ipv6.hdr.src_addr,
2723 input->flow.ipv6_flow.src_ip,
2724 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2725 memcpy(attributes->l3.ipv6.hdr.dst_addr,
2726 input->flow.ipv6_flow.dst_ip,
2727 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2728 memcpy(attributes->l3_mask.ipv6.hdr.src_addr,
2729 mask->ipv6_mask.src_ip,
2730 RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
2731 memcpy(attributes->l3_mask.ipv6.hdr.dst_addr,
2732 mask->ipv6_mask.dst_ip,
2733 RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
2734 attributes->items[1] = (struct rte_flow_item){
2735 .type = RTE_FLOW_ITEM_TYPE_IPV6,
2736 .spec = &attributes->l3,
2737 .mask = &attributes->l3_mask,
2741 DRV_LOG(ERR, "port %u invalid flow type%d",
2742 dev->data->port_id, fdir_filter->input.flow_type);
2743 rte_errno = ENOTSUP;
2747 switch (fdir_filter->input.flow_type) {
2748 case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
2749 attributes->l4.udp.hdr = (struct udp_hdr){
2750 .src_port = input->flow.udp4_flow.src_port,
2751 .dst_port = input->flow.udp4_flow.dst_port,
2753 attributes->l4_mask.udp.hdr = (struct udp_hdr){
2754 .src_port = mask->src_port_mask,
2755 .dst_port = mask->dst_port_mask,
2757 attributes->items[2] = (struct rte_flow_item){
2758 .type = RTE_FLOW_ITEM_TYPE_UDP,
2759 .spec = &attributes->l4,
2760 .mask = &attributes->l4_mask,
2763 case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
2764 attributes->l4.tcp.hdr = (struct tcp_hdr){
2765 .src_port = input->flow.tcp4_flow.src_port,
2766 .dst_port = input->flow.tcp4_flow.dst_port,
2768 attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
2769 .src_port = mask->src_port_mask,
2770 .dst_port = mask->dst_port_mask,
2772 attributes->items[2] = (struct rte_flow_item){
2773 .type = RTE_FLOW_ITEM_TYPE_TCP,
2774 .spec = &attributes->l4,
2775 .mask = &attributes->l4_mask,
2778 case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
2779 attributes->l4.udp.hdr = (struct udp_hdr){
2780 .src_port = input->flow.udp6_flow.src_port,
2781 .dst_port = input->flow.udp6_flow.dst_port,
2783 attributes->l4_mask.udp.hdr = (struct udp_hdr){
2784 .src_port = mask->src_port_mask,
2785 .dst_port = mask->dst_port_mask,
2787 attributes->items[2] = (struct rte_flow_item){
2788 .type = RTE_FLOW_ITEM_TYPE_UDP,
2789 .spec = &attributes->l4,
2790 .mask = &attributes->l4_mask,
2793 case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
2794 attributes->l4.tcp.hdr = (struct tcp_hdr){
2795 .src_port = input->flow.tcp6_flow.src_port,
2796 .dst_port = input->flow.tcp6_flow.dst_port,
2798 attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
2799 .src_port = mask->src_port_mask,
2800 .dst_port = mask->dst_port_mask,
2802 attributes->items[2] = (struct rte_flow_item){
2803 .type = RTE_FLOW_ITEM_TYPE_TCP,
2804 .spec = &attributes->l4,
2805 .mask = &attributes->l4_mask,
2808 case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
2809 case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
2812 DRV_LOG(ERR, "port %u invalid flow type%d",
2813 dev->data->port_id, fdir_filter->input.flow_type);
2814 rte_errno = ENOTSUP;
2821 * Add new flow director filter and store it in list.
2824 * Pointer to Ethernet device.
2825 * @param fdir_filter
2826 * Flow director filter to add.
2829 * 0 on success, a negative errno value otherwise and rte_errno is set.
2832 mlx5_fdir_filter_add(struct rte_eth_dev *dev,
2833 const struct rte_eth_fdir_filter *fdir_filter)
2835 struct priv *priv = dev->data->dev_private;
2836 struct mlx5_fdir attributes = {
2839 .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2840 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2844 struct mlx5_flow_parse parser = {
2845 .layer = HASH_RXQ_ETH,
2847 struct rte_flow_error error;
2848 struct rte_flow *flow;
2851 ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
2854 ret = mlx5_flow_convert(dev, &attributes.attr, attributes.items,
2855 attributes.actions, &error, &parser);
2858 flow = mlx5_flow_list_create(dev, &priv->flows, &attributes.attr,
2859 attributes.items, attributes.actions,
2862 DRV_LOG(DEBUG, "port %u FDIR created %p", dev->data->port_id,
2870 * Delete specific filter.
2873 * Pointer to Ethernet device.
2874 * @param fdir_filter
2875 * Filter to be deleted.
2878 * 0 on success, a negative errno value otherwise and rte_errno is set.
2881 mlx5_fdir_filter_delete(struct rte_eth_dev *dev,
2882 const struct rte_eth_fdir_filter *fdir_filter)
2884 struct priv *priv = dev->data->dev_private;
2885 struct mlx5_fdir attributes = {
2888 struct mlx5_flow_parse parser = {
2890 .layer = HASH_RXQ_ETH,
2892 struct rte_flow_error error;
2893 struct rte_flow *flow;
2897 ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
2900 ret = mlx5_flow_convert(dev, &attributes.attr, attributes.items,
2901 attributes.actions, &error, &parser);
2905 * Special case for drop action which is only set in the
2906 * specifications when the flow is created. In this situation the
2907 * drop specification is missing.
2910 struct ibv_flow_spec_action_drop *drop;
2912 drop = (void *)((uintptr_t)parser.queue[HASH_RXQ_ETH].ibv_attr +
2913 parser.queue[HASH_RXQ_ETH].offset);
2914 *drop = (struct ibv_flow_spec_action_drop){
2915 .type = IBV_FLOW_SPEC_ACTION_DROP,
2916 .size = sizeof(struct ibv_flow_spec_action_drop),
2918 parser.queue[HASH_RXQ_ETH].ibv_attr->num_of_specs++;
2920 TAILQ_FOREACH(flow, &priv->flows, next) {
2921 struct ibv_flow_attr *attr;
2922 struct ibv_spec_header *attr_h;
2924 struct ibv_flow_attr *flow_attr;
2925 struct ibv_spec_header *flow_h;
2927 unsigned int specs_n;
2929 attr = parser.queue[HASH_RXQ_ETH].ibv_attr;
2930 flow_attr = flow->frxq[HASH_RXQ_ETH].ibv_attr;
2931 /* Compare first the attributes. */
2932 if (memcmp(attr, flow_attr, sizeof(struct ibv_flow_attr)))
2934 if (attr->num_of_specs == 0)
2936 spec = (void *)((uintptr_t)attr +
2937 sizeof(struct ibv_flow_attr));
2938 flow_spec = (void *)((uintptr_t)flow_attr +
2939 sizeof(struct ibv_flow_attr));
2940 specs_n = RTE_MIN(attr->num_of_specs, flow_attr->num_of_specs);
2941 for (i = 0; i != specs_n; ++i) {
2944 if (memcmp(spec, flow_spec,
2945 RTE_MIN(attr_h->size, flow_h->size)))
2947 spec = (void *)((uintptr_t)spec + attr_h->size);
2948 flow_spec = (void *)((uintptr_t)flow_spec +
2951 /* At this point, the flow match. */
2954 /* The flow does not match. */
2957 ret = rte_errno; /* Save rte_errno before cleanup. */
2959 mlx5_flow_list_destroy(dev, &priv->flows, flow);
2961 for (i = 0; i != hash_rxq_init_n; ++i) {
2962 if (parser.queue[i].ibv_attr)
2963 rte_free(parser.queue[i].ibv_attr);
2965 rte_errno = ret; /* Restore rte_errno. */
2970 * Update queue for specific filter.
2973 * Pointer to Ethernet device.
2974 * @param fdir_filter
2975 * Filter to be updated.
2978 * 0 on success, a negative errno value otherwise and rte_errno is set.
2981 mlx5_fdir_filter_update(struct rte_eth_dev *dev,
2982 const struct rte_eth_fdir_filter *fdir_filter)
2986 ret = mlx5_fdir_filter_delete(dev, fdir_filter);
2989 return mlx5_fdir_filter_add(dev, fdir_filter);
2993 * Flush all filters.
2996 * Pointer to Ethernet device.
2999 mlx5_fdir_filter_flush(struct rte_eth_dev *dev)
3001 struct priv *priv = dev->data->dev_private;
3003 mlx5_flow_list_flush(dev, &priv->flows);
3007 * Get flow director information.
3010 * Pointer to Ethernet device.
3011 * @param[out] fdir_info
3012 * Resulting flow director information.
3015 mlx5_fdir_info_get(struct rte_eth_dev *dev, struct rte_eth_fdir_info *fdir_info)
3017 struct priv *priv = dev->data->dev_private;
3018 struct rte_eth_fdir_masks *mask =
3019 &priv->dev->data->dev_conf.fdir_conf.mask;
3021 fdir_info->mode = priv->dev->data->dev_conf.fdir_conf.mode;
3022 fdir_info->guarant_spc = 0;
3023 rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
3024 fdir_info->max_flexpayload = 0;
3025 fdir_info->flow_types_mask[0] = 0;
3026 fdir_info->flex_payload_unit = 0;
3027 fdir_info->max_flex_payload_segment_num = 0;
3028 fdir_info->flex_payload_limit = 0;
3029 memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
3033 * Deal with flow director operations.
3036 * Pointer to Ethernet device.
3038 * Operation to perform.
3040 * Pointer to operation-specific structure.
3043 * 0 on success, a negative errno value otherwise and rte_errno is set.
3046 mlx5_fdir_ctrl_func(struct rte_eth_dev *dev, enum rte_filter_op filter_op,
3049 struct priv *priv = dev->data->dev_private;
3050 enum rte_fdir_mode fdir_mode =
3051 priv->dev->data->dev_conf.fdir_conf.mode;
3053 if (filter_op == RTE_ETH_FILTER_NOP)
3055 if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
3056 fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
3057 DRV_LOG(ERR, "port %u flow director mode %d not supported",
3058 dev->data->port_id, fdir_mode);
3062 switch (filter_op) {
3063 case RTE_ETH_FILTER_ADD:
3064 return mlx5_fdir_filter_add(dev, arg);
3065 case RTE_ETH_FILTER_UPDATE:
3066 return mlx5_fdir_filter_update(dev, arg);
3067 case RTE_ETH_FILTER_DELETE:
3068 return mlx5_fdir_filter_delete(dev, arg);
3069 case RTE_ETH_FILTER_FLUSH:
3070 mlx5_fdir_filter_flush(dev);
3072 case RTE_ETH_FILTER_INFO:
3073 mlx5_fdir_info_get(dev, arg);
3076 DRV_LOG(DEBUG, "port %u unknown operation %u",
3077 dev->data->port_id, filter_op);
3085 * Manage filter operations.
3088 * Pointer to Ethernet device structure.
3089 * @param filter_type
3092 * Operation to perform.
3094 * Pointer to operation-specific structure.
3097 * 0 on success, a negative errno value otherwise and rte_errno is set.
3100 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
3101 enum rte_filter_type filter_type,
3102 enum rte_filter_op filter_op,
3105 switch (filter_type) {
3106 case RTE_ETH_FILTER_GENERIC:
3107 if (filter_op != RTE_ETH_FILTER_GET) {
3111 *(const void **)arg = &mlx5_flow_ops;
3113 case RTE_ETH_FILTER_FDIR:
3114 return mlx5_fdir_ctrl_func(dev, filter_op, arg);
3116 DRV_LOG(ERR, "port %u filter type (%d) not supported",
3117 dev->data->port_id, filter_type);
3118 rte_errno = ENOTSUP;