1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2016 6WIND S.A.
3 * Copyright 2016 Mellanox.
10 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
12 #pragma GCC diagnostic ignored "-Wpedantic"
14 #include <infiniband/verbs.h>
16 #pragma GCC diagnostic error "-Wpedantic"
19 #include <rte_ethdev_driver.h>
21 #include <rte_flow_driver.h>
22 #include <rte_malloc.h>
26 #include "mlx5_defs.h"
28 #include "mlx5_glue.h"
30 /* Define minimal priority for control plane flows. */
31 #define MLX5_CTRL_FLOW_PRIORITY 4
33 /* Internet Protocol versions. */
37 #ifndef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
38 struct ibv_flow_spec_counter_action {
43 /* Dev ops structure defined in mlx5.c */
44 extern const struct eth_dev_ops mlx5_dev_ops;
45 extern const struct eth_dev_ops mlx5_dev_ops_isolate;
48 mlx5_flow_create_eth(const struct rte_flow_item *item,
49 const void *default_mask,
53 mlx5_flow_create_vlan(const struct rte_flow_item *item,
54 const void *default_mask,
58 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
59 const void *default_mask,
63 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
64 const void *default_mask,
68 mlx5_flow_create_udp(const struct rte_flow_item *item,
69 const void *default_mask,
73 mlx5_flow_create_tcp(const struct rte_flow_item *item,
74 const void *default_mask,
78 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
79 const void *default_mask,
82 struct mlx5_flow_parse;
85 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
89 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id);
92 mlx5_flow_create_count(struct rte_eth_dev *dev, struct mlx5_flow_parse *parser);
94 /* Hash RX queue types. */
105 /* Initialization data for hash RX queue. */
106 struct hash_rxq_init {
107 uint64_t hash_fields; /* Fields that participate in the hash. */
108 uint64_t dpdk_rss_hf; /* Matching DPDK RSS hash fields. */
109 unsigned int flow_priority; /* Flow priority to use. */
110 unsigned int ip_version; /* Internet protocol. */
113 /* Initialization data for hash RX queues. */
114 const struct hash_rxq_init hash_rxq_init[] = {
116 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
117 IBV_RX_HASH_DST_IPV4 |
118 IBV_RX_HASH_SRC_PORT_TCP |
119 IBV_RX_HASH_DST_PORT_TCP),
120 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,
122 .ip_version = MLX5_IPV4,
125 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
126 IBV_RX_HASH_DST_IPV4 |
127 IBV_RX_HASH_SRC_PORT_UDP |
128 IBV_RX_HASH_DST_PORT_UDP),
129 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP,
131 .ip_version = MLX5_IPV4,
134 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
135 IBV_RX_HASH_DST_IPV4),
136 .dpdk_rss_hf = (ETH_RSS_IPV4 |
139 .ip_version = MLX5_IPV4,
142 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
143 IBV_RX_HASH_DST_IPV6 |
144 IBV_RX_HASH_SRC_PORT_TCP |
145 IBV_RX_HASH_DST_PORT_TCP),
146 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP,
148 .ip_version = MLX5_IPV6,
151 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
152 IBV_RX_HASH_DST_IPV6 |
153 IBV_RX_HASH_SRC_PORT_UDP |
154 IBV_RX_HASH_DST_PORT_UDP),
155 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP,
157 .ip_version = MLX5_IPV6,
160 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
161 IBV_RX_HASH_DST_IPV6),
162 .dpdk_rss_hf = (ETH_RSS_IPV6 |
165 .ip_version = MLX5_IPV6,
174 /* Number of entries in hash_rxq_init[]. */
175 const unsigned int hash_rxq_init_n = RTE_DIM(hash_rxq_init);
177 /** Structure for holding counter stats. */
178 struct mlx5_flow_counter_stats {
179 uint64_t hits; /**< Number of packets matched by the rule. */
180 uint64_t bytes; /**< Number of bytes matched by the rule. */
183 /** Structure for Drop queue. */
184 struct mlx5_hrxq_drop {
185 struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */
186 struct ibv_qp *qp; /**< Verbs queue pair. */
187 struct ibv_wq *wq; /**< Verbs work queue. */
188 struct ibv_cq *cq; /**< Verbs completion queue. */
191 /* Flows structures. */
193 uint64_t hash_fields; /**< Fields that participate in the hash. */
194 struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
195 struct ibv_flow *ibv_flow; /**< Verbs flow. */
196 struct mlx5_hrxq *hrxq; /**< Hash Rx queues. */
199 /* Drop flows structures. */
200 struct mlx5_flow_drop {
201 struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
202 struct ibv_flow *ibv_flow; /**< Verbs flow. */
206 TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
207 uint32_t mark:1; /**< Set if the flow is marked. */
208 uint32_t drop:1; /**< Drop queue. */
209 uint16_t queues_n; /**< Number of entries in queue[]. */
210 uint16_t (*queues)[]; /**< Queues indexes to use. */
211 struct rte_eth_rss_conf rss_conf; /**< RSS configuration */
212 uint8_t rss_key[40]; /**< copy of the RSS key. */
213 struct ibv_counter_set *cs; /**< Holds the counters for the rule. */
214 struct mlx5_flow_counter_stats counter_stats;/**<The counter stats. */
215 struct mlx5_flow frxq[RTE_DIM(hash_rxq_init)];
216 /**< Flow with Rx queue. */
219 /** Static initializer for items. */
221 (const enum rte_flow_item_type []){ \
222 __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
225 /** Structure to generate a simple graph of layers supported by the NIC. */
226 struct mlx5_flow_items {
227 /** List of possible actions for these items. */
228 const enum rte_flow_action_type *const actions;
229 /** Bit-masks corresponding to the possibilities for the item. */
232 * Default bit-masks to use when item->mask is not provided. When
233 * \default_mask is also NULL, the full supported bit-mask (\mask) is
236 const void *default_mask;
237 /** Bit-masks size in bytes. */
238 const unsigned int mask_sz;
240 * Conversion function from rte_flow to NIC specific flow.
243 * rte_flow item to convert.
244 * @param default_mask
245 * Default bit-masks to use when item->mask is not provided.
247 * Internal structure to store the conversion.
250 * 0 on success, a negative errno value otherwise and rte_errno is
253 int (*convert)(const struct rte_flow_item *item,
254 const void *default_mask,
256 /** Size in bytes of the destination structure. */
257 const unsigned int dst_sz;
258 /** List of possible following items. */
259 const enum rte_flow_item_type *const items;
262 /** Valid action for this PMD. */
263 static const enum rte_flow_action_type valid_actions[] = {
264 RTE_FLOW_ACTION_TYPE_DROP,
265 RTE_FLOW_ACTION_TYPE_QUEUE,
266 RTE_FLOW_ACTION_TYPE_MARK,
267 RTE_FLOW_ACTION_TYPE_FLAG,
268 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
269 RTE_FLOW_ACTION_TYPE_COUNT,
271 RTE_FLOW_ACTION_TYPE_END,
274 /** Graph of supported items and associated actions. */
275 static const struct mlx5_flow_items mlx5_flow_items[] = {
276 [RTE_FLOW_ITEM_TYPE_END] = {
277 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
278 RTE_FLOW_ITEM_TYPE_VXLAN),
280 [RTE_FLOW_ITEM_TYPE_ETH] = {
281 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
282 RTE_FLOW_ITEM_TYPE_IPV4,
283 RTE_FLOW_ITEM_TYPE_IPV6),
284 .actions = valid_actions,
285 .mask = &(const struct rte_flow_item_eth){
286 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
287 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
290 .default_mask = &rte_flow_item_eth_mask,
291 .mask_sz = sizeof(struct rte_flow_item_eth),
292 .convert = mlx5_flow_create_eth,
293 .dst_sz = sizeof(struct ibv_flow_spec_eth),
295 [RTE_FLOW_ITEM_TYPE_VLAN] = {
296 .items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
297 RTE_FLOW_ITEM_TYPE_IPV6),
298 .actions = valid_actions,
299 .mask = &(const struct rte_flow_item_vlan){
302 .default_mask = &rte_flow_item_vlan_mask,
303 .mask_sz = sizeof(struct rte_flow_item_vlan),
304 .convert = mlx5_flow_create_vlan,
307 [RTE_FLOW_ITEM_TYPE_IPV4] = {
308 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
309 RTE_FLOW_ITEM_TYPE_TCP),
310 .actions = valid_actions,
311 .mask = &(const struct rte_flow_item_ipv4){
315 .type_of_service = -1,
319 .default_mask = &rte_flow_item_ipv4_mask,
320 .mask_sz = sizeof(struct rte_flow_item_ipv4),
321 .convert = mlx5_flow_create_ipv4,
322 .dst_sz = sizeof(struct ibv_flow_spec_ipv4_ext),
324 [RTE_FLOW_ITEM_TYPE_IPV6] = {
325 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
326 RTE_FLOW_ITEM_TYPE_TCP),
327 .actions = valid_actions,
328 .mask = &(const struct rte_flow_item_ipv6){
331 0xff, 0xff, 0xff, 0xff,
332 0xff, 0xff, 0xff, 0xff,
333 0xff, 0xff, 0xff, 0xff,
334 0xff, 0xff, 0xff, 0xff,
337 0xff, 0xff, 0xff, 0xff,
338 0xff, 0xff, 0xff, 0xff,
339 0xff, 0xff, 0xff, 0xff,
340 0xff, 0xff, 0xff, 0xff,
347 .default_mask = &rte_flow_item_ipv6_mask,
348 .mask_sz = sizeof(struct rte_flow_item_ipv6),
349 .convert = mlx5_flow_create_ipv6,
350 .dst_sz = sizeof(struct ibv_flow_spec_ipv6),
352 [RTE_FLOW_ITEM_TYPE_UDP] = {
353 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN),
354 .actions = valid_actions,
355 .mask = &(const struct rte_flow_item_udp){
361 .default_mask = &rte_flow_item_udp_mask,
362 .mask_sz = sizeof(struct rte_flow_item_udp),
363 .convert = mlx5_flow_create_udp,
364 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
366 [RTE_FLOW_ITEM_TYPE_TCP] = {
367 .actions = valid_actions,
368 .mask = &(const struct rte_flow_item_tcp){
374 .default_mask = &rte_flow_item_tcp_mask,
375 .mask_sz = sizeof(struct rte_flow_item_tcp),
376 .convert = mlx5_flow_create_tcp,
377 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
379 [RTE_FLOW_ITEM_TYPE_VXLAN] = {
380 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH),
381 .actions = valid_actions,
382 .mask = &(const struct rte_flow_item_vxlan){
383 .vni = "\xff\xff\xff",
385 .default_mask = &rte_flow_item_vxlan_mask,
386 .mask_sz = sizeof(struct rte_flow_item_vxlan),
387 .convert = mlx5_flow_create_vxlan,
388 .dst_sz = sizeof(struct ibv_flow_spec_tunnel),
392 /** Structure to pass to the conversion function. */
393 struct mlx5_flow_parse {
394 uint32_t inner; /**< Set once VXLAN is encountered. */
396 /**< Whether resources should remain after a validate. */
397 uint32_t drop:1; /**< Target is a drop queue. */
398 uint32_t mark:1; /**< Mark is present in the flow. */
399 uint32_t count:1; /**< Count is present in the flow. */
400 uint32_t mark_id; /**< Mark identifier. */
401 uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
402 uint16_t queues_n; /**< Number of entries in queue[]. */
403 struct rte_eth_rss_conf rss_conf; /**< RSS configuration */
404 uint8_t rss_key[40]; /**< copy of the RSS key. */
405 enum hash_rxq_type layer; /**< Last pattern layer detected. */
406 struct ibv_counter_set *cs; /**< Holds the counter set for the rule */
408 struct ibv_flow_attr *ibv_attr;
409 /**< Pointer to Verbs attributes. */
411 /**< Current position or total size of the attribute. */
412 } queue[RTE_DIM(hash_rxq_init)];
415 static const struct rte_flow_ops mlx5_flow_ops = {
416 .validate = mlx5_flow_validate,
417 .create = mlx5_flow_create,
418 .destroy = mlx5_flow_destroy,
419 .flush = mlx5_flow_flush,
420 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
421 .query = mlx5_flow_query,
425 .isolate = mlx5_flow_isolate,
428 /* Convert FDIR request to Generic flow. */
430 struct rte_flow_attr attr;
431 struct rte_flow_action actions[2];
432 struct rte_flow_item items[4];
433 struct rte_flow_item_eth l2;
434 struct rte_flow_item_eth l2_mask;
436 struct rte_flow_item_ipv4 ipv4;
437 struct rte_flow_item_ipv6 ipv6;
440 struct rte_flow_item_udp udp;
441 struct rte_flow_item_tcp tcp;
443 struct rte_flow_action_queue queue;
446 /* Verbs specification header. */
447 struct ibv_spec_header {
448 enum ibv_flow_spec_type type;
453 * Check support for a given item.
456 * Item specification.
458 * Bit-masks covering supported fields to compare with spec, last and mask in
461 * Bit-Mask size in bytes.
464 * 0 on success, a negative errno value otherwise and rte_errno is set.
467 mlx5_flow_item_validate(const struct rte_flow_item *item,
468 const uint8_t *mask, unsigned int size)
470 if (!item->spec && (item->mask || item->last)) {
474 if (item->spec && !item->mask) {
476 const uint8_t *spec = item->spec;
478 for (i = 0; i < size; ++i)
479 if ((spec[i] | mask[i]) != mask[i]) {
484 if (item->last && !item->mask) {
486 const uint8_t *spec = item->last;
488 for (i = 0; i < size; ++i)
489 if ((spec[i] | mask[i]) != mask[i]) {
496 const uint8_t *spec = item->spec;
498 for (i = 0; i < size; ++i)
499 if ((spec[i] | mask[i]) != mask[i]) {
504 if (item->spec && item->last) {
507 const uint8_t *apply = mask;
513 for (i = 0; i < size; ++i) {
514 spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
515 last[i] = ((const uint8_t *)item->last)[i] & apply[i];
517 ret = memcmp(spec, last, size);
527 * Copy the RSS configuration from the user ones, of the rss_conf is null,
528 * uses the driver one.
531 * Internal parser structure.
533 * User RSS configuration to save.
536 * 0 on success, a negative errno value otherwise and rte_errno is set.
539 mlx5_flow_convert_rss_conf(struct mlx5_flow_parse *parser,
540 const struct rte_eth_rss_conf *rss_conf)
543 * This function is also called at the beginning of
544 * mlx5_flow_convert_actions() to initialize the parser with the
545 * device default RSS configuration.
548 if (rss_conf->rss_hf & MLX5_RSS_HF_MASK) {
552 if (rss_conf->rss_key_len != 40) {
556 if (rss_conf->rss_key_len && rss_conf->rss_key) {
557 parser->rss_conf.rss_key_len = rss_conf->rss_key_len;
558 memcpy(parser->rss_key, rss_conf->rss_key,
559 rss_conf->rss_key_len);
560 parser->rss_conf.rss_key = parser->rss_key;
562 parser->rss_conf.rss_hf = rss_conf->rss_hf;
568 * Extract attribute to the parser.
571 * Flow rule attributes.
573 * Perform verbose error reporting if not NULL.
576 * 0 on success, a negative errno value otherwise and rte_errno is set.
579 mlx5_flow_convert_attributes(const struct rte_flow_attr *attr,
580 struct rte_flow_error *error)
583 rte_flow_error_set(error, ENOTSUP,
584 RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
586 "groups are not supported");
589 if (attr->priority && attr->priority != MLX5_CTRL_FLOW_PRIORITY) {
590 rte_flow_error_set(error, ENOTSUP,
591 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
593 "priorities are not supported");
597 rte_flow_error_set(error, ENOTSUP,
598 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
600 "egress is not supported");
603 if (!attr->ingress) {
604 rte_flow_error_set(error, ENOTSUP,
605 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
607 "only ingress is supported");
614 * Extract actions request to the parser.
617 * Pointer to Ethernet device.
619 * Associated actions (list terminated by the END action).
621 * Perform verbose error reporting if not NULL.
622 * @param[in, out] parser
623 * Internal parser structure.
626 * 0 on success, a negative errno value otherwise and rte_errno is set.
629 mlx5_flow_convert_actions(struct rte_eth_dev *dev,
630 const struct rte_flow_action actions[],
631 struct rte_flow_error *error,
632 struct mlx5_flow_parse *parser)
634 struct priv *priv = dev->data->dev_private;
638 * Add default RSS configuration necessary for Verbs to create QP even
639 * if no RSS is necessary.
641 ret = mlx5_flow_convert_rss_conf(parser,
642 (const struct rte_eth_rss_conf *)
646 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
647 if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
649 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
651 } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
652 const struct rte_flow_action_queue *queue =
653 (const struct rte_flow_action_queue *)
658 if (!queue || (queue->index > (priv->rxqs_n - 1)))
659 goto exit_action_not_supported;
660 for (n = 0; n < parser->queues_n; ++n) {
661 if (parser->queues[n] == queue->index) {
666 if (parser->queues_n > 1 && !found) {
667 rte_flow_error_set(error, ENOTSUP,
668 RTE_FLOW_ERROR_TYPE_ACTION,
670 "queue action not in RSS queues");
674 parser->queues_n = 1;
675 parser->queues[0] = queue->index;
677 } else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
678 const struct rte_flow_action_rss *rss =
679 (const struct rte_flow_action_rss *)
683 if (!rss || !rss->num) {
684 rte_flow_error_set(error, EINVAL,
685 RTE_FLOW_ERROR_TYPE_ACTION,
690 if (parser->queues_n == 1) {
693 assert(parser->queues_n);
694 for (n = 0; n < rss->num; ++n) {
695 if (parser->queues[0] ==
702 rte_flow_error_set(error, ENOTSUP,
703 RTE_FLOW_ERROR_TYPE_ACTION,
705 "queue action not in RSS"
710 for (n = 0; n < rss->num; ++n) {
711 if (rss->queue[n] >= priv->rxqs_n) {
712 rte_flow_error_set(error, EINVAL,
713 RTE_FLOW_ERROR_TYPE_ACTION,
715 "queue id > number of"
720 for (n = 0; n < rss->num; ++n)
721 parser->queues[n] = rss->queue[n];
722 parser->queues_n = rss->num;
723 if (mlx5_flow_convert_rss_conf(parser, rss->rss_conf)) {
724 rte_flow_error_set(error, EINVAL,
725 RTE_FLOW_ERROR_TYPE_ACTION,
727 "wrong RSS configuration");
730 } else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
731 const struct rte_flow_action_mark *mark =
732 (const struct rte_flow_action_mark *)
736 rte_flow_error_set(error, EINVAL,
737 RTE_FLOW_ERROR_TYPE_ACTION,
739 "mark must be defined");
741 } else if (mark->id >= MLX5_FLOW_MARK_MAX) {
742 rte_flow_error_set(error, ENOTSUP,
743 RTE_FLOW_ERROR_TYPE_ACTION,
745 "mark must be between 0"
750 parser->mark_id = mark->id;
751 } else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
753 } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT &&
754 priv->config.flow_counter_en) {
757 goto exit_action_not_supported;
760 if (parser->drop && parser->mark)
762 if (!parser->queues_n && !parser->drop) {
763 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
764 NULL, "no valid action");
768 exit_action_not_supported:
769 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
770 actions, "action not supported");
778 * Pattern specification (list terminated by the END pattern item).
780 * Perform verbose error reporting if not NULL.
781 * @param[in, out] parser
782 * Internal parser structure.
785 * 0 on success, a negative errno value otherwise and rte_errno is set.
788 mlx5_flow_convert_items_validate(const struct rte_flow_item items[],
789 struct rte_flow_error *error,
790 struct mlx5_flow_parse *parser)
792 const struct mlx5_flow_items *cur_item = mlx5_flow_items;
796 /* Initialise the offsets to start after verbs attribute. */
797 for (i = 0; i != hash_rxq_init_n; ++i)
798 parser->queue[i].offset = sizeof(struct ibv_flow_attr);
799 for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
800 const struct mlx5_flow_items *token = NULL;
803 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
807 cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
809 if (cur_item->items[i] == items->type) {
810 token = &mlx5_flow_items[items->type];
815 goto exit_item_not_supported;
817 ret = mlx5_flow_item_validate(items,
818 (const uint8_t *)cur_item->mask,
821 goto exit_item_not_supported;
822 if (items->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
824 rte_flow_error_set(error, ENOTSUP,
825 RTE_FLOW_ERROR_TYPE_ITEM,
827 "cannot recognize multiple"
828 " VXLAN encapsulations");
831 parser->inner = IBV_FLOW_SPEC_INNER;
834 parser->queue[HASH_RXQ_ETH].offset += cur_item->dst_sz;
836 for (n = 0; n != hash_rxq_init_n; ++n)
837 parser->queue[n].offset += cur_item->dst_sz;
841 parser->queue[HASH_RXQ_ETH].offset +=
842 sizeof(struct ibv_flow_spec_action_drop);
845 for (i = 0; i != hash_rxq_init_n; ++i)
846 parser->queue[i].offset +=
847 sizeof(struct ibv_flow_spec_action_tag);
850 unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
852 for (i = 0; i != hash_rxq_init_n; ++i)
853 parser->queue[i].offset += size;
856 exit_item_not_supported:
857 return rte_flow_error_set(error, -ret, RTE_FLOW_ERROR_TYPE_ITEM,
858 items, "item not supported");
862 * Allocate memory space to store verbs flow attributes.
864 * @param[in] priority
867 * Amount of byte to allocate.
869 * Perform verbose error reporting if not NULL.
872 * A verbs flow attribute on success, NULL otherwise and rte_errno is set.
874 static struct ibv_flow_attr *
875 mlx5_flow_convert_allocate(unsigned int priority,
877 struct rte_flow_error *error)
879 struct ibv_flow_attr *ibv_attr;
881 ibv_attr = rte_calloc(__func__, 1, size, 0);
883 rte_flow_error_set(error, ENOMEM,
884 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
886 "cannot allocate verbs spec attributes");
889 ibv_attr->priority = priority;
894 * Finalise verbs flow attributes.
896 * @param[in, out] parser
897 * Internal parser structure.
900 mlx5_flow_convert_finalise(struct mlx5_flow_parse *parser)
902 const unsigned int ipv4 =
903 hash_rxq_init[parser->layer].ip_version == MLX5_IPV4;
904 const enum hash_rxq_type hmin = ipv4 ? HASH_RXQ_TCPV4 : HASH_RXQ_TCPV6;
905 const enum hash_rxq_type hmax = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
906 const enum hash_rxq_type ohmin = ipv4 ? HASH_RXQ_TCPV6 : HASH_RXQ_TCPV4;
907 const enum hash_rxq_type ohmax = ipv4 ? HASH_RXQ_IPV6 : HASH_RXQ_IPV4;
908 const enum hash_rxq_type ip = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
911 /* Remove any other flow not matching the pattern. */
912 if (parser->queues_n == 1) {
913 for (i = 0; i != hash_rxq_init_n; ++i) {
914 if (i == HASH_RXQ_ETH)
916 rte_free(parser->queue[i].ibv_attr);
917 parser->queue[i].ibv_attr = NULL;
921 if (parser->layer == HASH_RXQ_ETH) {
925 * This layer becomes useless as the pattern define under
928 rte_free(parser->queue[HASH_RXQ_ETH].ibv_attr);
929 parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
931 /* Remove opposite kind of layer e.g. IPv6 if the pattern is IPv4. */
932 for (i = ohmin; i != (ohmax + 1); ++i) {
933 if (!parser->queue[i].ibv_attr)
935 rte_free(parser->queue[i].ibv_attr);
936 parser->queue[i].ibv_attr = NULL;
938 /* Remove impossible flow according to the RSS configuration. */
939 if (hash_rxq_init[parser->layer].dpdk_rss_hf &
940 parser->rss_conf.rss_hf) {
941 /* Remove any other flow. */
942 for (i = hmin; i != (hmax + 1); ++i) {
943 if ((i == parser->layer) ||
944 (!parser->queue[i].ibv_attr))
946 rte_free(parser->queue[i].ibv_attr);
947 parser->queue[i].ibv_attr = NULL;
949 } else if (!parser->queue[ip].ibv_attr) {
950 /* no RSS possible with the current configuration. */
951 parser->queues_n = 1;
956 * Fill missing layers in verbs specifications, or compute the correct
957 * offset to allocate the memory space for the attributes and
960 for (i = 0; i != hash_rxq_init_n - 1; ++i) {
962 struct ibv_flow_spec_ipv4_ext ipv4;
963 struct ibv_flow_spec_ipv6 ipv6;
964 struct ibv_flow_spec_tcp_udp udp_tcp;
969 if (i == parser->layer)
971 if (parser->layer == HASH_RXQ_ETH) {
972 if (hash_rxq_init[i].ip_version == MLX5_IPV4) {
973 size = sizeof(struct ibv_flow_spec_ipv4_ext);
974 specs.ipv4 = (struct ibv_flow_spec_ipv4_ext){
975 .type = IBV_FLOW_SPEC_IPV4_EXT,
979 size = sizeof(struct ibv_flow_spec_ipv6);
980 specs.ipv6 = (struct ibv_flow_spec_ipv6){
981 .type = IBV_FLOW_SPEC_IPV6,
985 if (parser->queue[i].ibv_attr) {
986 dst = (void *)((uintptr_t)
987 parser->queue[i].ibv_attr +
988 parser->queue[i].offset);
989 memcpy(dst, &specs, size);
990 ++parser->queue[i].ibv_attr->num_of_specs;
992 parser->queue[i].offset += size;
994 if ((i == HASH_RXQ_UDPV4) || (i == HASH_RXQ_TCPV4) ||
995 (i == HASH_RXQ_UDPV6) || (i == HASH_RXQ_TCPV6)) {
996 size = sizeof(struct ibv_flow_spec_tcp_udp);
997 specs.udp_tcp = (struct ibv_flow_spec_tcp_udp) {
998 .type = ((i == HASH_RXQ_UDPV4 ||
999 i == HASH_RXQ_UDPV6) ?
1004 if (parser->queue[i].ibv_attr) {
1005 dst = (void *)((uintptr_t)
1006 parser->queue[i].ibv_attr +
1007 parser->queue[i].offset);
1008 memcpy(dst, &specs, size);
1009 ++parser->queue[i].ibv_attr->num_of_specs;
1011 parser->queue[i].offset += size;
1017 * Validate and convert a flow supported by the NIC.
1020 * Pointer to Ethernet device.
1022 * Flow rule attributes.
1023 * @param[in] pattern
1024 * Pattern specification (list terminated by the END pattern item).
1025 * @param[in] actions
1026 * Associated actions (list terminated by the END action).
1028 * Perform verbose error reporting if not NULL.
1029 * @param[in, out] parser
1030 * Internal parser structure.
1033 * 0 on success, a negative errno value otherwise and rte_errno is set.
1036 mlx5_flow_convert(struct rte_eth_dev *dev,
1037 const struct rte_flow_attr *attr,
1038 const struct rte_flow_item items[],
1039 const struct rte_flow_action actions[],
1040 struct rte_flow_error *error,
1041 struct mlx5_flow_parse *parser)
1043 const struct mlx5_flow_items *cur_item = mlx5_flow_items;
1047 /* First step. Validate the attributes, items and actions. */
1048 *parser = (struct mlx5_flow_parse){
1049 .create = parser->create,
1050 .layer = HASH_RXQ_ETH,
1051 .mark_id = MLX5_FLOW_MARK_DEFAULT,
1053 ret = mlx5_flow_convert_attributes(attr, error);
1056 ret = mlx5_flow_convert_actions(dev, actions, error, parser);
1059 ret = mlx5_flow_convert_items_validate(items, error, parser);
1062 mlx5_flow_convert_finalise(parser);
1065 * Allocate the memory space to store verbs specifications.
1068 unsigned int priority =
1070 hash_rxq_init[HASH_RXQ_ETH].flow_priority;
1071 unsigned int offset = parser->queue[HASH_RXQ_ETH].offset;
1073 parser->queue[HASH_RXQ_ETH].ibv_attr =
1074 mlx5_flow_convert_allocate(priority, offset, error);
1075 if (!parser->queue[HASH_RXQ_ETH].ibv_attr)
1077 parser->queue[HASH_RXQ_ETH].offset =
1078 sizeof(struct ibv_flow_attr);
1080 for (i = 0; i != hash_rxq_init_n; ++i) {
1081 unsigned int priority =
1083 hash_rxq_init[i].flow_priority;
1084 unsigned int offset;
1086 if (!(parser->rss_conf.rss_hf &
1087 hash_rxq_init[i].dpdk_rss_hf) &&
1088 (i != HASH_RXQ_ETH))
1090 offset = parser->queue[i].offset;
1091 parser->queue[i].ibv_attr =
1092 mlx5_flow_convert_allocate(priority,
1094 if (!parser->queue[i].ibv_attr)
1096 parser->queue[i].offset = sizeof(struct ibv_flow_attr);
1099 /* Third step. Conversion parse, fill the specifications. */
1101 for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
1102 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
1104 cur_item = &mlx5_flow_items[items->type];
1105 ret = cur_item->convert(items,
1106 (cur_item->default_mask ?
1107 cur_item->default_mask :
1111 rte_flow_error_set(error, rte_errno,
1112 RTE_FLOW_ERROR_TYPE_ITEM,
1113 items, "item not supported");
1118 mlx5_flow_create_flag_mark(parser, parser->mark_id);
1119 if (parser->count && parser->create) {
1120 mlx5_flow_create_count(dev, parser);
1122 goto exit_count_error;
1125 * Last step. Complete missing specification to reach the RSS
1128 if (!parser->drop) {
1129 mlx5_flow_convert_finalise(parser);
1131 parser->queue[HASH_RXQ_ETH].ibv_attr->priority =
1133 hash_rxq_init[parser->layer].flow_priority;
1136 /* Only verification is expected, all resources should be released. */
1137 if (!parser->create) {
1138 for (i = 0; i != hash_rxq_init_n; ++i) {
1139 if (parser->queue[i].ibv_attr) {
1140 rte_free(parser->queue[i].ibv_attr);
1141 parser->queue[i].ibv_attr = NULL;
1147 for (i = 0; i != hash_rxq_init_n; ++i) {
1148 if (parser->queue[i].ibv_attr) {
1149 rte_free(parser->queue[i].ibv_attr);
1150 parser->queue[i].ibv_attr = NULL;
1153 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1154 NULL, "cannot allocate verbs spec attributes");
1157 rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1158 NULL, "cannot create counter");
1163 * Copy the specification created into the flow.
1166 * Internal parser structure.
1168 * Create specification.
1170 * Size in bytes of the specification to copy.
1173 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
1179 for (i = 0; i != hash_rxq_init_n; ++i) {
1180 if (!parser->queue[i].ibv_attr)
1182 /* Specification must be the same l3 type or none. */
1183 if (parser->layer == HASH_RXQ_ETH ||
1184 (hash_rxq_init[parser->layer].ip_version ==
1185 hash_rxq_init[i].ip_version) ||
1186 (hash_rxq_init[i].ip_version == 0)) {
1187 dst = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1188 parser->queue[i].offset);
1189 memcpy(dst, src, size);
1190 ++parser->queue[i].ibv_attr->num_of_specs;
1191 parser->queue[i].offset += size;
1197 * Convert Ethernet item to Verbs specification.
1200 * Item specification.
1201 * @param default_mask[in]
1202 * Default bit-masks to use when item->mask is not provided.
1203 * @param data[in, out]
1207 * 0 on success, a negative errno value otherwise and rte_errno is set.
1210 mlx5_flow_create_eth(const struct rte_flow_item *item,
1211 const void *default_mask,
1214 const struct rte_flow_item_eth *spec = item->spec;
1215 const struct rte_flow_item_eth *mask = item->mask;
1216 struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1217 const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1218 struct ibv_flow_spec_eth eth = {
1219 .type = parser->inner | IBV_FLOW_SPEC_ETH,
1223 /* Don't update layer for the inner pattern. */
1225 parser->layer = HASH_RXQ_ETH;
1230 mask = default_mask;
1231 memcpy(ð.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
1232 memcpy(ð.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
1233 eth.val.ether_type = spec->type;
1234 memcpy(ð.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
1235 memcpy(ð.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
1236 eth.mask.ether_type = mask->type;
1237 /* Remove unwanted bits from values. */
1238 for (i = 0; i < ETHER_ADDR_LEN; ++i) {
1239 eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
1240 eth.val.src_mac[i] &= eth.mask.src_mac[i];
1242 eth.val.ether_type &= eth.mask.ether_type;
1244 mlx5_flow_create_copy(parser, ð, eth_size);
1249 * Convert VLAN item to Verbs specification.
1252 * Item specification.
1253 * @param default_mask[in]
1254 * Default bit-masks to use when item->mask is not provided.
1255 * @param data[in, out]
1259 * 0 on success, a negative errno value otherwise and rte_errno is set.
1262 mlx5_flow_create_vlan(const struct rte_flow_item *item,
1263 const void *default_mask,
1266 const struct rte_flow_item_vlan *spec = item->spec;
1267 const struct rte_flow_item_vlan *mask = item->mask;
1268 struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1269 struct ibv_flow_spec_eth *eth;
1270 const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1275 mask = default_mask;
1277 for (i = 0; i != hash_rxq_init_n; ++i) {
1278 if (!parser->queue[i].ibv_attr)
1281 eth = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1282 parser->queue[i].offset - eth_size);
1283 eth->val.vlan_tag = spec->tci;
1284 eth->mask.vlan_tag = mask->tci;
1285 eth->val.vlan_tag &= eth->mask.vlan_tag;
1292 * Convert IPv4 item to Verbs specification.
1295 * Item specification.
1296 * @param default_mask[in]
1297 * Default bit-masks to use when item->mask is not provided.
1298 * @param data[in, out]
1302 * 0 on success, a negative errno value otherwise and rte_errno is set.
1305 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
1306 const void *default_mask,
1309 const struct rte_flow_item_ipv4 *spec = item->spec;
1310 const struct rte_flow_item_ipv4 *mask = item->mask;
1311 struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1312 unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4_ext);
1313 struct ibv_flow_spec_ipv4_ext ipv4 = {
1314 .type = parser->inner | IBV_FLOW_SPEC_IPV4_EXT,
1318 /* Don't update layer for the inner pattern. */
1320 parser->layer = HASH_RXQ_IPV4;
1323 mask = default_mask;
1324 ipv4.val = (struct ibv_flow_ipv4_ext_filter){
1325 .src_ip = spec->hdr.src_addr,
1326 .dst_ip = spec->hdr.dst_addr,
1327 .proto = spec->hdr.next_proto_id,
1328 .tos = spec->hdr.type_of_service,
1330 ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
1331 .src_ip = mask->hdr.src_addr,
1332 .dst_ip = mask->hdr.dst_addr,
1333 .proto = mask->hdr.next_proto_id,
1334 .tos = mask->hdr.type_of_service,
1336 /* Remove unwanted bits from values. */
1337 ipv4.val.src_ip &= ipv4.mask.src_ip;
1338 ipv4.val.dst_ip &= ipv4.mask.dst_ip;
1339 ipv4.val.proto &= ipv4.mask.proto;
1340 ipv4.val.tos &= ipv4.mask.tos;
1342 mlx5_flow_create_copy(parser, &ipv4, ipv4_size);
1347 * Convert IPv6 item to Verbs specification.
1350 * Item specification.
1351 * @param default_mask[in]
1352 * Default bit-masks to use when item->mask is not provided.
1353 * @param data[in, out]
1357 * 0 on success, a negative errno value otherwise and rte_errno is set.
1360 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
1361 const void *default_mask,
1364 const struct rte_flow_item_ipv6 *spec = item->spec;
1365 const struct rte_flow_item_ipv6 *mask = item->mask;
1366 struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1367 unsigned int ipv6_size = sizeof(struct ibv_flow_spec_ipv6);
1368 struct ibv_flow_spec_ipv6 ipv6 = {
1369 .type = parser->inner | IBV_FLOW_SPEC_IPV6,
1373 /* Don't update layer for the inner pattern. */
1375 parser->layer = HASH_RXQ_IPV6;
1378 uint32_t vtc_flow_val;
1379 uint32_t vtc_flow_mask;
1382 mask = default_mask;
1383 memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
1384 RTE_DIM(ipv6.val.src_ip));
1385 memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
1386 RTE_DIM(ipv6.val.dst_ip));
1387 memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
1388 RTE_DIM(ipv6.mask.src_ip));
1389 memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
1390 RTE_DIM(ipv6.mask.dst_ip));
1391 vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
1392 vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
1393 ipv6.val.flow_label =
1394 rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >>
1396 ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >>
1398 ipv6.val.next_hdr = spec->hdr.proto;
1399 ipv6.val.hop_limit = spec->hdr.hop_limits;
1400 ipv6.mask.flow_label =
1401 rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >>
1403 ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >>
1405 ipv6.mask.next_hdr = mask->hdr.proto;
1406 ipv6.mask.hop_limit = mask->hdr.hop_limits;
1407 /* Remove unwanted bits from values. */
1408 for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
1409 ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
1410 ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
1412 ipv6.val.flow_label &= ipv6.mask.flow_label;
1413 ipv6.val.traffic_class &= ipv6.mask.traffic_class;
1414 ipv6.val.next_hdr &= ipv6.mask.next_hdr;
1415 ipv6.val.hop_limit &= ipv6.mask.hop_limit;
1417 mlx5_flow_create_copy(parser, &ipv6, ipv6_size);
1422 * Convert UDP item to Verbs specification.
1425 * Item specification.
1426 * @param default_mask[in]
1427 * Default bit-masks to use when item->mask is not provided.
1428 * @param data[in, out]
1432 * 0 on success, a negative errno value otherwise and rte_errno is set.
1435 mlx5_flow_create_udp(const struct rte_flow_item *item,
1436 const void *default_mask,
1439 const struct rte_flow_item_udp *spec = item->spec;
1440 const struct rte_flow_item_udp *mask = item->mask;
1441 struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1442 unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1443 struct ibv_flow_spec_tcp_udp udp = {
1444 .type = parser->inner | IBV_FLOW_SPEC_UDP,
1448 /* Don't update layer for the inner pattern. */
1449 if (!parser->inner) {
1450 if (parser->layer == HASH_RXQ_IPV4)
1451 parser->layer = HASH_RXQ_UDPV4;
1453 parser->layer = HASH_RXQ_UDPV6;
1457 mask = default_mask;
1458 udp.val.dst_port = spec->hdr.dst_port;
1459 udp.val.src_port = spec->hdr.src_port;
1460 udp.mask.dst_port = mask->hdr.dst_port;
1461 udp.mask.src_port = mask->hdr.src_port;
1462 /* Remove unwanted bits from values. */
1463 udp.val.src_port &= udp.mask.src_port;
1464 udp.val.dst_port &= udp.mask.dst_port;
1466 mlx5_flow_create_copy(parser, &udp, udp_size);
1471 * Convert TCP item to Verbs specification.
1474 * Item specification.
1475 * @param default_mask[in]
1476 * Default bit-masks to use when item->mask is not provided.
1477 * @param data[in, out]
1481 * 0 on success, a negative errno value otherwise and rte_errno is set.
1484 mlx5_flow_create_tcp(const struct rte_flow_item *item,
1485 const void *default_mask,
1488 const struct rte_flow_item_tcp *spec = item->spec;
1489 const struct rte_flow_item_tcp *mask = item->mask;
1490 struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1491 unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1492 struct ibv_flow_spec_tcp_udp tcp = {
1493 .type = parser->inner | IBV_FLOW_SPEC_TCP,
1497 /* Don't update layer for the inner pattern. */
1498 if (!parser->inner) {
1499 if (parser->layer == HASH_RXQ_IPV4)
1500 parser->layer = HASH_RXQ_TCPV4;
1502 parser->layer = HASH_RXQ_TCPV6;
1506 mask = default_mask;
1507 tcp.val.dst_port = spec->hdr.dst_port;
1508 tcp.val.src_port = spec->hdr.src_port;
1509 tcp.mask.dst_port = mask->hdr.dst_port;
1510 tcp.mask.src_port = mask->hdr.src_port;
1511 /* Remove unwanted bits from values. */
1512 tcp.val.src_port &= tcp.mask.src_port;
1513 tcp.val.dst_port &= tcp.mask.dst_port;
1515 mlx5_flow_create_copy(parser, &tcp, tcp_size);
1520 * Convert VXLAN item to Verbs specification.
1523 * Item specification.
1524 * @param default_mask[in]
1525 * Default bit-masks to use when item->mask is not provided.
1526 * @param data[in, out]
1530 * 0 on success, a negative errno value otherwise and rte_errno is set.
1533 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
1534 const void *default_mask,
1537 const struct rte_flow_item_vxlan *spec = item->spec;
1538 const struct rte_flow_item_vxlan *mask = item->mask;
1539 struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1540 unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1541 struct ibv_flow_spec_tunnel vxlan = {
1542 .type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1551 parser->inner = IBV_FLOW_SPEC_INNER;
1554 mask = default_mask;
1555 memcpy(&id.vni[1], spec->vni, 3);
1556 vxlan.val.tunnel_id = id.vlan_id;
1557 memcpy(&id.vni[1], mask->vni, 3);
1558 vxlan.mask.tunnel_id = id.vlan_id;
1559 /* Remove unwanted bits from values. */
1560 vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
1563 * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
1564 * layer is defined in the Verbs specification it is interpreted as
1565 * wildcard and all packets will match this rule, if it follows a full
1566 * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
1567 * before will also match this rule.
1568 * To avoid such situation, VNI 0 is currently refused.
1570 if (!vxlan.val.tunnel_id) {
1574 mlx5_flow_create_copy(parser, &vxlan, size);
1579 * Convert mark/flag action to Verbs specification.
1582 * Internal parser structure.
1587 * 0 on success, a negative errno value otherwise and rte_errno is set.
1590 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id)
1592 unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1593 struct ibv_flow_spec_action_tag tag = {
1594 .type = IBV_FLOW_SPEC_ACTION_TAG,
1596 .tag_id = mlx5_flow_mark_set(mark_id),
1599 assert(parser->mark);
1600 mlx5_flow_create_copy(parser, &tag, size);
1605 * Convert count action to Verbs specification.
1608 * Pointer to Ethernet device.
1610 * Pointer to MLX5 flow parser structure.
1613 * 0 on success, a negative errno value otherwise and rte_errno is set.
1616 mlx5_flow_create_count(struct rte_eth_dev *dev __rte_unused,
1617 struct mlx5_flow_parse *parser __rte_unused)
1619 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
1620 struct priv *priv = dev->data->dev_private;
1621 unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
1622 struct ibv_counter_set_init_attr init_attr = {0};
1623 struct ibv_flow_spec_counter_action counter = {
1624 .type = IBV_FLOW_SPEC_ACTION_COUNT,
1626 .counter_set_handle = 0,
1629 init_attr.counter_set_id = 0;
1630 parser->cs = mlx5_glue->create_counter_set(priv->ctx, &init_attr);
1635 counter.counter_set_handle = parser->cs->handle;
1636 mlx5_flow_create_copy(parser, &counter, size);
1642 * Complete flow rule creation with a drop queue.
1645 * Pointer to Ethernet device.
1647 * Internal parser structure.
1649 * Pointer to the rte_flow.
1651 * Perform verbose error reporting if not NULL.
1654 * 0 on success, a negative errno value otherwise and rte_errno is set.
1657 mlx5_flow_create_action_queue_drop(struct rte_eth_dev *dev,
1658 struct mlx5_flow_parse *parser,
1659 struct rte_flow *flow,
1660 struct rte_flow_error *error)
1662 struct priv *priv = dev->data->dev_private;
1663 struct ibv_flow_spec_action_drop *drop;
1664 unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
1669 drop = (void *)((uintptr_t)parser->queue[HASH_RXQ_ETH].ibv_attr +
1670 parser->queue[HASH_RXQ_ETH].offset);
1671 *drop = (struct ibv_flow_spec_action_drop){
1672 .type = IBV_FLOW_SPEC_ACTION_DROP,
1675 ++parser->queue[HASH_RXQ_ETH].ibv_attr->num_of_specs;
1676 parser->queue[HASH_RXQ_ETH].offset += size;
1677 flow->frxq[HASH_RXQ_ETH].ibv_attr =
1678 parser->queue[HASH_RXQ_ETH].ibv_attr;
1680 flow->cs = parser->cs;
1681 if (!priv->dev->data->dev_started)
1683 parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
1684 flow->frxq[HASH_RXQ_ETH].ibv_flow =
1685 mlx5_glue->create_flow(priv->flow_drop_queue->qp,
1686 flow->frxq[HASH_RXQ_ETH].ibv_attr);
1687 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
1688 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1689 NULL, "flow rule creation failure");
1695 if (flow->frxq[HASH_RXQ_ETH].ibv_flow) {
1696 claim_zero(mlx5_glue->destroy_flow
1697 (flow->frxq[HASH_RXQ_ETH].ibv_flow));
1698 flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
1700 if (flow->frxq[HASH_RXQ_ETH].ibv_attr) {
1701 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
1702 flow->frxq[HASH_RXQ_ETH].ibv_attr = NULL;
1705 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
1713 * Create hash Rx queues when RSS is enabled.
1716 * Pointer to Ethernet device.
1718 * Internal parser structure.
1720 * Pointer to the rte_flow.
1722 * Perform verbose error reporting if not NULL.
1725 * 0 on success, a negative errno value otherwise and rte_errno is set.
1728 mlx5_flow_create_action_queue_rss(struct rte_eth_dev *dev,
1729 struct mlx5_flow_parse *parser,
1730 struct rte_flow *flow,
1731 struct rte_flow_error *error)
1733 struct priv *priv = dev->data->dev_private;
1736 for (i = 0; i != hash_rxq_init_n; ++i) {
1737 uint64_t hash_fields;
1739 if (!parser->queue[i].ibv_attr)
1741 flow->frxq[i].ibv_attr = parser->queue[i].ibv_attr;
1742 parser->queue[i].ibv_attr = NULL;
1743 hash_fields = hash_rxq_init[i].hash_fields;
1744 if (!priv->dev->data->dev_started)
1746 flow->frxq[i].hrxq =
1748 parser->rss_conf.rss_key,
1749 parser->rss_conf.rss_key_len,
1753 if (flow->frxq[i].hrxq)
1755 flow->frxq[i].hrxq =
1757 parser->rss_conf.rss_key,
1758 parser->rss_conf.rss_key_len,
1762 if (!flow->frxq[i].hrxq) {
1763 return rte_flow_error_set(error, ENOMEM,
1764 RTE_FLOW_ERROR_TYPE_HANDLE,
1766 "cannot create hash rxq");
1773 * Complete flow rule creation.
1776 * Pointer to Ethernet device.
1778 * Internal parser structure.
1780 * Pointer to the rte_flow.
1782 * Perform verbose error reporting if not NULL.
1785 * 0 on success, a negative errno value otherwise and rte_errno is set.
1788 mlx5_flow_create_action_queue(struct rte_eth_dev *dev,
1789 struct mlx5_flow_parse *parser,
1790 struct rte_flow *flow,
1791 struct rte_flow_error *error)
1793 struct priv *priv = dev->data->dev_private;
1796 unsigned int flows_n = 0;
1800 assert(!parser->drop);
1801 ret = mlx5_flow_create_action_queue_rss(dev, parser, flow, error);
1805 flow->cs = parser->cs;
1806 if (!priv->dev->data->dev_started)
1808 for (i = 0; i != hash_rxq_init_n; ++i) {
1809 if (!flow->frxq[i].hrxq)
1811 flow->frxq[i].ibv_flow =
1812 mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
1813 flow->frxq[i].ibv_attr);
1814 if (!flow->frxq[i].ibv_flow) {
1815 rte_flow_error_set(error, ENOMEM,
1816 RTE_FLOW_ERROR_TYPE_HANDLE,
1817 NULL, "flow rule creation failure");
1821 DRV_LOG(DEBUG, "port %u %p type %d QP %p ibv_flow %p",
1824 (void *)flow->frxq[i].hrxq,
1825 (void *)flow->frxq[i].ibv_flow);
1828 rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_HANDLE,
1829 NULL, "internal error in flow creation");
1832 for (i = 0; i != parser->queues_n; ++i) {
1833 struct mlx5_rxq_data *q =
1834 (*priv->rxqs)[parser->queues[i]];
1836 q->mark |= parser->mark;
1840 ret = rte_errno; /* Save rte_errno before cleanup. */
1842 for (i = 0; i != hash_rxq_init_n; ++i) {
1843 if (flow->frxq[i].ibv_flow) {
1844 struct ibv_flow *ibv_flow = flow->frxq[i].ibv_flow;
1846 claim_zero(mlx5_glue->destroy_flow(ibv_flow));
1848 if (flow->frxq[i].hrxq)
1849 mlx5_hrxq_release(dev, flow->frxq[i].hrxq);
1850 if (flow->frxq[i].ibv_attr)
1851 rte_free(flow->frxq[i].ibv_attr);
1854 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
1858 rte_errno = ret; /* Restore rte_errno. */
1866 * Pointer to Ethernet device.
1868 * Pointer to a TAILQ flow list.
1870 * Flow rule attributes.
1871 * @param[in] pattern
1872 * Pattern specification (list terminated by the END pattern item).
1873 * @param[in] actions
1874 * Associated actions (list terminated by the END action).
1876 * Perform verbose error reporting if not NULL.
1879 * A flow on success, NULL otherwise and rte_errno is set.
1881 static struct rte_flow *
1882 mlx5_flow_list_create(struct rte_eth_dev *dev,
1883 struct mlx5_flows *list,
1884 const struct rte_flow_attr *attr,
1885 const struct rte_flow_item items[],
1886 const struct rte_flow_action actions[],
1887 struct rte_flow_error *error)
1889 struct mlx5_flow_parse parser = { .create = 1, };
1890 struct rte_flow *flow = NULL;
1894 ret = mlx5_flow_convert(dev, attr, items, actions, error, &parser);
1897 flow = rte_calloc(__func__, 1,
1898 sizeof(*flow) + parser.queues_n * sizeof(uint16_t),
1901 rte_flow_error_set(error, ENOMEM,
1902 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1904 "cannot allocate flow memory");
1907 /* Copy queues configuration. */
1908 flow->queues = (uint16_t (*)[])(flow + 1);
1909 memcpy(flow->queues, parser.queues, parser.queues_n * sizeof(uint16_t));
1910 flow->queues_n = parser.queues_n;
1911 flow->mark = parser.mark;
1912 /* Copy RSS configuration. */
1913 flow->rss_conf = parser.rss_conf;
1914 flow->rss_conf.rss_key = flow->rss_key;
1915 memcpy(flow->rss_key, parser.rss_key, parser.rss_conf.rss_key_len);
1916 /* finalise the flow. */
1918 ret = mlx5_flow_create_action_queue_drop(dev, &parser, flow,
1921 ret = mlx5_flow_create_action_queue(dev, &parser, flow, error);
1924 TAILQ_INSERT_TAIL(list, flow, next);
1925 DRV_LOG(DEBUG, "port %u flow created %p", dev->data->port_id,
1929 DRV_LOG(ERR, "port %u flow creation error: %s", dev->data->port_id,
1931 for (i = 0; i != hash_rxq_init_n; ++i) {
1932 if (parser.queue[i].ibv_attr)
1933 rte_free(parser.queue[i].ibv_attr);
1940 * Validate a flow supported by the NIC.
1942 * @see rte_flow_validate()
1946 mlx5_flow_validate(struct rte_eth_dev *dev,
1947 const struct rte_flow_attr *attr,
1948 const struct rte_flow_item items[],
1949 const struct rte_flow_action actions[],
1950 struct rte_flow_error *error)
1952 struct mlx5_flow_parse parser = { .create = 0, };
1954 return mlx5_flow_convert(dev, attr, items, actions, error, &parser);
1960 * @see rte_flow_create()
1964 mlx5_flow_create(struct rte_eth_dev *dev,
1965 const struct rte_flow_attr *attr,
1966 const struct rte_flow_item items[],
1967 const struct rte_flow_action actions[],
1968 struct rte_flow_error *error)
1970 struct priv *priv = dev->data->dev_private;
1972 return mlx5_flow_list_create(dev, &priv->flows, attr, items, actions,
1977 * Destroy a flow in a list.
1980 * Pointer to Ethernet device.
1982 * Pointer to a TAILQ flow list.
1987 mlx5_flow_list_destroy(struct rte_eth_dev *dev, struct mlx5_flows *list,
1988 struct rte_flow *flow)
1990 struct priv *priv = dev->data->dev_private;
1993 if (flow->drop || !flow->mark)
1995 for (i = 0; i != flow->queues_n; ++i) {
1996 struct rte_flow *tmp;
2000 * To remove the mark from the queue, the queue must not be
2001 * present in any other marked flow (RSS or not).
2003 TAILQ_FOREACH(tmp, list, next) {
2005 uint16_t *tqs = NULL;
2010 for (j = 0; j != hash_rxq_init_n; ++j) {
2011 if (!tmp->frxq[j].hrxq)
2013 tqs = tmp->frxq[j].hrxq->ind_table->queues;
2014 tq_n = tmp->frxq[j].hrxq->ind_table->queues_n;
2018 for (j = 0; (j != tq_n) && !mark; j++)
2019 if (tqs[j] == (*flow->queues)[i])
2022 (*priv->rxqs)[(*flow->queues)[i]]->mark = mark;
2026 if (flow->frxq[HASH_RXQ_ETH].ibv_flow)
2027 claim_zero(mlx5_glue->destroy_flow
2028 (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2029 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
2031 for (i = 0; i != hash_rxq_init_n; ++i) {
2032 struct mlx5_flow *frxq = &flow->frxq[i];
2035 claim_zero(mlx5_glue->destroy_flow
2038 mlx5_hrxq_release(dev, frxq->hrxq);
2040 rte_free(frxq->ibv_attr);
2044 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
2047 TAILQ_REMOVE(list, flow, next);
2048 DRV_LOG(DEBUG, "port %u flow destroyed %p", dev->data->port_id,
2054 * Destroy all flows.
2057 * Pointer to Ethernet device.
2059 * Pointer to a TAILQ flow list.
2062 mlx5_flow_list_flush(struct rte_eth_dev *dev, struct mlx5_flows *list)
2064 while (!TAILQ_EMPTY(list)) {
2065 struct rte_flow *flow;
2067 flow = TAILQ_FIRST(list);
2068 mlx5_flow_list_destroy(dev, list, flow);
2073 * Create drop queue.
2076 * Pointer to Ethernet device.
2079 * 0 on success, a negative errno value otherwise and rte_errno is set.
2082 mlx5_flow_create_drop_queue(struct rte_eth_dev *dev)
2084 struct priv *priv = dev->data->dev_private;
2085 struct mlx5_hrxq_drop *fdq = NULL;
2089 fdq = rte_calloc(__func__, 1, sizeof(*fdq), 0);
2092 "port %u cannot allocate memory for drop queue",
2093 dev->data->port_id);
2097 fdq->cq = mlx5_glue->create_cq(priv->ctx, 1, NULL, NULL, 0);
2099 DRV_LOG(WARNING, "port %u cannot allocate CQ for drop queue",
2100 dev->data->port_id);
2104 fdq->wq = mlx5_glue->create_wq
2106 &(struct ibv_wq_init_attr){
2107 .wq_type = IBV_WQT_RQ,
2114 DRV_LOG(WARNING, "port %u cannot allocate WQ for drop queue",
2115 dev->data->port_id);
2119 fdq->ind_table = mlx5_glue->create_rwq_ind_table
2121 &(struct ibv_rwq_ind_table_init_attr){
2122 .log_ind_tbl_size = 0,
2123 .ind_tbl = &fdq->wq,
2126 if (!fdq->ind_table) {
2128 "port %u cannot allocate indirection table for drop"
2130 dev->data->port_id);
2134 fdq->qp = mlx5_glue->create_qp_ex
2136 &(struct ibv_qp_init_attr_ex){
2137 .qp_type = IBV_QPT_RAW_PACKET,
2139 IBV_QP_INIT_ATTR_PD |
2140 IBV_QP_INIT_ATTR_IND_TABLE |
2141 IBV_QP_INIT_ATTR_RX_HASH,
2142 .rx_hash_conf = (struct ibv_rx_hash_conf){
2144 IBV_RX_HASH_FUNC_TOEPLITZ,
2145 .rx_hash_key_len = rss_hash_default_key_len,
2146 .rx_hash_key = rss_hash_default_key,
2147 .rx_hash_fields_mask = 0,
2149 .rwq_ind_tbl = fdq->ind_table,
2153 DRV_LOG(WARNING, "port %u cannot allocate QP for drop queue",
2154 dev->data->port_id);
2158 priv->flow_drop_queue = fdq;
2162 claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2164 claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2166 claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2168 claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2171 priv->flow_drop_queue = NULL;
2176 * Delete drop queue.
2179 * Pointer to Ethernet device.
2182 mlx5_flow_delete_drop_queue(struct rte_eth_dev *dev)
2184 struct priv *priv = dev->data->dev_private;
2185 struct mlx5_hrxq_drop *fdq = priv->flow_drop_queue;
2190 claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2192 claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2194 claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2196 claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2198 priv->flow_drop_queue = NULL;
2205 * Pointer to Ethernet device.
2207 * Pointer to a TAILQ flow list.
2210 mlx5_flow_stop(struct rte_eth_dev *dev, struct mlx5_flows *list)
2212 struct priv *priv = dev->data->dev_private;
2213 struct rte_flow *flow;
2215 TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) {
2217 struct mlx5_ind_table_ibv *ind_tbl = NULL;
2220 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow)
2222 claim_zero(mlx5_glue->destroy_flow
2223 (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2224 flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
2225 DRV_LOG(DEBUG, "port %u flow %p removed",
2226 dev->data->port_id, (void *)flow);
2230 /* Verify the flow has not already been cleaned. */
2231 for (i = 0; i != hash_rxq_init_n; ++i) {
2232 if (!flow->frxq[i].ibv_flow)
2235 * Indirection table may be necessary to remove the
2236 * flags in the Rx queues.
2237 * This helps to speed-up the process by avoiding
2240 ind_tbl = flow->frxq[i].hrxq->ind_table;
2243 if (i == hash_rxq_init_n)
2247 for (i = 0; i != ind_tbl->queues_n; ++i)
2248 (*priv->rxqs)[ind_tbl->queues[i]]->mark = 0;
2250 for (i = 0; i != hash_rxq_init_n; ++i) {
2251 if (!flow->frxq[i].ibv_flow)
2253 claim_zero(mlx5_glue->destroy_flow
2254 (flow->frxq[i].ibv_flow));
2255 flow->frxq[i].ibv_flow = NULL;
2256 mlx5_hrxq_release(dev, flow->frxq[i].hrxq);
2257 flow->frxq[i].hrxq = NULL;
2259 DRV_LOG(DEBUG, "port %u flow %p removed", dev->data->port_id,
2268 * Pointer to Ethernet device.
2270 * Pointer to a TAILQ flow list.
2273 * 0 on success, a negative errno value otherwise and rte_errno is set.
2276 mlx5_flow_start(struct rte_eth_dev *dev, struct mlx5_flows *list)
2278 struct priv *priv = dev->data->dev_private;
2279 struct rte_flow *flow;
2281 TAILQ_FOREACH(flow, list, next) {
2285 flow->frxq[HASH_RXQ_ETH].ibv_flow =
2286 mlx5_glue->create_flow
2287 (priv->flow_drop_queue->qp,
2288 flow->frxq[HASH_RXQ_ETH].ibv_attr);
2289 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
2291 "port %u flow %p cannot be applied",
2292 dev->data->port_id, (void *)flow);
2296 DRV_LOG(DEBUG, "port %u flow %p applied",
2297 dev->data->port_id, (void *)flow);
2301 for (i = 0; i != hash_rxq_init_n; ++i) {
2302 if (!flow->frxq[i].ibv_attr)
2304 flow->frxq[i].hrxq =
2305 mlx5_hrxq_get(dev, flow->rss_conf.rss_key,
2306 flow->rss_conf.rss_key_len,
2307 hash_rxq_init[i].hash_fields,
2310 if (flow->frxq[i].hrxq)
2312 flow->frxq[i].hrxq =
2313 mlx5_hrxq_new(dev, flow->rss_conf.rss_key,
2314 flow->rss_conf.rss_key_len,
2315 hash_rxq_init[i].hash_fields,
2318 if (!flow->frxq[i].hrxq) {
2320 "port %u flow %p cannot be applied",
2321 dev->data->port_id, (void *)flow);
2326 flow->frxq[i].ibv_flow =
2327 mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
2328 flow->frxq[i].ibv_attr);
2329 if (!flow->frxq[i].ibv_flow) {
2331 "port %u flow %p cannot be applied",
2332 dev->data->port_id, (void *)flow);
2336 DRV_LOG(DEBUG, "port %u flow %p applied",
2337 dev->data->port_id, (void *)flow);
2341 for (i = 0; i != flow->queues_n; ++i)
2342 (*priv->rxqs)[(*flow->queues)[i]]->mark = 1;
2348 * Verify the flow list is empty
2351 * Pointer to Ethernet device.
2353 * @return the number of flows not released.
2356 mlx5_flow_verify(struct rte_eth_dev *dev)
2358 struct priv *priv = dev->data->dev_private;
2359 struct rte_flow *flow;
2362 TAILQ_FOREACH(flow, &priv->flows, next) {
2363 DRV_LOG(DEBUG, "port %u flow %p still referenced",
2364 dev->data->port_id, (void *)flow);
2371 * Enable a control flow configured from the control plane.
2374 * Pointer to Ethernet device.
2376 * An Ethernet flow spec to apply.
2378 * An Ethernet flow mask to apply.
2380 * A VLAN flow spec to apply.
2382 * A VLAN flow mask to apply.
2385 * 0 on success, a negative errno value otherwise and rte_errno is set.
2388 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
2389 struct rte_flow_item_eth *eth_spec,
2390 struct rte_flow_item_eth *eth_mask,
2391 struct rte_flow_item_vlan *vlan_spec,
2392 struct rte_flow_item_vlan *vlan_mask)
2394 struct priv *priv = dev->data->dev_private;
2395 const struct rte_flow_attr attr = {
2397 .priority = MLX5_CTRL_FLOW_PRIORITY,
2399 struct rte_flow_item items[] = {
2401 .type = RTE_FLOW_ITEM_TYPE_ETH,
2407 .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
2408 RTE_FLOW_ITEM_TYPE_END,
2414 .type = RTE_FLOW_ITEM_TYPE_END,
2417 struct rte_flow_action actions[] = {
2419 .type = RTE_FLOW_ACTION_TYPE_RSS,
2422 .type = RTE_FLOW_ACTION_TYPE_END,
2425 struct rte_flow *flow;
2426 struct rte_flow_error error;
2429 struct rte_flow_action_rss rss;
2431 const struct rte_eth_rss_conf *rss_conf;
2433 uint16_t queue[RTE_MAX_QUEUES_PER_PORT];
2437 if (!priv->reta_idx_n) {
2441 for (i = 0; i != priv->reta_idx_n; ++i)
2442 action_rss.local.queue[i] = (*priv->reta_idx)[i];
2443 action_rss.local.rss_conf = &priv->rss_conf;
2444 action_rss.local.num = priv->reta_idx_n;
2445 actions[0].conf = (const void *)&action_rss.rss;
2446 flow = mlx5_flow_list_create(dev, &priv->ctrl_flows, &attr, items,
2454 * Enable a flow control configured from the control plane.
2457 * Pointer to Ethernet device.
2459 * An Ethernet flow spec to apply.
2461 * An Ethernet flow mask to apply.
2464 * 0 on success, a negative errno value otherwise and rte_errno is set.
2467 mlx5_ctrl_flow(struct rte_eth_dev *dev,
2468 struct rte_flow_item_eth *eth_spec,
2469 struct rte_flow_item_eth *eth_mask)
2471 return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
2477 * @see rte_flow_destroy()
2481 mlx5_flow_destroy(struct rte_eth_dev *dev,
2482 struct rte_flow *flow,
2483 struct rte_flow_error *error __rte_unused)
2485 struct priv *priv = dev->data->dev_private;
2487 mlx5_flow_list_destroy(dev, &priv->flows, flow);
2492 * Destroy all flows.
2494 * @see rte_flow_flush()
2498 mlx5_flow_flush(struct rte_eth_dev *dev,
2499 struct rte_flow_error *error __rte_unused)
2501 struct priv *priv = dev->data->dev_private;
2503 mlx5_flow_list_flush(dev, &priv->flows);
2507 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
2509 * Query flow counter.
2513 * @param counter_value
2514 * returned data from the counter.
2517 * 0 on success, a negative errno value otherwise and rte_errno is set.
2520 mlx5_flow_query_count(struct ibv_counter_set *cs,
2521 struct mlx5_flow_counter_stats *counter_stats,
2522 struct rte_flow_query_count *query_count,
2523 struct rte_flow_error *error)
2525 uint64_t counters[2];
2526 struct ibv_query_counter_set_attr query_cs_attr = {
2528 .query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
2530 struct ibv_counter_set_data query_out = {
2532 .outlen = 2 * sizeof(uint64_t),
2534 int err = mlx5_glue->query_counter_set(&query_cs_attr, &query_out);
2537 return rte_flow_error_set(error, err,
2538 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2540 "cannot read counter");
2541 query_count->hits_set = 1;
2542 query_count->bytes_set = 1;
2543 query_count->hits = counters[0] - counter_stats->hits;
2544 query_count->bytes = counters[1] - counter_stats->bytes;
2545 if (query_count->reset) {
2546 counter_stats->hits = counters[0];
2547 counter_stats->bytes = counters[1];
2555 * @see rte_flow_query()
2559 mlx5_flow_query(struct rte_eth_dev *dev __rte_unused,
2560 struct rte_flow *flow,
2561 enum rte_flow_action_type action __rte_unused,
2563 struct rte_flow_error *error)
2568 ret = mlx5_flow_query_count(flow->cs,
2569 &flow->counter_stats,
2570 (struct rte_flow_query_count *)data,
2575 return rte_flow_error_set(error, EINVAL,
2576 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2578 "no counter found for flow");
2587 * @see rte_flow_isolate()
2591 mlx5_flow_isolate(struct rte_eth_dev *dev,
2593 struct rte_flow_error *error)
2595 struct priv *priv = dev->data->dev_private;
2597 if (dev->data->dev_started) {
2598 rte_flow_error_set(error, EBUSY,
2599 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2601 "port must be stopped first");
2604 priv->isolated = !!enable;
2606 priv->dev->dev_ops = &mlx5_dev_ops_isolate;
2608 priv->dev->dev_ops = &mlx5_dev_ops;
2613 * Convert a flow director filter to a generic flow.
2616 * Pointer to Ethernet device.
2617 * @param fdir_filter
2618 * Flow director filter to add.
2620 * Generic flow parameters structure.
2623 * 0 on success, a negative errno value otherwise and rte_errno is set.
2626 mlx5_fdir_filter_convert(struct rte_eth_dev *dev,
2627 const struct rte_eth_fdir_filter *fdir_filter,
2628 struct mlx5_fdir *attributes)
2630 struct priv *priv = dev->data->dev_private;
2631 const struct rte_eth_fdir_input *input = &fdir_filter->input;
2633 /* Validate queue number. */
2634 if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
2635 DRV_LOG(ERR, "port %u invalid queue number %d",
2636 dev->data->port_id, fdir_filter->action.rx_queue);
2640 attributes->attr.ingress = 1;
2641 attributes->items[0] = (struct rte_flow_item) {
2642 .type = RTE_FLOW_ITEM_TYPE_ETH,
2643 .spec = &attributes->l2,
2644 .mask = &attributes->l2_mask,
2646 switch (fdir_filter->action.behavior) {
2647 case RTE_ETH_FDIR_ACCEPT:
2648 attributes->actions[0] = (struct rte_flow_action){
2649 .type = RTE_FLOW_ACTION_TYPE_QUEUE,
2650 .conf = &attributes->queue,
2653 case RTE_ETH_FDIR_REJECT:
2654 attributes->actions[0] = (struct rte_flow_action){
2655 .type = RTE_FLOW_ACTION_TYPE_DROP,
2659 DRV_LOG(ERR, "port %u invalid behavior %d",
2661 fdir_filter->action.behavior);
2662 rte_errno = ENOTSUP;
2665 attributes->queue.index = fdir_filter->action.rx_queue;
2666 switch (fdir_filter->input.flow_type) {
2667 case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
2668 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2669 .src_addr = input->flow.udp4_flow.ip.src_ip,
2670 .dst_addr = input->flow.udp4_flow.ip.dst_ip,
2671 .time_to_live = input->flow.udp4_flow.ip.ttl,
2672 .type_of_service = input->flow.udp4_flow.ip.tos,
2673 .next_proto_id = input->flow.udp4_flow.ip.proto,
2675 attributes->l4.udp.hdr = (struct udp_hdr){
2676 .src_port = input->flow.udp4_flow.src_port,
2677 .dst_port = input->flow.udp4_flow.dst_port,
2679 attributes->items[1] = (struct rte_flow_item){
2680 .type = RTE_FLOW_ITEM_TYPE_IPV4,
2681 .spec = &attributes->l3,
2682 .mask = &attributes->l3,
2684 attributes->items[2] = (struct rte_flow_item){
2685 .type = RTE_FLOW_ITEM_TYPE_UDP,
2686 .spec = &attributes->l4,
2687 .mask = &attributes->l4,
2690 case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
2691 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2692 .src_addr = input->flow.tcp4_flow.ip.src_ip,
2693 .dst_addr = input->flow.tcp4_flow.ip.dst_ip,
2694 .time_to_live = input->flow.tcp4_flow.ip.ttl,
2695 .type_of_service = input->flow.tcp4_flow.ip.tos,
2696 .next_proto_id = input->flow.tcp4_flow.ip.proto,
2698 attributes->l4.tcp.hdr = (struct tcp_hdr){
2699 .src_port = input->flow.tcp4_flow.src_port,
2700 .dst_port = input->flow.tcp4_flow.dst_port,
2702 attributes->items[1] = (struct rte_flow_item){
2703 .type = RTE_FLOW_ITEM_TYPE_IPV4,
2704 .spec = &attributes->l3,
2705 .mask = &attributes->l3,
2707 attributes->items[2] = (struct rte_flow_item){
2708 .type = RTE_FLOW_ITEM_TYPE_TCP,
2709 .spec = &attributes->l4,
2710 .mask = &attributes->l4,
2713 case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
2714 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2715 .src_addr = input->flow.ip4_flow.src_ip,
2716 .dst_addr = input->flow.ip4_flow.dst_ip,
2717 .time_to_live = input->flow.ip4_flow.ttl,
2718 .type_of_service = input->flow.ip4_flow.tos,
2719 .next_proto_id = input->flow.ip4_flow.proto,
2721 attributes->items[1] = (struct rte_flow_item){
2722 .type = RTE_FLOW_ITEM_TYPE_IPV4,
2723 .spec = &attributes->l3,
2724 .mask = &attributes->l3,
2727 case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
2728 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2729 .hop_limits = input->flow.udp6_flow.ip.hop_limits,
2730 .proto = input->flow.udp6_flow.ip.proto,
2732 memcpy(attributes->l3.ipv6.hdr.src_addr,
2733 input->flow.udp6_flow.ip.src_ip,
2734 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2735 memcpy(attributes->l3.ipv6.hdr.dst_addr,
2736 input->flow.udp6_flow.ip.dst_ip,
2737 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2738 attributes->l4.udp.hdr = (struct udp_hdr){
2739 .src_port = input->flow.udp6_flow.src_port,
2740 .dst_port = input->flow.udp6_flow.dst_port,
2742 attributes->items[1] = (struct rte_flow_item){
2743 .type = RTE_FLOW_ITEM_TYPE_IPV6,
2744 .spec = &attributes->l3,
2745 .mask = &attributes->l3,
2747 attributes->items[2] = (struct rte_flow_item){
2748 .type = RTE_FLOW_ITEM_TYPE_UDP,
2749 .spec = &attributes->l4,
2750 .mask = &attributes->l4,
2753 case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
2754 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2755 .hop_limits = input->flow.tcp6_flow.ip.hop_limits,
2756 .proto = input->flow.tcp6_flow.ip.proto,
2758 memcpy(attributes->l3.ipv6.hdr.src_addr,
2759 input->flow.tcp6_flow.ip.src_ip,
2760 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2761 memcpy(attributes->l3.ipv6.hdr.dst_addr,
2762 input->flow.tcp6_flow.ip.dst_ip,
2763 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2764 attributes->l4.tcp.hdr = (struct tcp_hdr){
2765 .src_port = input->flow.tcp6_flow.src_port,
2766 .dst_port = input->flow.tcp6_flow.dst_port,
2768 attributes->items[1] = (struct rte_flow_item){
2769 .type = RTE_FLOW_ITEM_TYPE_IPV6,
2770 .spec = &attributes->l3,
2771 .mask = &attributes->l3,
2773 attributes->items[2] = (struct rte_flow_item){
2774 .type = RTE_FLOW_ITEM_TYPE_TCP,
2775 .spec = &attributes->l4,
2776 .mask = &attributes->l4,
2779 case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
2780 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2781 .hop_limits = input->flow.ipv6_flow.hop_limits,
2782 .proto = input->flow.ipv6_flow.proto,
2784 memcpy(attributes->l3.ipv6.hdr.src_addr,
2785 input->flow.ipv6_flow.src_ip,
2786 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2787 memcpy(attributes->l3.ipv6.hdr.dst_addr,
2788 input->flow.ipv6_flow.dst_ip,
2789 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2790 attributes->items[1] = (struct rte_flow_item){
2791 .type = RTE_FLOW_ITEM_TYPE_IPV6,
2792 .spec = &attributes->l3,
2793 .mask = &attributes->l3,
2797 DRV_LOG(ERR, "port %u invalid flow type%d",
2798 dev->data->port_id, fdir_filter->input.flow_type);
2799 rte_errno = ENOTSUP;
2806 * Add new flow director filter and store it in list.
2809 * Pointer to Ethernet device.
2810 * @param fdir_filter
2811 * Flow director filter to add.
2814 * 0 on success, a negative errno value otherwise and rte_errno is set.
2817 mlx5_fdir_filter_add(struct rte_eth_dev *dev,
2818 const struct rte_eth_fdir_filter *fdir_filter)
2820 struct priv *priv = dev->data->dev_private;
2821 struct mlx5_fdir attributes = {
2824 .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2825 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2829 struct mlx5_flow_parse parser = {
2830 .layer = HASH_RXQ_ETH,
2832 struct rte_flow_error error;
2833 struct rte_flow *flow;
2836 ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
2839 ret = mlx5_flow_convert(dev, &attributes.attr, attributes.items,
2840 attributes.actions, &error, &parser);
2843 flow = mlx5_flow_list_create(dev, &priv->flows, &attributes.attr,
2844 attributes.items, attributes.actions,
2847 DRV_LOG(DEBUG, "port %u FDIR created %p", dev->data->port_id,
2855 * Delete specific filter.
2858 * Pointer to Ethernet device.
2859 * @param fdir_filter
2860 * Filter to be deleted.
2863 * 0 on success, a negative errno value otherwise and rte_errno is set.
2866 mlx5_fdir_filter_delete(struct rte_eth_dev *dev,
2867 const struct rte_eth_fdir_filter *fdir_filter)
2869 struct priv *priv = dev->data->dev_private;
2870 struct mlx5_fdir attributes = {
2873 struct mlx5_flow_parse parser = {
2875 .layer = HASH_RXQ_ETH,
2877 struct rte_flow_error error;
2878 struct rte_flow *flow;
2882 ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
2885 ret = mlx5_flow_convert(dev, &attributes.attr, attributes.items,
2886 attributes.actions, &error, &parser);
2890 * Special case for drop action which is only set in the
2891 * specifications when the flow is created. In this situation the
2892 * drop specification is missing.
2895 struct ibv_flow_spec_action_drop *drop;
2897 drop = (void *)((uintptr_t)parser.queue[HASH_RXQ_ETH].ibv_attr +
2898 parser.queue[HASH_RXQ_ETH].offset);
2899 *drop = (struct ibv_flow_spec_action_drop){
2900 .type = IBV_FLOW_SPEC_ACTION_DROP,
2901 .size = sizeof(struct ibv_flow_spec_action_drop),
2903 parser.queue[HASH_RXQ_ETH].ibv_attr->num_of_specs++;
2905 TAILQ_FOREACH(flow, &priv->flows, next) {
2906 struct ibv_flow_attr *attr;
2907 struct ibv_spec_header *attr_h;
2909 struct ibv_flow_attr *flow_attr;
2910 struct ibv_spec_header *flow_h;
2912 unsigned int specs_n;
2914 attr = parser.queue[HASH_RXQ_ETH].ibv_attr;
2915 flow_attr = flow->frxq[HASH_RXQ_ETH].ibv_attr;
2916 /* Compare first the attributes. */
2917 if (memcmp(attr, flow_attr, sizeof(struct ibv_flow_attr)))
2919 if (attr->num_of_specs == 0)
2921 spec = (void *)((uintptr_t)attr +
2922 sizeof(struct ibv_flow_attr));
2923 flow_spec = (void *)((uintptr_t)flow_attr +
2924 sizeof(struct ibv_flow_attr));
2925 specs_n = RTE_MIN(attr->num_of_specs, flow_attr->num_of_specs);
2926 for (i = 0; i != specs_n; ++i) {
2929 if (memcmp(spec, flow_spec,
2930 RTE_MIN(attr_h->size, flow_h->size)))
2932 spec = (void *)((uintptr_t)spec + attr_h->size);
2933 flow_spec = (void *)((uintptr_t)flow_spec +
2936 /* At this point, the flow match. */
2939 /* The flow does not match. */
2942 ret = rte_errno; /* Save rte_errno before cleanup. */
2944 mlx5_flow_list_destroy(dev, &priv->flows, flow);
2946 for (i = 0; i != hash_rxq_init_n; ++i) {
2947 if (parser.queue[i].ibv_attr)
2948 rte_free(parser.queue[i].ibv_attr);
2950 rte_errno = ret; /* Restore rte_errno. */
2955 * Update queue for specific filter.
2958 * Pointer to Ethernet device.
2959 * @param fdir_filter
2960 * Filter to be updated.
2963 * 0 on success, a negative errno value otherwise and rte_errno is set.
2966 mlx5_fdir_filter_update(struct rte_eth_dev *dev,
2967 const struct rte_eth_fdir_filter *fdir_filter)
2971 ret = mlx5_fdir_filter_delete(dev, fdir_filter);
2974 return mlx5_fdir_filter_add(dev, fdir_filter);
2978 * Flush all filters.
2981 * Pointer to Ethernet device.
2984 mlx5_fdir_filter_flush(struct rte_eth_dev *dev)
2986 struct priv *priv = dev->data->dev_private;
2988 mlx5_flow_list_flush(dev, &priv->flows);
2992 * Get flow director information.
2995 * Pointer to Ethernet device.
2996 * @param[out] fdir_info
2997 * Resulting flow director information.
3000 mlx5_fdir_info_get(struct rte_eth_dev *dev, struct rte_eth_fdir_info *fdir_info)
3002 struct priv *priv = dev->data->dev_private;
3003 struct rte_eth_fdir_masks *mask =
3004 &priv->dev->data->dev_conf.fdir_conf.mask;
3006 fdir_info->mode = priv->dev->data->dev_conf.fdir_conf.mode;
3007 fdir_info->guarant_spc = 0;
3008 rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
3009 fdir_info->max_flexpayload = 0;
3010 fdir_info->flow_types_mask[0] = 0;
3011 fdir_info->flex_payload_unit = 0;
3012 fdir_info->max_flex_payload_segment_num = 0;
3013 fdir_info->flex_payload_limit = 0;
3014 memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
3018 * Deal with flow director operations.
3021 * Pointer to Ethernet device.
3023 * Operation to perform.
3025 * Pointer to operation-specific structure.
3028 * 0 on success, a negative errno value otherwise and rte_errno is set.
3031 mlx5_fdir_ctrl_func(struct rte_eth_dev *dev, enum rte_filter_op filter_op,
3034 struct priv *priv = dev->data->dev_private;
3035 enum rte_fdir_mode fdir_mode =
3036 priv->dev->data->dev_conf.fdir_conf.mode;
3038 if (filter_op == RTE_ETH_FILTER_NOP)
3040 if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
3041 fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
3042 DRV_LOG(ERR, "port %u flow director mode %d not supported",
3043 dev->data->port_id, fdir_mode);
3047 switch (filter_op) {
3048 case RTE_ETH_FILTER_ADD:
3049 return mlx5_fdir_filter_add(dev, arg);
3050 case RTE_ETH_FILTER_UPDATE:
3051 return mlx5_fdir_filter_update(dev, arg);
3052 case RTE_ETH_FILTER_DELETE:
3053 return mlx5_fdir_filter_delete(dev, arg);
3054 case RTE_ETH_FILTER_FLUSH:
3055 mlx5_fdir_filter_flush(dev);
3057 case RTE_ETH_FILTER_INFO:
3058 mlx5_fdir_info_get(dev, arg);
3061 DRV_LOG(DEBUG, "port %u unknown operation %u",
3062 dev->data->port_id, filter_op);
3070 * Manage filter operations.
3073 * Pointer to Ethernet device structure.
3074 * @param filter_type
3077 * Operation to perform.
3079 * Pointer to operation-specific structure.
3082 * 0 on success, a negative errno value otherwise and rte_errno is set.
3085 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
3086 enum rte_filter_type filter_type,
3087 enum rte_filter_op filter_op,
3090 switch (filter_type) {
3091 case RTE_ETH_FILTER_GENERIC:
3092 if (filter_op != RTE_ETH_FILTER_GET) {
3096 *(const void **)arg = &mlx5_flow_ops;
3098 case RTE_ETH_FILTER_FDIR:
3099 return mlx5_fdir_ctrl_func(dev, filter_op, arg);
3101 DRV_LOG(ERR, "port %u filter type (%d) not supported",
3102 dev->data->port_id, filter_type);
3103 rte_errno = ENOTSUP;