1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2016 6WIND S.A.
3 * Copyright 2016 Mellanox.
10 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
12 #pragma GCC diagnostic ignored "-Wpedantic"
14 #include <infiniband/verbs.h>
16 #pragma GCC diagnostic error "-Wpedantic"
19 #include <rte_ethdev_driver.h>
21 #include <rte_flow_driver.h>
22 #include <rte_malloc.h>
26 #include "mlx5_defs.h"
28 #include "mlx5_glue.h"
30 /* Define minimal priority for control plane flows. */
31 #define MLX5_CTRL_FLOW_PRIORITY 4
33 /* Internet Protocol versions. */
37 #ifndef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
38 struct ibv_flow_spec_counter_action {
43 /* Dev ops structure defined in mlx5.c */
44 extern const struct eth_dev_ops mlx5_dev_ops;
45 extern const struct eth_dev_ops mlx5_dev_ops_isolate;
48 mlx5_flow_create_eth(const struct rte_flow_item *item,
49 const void *default_mask,
53 mlx5_flow_create_vlan(const struct rte_flow_item *item,
54 const void *default_mask,
58 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
59 const void *default_mask,
63 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
64 const void *default_mask,
68 mlx5_flow_create_udp(const struct rte_flow_item *item,
69 const void *default_mask,
73 mlx5_flow_create_tcp(const struct rte_flow_item *item,
74 const void *default_mask,
78 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
79 const void *default_mask,
82 struct mlx5_flow_parse;
85 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
89 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id);
92 mlx5_flow_create_count(struct rte_eth_dev *dev, struct mlx5_flow_parse *parser);
94 /* Hash RX queue types. */
105 /* Initialization data for hash RX queue. */
106 struct hash_rxq_init {
107 uint64_t hash_fields; /* Fields that participate in the hash. */
108 uint64_t dpdk_rss_hf; /* Matching DPDK RSS hash fields. */
109 unsigned int flow_priority; /* Flow priority to use. */
110 unsigned int ip_version; /* Internet protocol. */
113 /* Initialization data for hash RX queues. */
114 const struct hash_rxq_init hash_rxq_init[] = {
116 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
117 IBV_RX_HASH_DST_IPV4 |
118 IBV_RX_HASH_SRC_PORT_TCP |
119 IBV_RX_HASH_DST_PORT_TCP),
120 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,
122 .ip_version = MLX5_IPV4,
125 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
126 IBV_RX_HASH_DST_IPV4 |
127 IBV_RX_HASH_SRC_PORT_UDP |
128 IBV_RX_HASH_DST_PORT_UDP),
129 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP,
131 .ip_version = MLX5_IPV4,
134 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
135 IBV_RX_HASH_DST_IPV4),
136 .dpdk_rss_hf = (ETH_RSS_IPV4 |
139 .ip_version = MLX5_IPV4,
142 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
143 IBV_RX_HASH_DST_IPV6 |
144 IBV_RX_HASH_SRC_PORT_TCP |
145 IBV_RX_HASH_DST_PORT_TCP),
146 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP,
148 .ip_version = MLX5_IPV6,
151 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
152 IBV_RX_HASH_DST_IPV6 |
153 IBV_RX_HASH_SRC_PORT_UDP |
154 IBV_RX_HASH_DST_PORT_UDP),
155 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP,
157 .ip_version = MLX5_IPV6,
160 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
161 IBV_RX_HASH_DST_IPV6),
162 .dpdk_rss_hf = (ETH_RSS_IPV6 |
165 .ip_version = MLX5_IPV6,
174 /* Number of entries in hash_rxq_init[]. */
175 const unsigned int hash_rxq_init_n = RTE_DIM(hash_rxq_init);
177 /** Structure for holding counter stats. */
178 struct mlx5_flow_counter_stats {
179 uint64_t hits; /**< Number of packets matched by the rule. */
180 uint64_t bytes; /**< Number of bytes matched by the rule. */
183 /** Structure for Drop queue. */
184 struct mlx5_hrxq_drop {
185 struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */
186 struct ibv_qp *qp; /**< Verbs queue pair. */
187 struct ibv_wq *wq; /**< Verbs work queue. */
188 struct ibv_cq *cq; /**< Verbs completion queue. */
191 /* Flows structures. */
193 uint64_t hash_fields; /**< Fields that participate in the hash. */
194 struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
195 struct ibv_flow *ibv_flow; /**< Verbs flow. */
196 struct mlx5_hrxq *hrxq; /**< Hash Rx queues. */
199 /* Drop flows structures. */
200 struct mlx5_flow_drop {
201 struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
202 struct ibv_flow *ibv_flow; /**< Verbs flow. */
206 TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
207 uint32_t mark:1; /**< Set if the flow is marked. */
208 uint32_t drop:1; /**< Drop queue. */
209 uint16_t queues_n; /**< Number of entries in queue[]. */
210 uint16_t (*queues)[]; /**< Queues indexes to use. */
211 struct rte_eth_rss_conf rss_conf; /**< RSS configuration */
212 uint8_t rss_key[40]; /**< copy of the RSS key. */
213 struct ibv_counter_set *cs; /**< Holds the counters for the rule. */
214 struct mlx5_flow_counter_stats counter_stats;/**<The counter stats. */
215 struct mlx5_flow frxq[RTE_DIM(hash_rxq_init)];
216 /**< Flow with Rx queue. */
219 /** Static initializer for items. */
221 (const enum rte_flow_item_type []){ \
222 __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
225 /** Structure to generate a simple graph of layers supported by the NIC. */
226 struct mlx5_flow_items {
227 /** List of possible actions for these items. */
228 const enum rte_flow_action_type *const actions;
229 /** Bit-masks corresponding to the possibilities for the item. */
232 * Default bit-masks to use when item->mask is not provided. When
233 * \default_mask is also NULL, the full supported bit-mask (\mask) is
236 const void *default_mask;
237 /** Bit-masks size in bytes. */
238 const unsigned int mask_sz;
240 * Conversion function from rte_flow to NIC specific flow.
243 * rte_flow item to convert.
244 * @param default_mask
245 * Default bit-masks to use when item->mask is not provided.
247 * Internal structure to store the conversion.
250 * 0 on success, a negative errno value otherwise and rte_errno is
253 int (*convert)(const struct rte_flow_item *item,
254 const void *default_mask,
256 /** Size in bytes of the destination structure. */
257 const unsigned int dst_sz;
258 /** List of possible following items. */
259 const enum rte_flow_item_type *const items;
262 /** Valid action for this PMD. */
263 static const enum rte_flow_action_type valid_actions[] = {
264 RTE_FLOW_ACTION_TYPE_DROP,
265 RTE_FLOW_ACTION_TYPE_QUEUE,
266 RTE_FLOW_ACTION_TYPE_MARK,
267 RTE_FLOW_ACTION_TYPE_FLAG,
268 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
269 RTE_FLOW_ACTION_TYPE_COUNT,
271 RTE_FLOW_ACTION_TYPE_END,
274 /** Graph of supported items and associated actions. */
275 static const struct mlx5_flow_items mlx5_flow_items[] = {
276 [RTE_FLOW_ITEM_TYPE_END] = {
277 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
278 RTE_FLOW_ITEM_TYPE_VXLAN),
280 [RTE_FLOW_ITEM_TYPE_ETH] = {
281 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
282 RTE_FLOW_ITEM_TYPE_IPV4,
283 RTE_FLOW_ITEM_TYPE_IPV6),
284 .actions = valid_actions,
285 .mask = &(const struct rte_flow_item_eth){
286 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
287 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
290 .default_mask = &rte_flow_item_eth_mask,
291 .mask_sz = sizeof(struct rte_flow_item_eth),
292 .convert = mlx5_flow_create_eth,
293 .dst_sz = sizeof(struct ibv_flow_spec_eth),
295 [RTE_FLOW_ITEM_TYPE_VLAN] = {
296 .items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
297 RTE_FLOW_ITEM_TYPE_IPV6),
298 .actions = valid_actions,
299 .mask = &(const struct rte_flow_item_vlan){
302 .default_mask = &rte_flow_item_vlan_mask,
303 .mask_sz = sizeof(struct rte_flow_item_vlan),
304 .convert = mlx5_flow_create_vlan,
307 [RTE_FLOW_ITEM_TYPE_IPV4] = {
308 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
309 RTE_FLOW_ITEM_TYPE_TCP),
310 .actions = valid_actions,
311 .mask = &(const struct rte_flow_item_ipv4){
315 .type_of_service = -1,
319 .default_mask = &rte_flow_item_ipv4_mask,
320 .mask_sz = sizeof(struct rte_flow_item_ipv4),
321 .convert = mlx5_flow_create_ipv4,
322 .dst_sz = sizeof(struct ibv_flow_spec_ipv4_ext),
324 [RTE_FLOW_ITEM_TYPE_IPV6] = {
325 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
326 RTE_FLOW_ITEM_TYPE_TCP),
327 .actions = valid_actions,
328 .mask = &(const struct rte_flow_item_ipv6){
331 0xff, 0xff, 0xff, 0xff,
332 0xff, 0xff, 0xff, 0xff,
333 0xff, 0xff, 0xff, 0xff,
334 0xff, 0xff, 0xff, 0xff,
337 0xff, 0xff, 0xff, 0xff,
338 0xff, 0xff, 0xff, 0xff,
339 0xff, 0xff, 0xff, 0xff,
340 0xff, 0xff, 0xff, 0xff,
347 .default_mask = &rte_flow_item_ipv6_mask,
348 .mask_sz = sizeof(struct rte_flow_item_ipv6),
349 .convert = mlx5_flow_create_ipv6,
350 .dst_sz = sizeof(struct ibv_flow_spec_ipv6),
352 [RTE_FLOW_ITEM_TYPE_UDP] = {
353 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN),
354 .actions = valid_actions,
355 .mask = &(const struct rte_flow_item_udp){
361 .default_mask = &rte_flow_item_udp_mask,
362 .mask_sz = sizeof(struct rte_flow_item_udp),
363 .convert = mlx5_flow_create_udp,
364 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
366 [RTE_FLOW_ITEM_TYPE_TCP] = {
367 .actions = valid_actions,
368 .mask = &(const struct rte_flow_item_tcp){
374 .default_mask = &rte_flow_item_tcp_mask,
375 .mask_sz = sizeof(struct rte_flow_item_tcp),
376 .convert = mlx5_flow_create_tcp,
377 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
379 [RTE_FLOW_ITEM_TYPE_VXLAN] = {
380 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH),
381 .actions = valid_actions,
382 .mask = &(const struct rte_flow_item_vxlan){
383 .vni = "\xff\xff\xff",
385 .default_mask = &rte_flow_item_vxlan_mask,
386 .mask_sz = sizeof(struct rte_flow_item_vxlan),
387 .convert = mlx5_flow_create_vxlan,
388 .dst_sz = sizeof(struct ibv_flow_spec_tunnel),
392 /** Structure to pass to the conversion function. */
393 struct mlx5_flow_parse {
394 uint32_t inner; /**< Set once VXLAN is encountered. */
396 /**< Whether resources should remain after a validate. */
397 uint32_t drop:1; /**< Target is a drop queue. */
398 uint32_t mark:1; /**< Mark is present in the flow. */
399 uint32_t count:1; /**< Count is present in the flow. */
400 uint32_t mark_id; /**< Mark identifier. */
401 uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
402 uint16_t queues_n; /**< Number of entries in queue[]. */
403 struct rte_eth_rss_conf rss_conf; /**< RSS configuration */
404 uint8_t rss_key[40]; /**< copy of the RSS key. */
405 enum hash_rxq_type layer; /**< Last pattern layer detected. */
406 struct ibv_counter_set *cs; /**< Holds the counter set for the rule */
408 struct ibv_flow_attr *ibv_attr;
409 /**< Pointer to Verbs attributes. */
411 /**< Current position or total size of the attribute. */
412 } queue[RTE_DIM(hash_rxq_init)];
415 static const struct rte_flow_ops mlx5_flow_ops = {
416 .validate = mlx5_flow_validate,
417 .create = mlx5_flow_create,
418 .destroy = mlx5_flow_destroy,
419 .flush = mlx5_flow_flush,
420 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
421 .query = mlx5_flow_query,
425 .isolate = mlx5_flow_isolate,
428 /* Convert FDIR request to Generic flow. */
430 struct rte_flow_attr attr;
431 struct rte_flow_action actions[2];
432 struct rte_flow_item items[4];
433 struct rte_flow_item_eth l2;
434 struct rte_flow_item_eth l2_mask;
436 struct rte_flow_item_ipv4 ipv4;
437 struct rte_flow_item_ipv6 ipv6;
440 struct rte_flow_item_udp udp;
441 struct rte_flow_item_tcp tcp;
443 struct rte_flow_action_queue queue;
446 /* Verbs specification header. */
447 struct ibv_spec_header {
448 enum ibv_flow_spec_type type;
453 * Check support for a given item.
456 * Item specification.
458 * Bit-masks covering supported fields to compare with spec, last and mask in
461 * Bit-Mask size in bytes.
464 * 0 on success, a negative errno value otherwise and rte_errno is set.
467 mlx5_flow_item_validate(const struct rte_flow_item *item,
468 const uint8_t *mask, unsigned int size)
470 if (!item->spec && (item->mask || item->last)) {
474 if (item->spec && !item->mask) {
476 const uint8_t *spec = item->spec;
478 for (i = 0; i < size; ++i)
479 if ((spec[i] | mask[i]) != mask[i]) {
484 if (item->last && !item->mask) {
486 const uint8_t *spec = item->last;
488 for (i = 0; i < size; ++i)
489 if ((spec[i] | mask[i]) != mask[i]) {
496 const uint8_t *spec = item->spec;
498 for (i = 0; i < size; ++i)
499 if ((spec[i] | mask[i]) != mask[i]) {
504 if (item->spec && item->last) {
507 const uint8_t *apply = mask;
513 for (i = 0; i < size; ++i) {
514 spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
515 last[i] = ((const uint8_t *)item->last)[i] & apply[i];
517 ret = memcmp(spec, last, size);
527 * Copy the RSS configuration from the user ones, of the rss_conf is null,
528 * uses the driver one.
531 * Internal parser structure.
533 * User RSS configuration to save.
536 * 0 on success, a negative errno value otherwise and rte_errno is set.
539 mlx5_flow_convert_rss_conf(struct mlx5_flow_parse *parser,
540 const struct rte_eth_rss_conf *rss_conf)
543 * This function is also called at the beginning of
544 * mlx5_flow_convert_actions() to initialize the parser with the
545 * device default RSS configuration.
548 if (rss_conf->rss_hf & MLX5_RSS_HF_MASK) {
552 if (rss_conf->rss_key_len != 40) {
556 if (rss_conf->rss_key_len && rss_conf->rss_key) {
557 parser->rss_conf.rss_key_len = rss_conf->rss_key_len;
558 memcpy(parser->rss_key, rss_conf->rss_key,
559 rss_conf->rss_key_len);
560 parser->rss_conf.rss_key = parser->rss_key;
562 parser->rss_conf.rss_hf = rss_conf->rss_hf;
568 * Extract attribute to the parser.
571 * Flow rule attributes.
573 * Perform verbose error reporting if not NULL.
576 * 0 on success, a negative errno value otherwise and rte_errno is set.
579 mlx5_flow_convert_attributes(const struct rte_flow_attr *attr,
580 struct rte_flow_error *error)
583 rte_flow_error_set(error, ENOTSUP,
584 RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
586 "groups are not supported");
589 if (attr->priority && attr->priority != MLX5_CTRL_FLOW_PRIORITY) {
590 rte_flow_error_set(error, ENOTSUP,
591 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
593 "priorities are not supported");
597 rte_flow_error_set(error, ENOTSUP,
598 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
600 "egress is not supported");
603 if (!attr->ingress) {
604 rte_flow_error_set(error, ENOTSUP,
605 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
607 "only ingress is supported");
614 * Extract actions request to the parser.
617 * Pointer to Ethernet device.
619 * Associated actions (list terminated by the END action).
621 * Perform verbose error reporting if not NULL.
622 * @param[in, out] parser
623 * Internal parser structure.
626 * 0 on success, a negative errno value otherwise and rte_errno is set.
629 mlx5_flow_convert_actions(struct rte_eth_dev *dev,
630 const struct rte_flow_action actions[],
631 struct rte_flow_error *error,
632 struct mlx5_flow_parse *parser)
634 struct priv *priv = dev->data->dev_private;
638 * Add default RSS configuration necessary for Verbs to create QP even
639 * if no RSS is necessary.
641 ret = mlx5_flow_convert_rss_conf(parser,
642 (const struct rte_eth_rss_conf *)
646 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
647 if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
649 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
651 } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
652 const struct rte_flow_action_queue *queue =
653 (const struct rte_flow_action_queue *)
658 if (!queue || (queue->index > (priv->rxqs_n - 1)))
659 goto exit_action_not_supported;
660 for (n = 0; n < parser->queues_n; ++n) {
661 if (parser->queues[n] == queue->index) {
666 if (parser->queues_n > 1 && !found) {
667 rte_flow_error_set(error, ENOTSUP,
668 RTE_FLOW_ERROR_TYPE_ACTION,
670 "queue action not in RSS queues");
674 parser->queues_n = 1;
675 parser->queues[0] = queue->index;
677 } else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
678 const struct rte_flow_action_rss *rss =
679 (const struct rte_flow_action_rss *)
683 if (!rss || !rss->num) {
684 rte_flow_error_set(error, EINVAL,
685 RTE_FLOW_ERROR_TYPE_ACTION,
690 if (parser->queues_n == 1) {
693 assert(parser->queues_n);
694 for (n = 0; n < rss->num; ++n) {
695 if (parser->queues[0] ==
702 rte_flow_error_set(error, ENOTSUP,
703 RTE_FLOW_ERROR_TYPE_ACTION,
705 "queue action not in RSS"
710 for (n = 0; n < rss->num; ++n) {
711 if (rss->queue[n] >= priv->rxqs_n) {
712 rte_flow_error_set(error, EINVAL,
713 RTE_FLOW_ERROR_TYPE_ACTION,
715 "queue id > number of"
720 for (n = 0; n < rss->num; ++n)
721 parser->queues[n] = rss->queue[n];
722 parser->queues_n = rss->num;
723 if (mlx5_flow_convert_rss_conf(parser, rss->rss_conf)) {
724 rte_flow_error_set(error, EINVAL,
725 RTE_FLOW_ERROR_TYPE_ACTION,
727 "wrong RSS configuration");
730 } else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
731 const struct rte_flow_action_mark *mark =
732 (const struct rte_flow_action_mark *)
736 rte_flow_error_set(error, EINVAL,
737 RTE_FLOW_ERROR_TYPE_ACTION,
739 "mark must be defined");
741 } else if (mark->id >= MLX5_FLOW_MARK_MAX) {
742 rte_flow_error_set(error, ENOTSUP,
743 RTE_FLOW_ERROR_TYPE_ACTION,
745 "mark must be between 0"
750 parser->mark_id = mark->id;
751 } else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
753 } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT &&
754 priv->config.flow_counter_en) {
757 goto exit_action_not_supported;
760 if (parser->drop && parser->mark)
762 if (!parser->queues_n && !parser->drop) {
763 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
764 NULL, "no valid action");
768 exit_action_not_supported:
769 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
770 actions, "action not supported");
778 * Pattern specification (list terminated by the END pattern item).
780 * Perform verbose error reporting if not NULL.
781 * @param[in, out] parser
782 * Internal parser structure.
785 * 0 on success, a negative errno value otherwise and rte_errno is set.
788 mlx5_flow_convert_items_validate(const struct rte_flow_item items[],
789 struct rte_flow_error *error,
790 struct mlx5_flow_parse *parser)
792 const struct mlx5_flow_items *cur_item = mlx5_flow_items;
796 /* Initialise the offsets to start after verbs attribute. */
797 for (i = 0; i != hash_rxq_init_n; ++i)
798 parser->queue[i].offset = sizeof(struct ibv_flow_attr);
799 for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
800 const struct mlx5_flow_items *token = NULL;
803 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
807 cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
809 if (cur_item->items[i] == items->type) {
810 token = &mlx5_flow_items[items->type];
815 goto exit_item_not_supported;
817 ret = mlx5_flow_item_validate(items,
818 (const uint8_t *)cur_item->mask,
821 goto exit_item_not_supported;
822 if (items->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
824 rte_flow_error_set(error, ENOTSUP,
825 RTE_FLOW_ERROR_TYPE_ITEM,
827 "cannot recognize multiple"
828 " VXLAN encapsulations");
831 parser->inner = IBV_FLOW_SPEC_INNER;
834 parser->queue[HASH_RXQ_ETH].offset += cur_item->dst_sz;
836 for (n = 0; n != hash_rxq_init_n; ++n)
837 parser->queue[n].offset += cur_item->dst_sz;
841 parser->queue[HASH_RXQ_ETH].offset +=
842 sizeof(struct ibv_flow_spec_action_drop);
845 for (i = 0; i != hash_rxq_init_n; ++i)
846 parser->queue[i].offset +=
847 sizeof(struct ibv_flow_spec_action_tag);
850 unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
852 for (i = 0; i != hash_rxq_init_n; ++i)
853 parser->queue[i].offset += size;
856 exit_item_not_supported:
857 return rte_flow_error_set(error, -ret, RTE_FLOW_ERROR_TYPE_ITEM,
858 items, "item not supported");
862 * Allocate memory space to store verbs flow attributes.
865 * Amount of byte to allocate.
867 * Perform verbose error reporting if not NULL.
870 * A verbs flow attribute on success, NULL otherwise and rte_errno is set.
872 static struct ibv_flow_attr *
873 mlx5_flow_convert_allocate(unsigned int size, struct rte_flow_error *error)
875 struct ibv_flow_attr *ibv_attr;
877 ibv_attr = rte_calloc(__func__, 1, size, 0);
879 rte_flow_error_set(error, ENOMEM,
880 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
882 "cannot allocate verbs spec attributes");
889 * Make inner packet matching with an higher priority from the non Inner
892 * @param[in, out] parser
893 * Internal parser structure.
895 * User flow attribute.
898 mlx5_flow_update_priority(struct mlx5_flow_parse *parser,
899 const struct rte_flow_attr *attr)
904 parser->queue[HASH_RXQ_ETH].ibv_attr->priority =
906 hash_rxq_init[HASH_RXQ_ETH].flow_priority;
909 for (i = 0; i != hash_rxq_init_n; ++i) {
910 if (parser->queue[i].ibv_attr) {
911 parser->queue[i].ibv_attr->priority =
913 hash_rxq_init[i].flow_priority -
914 (parser->inner ? 1 : 0);
920 * Finalise verbs flow attributes.
922 * @param[in, out] parser
923 * Internal parser structure.
926 mlx5_flow_convert_finalise(struct mlx5_flow_parse *parser)
928 const unsigned int ipv4 =
929 hash_rxq_init[parser->layer].ip_version == MLX5_IPV4;
930 const enum hash_rxq_type hmin = ipv4 ? HASH_RXQ_TCPV4 : HASH_RXQ_TCPV6;
931 const enum hash_rxq_type hmax = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
932 const enum hash_rxq_type ohmin = ipv4 ? HASH_RXQ_TCPV6 : HASH_RXQ_TCPV4;
933 const enum hash_rxq_type ohmax = ipv4 ? HASH_RXQ_IPV6 : HASH_RXQ_IPV4;
934 const enum hash_rxq_type ip = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
937 /* Remove any other flow not matching the pattern. */
938 if (parser->queues_n == 1) {
939 for (i = 0; i != hash_rxq_init_n; ++i) {
940 if (i == HASH_RXQ_ETH)
942 rte_free(parser->queue[i].ibv_attr);
943 parser->queue[i].ibv_attr = NULL;
947 if (parser->layer == HASH_RXQ_ETH) {
951 * This layer becomes useless as the pattern define under
954 rte_free(parser->queue[HASH_RXQ_ETH].ibv_attr);
955 parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
957 /* Remove opposite kind of layer e.g. IPv6 if the pattern is IPv4. */
958 for (i = ohmin; i != (ohmax + 1); ++i) {
959 if (!parser->queue[i].ibv_attr)
961 rte_free(parser->queue[i].ibv_attr);
962 parser->queue[i].ibv_attr = NULL;
964 /* Remove impossible flow according to the RSS configuration. */
965 if (hash_rxq_init[parser->layer].dpdk_rss_hf &
966 parser->rss_conf.rss_hf) {
967 /* Remove any other flow. */
968 for (i = hmin; i != (hmax + 1); ++i) {
969 if ((i == parser->layer) ||
970 (!parser->queue[i].ibv_attr))
972 rte_free(parser->queue[i].ibv_attr);
973 parser->queue[i].ibv_attr = NULL;
975 } else if (!parser->queue[ip].ibv_attr) {
976 /* no RSS possible with the current configuration. */
977 parser->queues_n = 1;
982 * Fill missing layers in verbs specifications, or compute the correct
983 * offset to allocate the memory space for the attributes and
986 for (i = 0; i != hash_rxq_init_n - 1; ++i) {
988 struct ibv_flow_spec_ipv4_ext ipv4;
989 struct ibv_flow_spec_ipv6 ipv6;
990 struct ibv_flow_spec_tcp_udp udp_tcp;
995 if (i == parser->layer)
997 if (parser->layer == HASH_RXQ_ETH) {
998 if (hash_rxq_init[i].ip_version == MLX5_IPV4) {
999 size = sizeof(struct ibv_flow_spec_ipv4_ext);
1000 specs.ipv4 = (struct ibv_flow_spec_ipv4_ext){
1001 .type = IBV_FLOW_SPEC_IPV4_EXT,
1005 size = sizeof(struct ibv_flow_spec_ipv6);
1006 specs.ipv6 = (struct ibv_flow_spec_ipv6){
1007 .type = IBV_FLOW_SPEC_IPV6,
1011 if (parser->queue[i].ibv_attr) {
1012 dst = (void *)((uintptr_t)
1013 parser->queue[i].ibv_attr +
1014 parser->queue[i].offset);
1015 memcpy(dst, &specs, size);
1016 ++parser->queue[i].ibv_attr->num_of_specs;
1018 parser->queue[i].offset += size;
1020 if ((i == HASH_RXQ_UDPV4) || (i == HASH_RXQ_TCPV4) ||
1021 (i == HASH_RXQ_UDPV6) || (i == HASH_RXQ_TCPV6)) {
1022 size = sizeof(struct ibv_flow_spec_tcp_udp);
1023 specs.udp_tcp = (struct ibv_flow_spec_tcp_udp) {
1024 .type = ((i == HASH_RXQ_UDPV4 ||
1025 i == HASH_RXQ_UDPV6) ?
1030 if (parser->queue[i].ibv_attr) {
1031 dst = (void *)((uintptr_t)
1032 parser->queue[i].ibv_attr +
1033 parser->queue[i].offset);
1034 memcpy(dst, &specs, size);
1035 ++parser->queue[i].ibv_attr->num_of_specs;
1037 parser->queue[i].offset += size;
1043 * Validate and convert a flow supported by the NIC.
1046 * Pointer to Ethernet device.
1048 * Flow rule attributes.
1049 * @param[in] pattern
1050 * Pattern specification (list terminated by the END pattern item).
1051 * @param[in] actions
1052 * Associated actions (list terminated by the END action).
1054 * Perform verbose error reporting if not NULL.
1055 * @param[in, out] parser
1056 * Internal parser structure.
1059 * 0 on success, a negative errno value otherwise and rte_errno is set.
1062 mlx5_flow_convert(struct rte_eth_dev *dev,
1063 const struct rte_flow_attr *attr,
1064 const struct rte_flow_item items[],
1065 const struct rte_flow_action actions[],
1066 struct rte_flow_error *error,
1067 struct mlx5_flow_parse *parser)
1069 const struct mlx5_flow_items *cur_item = mlx5_flow_items;
1073 /* First step. Validate the attributes, items and actions. */
1074 *parser = (struct mlx5_flow_parse){
1075 .create = parser->create,
1076 .layer = HASH_RXQ_ETH,
1077 .mark_id = MLX5_FLOW_MARK_DEFAULT,
1079 ret = mlx5_flow_convert_attributes(attr, error);
1082 ret = mlx5_flow_convert_actions(dev, actions, error, parser);
1085 ret = mlx5_flow_convert_items_validate(items, error, parser);
1088 mlx5_flow_convert_finalise(parser);
1091 * Allocate the memory space to store verbs specifications.
1094 unsigned int offset = parser->queue[HASH_RXQ_ETH].offset;
1096 parser->queue[HASH_RXQ_ETH].ibv_attr =
1097 mlx5_flow_convert_allocate(offset, error);
1098 if (!parser->queue[HASH_RXQ_ETH].ibv_attr)
1100 parser->queue[HASH_RXQ_ETH].offset =
1101 sizeof(struct ibv_flow_attr);
1103 for (i = 0; i != hash_rxq_init_n; ++i) {
1104 unsigned int offset;
1106 if (!(parser->rss_conf.rss_hf &
1107 hash_rxq_init[i].dpdk_rss_hf) &&
1108 (i != HASH_RXQ_ETH))
1110 offset = parser->queue[i].offset;
1111 parser->queue[i].ibv_attr =
1112 mlx5_flow_convert_allocate(offset, error);
1113 if (!parser->queue[i].ibv_attr)
1115 parser->queue[i].offset = sizeof(struct ibv_flow_attr);
1118 /* Third step. Conversion parse, fill the specifications. */
1120 for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
1121 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
1123 cur_item = &mlx5_flow_items[items->type];
1124 ret = cur_item->convert(items,
1125 (cur_item->default_mask ?
1126 cur_item->default_mask :
1130 rte_flow_error_set(error, rte_errno,
1131 RTE_FLOW_ERROR_TYPE_ITEM,
1132 items, "item not supported");
1137 mlx5_flow_create_flag_mark(parser, parser->mark_id);
1138 if (parser->count && parser->create) {
1139 mlx5_flow_create_count(dev, parser);
1141 goto exit_count_error;
1144 * Last step. Complete missing specification to reach the RSS
1148 mlx5_flow_convert_finalise(parser);
1149 mlx5_flow_update_priority(parser, attr);
1151 /* Only verification is expected, all resources should be released. */
1152 if (!parser->create) {
1153 for (i = 0; i != hash_rxq_init_n; ++i) {
1154 if (parser->queue[i].ibv_attr) {
1155 rte_free(parser->queue[i].ibv_attr);
1156 parser->queue[i].ibv_attr = NULL;
1162 for (i = 0; i != hash_rxq_init_n; ++i) {
1163 if (parser->queue[i].ibv_attr) {
1164 rte_free(parser->queue[i].ibv_attr);
1165 parser->queue[i].ibv_attr = NULL;
1168 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1169 NULL, "cannot allocate verbs spec attributes");
1172 rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1173 NULL, "cannot create counter");
1178 * Copy the specification created into the flow.
1181 * Internal parser structure.
1183 * Create specification.
1185 * Size in bytes of the specification to copy.
1188 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
1194 for (i = 0; i != hash_rxq_init_n; ++i) {
1195 if (!parser->queue[i].ibv_attr)
1197 /* Specification must be the same l3 type or none. */
1198 if (parser->layer == HASH_RXQ_ETH ||
1199 (hash_rxq_init[parser->layer].ip_version ==
1200 hash_rxq_init[i].ip_version) ||
1201 (hash_rxq_init[i].ip_version == 0)) {
1202 dst = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1203 parser->queue[i].offset);
1204 memcpy(dst, src, size);
1205 ++parser->queue[i].ibv_attr->num_of_specs;
1206 parser->queue[i].offset += size;
1212 * Convert Ethernet item to Verbs specification.
1215 * Item specification.
1216 * @param default_mask[in]
1217 * Default bit-masks to use when item->mask is not provided.
1218 * @param data[in, out]
1222 * 0 on success, a negative errno value otherwise and rte_errno is set.
1225 mlx5_flow_create_eth(const struct rte_flow_item *item,
1226 const void *default_mask,
1229 const struct rte_flow_item_eth *spec = item->spec;
1230 const struct rte_flow_item_eth *mask = item->mask;
1231 struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1232 const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1233 struct ibv_flow_spec_eth eth = {
1234 .type = parser->inner | IBV_FLOW_SPEC_ETH,
1238 /* Don't update layer for the inner pattern. */
1240 parser->layer = HASH_RXQ_ETH;
1245 mask = default_mask;
1246 memcpy(ð.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
1247 memcpy(ð.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
1248 eth.val.ether_type = spec->type;
1249 memcpy(ð.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
1250 memcpy(ð.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
1251 eth.mask.ether_type = mask->type;
1252 /* Remove unwanted bits from values. */
1253 for (i = 0; i < ETHER_ADDR_LEN; ++i) {
1254 eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
1255 eth.val.src_mac[i] &= eth.mask.src_mac[i];
1257 eth.val.ether_type &= eth.mask.ether_type;
1259 mlx5_flow_create_copy(parser, ð, eth_size);
1264 * Convert VLAN item to Verbs specification.
1267 * Item specification.
1268 * @param default_mask[in]
1269 * Default bit-masks to use when item->mask is not provided.
1270 * @param data[in, out]
1274 * 0 on success, a negative errno value otherwise and rte_errno is set.
1277 mlx5_flow_create_vlan(const struct rte_flow_item *item,
1278 const void *default_mask,
1281 const struct rte_flow_item_vlan *spec = item->spec;
1282 const struct rte_flow_item_vlan *mask = item->mask;
1283 struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1284 struct ibv_flow_spec_eth *eth;
1285 const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1290 mask = default_mask;
1292 for (i = 0; i != hash_rxq_init_n; ++i) {
1293 if (!parser->queue[i].ibv_attr)
1296 eth = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1297 parser->queue[i].offset - eth_size);
1298 eth->val.vlan_tag = spec->tci;
1299 eth->mask.vlan_tag = mask->tci;
1300 eth->val.vlan_tag &= eth->mask.vlan_tag;
1307 * Convert IPv4 item to Verbs specification.
1310 * Item specification.
1311 * @param default_mask[in]
1312 * Default bit-masks to use when item->mask is not provided.
1313 * @param data[in, out]
1317 * 0 on success, a negative errno value otherwise and rte_errno is set.
1320 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
1321 const void *default_mask,
1324 const struct rte_flow_item_ipv4 *spec = item->spec;
1325 const struct rte_flow_item_ipv4 *mask = item->mask;
1326 struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1327 unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4_ext);
1328 struct ibv_flow_spec_ipv4_ext ipv4 = {
1329 .type = parser->inner | IBV_FLOW_SPEC_IPV4_EXT,
1333 /* Don't update layer for the inner pattern. */
1335 parser->layer = HASH_RXQ_IPV4;
1338 mask = default_mask;
1339 ipv4.val = (struct ibv_flow_ipv4_ext_filter){
1340 .src_ip = spec->hdr.src_addr,
1341 .dst_ip = spec->hdr.dst_addr,
1342 .proto = spec->hdr.next_proto_id,
1343 .tos = spec->hdr.type_of_service,
1345 ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
1346 .src_ip = mask->hdr.src_addr,
1347 .dst_ip = mask->hdr.dst_addr,
1348 .proto = mask->hdr.next_proto_id,
1349 .tos = mask->hdr.type_of_service,
1351 /* Remove unwanted bits from values. */
1352 ipv4.val.src_ip &= ipv4.mask.src_ip;
1353 ipv4.val.dst_ip &= ipv4.mask.dst_ip;
1354 ipv4.val.proto &= ipv4.mask.proto;
1355 ipv4.val.tos &= ipv4.mask.tos;
1357 mlx5_flow_create_copy(parser, &ipv4, ipv4_size);
1362 * Convert IPv6 item to Verbs specification.
1365 * Item specification.
1366 * @param default_mask[in]
1367 * Default bit-masks to use when item->mask is not provided.
1368 * @param data[in, out]
1372 * 0 on success, a negative errno value otherwise and rte_errno is set.
1375 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
1376 const void *default_mask,
1379 const struct rte_flow_item_ipv6 *spec = item->spec;
1380 const struct rte_flow_item_ipv6 *mask = item->mask;
1381 struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1382 unsigned int ipv6_size = sizeof(struct ibv_flow_spec_ipv6);
1383 struct ibv_flow_spec_ipv6 ipv6 = {
1384 .type = parser->inner | IBV_FLOW_SPEC_IPV6,
1388 /* Don't update layer for the inner pattern. */
1390 parser->layer = HASH_RXQ_IPV6;
1393 uint32_t vtc_flow_val;
1394 uint32_t vtc_flow_mask;
1397 mask = default_mask;
1398 memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
1399 RTE_DIM(ipv6.val.src_ip));
1400 memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
1401 RTE_DIM(ipv6.val.dst_ip));
1402 memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
1403 RTE_DIM(ipv6.mask.src_ip));
1404 memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
1405 RTE_DIM(ipv6.mask.dst_ip));
1406 vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
1407 vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
1408 ipv6.val.flow_label =
1409 rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >>
1411 ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >>
1413 ipv6.val.next_hdr = spec->hdr.proto;
1414 ipv6.val.hop_limit = spec->hdr.hop_limits;
1415 ipv6.mask.flow_label =
1416 rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >>
1418 ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >>
1420 ipv6.mask.next_hdr = mask->hdr.proto;
1421 ipv6.mask.hop_limit = mask->hdr.hop_limits;
1422 /* Remove unwanted bits from values. */
1423 for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
1424 ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
1425 ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
1427 ipv6.val.flow_label &= ipv6.mask.flow_label;
1428 ipv6.val.traffic_class &= ipv6.mask.traffic_class;
1429 ipv6.val.next_hdr &= ipv6.mask.next_hdr;
1430 ipv6.val.hop_limit &= ipv6.mask.hop_limit;
1432 mlx5_flow_create_copy(parser, &ipv6, ipv6_size);
1437 * Convert UDP item to Verbs specification.
1440 * Item specification.
1441 * @param default_mask[in]
1442 * Default bit-masks to use when item->mask is not provided.
1443 * @param data[in, out]
1447 * 0 on success, a negative errno value otherwise and rte_errno is set.
1450 mlx5_flow_create_udp(const struct rte_flow_item *item,
1451 const void *default_mask,
1454 const struct rte_flow_item_udp *spec = item->spec;
1455 const struct rte_flow_item_udp *mask = item->mask;
1456 struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1457 unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1458 struct ibv_flow_spec_tcp_udp udp = {
1459 .type = parser->inner | IBV_FLOW_SPEC_UDP,
1463 /* Don't update layer for the inner pattern. */
1464 if (!parser->inner) {
1465 if (parser->layer == HASH_RXQ_IPV4)
1466 parser->layer = HASH_RXQ_UDPV4;
1468 parser->layer = HASH_RXQ_UDPV6;
1472 mask = default_mask;
1473 udp.val.dst_port = spec->hdr.dst_port;
1474 udp.val.src_port = spec->hdr.src_port;
1475 udp.mask.dst_port = mask->hdr.dst_port;
1476 udp.mask.src_port = mask->hdr.src_port;
1477 /* Remove unwanted bits from values. */
1478 udp.val.src_port &= udp.mask.src_port;
1479 udp.val.dst_port &= udp.mask.dst_port;
1481 mlx5_flow_create_copy(parser, &udp, udp_size);
1486 * Convert TCP item to Verbs specification.
1489 * Item specification.
1490 * @param default_mask[in]
1491 * Default bit-masks to use when item->mask is not provided.
1492 * @param data[in, out]
1496 * 0 on success, a negative errno value otherwise and rte_errno is set.
1499 mlx5_flow_create_tcp(const struct rte_flow_item *item,
1500 const void *default_mask,
1503 const struct rte_flow_item_tcp *spec = item->spec;
1504 const struct rte_flow_item_tcp *mask = item->mask;
1505 struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1506 unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1507 struct ibv_flow_spec_tcp_udp tcp = {
1508 .type = parser->inner | IBV_FLOW_SPEC_TCP,
1512 /* Don't update layer for the inner pattern. */
1513 if (!parser->inner) {
1514 if (parser->layer == HASH_RXQ_IPV4)
1515 parser->layer = HASH_RXQ_TCPV4;
1517 parser->layer = HASH_RXQ_TCPV6;
1521 mask = default_mask;
1522 tcp.val.dst_port = spec->hdr.dst_port;
1523 tcp.val.src_port = spec->hdr.src_port;
1524 tcp.mask.dst_port = mask->hdr.dst_port;
1525 tcp.mask.src_port = mask->hdr.src_port;
1526 /* Remove unwanted bits from values. */
1527 tcp.val.src_port &= tcp.mask.src_port;
1528 tcp.val.dst_port &= tcp.mask.dst_port;
1530 mlx5_flow_create_copy(parser, &tcp, tcp_size);
1535 * Convert VXLAN item to Verbs specification.
1538 * Item specification.
1539 * @param default_mask[in]
1540 * Default bit-masks to use when item->mask is not provided.
1541 * @param data[in, out]
1545 * 0 on success, a negative errno value otherwise and rte_errno is set.
1548 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
1549 const void *default_mask,
1552 const struct rte_flow_item_vxlan *spec = item->spec;
1553 const struct rte_flow_item_vxlan *mask = item->mask;
1554 struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1555 unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1556 struct ibv_flow_spec_tunnel vxlan = {
1557 .type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1566 parser->inner = IBV_FLOW_SPEC_INNER;
1569 mask = default_mask;
1570 memcpy(&id.vni[1], spec->vni, 3);
1571 vxlan.val.tunnel_id = id.vlan_id;
1572 memcpy(&id.vni[1], mask->vni, 3);
1573 vxlan.mask.tunnel_id = id.vlan_id;
1574 /* Remove unwanted bits from values. */
1575 vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
1578 * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
1579 * layer is defined in the Verbs specification it is interpreted as
1580 * wildcard and all packets will match this rule, if it follows a full
1581 * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
1582 * before will also match this rule.
1583 * To avoid such situation, VNI 0 is currently refused.
1585 if (!vxlan.val.tunnel_id) {
1589 mlx5_flow_create_copy(parser, &vxlan, size);
1594 * Convert mark/flag action to Verbs specification.
1597 * Internal parser structure.
1602 * 0 on success, a negative errno value otherwise and rte_errno is set.
1605 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id)
1607 unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1608 struct ibv_flow_spec_action_tag tag = {
1609 .type = IBV_FLOW_SPEC_ACTION_TAG,
1611 .tag_id = mlx5_flow_mark_set(mark_id),
1614 assert(parser->mark);
1615 mlx5_flow_create_copy(parser, &tag, size);
1620 * Convert count action to Verbs specification.
1623 * Pointer to Ethernet device.
1625 * Pointer to MLX5 flow parser structure.
1628 * 0 on success, a negative errno value otherwise and rte_errno is set.
1631 mlx5_flow_create_count(struct rte_eth_dev *dev __rte_unused,
1632 struct mlx5_flow_parse *parser __rte_unused)
1634 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
1635 struct priv *priv = dev->data->dev_private;
1636 unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
1637 struct ibv_counter_set_init_attr init_attr = {0};
1638 struct ibv_flow_spec_counter_action counter = {
1639 .type = IBV_FLOW_SPEC_ACTION_COUNT,
1641 .counter_set_handle = 0,
1644 init_attr.counter_set_id = 0;
1645 parser->cs = mlx5_glue->create_counter_set(priv->ctx, &init_attr);
1650 counter.counter_set_handle = parser->cs->handle;
1651 mlx5_flow_create_copy(parser, &counter, size);
1657 * Complete flow rule creation with a drop queue.
1660 * Pointer to Ethernet device.
1662 * Internal parser structure.
1664 * Pointer to the rte_flow.
1666 * Perform verbose error reporting if not NULL.
1669 * 0 on success, a negative errno value otherwise and rte_errno is set.
1672 mlx5_flow_create_action_queue_drop(struct rte_eth_dev *dev,
1673 struct mlx5_flow_parse *parser,
1674 struct rte_flow *flow,
1675 struct rte_flow_error *error)
1677 struct priv *priv = dev->data->dev_private;
1678 struct ibv_flow_spec_action_drop *drop;
1679 unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
1684 drop = (void *)((uintptr_t)parser->queue[HASH_RXQ_ETH].ibv_attr +
1685 parser->queue[HASH_RXQ_ETH].offset);
1686 *drop = (struct ibv_flow_spec_action_drop){
1687 .type = IBV_FLOW_SPEC_ACTION_DROP,
1690 ++parser->queue[HASH_RXQ_ETH].ibv_attr->num_of_specs;
1691 parser->queue[HASH_RXQ_ETH].offset += size;
1692 flow->frxq[HASH_RXQ_ETH].ibv_attr =
1693 parser->queue[HASH_RXQ_ETH].ibv_attr;
1695 flow->cs = parser->cs;
1696 if (!priv->dev->data->dev_started)
1698 parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
1699 flow->frxq[HASH_RXQ_ETH].ibv_flow =
1700 mlx5_glue->create_flow(priv->flow_drop_queue->qp,
1701 flow->frxq[HASH_RXQ_ETH].ibv_attr);
1702 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
1703 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1704 NULL, "flow rule creation failure");
1710 if (flow->frxq[HASH_RXQ_ETH].ibv_flow) {
1711 claim_zero(mlx5_glue->destroy_flow
1712 (flow->frxq[HASH_RXQ_ETH].ibv_flow));
1713 flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
1715 if (flow->frxq[HASH_RXQ_ETH].ibv_attr) {
1716 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
1717 flow->frxq[HASH_RXQ_ETH].ibv_attr = NULL;
1720 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
1728 * Create hash Rx queues when RSS is enabled.
1731 * Pointer to Ethernet device.
1733 * Internal parser structure.
1735 * Pointer to the rte_flow.
1737 * Perform verbose error reporting if not NULL.
1740 * 0 on success, a negative errno value otherwise and rte_errno is set.
1743 mlx5_flow_create_action_queue_rss(struct rte_eth_dev *dev,
1744 struct mlx5_flow_parse *parser,
1745 struct rte_flow *flow,
1746 struct rte_flow_error *error)
1748 struct priv *priv = dev->data->dev_private;
1751 for (i = 0; i != hash_rxq_init_n; ++i) {
1752 uint64_t hash_fields;
1754 if (!parser->queue[i].ibv_attr)
1756 flow->frxq[i].ibv_attr = parser->queue[i].ibv_attr;
1757 parser->queue[i].ibv_attr = NULL;
1758 hash_fields = hash_rxq_init[i].hash_fields;
1759 if (!priv->dev->data->dev_started)
1761 flow->frxq[i].hrxq =
1763 parser->rss_conf.rss_key,
1764 parser->rss_conf.rss_key_len,
1768 if (flow->frxq[i].hrxq)
1770 flow->frxq[i].hrxq =
1772 parser->rss_conf.rss_key,
1773 parser->rss_conf.rss_key_len,
1777 if (!flow->frxq[i].hrxq) {
1778 return rte_flow_error_set(error, ENOMEM,
1779 RTE_FLOW_ERROR_TYPE_HANDLE,
1781 "cannot create hash rxq");
1788 * Complete flow rule creation.
1791 * Pointer to Ethernet device.
1793 * Internal parser structure.
1795 * Pointer to the rte_flow.
1797 * Perform verbose error reporting if not NULL.
1800 * 0 on success, a negative errno value otherwise and rte_errno is set.
1803 mlx5_flow_create_action_queue(struct rte_eth_dev *dev,
1804 struct mlx5_flow_parse *parser,
1805 struct rte_flow *flow,
1806 struct rte_flow_error *error)
1808 struct priv *priv = dev->data->dev_private;
1811 unsigned int flows_n = 0;
1815 assert(!parser->drop);
1816 ret = mlx5_flow_create_action_queue_rss(dev, parser, flow, error);
1820 flow->cs = parser->cs;
1821 if (!priv->dev->data->dev_started)
1823 for (i = 0; i != hash_rxq_init_n; ++i) {
1824 if (!flow->frxq[i].hrxq)
1826 flow->frxq[i].ibv_flow =
1827 mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
1828 flow->frxq[i].ibv_attr);
1829 if (!flow->frxq[i].ibv_flow) {
1830 rte_flow_error_set(error, ENOMEM,
1831 RTE_FLOW_ERROR_TYPE_HANDLE,
1832 NULL, "flow rule creation failure");
1836 DRV_LOG(DEBUG, "port %u %p type %d QP %p ibv_flow %p",
1839 (void *)flow->frxq[i].hrxq,
1840 (void *)flow->frxq[i].ibv_flow);
1843 rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_HANDLE,
1844 NULL, "internal error in flow creation");
1847 for (i = 0; i != parser->queues_n; ++i) {
1848 struct mlx5_rxq_data *q =
1849 (*priv->rxqs)[parser->queues[i]];
1851 q->mark |= parser->mark;
1855 ret = rte_errno; /* Save rte_errno before cleanup. */
1857 for (i = 0; i != hash_rxq_init_n; ++i) {
1858 if (flow->frxq[i].ibv_flow) {
1859 struct ibv_flow *ibv_flow = flow->frxq[i].ibv_flow;
1861 claim_zero(mlx5_glue->destroy_flow(ibv_flow));
1863 if (flow->frxq[i].hrxq)
1864 mlx5_hrxq_release(dev, flow->frxq[i].hrxq);
1865 if (flow->frxq[i].ibv_attr)
1866 rte_free(flow->frxq[i].ibv_attr);
1869 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
1873 rte_errno = ret; /* Restore rte_errno. */
1881 * Pointer to Ethernet device.
1883 * Pointer to a TAILQ flow list.
1885 * Flow rule attributes.
1886 * @param[in] pattern
1887 * Pattern specification (list terminated by the END pattern item).
1888 * @param[in] actions
1889 * Associated actions (list terminated by the END action).
1891 * Perform verbose error reporting if not NULL.
1894 * A flow on success, NULL otherwise and rte_errno is set.
1896 static struct rte_flow *
1897 mlx5_flow_list_create(struct rte_eth_dev *dev,
1898 struct mlx5_flows *list,
1899 const struct rte_flow_attr *attr,
1900 const struct rte_flow_item items[],
1901 const struct rte_flow_action actions[],
1902 struct rte_flow_error *error)
1904 struct mlx5_flow_parse parser = { .create = 1, };
1905 struct rte_flow *flow = NULL;
1909 ret = mlx5_flow_convert(dev, attr, items, actions, error, &parser);
1912 flow = rte_calloc(__func__, 1,
1913 sizeof(*flow) + parser.queues_n * sizeof(uint16_t),
1916 rte_flow_error_set(error, ENOMEM,
1917 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1919 "cannot allocate flow memory");
1922 /* Copy queues configuration. */
1923 flow->queues = (uint16_t (*)[])(flow + 1);
1924 memcpy(flow->queues, parser.queues, parser.queues_n * sizeof(uint16_t));
1925 flow->queues_n = parser.queues_n;
1926 flow->mark = parser.mark;
1927 /* Copy RSS configuration. */
1928 flow->rss_conf = parser.rss_conf;
1929 flow->rss_conf.rss_key = flow->rss_key;
1930 memcpy(flow->rss_key, parser.rss_key, parser.rss_conf.rss_key_len);
1931 /* finalise the flow. */
1933 ret = mlx5_flow_create_action_queue_drop(dev, &parser, flow,
1936 ret = mlx5_flow_create_action_queue(dev, &parser, flow, error);
1939 TAILQ_INSERT_TAIL(list, flow, next);
1940 DRV_LOG(DEBUG, "port %u flow created %p", dev->data->port_id,
1944 DRV_LOG(ERR, "port %u flow creation error: %s", dev->data->port_id,
1946 for (i = 0; i != hash_rxq_init_n; ++i) {
1947 if (parser.queue[i].ibv_attr)
1948 rte_free(parser.queue[i].ibv_attr);
1955 * Validate a flow supported by the NIC.
1957 * @see rte_flow_validate()
1961 mlx5_flow_validate(struct rte_eth_dev *dev,
1962 const struct rte_flow_attr *attr,
1963 const struct rte_flow_item items[],
1964 const struct rte_flow_action actions[],
1965 struct rte_flow_error *error)
1967 struct mlx5_flow_parse parser = { .create = 0, };
1969 return mlx5_flow_convert(dev, attr, items, actions, error, &parser);
1975 * @see rte_flow_create()
1979 mlx5_flow_create(struct rte_eth_dev *dev,
1980 const struct rte_flow_attr *attr,
1981 const struct rte_flow_item items[],
1982 const struct rte_flow_action actions[],
1983 struct rte_flow_error *error)
1985 struct priv *priv = dev->data->dev_private;
1987 return mlx5_flow_list_create(dev, &priv->flows, attr, items, actions,
1992 * Destroy a flow in a list.
1995 * Pointer to Ethernet device.
1997 * Pointer to a TAILQ flow list.
2002 mlx5_flow_list_destroy(struct rte_eth_dev *dev, struct mlx5_flows *list,
2003 struct rte_flow *flow)
2005 struct priv *priv = dev->data->dev_private;
2008 if (flow->drop || !flow->mark)
2010 for (i = 0; i != flow->queues_n; ++i) {
2011 struct rte_flow *tmp;
2015 * To remove the mark from the queue, the queue must not be
2016 * present in any other marked flow (RSS or not).
2018 TAILQ_FOREACH(tmp, list, next) {
2020 uint16_t *tqs = NULL;
2025 for (j = 0; j != hash_rxq_init_n; ++j) {
2026 if (!tmp->frxq[j].hrxq)
2028 tqs = tmp->frxq[j].hrxq->ind_table->queues;
2029 tq_n = tmp->frxq[j].hrxq->ind_table->queues_n;
2033 for (j = 0; (j != tq_n) && !mark; j++)
2034 if (tqs[j] == (*flow->queues)[i])
2037 (*priv->rxqs)[(*flow->queues)[i]]->mark = mark;
2041 if (flow->frxq[HASH_RXQ_ETH].ibv_flow)
2042 claim_zero(mlx5_glue->destroy_flow
2043 (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2044 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
2046 for (i = 0; i != hash_rxq_init_n; ++i) {
2047 struct mlx5_flow *frxq = &flow->frxq[i];
2050 claim_zero(mlx5_glue->destroy_flow
2053 mlx5_hrxq_release(dev, frxq->hrxq);
2055 rte_free(frxq->ibv_attr);
2059 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
2062 TAILQ_REMOVE(list, flow, next);
2063 DRV_LOG(DEBUG, "port %u flow destroyed %p", dev->data->port_id,
2069 * Destroy all flows.
2072 * Pointer to Ethernet device.
2074 * Pointer to a TAILQ flow list.
2077 mlx5_flow_list_flush(struct rte_eth_dev *dev, struct mlx5_flows *list)
2079 while (!TAILQ_EMPTY(list)) {
2080 struct rte_flow *flow;
2082 flow = TAILQ_FIRST(list);
2083 mlx5_flow_list_destroy(dev, list, flow);
2088 * Create drop queue.
2091 * Pointer to Ethernet device.
2094 * 0 on success, a negative errno value otherwise and rte_errno is set.
2097 mlx5_flow_create_drop_queue(struct rte_eth_dev *dev)
2099 struct priv *priv = dev->data->dev_private;
2100 struct mlx5_hrxq_drop *fdq = NULL;
2104 fdq = rte_calloc(__func__, 1, sizeof(*fdq), 0);
2107 "port %u cannot allocate memory for drop queue",
2108 dev->data->port_id);
2112 fdq->cq = mlx5_glue->create_cq(priv->ctx, 1, NULL, NULL, 0);
2114 DRV_LOG(WARNING, "port %u cannot allocate CQ for drop queue",
2115 dev->data->port_id);
2119 fdq->wq = mlx5_glue->create_wq
2121 &(struct ibv_wq_init_attr){
2122 .wq_type = IBV_WQT_RQ,
2129 DRV_LOG(WARNING, "port %u cannot allocate WQ for drop queue",
2130 dev->data->port_id);
2134 fdq->ind_table = mlx5_glue->create_rwq_ind_table
2136 &(struct ibv_rwq_ind_table_init_attr){
2137 .log_ind_tbl_size = 0,
2138 .ind_tbl = &fdq->wq,
2141 if (!fdq->ind_table) {
2143 "port %u cannot allocate indirection table for drop"
2145 dev->data->port_id);
2149 fdq->qp = mlx5_glue->create_qp_ex
2151 &(struct ibv_qp_init_attr_ex){
2152 .qp_type = IBV_QPT_RAW_PACKET,
2154 IBV_QP_INIT_ATTR_PD |
2155 IBV_QP_INIT_ATTR_IND_TABLE |
2156 IBV_QP_INIT_ATTR_RX_HASH,
2157 .rx_hash_conf = (struct ibv_rx_hash_conf){
2159 IBV_RX_HASH_FUNC_TOEPLITZ,
2160 .rx_hash_key_len = rss_hash_default_key_len,
2161 .rx_hash_key = rss_hash_default_key,
2162 .rx_hash_fields_mask = 0,
2164 .rwq_ind_tbl = fdq->ind_table,
2168 DRV_LOG(WARNING, "port %u cannot allocate QP for drop queue",
2169 dev->data->port_id);
2173 priv->flow_drop_queue = fdq;
2177 claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2179 claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2181 claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2183 claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2186 priv->flow_drop_queue = NULL;
2191 * Delete drop queue.
2194 * Pointer to Ethernet device.
2197 mlx5_flow_delete_drop_queue(struct rte_eth_dev *dev)
2199 struct priv *priv = dev->data->dev_private;
2200 struct mlx5_hrxq_drop *fdq = priv->flow_drop_queue;
2205 claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2207 claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2209 claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2211 claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2213 priv->flow_drop_queue = NULL;
2220 * Pointer to Ethernet device.
2222 * Pointer to a TAILQ flow list.
2225 mlx5_flow_stop(struct rte_eth_dev *dev, struct mlx5_flows *list)
2227 struct priv *priv = dev->data->dev_private;
2228 struct rte_flow *flow;
2230 TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) {
2232 struct mlx5_ind_table_ibv *ind_tbl = NULL;
2235 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow)
2237 claim_zero(mlx5_glue->destroy_flow
2238 (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2239 flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
2240 DRV_LOG(DEBUG, "port %u flow %p removed",
2241 dev->data->port_id, (void *)flow);
2245 /* Verify the flow has not already been cleaned. */
2246 for (i = 0; i != hash_rxq_init_n; ++i) {
2247 if (!flow->frxq[i].ibv_flow)
2250 * Indirection table may be necessary to remove the
2251 * flags in the Rx queues.
2252 * This helps to speed-up the process by avoiding
2255 ind_tbl = flow->frxq[i].hrxq->ind_table;
2258 if (i == hash_rxq_init_n)
2262 for (i = 0; i != ind_tbl->queues_n; ++i)
2263 (*priv->rxqs)[ind_tbl->queues[i]]->mark = 0;
2265 for (i = 0; i != hash_rxq_init_n; ++i) {
2266 if (!flow->frxq[i].ibv_flow)
2268 claim_zero(mlx5_glue->destroy_flow
2269 (flow->frxq[i].ibv_flow));
2270 flow->frxq[i].ibv_flow = NULL;
2271 mlx5_hrxq_release(dev, flow->frxq[i].hrxq);
2272 flow->frxq[i].hrxq = NULL;
2274 DRV_LOG(DEBUG, "port %u flow %p removed", dev->data->port_id,
2283 * Pointer to Ethernet device.
2285 * Pointer to a TAILQ flow list.
2288 * 0 on success, a negative errno value otherwise and rte_errno is set.
2291 mlx5_flow_start(struct rte_eth_dev *dev, struct mlx5_flows *list)
2293 struct priv *priv = dev->data->dev_private;
2294 struct rte_flow *flow;
2296 TAILQ_FOREACH(flow, list, next) {
2300 flow->frxq[HASH_RXQ_ETH].ibv_flow =
2301 mlx5_glue->create_flow
2302 (priv->flow_drop_queue->qp,
2303 flow->frxq[HASH_RXQ_ETH].ibv_attr);
2304 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
2306 "port %u flow %p cannot be applied",
2307 dev->data->port_id, (void *)flow);
2311 DRV_LOG(DEBUG, "port %u flow %p applied",
2312 dev->data->port_id, (void *)flow);
2316 for (i = 0; i != hash_rxq_init_n; ++i) {
2317 if (!flow->frxq[i].ibv_attr)
2319 flow->frxq[i].hrxq =
2320 mlx5_hrxq_get(dev, flow->rss_conf.rss_key,
2321 flow->rss_conf.rss_key_len,
2322 hash_rxq_init[i].hash_fields,
2325 if (flow->frxq[i].hrxq)
2327 flow->frxq[i].hrxq =
2328 mlx5_hrxq_new(dev, flow->rss_conf.rss_key,
2329 flow->rss_conf.rss_key_len,
2330 hash_rxq_init[i].hash_fields,
2333 if (!flow->frxq[i].hrxq) {
2335 "port %u flow %p cannot be applied",
2336 dev->data->port_id, (void *)flow);
2341 flow->frxq[i].ibv_flow =
2342 mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
2343 flow->frxq[i].ibv_attr);
2344 if (!flow->frxq[i].ibv_flow) {
2346 "port %u flow %p cannot be applied",
2347 dev->data->port_id, (void *)flow);
2351 DRV_LOG(DEBUG, "port %u flow %p applied",
2352 dev->data->port_id, (void *)flow);
2356 for (i = 0; i != flow->queues_n; ++i)
2357 (*priv->rxqs)[(*flow->queues)[i]]->mark = 1;
2363 * Verify the flow list is empty
2366 * Pointer to Ethernet device.
2368 * @return the number of flows not released.
2371 mlx5_flow_verify(struct rte_eth_dev *dev)
2373 struct priv *priv = dev->data->dev_private;
2374 struct rte_flow *flow;
2377 TAILQ_FOREACH(flow, &priv->flows, next) {
2378 DRV_LOG(DEBUG, "port %u flow %p still referenced",
2379 dev->data->port_id, (void *)flow);
2386 * Enable a control flow configured from the control plane.
2389 * Pointer to Ethernet device.
2391 * An Ethernet flow spec to apply.
2393 * An Ethernet flow mask to apply.
2395 * A VLAN flow spec to apply.
2397 * A VLAN flow mask to apply.
2400 * 0 on success, a negative errno value otherwise and rte_errno is set.
2403 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
2404 struct rte_flow_item_eth *eth_spec,
2405 struct rte_flow_item_eth *eth_mask,
2406 struct rte_flow_item_vlan *vlan_spec,
2407 struct rte_flow_item_vlan *vlan_mask)
2409 struct priv *priv = dev->data->dev_private;
2410 const struct rte_flow_attr attr = {
2412 .priority = MLX5_CTRL_FLOW_PRIORITY,
2414 struct rte_flow_item items[] = {
2416 .type = RTE_FLOW_ITEM_TYPE_ETH,
2422 .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
2423 RTE_FLOW_ITEM_TYPE_END,
2429 .type = RTE_FLOW_ITEM_TYPE_END,
2432 struct rte_flow_action actions[] = {
2434 .type = RTE_FLOW_ACTION_TYPE_RSS,
2437 .type = RTE_FLOW_ACTION_TYPE_END,
2440 struct rte_flow *flow;
2441 struct rte_flow_error error;
2444 struct rte_flow_action_rss rss;
2446 const struct rte_eth_rss_conf *rss_conf;
2448 uint16_t queue[RTE_MAX_QUEUES_PER_PORT];
2452 if (!priv->reta_idx_n) {
2456 for (i = 0; i != priv->reta_idx_n; ++i)
2457 action_rss.local.queue[i] = (*priv->reta_idx)[i];
2458 action_rss.local.rss_conf = &priv->rss_conf;
2459 action_rss.local.num = priv->reta_idx_n;
2460 actions[0].conf = (const void *)&action_rss.rss;
2461 flow = mlx5_flow_list_create(dev, &priv->ctrl_flows, &attr, items,
2469 * Enable a flow control configured from the control plane.
2472 * Pointer to Ethernet device.
2474 * An Ethernet flow spec to apply.
2476 * An Ethernet flow mask to apply.
2479 * 0 on success, a negative errno value otherwise and rte_errno is set.
2482 mlx5_ctrl_flow(struct rte_eth_dev *dev,
2483 struct rte_flow_item_eth *eth_spec,
2484 struct rte_flow_item_eth *eth_mask)
2486 return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
2492 * @see rte_flow_destroy()
2496 mlx5_flow_destroy(struct rte_eth_dev *dev,
2497 struct rte_flow *flow,
2498 struct rte_flow_error *error __rte_unused)
2500 struct priv *priv = dev->data->dev_private;
2502 mlx5_flow_list_destroy(dev, &priv->flows, flow);
2507 * Destroy all flows.
2509 * @see rte_flow_flush()
2513 mlx5_flow_flush(struct rte_eth_dev *dev,
2514 struct rte_flow_error *error __rte_unused)
2516 struct priv *priv = dev->data->dev_private;
2518 mlx5_flow_list_flush(dev, &priv->flows);
2522 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
2524 * Query flow counter.
2528 * @param counter_value
2529 * returned data from the counter.
2532 * 0 on success, a negative errno value otherwise and rte_errno is set.
2535 mlx5_flow_query_count(struct ibv_counter_set *cs,
2536 struct mlx5_flow_counter_stats *counter_stats,
2537 struct rte_flow_query_count *query_count,
2538 struct rte_flow_error *error)
2540 uint64_t counters[2];
2541 struct ibv_query_counter_set_attr query_cs_attr = {
2543 .query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
2545 struct ibv_counter_set_data query_out = {
2547 .outlen = 2 * sizeof(uint64_t),
2549 int err = mlx5_glue->query_counter_set(&query_cs_attr, &query_out);
2552 return rte_flow_error_set(error, err,
2553 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2555 "cannot read counter");
2556 query_count->hits_set = 1;
2557 query_count->bytes_set = 1;
2558 query_count->hits = counters[0] - counter_stats->hits;
2559 query_count->bytes = counters[1] - counter_stats->bytes;
2560 if (query_count->reset) {
2561 counter_stats->hits = counters[0];
2562 counter_stats->bytes = counters[1];
2570 * @see rte_flow_query()
2574 mlx5_flow_query(struct rte_eth_dev *dev __rte_unused,
2575 struct rte_flow *flow,
2576 enum rte_flow_action_type action __rte_unused,
2578 struct rte_flow_error *error)
2583 ret = mlx5_flow_query_count(flow->cs,
2584 &flow->counter_stats,
2585 (struct rte_flow_query_count *)data,
2590 return rte_flow_error_set(error, EINVAL,
2591 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2593 "no counter found for flow");
2602 * @see rte_flow_isolate()
2606 mlx5_flow_isolate(struct rte_eth_dev *dev,
2608 struct rte_flow_error *error)
2610 struct priv *priv = dev->data->dev_private;
2612 if (dev->data->dev_started) {
2613 rte_flow_error_set(error, EBUSY,
2614 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2616 "port must be stopped first");
2619 priv->isolated = !!enable;
2621 priv->dev->dev_ops = &mlx5_dev_ops_isolate;
2623 priv->dev->dev_ops = &mlx5_dev_ops;
2628 * Convert a flow director filter to a generic flow.
2631 * Pointer to Ethernet device.
2632 * @param fdir_filter
2633 * Flow director filter to add.
2635 * Generic flow parameters structure.
2638 * 0 on success, a negative errno value otherwise and rte_errno is set.
2641 mlx5_fdir_filter_convert(struct rte_eth_dev *dev,
2642 const struct rte_eth_fdir_filter *fdir_filter,
2643 struct mlx5_fdir *attributes)
2645 struct priv *priv = dev->data->dev_private;
2646 const struct rte_eth_fdir_input *input = &fdir_filter->input;
2648 /* Validate queue number. */
2649 if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
2650 DRV_LOG(ERR, "port %u invalid queue number %d",
2651 dev->data->port_id, fdir_filter->action.rx_queue);
2655 attributes->attr.ingress = 1;
2656 attributes->items[0] = (struct rte_flow_item) {
2657 .type = RTE_FLOW_ITEM_TYPE_ETH,
2658 .spec = &attributes->l2,
2659 .mask = &attributes->l2_mask,
2661 switch (fdir_filter->action.behavior) {
2662 case RTE_ETH_FDIR_ACCEPT:
2663 attributes->actions[0] = (struct rte_flow_action){
2664 .type = RTE_FLOW_ACTION_TYPE_QUEUE,
2665 .conf = &attributes->queue,
2668 case RTE_ETH_FDIR_REJECT:
2669 attributes->actions[0] = (struct rte_flow_action){
2670 .type = RTE_FLOW_ACTION_TYPE_DROP,
2674 DRV_LOG(ERR, "port %u invalid behavior %d",
2676 fdir_filter->action.behavior);
2677 rte_errno = ENOTSUP;
2680 attributes->queue.index = fdir_filter->action.rx_queue;
2681 switch (fdir_filter->input.flow_type) {
2682 case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
2683 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2684 .src_addr = input->flow.udp4_flow.ip.src_ip,
2685 .dst_addr = input->flow.udp4_flow.ip.dst_ip,
2686 .time_to_live = input->flow.udp4_flow.ip.ttl,
2687 .type_of_service = input->flow.udp4_flow.ip.tos,
2688 .next_proto_id = input->flow.udp4_flow.ip.proto,
2690 attributes->l4.udp.hdr = (struct udp_hdr){
2691 .src_port = input->flow.udp4_flow.src_port,
2692 .dst_port = input->flow.udp4_flow.dst_port,
2694 attributes->items[1] = (struct rte_flow_item){
2695 .type = RTE_FLOW_ITEM_TYPE_IPV4,
2696 .spec = &attributes->l3,
2697 .mask = &attributes->l3,
2699 attributes->items[2] = (struct rte_flow_item){
2700 .type = RTE_FLOW_ITEM_TYPE_UDP,
2701 .spec = &attributes->l4,
2702 .mask = &attributes->l4,
2705 case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
2706 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2707 .src_addr = input->flow.tcp4_flow.ip.src_ip,
2708 .dst_addr = input->flow.tcp4_flow.ip.dst_ip,
2709 .time_to_live = input->flow.tcp4_flow.ip.ttl,
2710 .type_of_service = input->flow.tcp4_flow.ip.tos,
2711 .next_proto_id = input->flow.tcp4_flow.ip.proto,
2713 attributes->l4.tcp.hdr = (struct tcp_hdr){
2714 .src_port = input->flow.tcp4_flow.src_port,
2715 .dst_port = input->flow.tcp4_flow.dst_port,
2717 attributes->items[1] = (struct rte_flow_item){
2718 .type = RTE_FLOW_ITEM_TYPE_IPV4,
2719 .spec = &attributes->l3,
2720 .mask = &attributes->l3,
2722 attributes->items[2] = (struct rte_flow_item){
2723 .type = RTE_FLOW_ITEM_TYPE_TCP,
2724 .spec = &attributes->l4,
2725 .mask = &attributes->l4,
2728 case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
2729 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2730 .src_addr = input->flow.ip4_flow.src_ip,
2731 .dst_addr = input->flow.ip4_flow.dst_ip,
2732 .time_to_live = input->flow.ip4_flow.ttl,
2733 .type_of_service = input->flow.ip4_flow.tos,
2734 .next_proto_id = input->flow.ip4_flow.proto,
2736 attributes->items[1] = (struct rte_flow_item){
2737 .type = RTE_FLOW_ITEM_TYPE_IPV4,
2738 .spec = &attributes->l3,
2739 .mask = &attributes->l3,
2742 case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
2743 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2744 .hop_limits = input->flow.udp6_flow.ip.hop_limits,
2745 .proto = input->flow.udp6_flow.ip.proto,
2747 memcpy(attributes->l3.ipv6.hdr.src_addr,
2748 input->flow.udp6_flow.ip.src_ip,
2749 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2750 memcpy(attributes->l3.ipv6.hdr.dst_addr,
2751 input->flow.udp6_flow.ip.dst_ip,
2752 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2753 attributes->l4.udp.hdr = (struct udp_hdr){
2754 .src_port = input->flow.udp6_flow.src_port,
2755 .dst_port = input->flow.udp6_flow.dst_port,
2757 attributes->items[1] = (struct rte_flow_item){
2758 .type = RTE_FLOW_ITEM_TYPE_IPV6,
2759 .spec = &attributes->l3,
2760 .mask = &attributes->l3,
2762 attributes->items[2] = (struct rte_flow_item){
2763 .type = RTE_FLOW_ITEM_TYPE_UDP,
2764 .spec = &attributes->l4,
2765 .mask = &attributes->l4,
2768 case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
2769 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2770 .hop_limits = input->flow.tcp6_flow.ip.hop_limits,
2771 .proto = input->flow.tcp6_flow.ip.proto,
2773 memcpy(attributes->l3.ipv6.hdr.src_addr,
2774 input->flow.tcp6_flow.ip.src_ip,
2775 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2776 memcpy(attributes->l3.ipv6.hdr.dst_addr,
2777 input->flow.tcp6_flow.ip.dst_ip,
2778 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2779 attributes->l4.tcp.hdr = (struct tcp_hdr){
2780 .src_port = input->flow.tcp6_flow.src_port,
2781 .dst_port = input->flow.tcp6_flow.dst_port,
2783 attributes->items[1] = (struct rte_flow_item){
2784 .type = RTE_FLOW_ITEM_TYPE_IPV6,
2785 .spec = &attributes->l3,
2786 .mask = &attributes->l3,
2788 attributes->items[2] = (struct rte_flow_item){
2789 .type = RTE_FLOW_ITEM_TYPE_TCP,
2790 .spec = &attributes->l4,
2791 .mask = &attributes->l4,
2794 case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
2795 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2796 .hop_limits = input->flow.ipv6_flow.hop_limits,
2797 .proto = input->flow.ipv6_flow.proto,
2799 memcpy(attributes->l3.ipv6.hdr.src_addr,
2800 input->flow.ipv6_flow.src_ip,
2801 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2802 memcpy(attributes->l3.ipv6.hdr.dst_addr,
2803 input->flow.ipv6_flow.dst_ip,
2804 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2805 attributes->items[1] = (struct rte_flow_item){
2806 .type = RTE_FLOW_ITEM_TYPE_IPV6,
2807 .spec = &attributes->l3,
2808 .mask = &attributes->l3,
2812 DRV_LOG(ERR, "port %u invalid flow type%d",
2813 dev->data->port_id, fdir_filter->input.flow_type);
2814 rte_errno = ENOTSUP;
2821 * Add new flow director filter and store it in list.
2824 * Pointer to Ethernet device.
2825 * @param fdir_filter
2826 * Flow director filter to add.
2829 * 0 on success, a negative errno value otherwise and rte_errno is set.
2832 mlx5_fdir_filter_add(struct rte_eth_dev *dev,
2833 const struct rte_eth_fdir_filter *fdir_filter)
2835 struct priv *priv = dev->data->dev_private;
2836 struct mlx5_fdir attributes = {
2839 .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2840 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2844 struct mlx5_flow_parse parser = {
2845 .layer = HASH_RXQ_ETH,
2847 struct rte_flow_error error;
2848 struct rte_flow *flow;
2851 ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
2854 ret = mlx5_flow_convert(dev, &attributes.attr, attributes.items,
2855 attributes.actions, &error, &parser);
2858 flow = mlx5_flow_list_create(dev, &priv->flows, &attributes.attr,
2859 attributes.items, attributes.actions,
2862 DRV_LOG(DEBUG, "port %u FDIR created %p", dev->data->port_id,
2870 * Delete specific filter.
2873 * Pointer to Ethernet device.
2874 * @param fdir_filter
2875 * Filter to be deleted.
2878 * 0 on success, a negative errno value otherwise and rte_errno is set.
2881 mlx5_fdir_filter_delete(struct rte_eth_dev *dev,
2882 const struct rte_eth_fdir_filter *fdir_filter)
2884 struct priv *priv = dev->data->dev_private;
2885 struct mlx5_fdir attributes = {
2888 struct mlx5_flow_parse parser = {
2890 .layer = HASH_RXQ_ETH,
2892 struct rte_flow_error error;
2893 struct rte_flow *flow;
2897 ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
2900 ret = mlx5_flow_convert(dev, &attributes.attr, attributes.items,
2901 attributes.actions, &error, &parser);
2905 * Special case for drop action which is only set in the
2906 * specifications when the flow is created. In this situation the
2907 * drop specification is missing.
2910 struct ibv_flow_spec_action_drop *drop;
2912 drop = (void *)((uintptr_t)parser.queue[HASH_RXQ_ETH].ibv_attr +
2913 parser.queue[HASH_RXQ_ETH].offset);
2914 *drop = (struct ibv_flow_spec_action_drop){
2915 .type = IBV_FLOW_SPEC_ACTION_DROP,
2916 .size = sizeof(struct ibv_flow_spec_action_drop),
2918 parser.queue[HASH_RXQ_ETH].ibv_attr->num_of_specs++;
2920 TAILQ_FOREACH(flow, &priv->flows, next) {
2921 struct ibv_flow_attr *attr;
2922 struct ibv_spec_header *attr_h;
2924 struct ibv_flow_attr *flow_attr;
2925 struct ibv_spec_header *flow_h;
2927 unsigned int specs_n;
2929 attr = parser.queue[HASH_RXQ_ETH].ibv_attr;
2930 flow_attr = flow->frxq[HASH_RXQ_ETH].ibv_attr;
2931 /* Compare first the attributes. */
2932 if (memcmp(attr, flow_attr, sizeof(struct ibv_flow_attr)))
2934 if (attr->num_of_specs == 0)
2936 spec = (void *)((uintptr_t)attr +
2937 sizeof(struct ibv_flow_attr));
2938 flow_spec = (void *)((uintptr_t)flow_attr +
2939 sizeof(struct ibv_flow_attr));
2940 specs_n = RTE_MIN(attr->num_of_specs, flow_attr->num_of_specs);
2941 for (i = 0; i != specs_n; ++i) {
2944 if (memcmp(spec, flow_spec,
2945 RTE_MIN(attr_h->size, flow_h->size)))
2947 spec = (void *)((uintptr_t)spec + attr_h->size);
2948 flow_spec = (void *)((uintptr_t)flow_spec +
2951 /* At this point, the flow match. */
2954 /* The flow does not match. */
2957 ret = rte_errno; /* Save rte_errno before cleanup. */
2959 mlx5_flow_list_destroy(dev, &priv->flows, flow);
2961 for (i = 0; i != hash_rxq_init_n; ++i) {
2962 if (parser.queue[i].ibv_attr)
2963 rte_free(parser.queue[i].ibv_attr);
2965 rte_errno = ret; /* Restore rte_errno. */
2970 * Update queue for specific filter.
2973 * Pointer to Ethernet device.
2974 * @param fdir_filter
2975 * Filter to be updated.
2978 * 0 on success, a negative errno value otherwise and rte_errno is set.
2981 mlx5_fdir_filter_update(struct rte_eth_dev *dev,
2982 const struct rte_eth_fdir_filter *fdir_filter)
2986 ret = mlx5_fdir_filter_delete(dev, fdir_filter);
2989 return mlx5_fdir_filter_add(dev, fdir_filter);
2993 * Flush all filters.
2996 * Pointer to Ethernet device.
2999 mlx5_fdir_filter_flush(struct rte_eth_dev *dev)
3001 struct priv *priv = dev->data->dev_private;
3003 mlx5_flow_list_flush(dev, &priv->flows);
3007 * Get flow director information.
3010 * Pointer to Ethernet device.
3011 * @param[out] fdir_info
3012 * Resulting flow director information.
3015 mlx5_fdir_info_get(struct rte_eth_dev *dev, struct rte_eth_fdir_info *fdir_info)
3017 struct priv *priv = dev->data->dev_private;
3018 struct rte_eth_fdir_masks *mask =
3019 &priv->dev->data->dev_conf.fdir_conf.mask;
3021 fdir_info->mode = priv->dev->data->dev_conf.fdir_conf.mode;
3022 fdir_info->guarant_spc = 0;
3023 rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
3024 fdir_info->max_flexpayload = 0;
3025 fdir_info->flow_types_mask[0] = 0;
3026 fdir_info->flex_payload_unit = 0;
3027 fdir_info->max_flex_payload_segment_num = 0;
3028 fdir_info->flex_payload_limit = 0;
3029 memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
3033 * Deal with flow director operations.
3036 * Pointer to Ethernet device.
3038 * Operation to perform.
3040 * Pointer to operation-specific structure.
3043 * 0 on success, a negative errno value otherwise and rte_errno is set.
3046 mlx5_fdir_ctrl_func(struct rte_eth_dev *dev, enum rte_filter_op filter_op,
3049 struct priv *priv = dev->data->dev_private;
3050 enum rte_fdir_mode fdir_mode =
3051 priv->dev->data->dev_conf.fdir_conf.mode;
3053 if (filter_op == RTE_ETH_FILTER_NOP)
3055 if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
3056 fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
3057 DRV_LOG(ERR, "port %u flow director mode %d not supported",
3058 dev->data->port_id, fdir_mode);
3062 switch (filter_op) {
3063 case RTE_ETH_FILTER_ADD:
3064 return mlx5_fdir_filter_add(dev, arg);
3065 case RTE_ETH_FILTER_UPDATE:
3066 return mlx5_fdir_filter_update(dev, arg);
3067 case RTE_ETH_FILTER_DELETE:
3068 return mlx5_fdir_filter_delete(dev, arg);
3069 case RTE_ETH_FILTER_FLUSH:
3070 mlx5_fdir_filter_flush(dev);
3072 case RTE_ETH_FILTER_INFO:
3073 mlx5_fdir_info_get(dev, arg);
3076 DRV_LOG(DEBUG, "port %u unknown operation %u",
3077 dev->data->port_id, filter_op);
3085 * Manage filter operations.
3088 * Pointer to Ethernet device structure.
3089 * @param filter_type
3092 * Operation to perform.
3094 * Pointer to operation-specific structure.
3097 * 0 on success, a negative errno value otherwise and rte_errno is set.
3100 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
3101 enum rte_filter_type filter_type,
3102 enum rte_filter_op filter_op,
3105 switch (filter_type) {
3106 case RTE_ETH_FILTER_GENERIC:
3107 if (filter_op != RTE_ETH_FILTER_GET) {
3111 *(const void **)arg = &mlx5_flow_ops;
3113 case RTE_ETH_FILTER_FDIR:
3114 return mlx5_fdir_ctrl_func(dev, filter_op, arg);
3116 DRV_LOG(ERR, "port %u filter type (%d) not supported",
3117 dev->data->port_id, filter_type);
3118 rte_errno = ENOTSUP;