1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2016 6WIND S.A.
3 * Copyright 2016 Mellanox.
10 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
12 #pragma GCC diagnostic ignored "-Wpedantic"
14 #include <infiniband/verbs.h>
16 #pragma GCC diagnostic error "-Wpedantic"
19 #include <rte_ethdev_driver.h>
21 #include <rte_flow_driver.h>
22 #include <rte_malloc.h>
26 #include "mlx5_defs.h"
28 #include "mlx5_glue.h"
30 /* Define minimal priority for control plane flows. */
31 #define MLX5_CTRL_FLOW_PRIORITY 4
33 /* Internet Protocol versions. */
37 #ifndef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
38 struct ibv_flow_spec_counter_action {
43 /* Dev ops structure defined in mlx5.c */
44 extern const struct eth_dev_ops mlx5_dev_ops;
45 extern const struct eth_dev_ops mlx5_dev_ops_isolate;
48 mlx5_flow_create_eth(const struct rte_flow_item *item,
49 const void *default_mask,
53 mlx5_flow_create_vlan(const struct rte_flow_item *item,
54 const void *default_mask,
58 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
59 const void *default_mask,
63 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
64 const void *default_mask,
68 mlx5_flow_create_udp(const struct rte_flow_item *item,
69 const void *default_mask,
73 mlx5_flow_create_tcp(const struct rte_flow_item *item,
74 const void *default_mask,
78 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
79 const void *default_mask,
82 struct mlx5_flow_parse;
85 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
89 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id);
92 mlx5_flow_create_count(struct rte_eth_dev *dev, struct mlx5_flow_parse *parser);
94 /* Hash RX queue types. */
105 /* Initialization data for hash RX queue. */
106 struct hash_rxq_init {
107 uint64_t hash_fields; /* Fields that participate in the hash. */
108 uint64_t dpdk_rss_hf; /* Matching DPDK RSS hash fields. */
109 unsigned int flow_priority; /* Flow priority to use. */
110 unsigned int ip_version; /* Internet protocol. */
113 /* Initialization data for hash RX queues. */
114 const struct hash_rxq_init hash_rxq_init[] = {
116 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
117 IBV_RX_HASH_DST_IPV4 |
118 IBV_RX_HASH_SRC_PORT_TCP |
119 IBV_RX_HASH_DST_PORT_TCP),
120 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,
122 .ip_version = MLX5_IPV4,
125 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
126 IBV_RX_HASH_DST_IPV4 |
127 IBV_RX_HASH_SRC_PORT_UDP |
128 IBV_RX_HASH_DST_PORT_UDP),
129 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP,
131 .ip_version = MLX5_IPV4,
134 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
135 IBV_RX_HASH_DST_IPV4),
136 .dpdk_rss_hf = (ETH_RSS_IPV4 |
139 .ip_version = MLX5_IPV4,
142 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
143 IBV_RX_HASH_DST_IPV6 |
144 IBV_RX_HASH_SRC_PORT_TCP |
145 IBV_RX_HASH_DST_PORT_TCP),
146 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP,
148 .ip_version = MLX5_IPV6,
151 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
152 IBV_RX_HASH_DST_IPV6 |
153 IBV_RX_HASH_SRC_PORT_UDP |
154 IBV_RX_HASH_DST_PORT_UDP),
155 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP,
157 .ip_version = MLX5_IPV6,
160 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
161 IBV_RX_HASH_DST_IPV6),
162 .dpdk_rss_hf = (ETH_RSS_IPV6 |
165 .ip_version = MLX5_IPV6,
174 /* Number of entries in hash_rxq_init[]. */
175 const unsigned int hash_rxq_init_n = RTE_DIM(hash_rxq_init);
177 /** Structure for holding counter stats. */
178 struct mlx5_flow_counter_stats {
179 uint64_t hits; /**< Number of packets matched by the rule. */
180 uint64_t bytes; /**< Number of bytes matched by the rule. */
183 /** Structure for Drop queue. */
184 struct mlx5_hrxq_drop {
185 struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */
186 struct ibv_qp *qp; /**< Verbs queue pair. */
187 struct ibv_wq *wq; /**< Verbs work queue. */
188 struct ibv_cq *cq; /**< Verbs completion queue. */
191 /* Flows structures. */
193 uint64_t hash_fields; /**< Fields that participate in the hash. */
194 struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
195 struct ibv_flow *ibv_flow; /**< Verbs flow. */
196 struct mlx5_hrxq *hrxq; /**< Hash Rx queues. */
199 /* Drop flows structures. */
200 struct mlx5_flow_drop {
201 struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
202 struct ibv_flow *ibv_flow; /**< Verbs flow. */
206 TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
207 uint32_t mark:1; /**< Set if the flow is marked. */
208 uint32_t drop:1; /**< Drop queue. */
209 uint16_t queues_n; /**< Number of entries in queue[]. */
210 uint16_t (*queues)[]; /**< Queues indexes to use. */
211 struct rte_eth_rss_conf rss_conf; /**< RSS configuration */
212 uint8_t rss_key[40]; /**< copy of the RSS key. */
213 struct ibv_counter_set *cs; /**< Holds the counters for the rule. */
214 struct mlx5_flow_counter_stats counter_stats;/**<The counter stats. */
215 struct mlx5_flow frxq[RTE_DIM(hash_rxq_init)];
216 /**< Flow with Rx queue. */
219 /** Static initializer for items. */
221 (const enum rte_flow_item_type []){ \
222 __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
225 /** Structure to generate a simple graph of layers supported by the NIC. */
226 struct mlx5_flow_items {
227 /** List of possible actions for these items. */
228 const enum rte_flow_action_type *const actions;
229 /** Bit-masks corresponding to the possibilities for the item. */
232 * Default bit-masks to use when item->mask is not provided. When
233 * \default_mask is also NULL, the full supported bit-mask (\mask) is
236 const void *default_mask;
237 /** Bit-masks size in bytes. */
238 const unsigned int mask_sz;
240 * Conversion function from rte_flow to NIC specific flow.
243 * rte_flow item to convert.
244 * @param default_mask
245 * Default bit-masks to use when item->mask is not provided.
247 * Internal structure to store the conversion.
250 * 0 on success, a negative errno value otherwise and rte_errno is
253 int (*convert)(const struct rte_flow_item *item,
254 const void *default_mask,
256 /** Size in bytes of the destination structure. */
257 const unsigned int dst_sz;
258 /** List of possible following items. */
259 const enum rte_flow_item_type *const items;
262 /** Valid action for this PMD. */
263 static const enum rte_flow_action_type valid_actions[] = {
264 RTE_FLOW_ACTION_TYPE_DROP,
265 RTE_FLOW_ACTION_TYPE_QUEUE,
266 RTE_FLOW_ACTION_TYPE_MARK,
267 RTE_FLOW_ACTION_TYPE_FLAG,
268 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
269 RTE_FLOW_ACTION_TYPE_COUNT,
271 RTE_FLOW_ACTION_TYPE_END,
274 /** Graph of supported items and associated actions. */
275 static const struct mlx5_flow_items mlx5_flow_items[] = {
276 [RTE_FLOW_ITEM_TYPE_END] = {
277 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
278 RTE_FLOW_ITEM_TYPE_VXLAN),
280 [RTE_FLOW_ITEM_TYPE_ETH] = {
281 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
282 RTE_FLOW_ITEM_TYPE_IPV4,
283 RTE_FLOW_ITEM_TYPE_IPV6),
284 .actions = valid_actions,
285 .mask = &(const struct rte_flow_item_eth){
286 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
287 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
290 .default_mask = &rte_flow_item_eth_mask,
291 .mask_sz = sizeof(struct rte_flow_item_eth),
292 .convert = mlx5_flow_create_eth,
293 .dst_sz = sizeof(struct ibv_flow_spec_eth),
295 [RTE_FLOW_ITEM_TYPE_VLAN] = {
296 .items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
297 RTE_FLOW_ITEM_TYPE_IPV6),
298 .actions = valid_actions,
299 .mask = &(const struct rte_flow_item_vlan){
302 .default_mask = &rte_flow_item_vlan_mask,
303 .mask_sz = sizeof(struct rte_flow_item_vlan),
304 .convert = mlx5_flow_create_vlan,
307 [RTE_FLOW_ITEM_TYPE_IPV4] = {
308 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
309 RTE_FLOW_ITEM_TYPE_TCP),
310 .actions = valid_actions,
311 .mask = &(const struct rte_flow_item_ipv4){
315 .type_of_service = -1,
319 .default_mask = &rte_flow_item_ipv4_mask,
320 .mask_sz = sizeof(struct rte_flow_item_ipv4),
321 .convert = mlx5_flow_create_ipv4,
322 .dst_sz = sizeof(struct ibv_flow_spec_ipv4_ext),
324 [RTE_FLOW_ITEM_TYPE_IPV6] = {
325 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
326 RTE_FLOW_ITEM_TYPE_TCP),
327 .actions = valid_actions,
328 .mask = &(const struct rte_flow_item_ipv6){
331 0xff, 0xff, 0xff, 0xff,
332 0xff, 0xff, 0xff, 0xff,
333 0xff, 0xff, 0xff, 0xff,
334 0xff, 0xff, 0xff, 0xff,
337 0xff, 0xff, 0xff, 0xff,
338 0xff, 0xff, 0xff, 0xff,
339 0xff, 0xff, 0xff, 0xff,
340 0xff, 0xff, 0xff, 0xff,
347 .default_mask = &rte_flow_item_ipv6_mask,
348 .mask_sz = sizeof(struct rte_flow_item_ipv6),
349 .convert = mlx5_flow_create_ipv6,
350 .dst_sz = sizeof(struct ibv_flow_spec_ipv6),
352 [RTE_FLOW_ITEM_TYPE_UDP] = {
353 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN),
354 .actions = valid_actions,
355 .mask = &(const struct rte_flow_item_udp){
361 .default_mask = &rte_flow_item_udp_mask,
362 .mask_sz = sizeof(struct rte_flow_item_udp),
363 .convert = mlx5_flow_create_udp,
364 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
366 [RTE_FLOW_ITEM_TYPE_TCP] = {
367 .actions = valid_actions,
368 .mask = &(const struct rte_flow_item_tcp){
374 .default_mask = &rte_flow_item_tcp_mask,
375 .mask_sz = sizeof(struct rte_flow_item_tcp),
376 .convert = mlx5_flow_create_tcp,
377 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
379 [RTE_FLOW_ITEM_TYPE_VXLAN] = {
380 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH),
381 .actions = valid_actions,
382 .mask = &(const struct rte_flow_item_vxlan){
383 .vni = "\xff\xff\xff",
385 .default_mask = &rte_flow_item_vxlan_mask,
386 .mask_sz = sizeof(struct rte_flow_item_vxlan),
387 .convert = mlx5_flow_create_vxlan,
388 .dst_sz = sizeof(struct ibv_flow_spec_tunnel),
392 /** Structure to pass to the conversion function. */
393 struct mlx5_flow_parse {
394 uint32_t inner; /**< Set once VXLAN is encountered. */
396 /**< Whether resources should remain after a validate. */
397 uint32_t drop:1; /**< Target is a drop queue. */
398 uint32_t mark:1; /**< Mark is present in the flow. */
399 uint32_t count:1; /**< Count is present in the flow. */
400 uint32_t mark_id; /**< Mark identifier. */
401 uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
402 uint16_t queues_n; /**< Number of entries in queue[]. */
403 struct rte_eth_rss_conf rss_conf; /**< RSS configuration */
404 uint8_t rss_key[40]; /**< copy of the RSS key. */
405 enum hash_rxq_type layer; /**< Last pattern layer detected. */
406 struct ibv_counter_set *cs; /**< Holds the counter set for the rule */
408 struct ibv_flow_attr *ibv_attr;
409 /**< Pointer to Verbs attributes. */
411 /**< Current position or total size of the attribute. */
412 } queue[RTE_DIM(hash_rxq_init)];
415 static const struct rte_flow_ops mlx5_flow_ops = {
416 .validate = mlx5_flow_validate,
417 .create = mlx5_flow_create,
418 .destroy = mlx5_flow_destroy,
419 .flush = mlx5_flow_flush,
420 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
421 .query = mlx5_flow_query,
425 .isolate = mlx5_flow_isolate,
428 /* Convert FDIR request to Generic flow. */
430 struct rte_flow_attr attr;
431 struct rte_flow_action actions[2];
432 struct rte_flow_item items[4];
433 struct rte_flow_item_eth l2;
434 struct rte_flow_item_eth l2_mask;
436 struct rte_flow_item_ipv4 ipv4;
437 struct rte_flow_item_ipv6 ipv6;
440 struct rte_flow_item_udp udp;
441 struct rte_flow_item_tcp tcp;
443 struct rte_flow_action_queue queue;
446 /* Verbs specification header. */
447 struct ibv_spec_header {
448 enum ibv_flow_spec_type type;
453 * Check support for a given item.
456 * Item specification.
458 * Bit-masks covering supported fields to compare with spec, last and mask in
461 * Bit-Mask size in bytes.
464 * 0 on success, a negative errno value otherwise and rte_errno is set.
467 mlx5_flow_item_validate(const struct rte_flow_item *item,
468 const uint8_t *mask, unsigned int size)
470 if (!item->spec && (item->mask || item->last)) {
474 if (item->spec && !item->mask) {
476 const uint8_t *spec = item->spec;
478 for (i = 0; i < size; ++i)
479 if ((spec[i] | mask[i]) != mask[i]) {
484 if (item->last && !item->mask) {
486 const uint8_t *spec = item->last;
488 for (i = 0; i < size; ++i)
489 if ((spec[i] | mask[i]) != mask[i]) {
496 const uint8_t *spec = item->spec;
498 for (i = 0; i < size; ++i)
499 if ((spec[i] | mask[i]) != mask[i]) {
504 if (item->spec && item->last) {
507 const uint8_t *apply = mask;
513 for (i = 0; i < size; ++i) {
514 spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
515 last[i] = ((const uint8_t *)item->last)[i] & apply[i];
517 ret = memcmp(spec, last, size);
527 * Copy the RSS configuration from the user ones, of the rss_conf is null,
528 * uses the driver one.
531 * Internal parser structure.
533 * User RSS configuration to save.
536 * 0 on success, a negative errno value otherwise and rte_errno is set.
539 mlx5_flow_convert_rss_conf(struct mlx5_flow_parse *parser,
540 const struct rte_eth_rss_conf *rss_conf)
543 * This function is also called at the beginning of
544 * mlx5_flow_convert_actions() to initialize the parser with the
545 * device default RSS configuration.
548 if (rss_conf->rss_hf & MLX5_RSS_HF_MASK) {
552 if (rss_conf->rss_key_len != 40) {
556 if (rss_conf->rss_key_len && rss_conf->rss_key) {
557 parser->rss_conf.rss_key_len = rss_conf->rss_key_len;
558 memcpy(parser->rss_key, rss_conf->rss_key,
559 rss_conf->rss_key_len);
560 parser->rss_conf.rss_key = parser->rss_key;
562 parser->rss_conf.rss_hf = rss_conf->rss_hf;
568 * Extract attribute to the parser.
571 * Flow rule attributes.
573 * Perform verbose error reporting if not NULL.
576 * 0 on success, a negative errno value otherwise and rte_errno is set.
579 mlx5_flow_convert_attributes(const struct rte_flow_attr *attr,
580 struct rte_flow_error *error)
583 rte_flow_error_set(error, ENOTSUP,
584 RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
586 "groups are not supported");
589 if (attr->priority && attr->priority != MLX5_CTRL_FLOW_PRIORITY) {
590 rte_flow_error_set(error, ENOTSUP,
591 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
593 "priorities are not supported");
597 rte_flow_error_set(error, ENOTSUP,
598 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
600 "egress is not supported");
603 if (!attr->ingress) {
604 rte_flow_error_set(error, ENOTSUP,
605 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
607 "only ingress is supported");
614 * Extract actions request to the parser.
617 * Pointer to Ethernet device.
619 * Associated actions (list terminated by the END action).
621 * Perform verbose error reporting if not NULL.
622 * @param[in, out] parser
623 * Internal parser structure.
626 * 0 on success, a negative errno value otherwise and rte_errno is set.
629 mlx5_flow_convert_actions(struct rte_eth_dev *dev,
630 const struct rte_flow_action actions[],
631 struct rte_flow_error *error,
632 struct mlx5_flow_parse *parser)
634 struct priv *priv = dev->data->dev_private;
638 * Add default RSS configuration necessary for Verbs to create QP even
639 * if no RSS is necessary.
641 ret = mlx5_flow_convert_rss_conf(parser,
642 (const struct rte_eth_rss_conf *)
646 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
647 if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
649 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
651 } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
652 const struct rte_flow_action_queue *queue =
653 (const struct rte_flow_action_queue *)
658 if (!queue || (queue->index > (priv->rxqs_n - 1)))
659 goto exit_action_not_supported;
660 for (n = 0; n < parser->queues_n; ++n) {
661 if (parser->queues[n] == queue->index) {
666 if (parser->queues_n > 1 && !found) {
667 rte_flow_error_set(error, ENOTSUP,
668 RTE_FLOW_ERROR_TYPE_ACTION,
670 "queue action not in RSS queues");
674 parser->queues_n = 1;
675 parser->queues[0] = queue->index;
677 } else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
678 const struct rte_flow_action_rss *rss =
679 (const struct rte_flow_action_rss *)
683 if (!rss || !rss->num) {
684 rte_flow_error_set(error, EINVAL,
685 RTE_FLOW_ERROR_TYPE_ACTION,
690 if (parser->queues_n == 1) {
693 assert(parser->queues_n);
694 for (n = 0; n < rss->num; ++n) {
695 if (parser->queues[0] ==
702 rte_flow_error_set(error, ENOTSUP,
703 RTE_FLOW_ERROR_TYPE_ACTION,
705 "queue action not in RSS"
710 for (n = 0; n < rss->num; ++n) {
711 if (rss->queue[n] >= priv->rxqs_n) {
712 rte_flow_error_set(error, EINVAL,
713 RTE_FLOW_ERROR_TYPE_ACTION,
715 "queue id > number of"
720 for (n = 0; n < rss->num; ++n)
721 parser->queues[n] = rss->queue[n];
722 parser->queues_n = rss->num;
723 if (mlx5_flow_convert_rss_conf(parser, rss->rss_conf)) {
724 rte_flow_error_set(error, EINVAL,
725 RTE_FLOW_ERROR_TYPE_ACTION,
727 "wrong RSS configuration");
730 } else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
731 const struct rte_flow_action_mark *mark =
732 (const struct rte_flow_action_mark *)
736 rte_flow_error_set(error, EINVAL,
737 RTE_FLOW_ERROR_TYPE_ACTION,
739 "mark must be defined");
741 } else if (mark->id >= MLX5_FLOW_MARK_MAX) {
742 rte_flow_error_set(error, ENOTSUP,
743 RTE_FLOW_ERROR_TYPE_ACTION,
745 "mark must be between 0"
750 parser->mark_id = mark->id;
751 } else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
753 } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT &&
754 priv->config.flow_counter_en) {
757 goto exit_action_not_supported;
760 if (parser->drop && parser->mark)
762 if (!parser->queues_n && !parser->drop) {
763 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
764 NULL, "no valid action");
768 exit_action_not_supported:
769 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
770 actions, "action not supported");
778 * Pattern specification (list terminated by the END pattern item).
780 * Perform verbose error reporting if not NULL.
781 * @param[in, out] parser
782 * Internal parser structure.
785 * 0 on success, a negative errno value otherwise and rte_errno is set.
788 mlx5_flow_convert_items_validate(const struct rte_flow_item items[],
789 struct rte_flow_error *error,
790 struct mlx5_flow_parse *parser)
792 const struct mlx5_flow_items *cur_item = mlx5_flow_items;
796 /* Initialise the offsets to start after verbs attribute. */
797 for (i = 0; i != hash_rxq_init_n; ++i)
798 parser->queue[i].offset = sizeof(struct ibv_flow_attr);
799 for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
800 const struct mlx5_flow_items *token = NULL;
803 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
807 cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
809 if (cur_item->items[i] == items->type) {
810 token = &mlx5_flow_items[items->type];
815 goto exit_item_not_supported;
817 ret = mlx5_flow_item_validate(items,
818 (const uint8_t *)cur_item->mask,
821 goto exit_item_not_supported;
822 if (items->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
824 rte_flow_error_set(error, ENOTSUP,
825 RTE_FLOW_ERROR_TYPE_ITEM,
827 "cannot recognize multiple"
828 " VXLAN encapsulations");
831 parser->inner = IBV_FLOW_SPEC_INNER;
834 parser->queue[HASH_RXQ_ETH].offset += cur_item->dst_sz;
836 for (n = 0; n != hash_rxq_init_n; ++n)
837 parser->queue[n].offset += cur_item->dst_sz;
841 parser->queue[HASH_RXQ_ETH].offset +=
842 sizeof(struct ibv_flow_spec_action_drop);
845 for (i = 0; i != hash_rxq_init_n; ++i)
846 parser->queue[i].offset +=
847 sizeof(struct ibv_flow_spec_action_tag);
850 unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
852 for (i = 0; i != hash_rxq_init_n; ++i)
853 parser->queue[i].offset += size;
856 exit_item_not_supported:
857 return rte_flow_error_set(error, -ret, RTE_FLOW_ERROR_TYPE_ITEM,
858 items, "item not supported");
862 * Allocate memory space to store verbs flow attributes.
864 * @param[in] priority
867 * Amount of byte to allocate.
869 * Perform verbose error reporting if not NULL.
872 * A verbs flow attribute on success, NULL otherwise and rte_errno is set.
874 static struct ibv_flow_attr *
875 mlx5_flow_convert_allocate(unsigned int priority,
877 struct rte_flow_error *error)
879 struct ibv_flow_attr *ibv_attr;
881 ibv_attr = rte_calloc(__func__, 1, size, 0);
883 rte_flow_error_set(error, ENOMEM,
884 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
886 "cannot allocate verbs spec attributes");
889 ibv_attr->priority = priority;
894 * Finalise verbs flow attributes.
896 * @param[in, out] parser
897 * Internal parser structure.
900 mlx5_flow_convert_finalise(struct mlx5_flow_parse *parser)
902 const unsigned int ipv4 =
903 hash_rxq_init[parser->layer].ip_version == MLX5_IPV4;
904 const enum hash_rxq_type hmin = ipv4 ? HASH_RXQ_TCPV4 : HASH_RXQ_TCPV6;
905 const enum hash_rxq_type hmax = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
906 const enum hash_rxq_type ohmin = ipv4 ? HASH_RXQ_TCPV6 : HASH_RXQ_TCPV4;
907 const enum hash_rxq_type ohmax = ipv4 ? HASH_RXQ_IPV6 : HASH_RXQ_IPV4;
908 const enum hash_rxq_type ip = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
911 /* Remove any other flow not matching the pattern. */
912 if (parser->queues_n == 1) {
913 for (i = 0; i != hash_rxq_init_n; ++i) {
914 if (i == HASH_RXQ_ETH)
916 rte_free(parser->queue[i].ibv_attr);
917 parser->queue[i].ibv_attr = NULL;
921 if (parser->layer == HASH_RXQ_ETH) {
925 * This layer becomes useless as the pattern define under
928 rte_free(parser->queue[HASH_RXQ_ETH].ibv_attr);
929 parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
931 /* Remove opposite kind of layer e.g. IPv6 if the pattern is IPv4. */
932 for (i = ohmin; i != (ohmax + 1); ++i) {
933 if (!parser->queue[i].ibv_attr)
935 rte_free(parser->queue[i].ibv_attr);
936 parser->queue[i].ibv_attr = NULL;
938 /* Remove impossible flow according to the RSS configuration. */
939 if (hash_rxq_init[parser->layer].dpdk_rss_hf &
940 parser->rss_conf.rss_hf) {
941 /* Remove any other flow. */
942 for (i = hmin; i != (hmax + 1); ++i) {
943 if ((i == parser->layer) ||
944 (!parser->queue[i].ibv_attr))
946 rte_free(parser->queue[i].ibv_attr);
947 parser->queue[i].ibv_attr = NULL;
949 } else if (!parser->queue[ip].ibv_attr) {
950 /* no RSS possible with the current configuration. */
951 parser->queues_n = 1;
956 * Fill missing layers in verbs specifications, or compute the correct
957 * offset to allocate the memory space for the attributes and
960 for (i = 0; i != hash_rxq_init_n - 1; ++i) {
962 struct ibv_flow_spec_ipv4_ext ipv4;
963 struct ibv_flow_spec_ipv6 ipv6;
964 struct ibv_flow_spec_tcp_udp udp_tcp;
969 if (i == parser->layer)
971 if (parser->layer == HASH_RXQ_ETH) {
972 if (hash_rxq_init[i].ip_version == MLX5_IPV4) {
973 size = sizeof(struct ibv_flow_spec_ipv4_ext);
974 specs.ipv4 = (struct ibv_flow_spec_ipv4_ext){
975 .type = IBV_FLOW_SPEC_IPV4_EXT,
979 size = sizeof(struct ibv_flow_spec_ipv6);
980 specs.ipv6 = (struct ibv_flow_spec_ipv6){
981 .type = IBV_FLOW_SPEC_IPV6,
985 if (parser->queue[i].ibv_attr) {
986 dst = (void *)((uintptr_t)
987 parser->queue[i].ibv_attr +
988 parser->queue[i].offset);
989 memcpy(dst, &specs, size);
990 ++parser->queue[i].ibv_attr->num_of_specs;
992 parser->queue[i].offset += size;
994 if ((i == HASH_RXQ_UDPV4) || (i == HASH_RXQ_TCPV4) ||
995 (i == HASH_RXQ_UDPV6) || (i == HASH_RXQ_TCPV6)) {
996 size = sizeof(struct ibv_flow_spec_tcp_udp);
997 specs.udp_tcp = (struct ibv_flow_spec_tcp_udp) {
998 .type = ((i == HASH_RXQ_UDPV4 ||
999 i == HASH_RXQ_UDPV6) ?
1004 if (parser->queue[i].ibv_attr) {
1005 dst = (void *)((uintptr_t)
1006 parser->queue[i].ibv_attr +
1007 parser->queue[i].offset);
1008 memcpy(dst, &specs, size);
1009 ++parser->queue[i].ibv_attr->num_of_specs;
1011 parser->queue[i].offset += size;
1017 * Validate and convert a flow supported by the NIC.
1020 * Pointer to Ethernet device.
1022 * Flow rule attributes.
1023 * @param[in] pattern
1024 * Pattern specification (list terminated by the END pattern item).
1025 * @param[in] actions
1026 * Associated actions (list terminated by the END action).
1028 * Perform verbose error reporting if not NULL.
1029 * @param[in, out] parser
1030 * Internal parser structure.
1033 * 0 on success, a negative errno value otherwise and rte_errno is set.
1036 mlx5_flow_convert(struct rte_eth_dev *dev,
1037 const struct rte_flow_attr *attr,
1038 const struct rte_flow_item items[],
1039 const struct rte_flow_action actions[],
1040 struct rte_flow_error *error,
1041 struct mlx5_flow_parse *parser)
1043 const struct mlx5_flow_items *cur_item = mlx5_flow_items;
1047 /* First step. Validate the attributes, items and actions. */
1048 *parser = (struct mlx5_flow_parse){
1049 .create = parser->create,
1050 .layer = HASH_RXQ_ETH,
1051 .mark_id = MLX5_FLOW_MARK_DEFAULT,
1053 ret = mlx5_flow_convert_attributes(attr, error);
1056 ret = mlx5_flow_convert_actions(dev, actions, error, parser);
1059 ret = mlx5_flow_convert_items_validate(items, error, parser);
1062 mlx5_flow_convert_finalise(parser);
1065 * Allocate the memory space to store verbs specifications.
1068 unsigned int priority =
1070 hash_rxq_init[HASH_RXQ_ETH].flow_priority;
1071 unsigned int offset = parser->queue[HASH_RXQ_ETH].offset;
1073 parser->queue[HASH_RXQ_ETH].ibv_attr =
1074 mlx5_flow_convert_allocate(priority, offset, error);
1075 if (!parser->queue[HASH_RXQ_ETH].ibv_attr)
1077 parser->queue[HASH_RXQ_ETH].offset =
1078 sizeof(struct ibv_flow_attr);
1080 for (i = 0; i != hash_rxq_init_n; ++i) {
1081 unsigned int priority =
1083 hash_rxq_init[i].flow_priority;
1084 unsigned int offset;
1086 if (!(parser->rss_conf.rss_hf &
1087 hash_rxq_init[i].dpdk_rss_hf) &&
1088 (i != HASH_RXQ_ETH))
1090 offset = parser->queue[i].offset;
1091 parser->queue[i].ibv_attr =
1092 mlx5_flow_convert_allocate(priority,
1094 if (!parser->queue[i].ibv_attr)
1096 parser->queue[i].offset = sizeof(struct ibv_flow_attr);
1099 /* Third step. Conversion parse, fill the specifications. */
1101 for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
1102 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
1104 cur_item = &mlx5_flow_items[items->type];
1105 ret = cur_item->convert(items,
1106 (cur_item->default_mask ?
1107 cur_item->default_mask :
1111 rte_flow_error_set(error, rte_errno,
1112 RTE_FLOW_ERROR_TYPE_ITEM,
1113 items, "item not supported");
1118 mlx5_flow_create_flag_mark(parser, parser->mark_id);
1119 if (parser->count && parser->create) {
1120 mlx5_flow_create_count(dev, parser);
1122 goto exit_count_error;
1125 * Last step. Complete missing specification to reach the RSS
1128 if (!parser->drop) {
1129 mlx5_flow_convert_finalise(parser);
1131 parser->queue[HASH_RXQ_ETH].ibv_attr->priority =
1133 hash_rxq_init[parser->layer].flow_priority;
1136 /* Only verification is expected, all resources should be released. */
1137 if (!parser->create) {
1138 for (i = 0; i != hash_rxq_init_n; ++i) {
1139 if (parser->queue[i].ibv_attr) {
1140 rte_free(parser->queue[i].ibv_attr);
1141 parser->queue[i].ibv_attr = NULL;
1147 for (i = 0; i != hash_rxq_init_n; ++i) {
1148 if (parser->queue[i].ibv_attr) {
1149 rte_free(parser->queue[i].ibv_attr);
1150 parser->queue[i].ibv_attr = NULL;
1153 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1154 NULL, "cannot allocate verbs spec attributes");
1157 rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1158 NULL, "cannot create counter");
1163 * Copy the specification created into the flow.
1166 * Internal parser structure.
1168 * Create specification.
1170 * Size in bytes of the specification to copy.
1173 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
1179 for (i = 0; i != hash_rxq_init_n; ++i) {
1180 if (!parser->queue[i].ibv_attr)
1182 /* Specification must be the same l3 type or none. */
1183 if (parser->layer == HASH_RXQ_ETH ||
1184 (hash_rxq_init[parser->layer].ip_version ==
1185 hash_rxq_init[i].ip_version) ||
1186 (hash_rxq_init[i].ip_version == 0)) {
1187 dst = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1188 parser->queue[i].offset);
1189 memcpy(dst, src, size);
1190 ++parser->queue[i].ibv_attr->num_of_specs;
1191 parser->queue[i].offset += size;
1197 * Convert Ethernet item to Verbs specification.
1200 * Item specification.
1201 * @param default_mask[in]
1202 * Default bit-masks to use when item->mask is not provided.
1203 * @param data[in, out]
1207 * 0 on success, a negative errno value otherwise and rte_errno is set.
1210 mlx5_flow_create_eth(const struct rte_flow_item *item,
1211 const void *default_mask,
1214 const struct rte_flow_item_eth *spec = item->spec;
1215 const struct rte_flow_item_eth *mask = item->mask;
1216 struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1217 const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1218 struct ibv_flow_spec_eth eth = {
1219 .type = parser->inner | IBV_FLOW_SPEC_ETH,
1223 /* Don't update layer for the inner pattern. */
1225 parser->layer = HASH_RXQ_ETH;
1230 mask = default_mask;
1231 memcpy(ð.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
1232 memcpy(ð.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
1233 eth.val.ether_type = spec->type;
1234 memcpy(ð.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
1235 memcpy(ð.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
1236 eth.mask.ether_type = mask->type;
1237 /* Remove unwanted bits from values. */
1238 for (i = 0; i < ETHER_ADDR_LEN; ++i) {
1239 eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
1240 eth.val.src_mac[i] &= eth.mask.src_mac[i];
1242 eth.val.ether_type &= eth.mask.ether_type;
1244 mlx5_flow_create_copy(parser, ð, eth_size);
1249 * Convert VLAN item to Verbs specification.
1252 * Item specification.
1253 * @param default_mask[in]
1254 * Default bit-masks to use when item->mask is not provided.
1255 * @param data[in, out]
1259 * 0 on success, a negative errno value otherwise and rte_errno is set.
1262 mlx5_flow_create_vlan(const struct rte_flow_item *item,
1263 const void *default_mask,
1266 const struct rte_flow_item_vlan *spec = item->spec;
1267 const struct rte_flow_item_vlan *mask = item->mask;
1268 struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1269 struct ibv_flow_spec_eth *eth;
1270 const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1275 mask = default_mask;
1277 for (i = 0; i != hash_rxq_init_n; ++i) {
1278 if (!parser->queue[i].ibv_attr)
1281 eth = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1282 parser->queue[i].offset - eth_size);
1283 eth->val.vlan_tag = spec->tci;
1284 eth->mask.vlan_tag = mask->tci;
1285 eth->val.vlan_tag &= eth->mask.vlan_tag;
1292 * Convert IPv4 item to Verbs specification.
1295 * Item specification.
1296 * @param default_mask[in]
1297 * Default bit-masks to use when item->mask is not provided.
1298 * @param data[in, out]
1302 * 0 on success, a negative errno value otherwise and rte_errno is set.
1305 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
1306 const void *default_mask,
1309 const struct rte_flow_item_ipv4 *spec = item->spec;
1310 const struct rte_flow_item_ipv4 *mask = item->mask;
1311 struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1312 unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4_ext);
1313 struct ibv_flow_spec_ipv4_ext ipv4 = {
1314 .type = parser->inner | IBV_FLOW_SPEC_IPV4_EXT,
1318 /* Don't update layer for the inner pattern. */
1320 parser->layer = HASH_RXQ_IPV4;
1323 mask = default_mask;
1324 ipv4.val = (struct ibv_flow_ipv4_ext_filter){
1325 .src_ip = spec->hdr.src_addr,
1326 .dst_ip = spec->hdr.dst_addr,
1327 .proto = spec->hdr.next_proto_id,
1328 .tos = spec->hdr.type_of_service,
1330 ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
1331 .src_ip = mask->hdr.src_addr,
1332 .dst_ip = mask->hdr.dst_addr,
1333 .proto = mask->hdr.next_proto_id,
1334 .tos = mask->hdr.type_of_service,
1336 /* Remove unwanted bits from values. */
1337 ipv4.val.src_ip &= ipv4.mask.src_ip;
1338 ipv4.val.dst_ip &= ipv4.mask.dst_ip;
1339 ipv4.val.proto &= ipv4.mask.proto;
1340 ipv4.val.tos &= ipv4.mask.tos;
1342 mlx5_flow_create_copy(parser, &ipv4, ipv4_size);
1347 * Convert IPv6 item to Verbs specification.
1350 * Item specification.
1351 * @param default_mask[in]
1352 * Default bit-masks to use when item->mask is not provided.
1353 * @param data[in, out]
1357 * 0 on success, a negative errno value otherwise and rte_errno is set.
1360 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
1361 const void *default_mask,
1364 const struct rte_flow_item_ipv6 *spec = item->spec;
1365 const struct rte_flow_item_ipv6 *mask = item->mask;
1366 struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1367 unsigned int ipv6_size = sizeof(struct ibv_flow_spec_ipv6);
1368 struct ibv_flow_spec_ipv6 ipv6 = {
1369 .type = parser->inner | IBV_FLOW_SPEC_IPV6,
1373 /* Don't update layer for the inner pattern. */
1375 parser->layer = HASH_RXQ_IPV6;
1378 uint32_t vtc_flow_val;
1379 uint32_t vtc_flow_mask;
1382 mask = default_mask;
1383 memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
1384 RTE_DIM(ipv6.val.src_ip));
1385 memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
1386 RTE_DIM(ipv6.val.dst_ip));
1387 memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
1388 RTE_DIM(ipv6.mask.src_ip));
1389 memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
1390 RTE_DIM(ipv6.mask.dst_ip));
1391 vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
1392 vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
1393 ipv6.val.flow_label =
1394 rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >>
1396 ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >>
1398 ipv6.val.next_hdr = spec->hdr.proto;
1399 ipv6.val.hop_limit = spec->hdr.hop_limits;
1400 ipv6.mask.flow_label =
1401 rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >>
1403 ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >>
1405 ipv6.mask.next_hdr = mask->hdr.proto;
1406 ipv6.mask.hop_limit = mask->hdr.hop_limits;
1407 /* Remove unwanted bits from values. */
1408 for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
1409 ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
1410 ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
1412 ipv6.val.flow_label &= ipv6.mask.flow_label;
1413 ipv6.val.traffic_class &= ipv6.mask.traffic_class;
1414 ipv6.val.next_hdr &= ipv6.mask.next_hdr;
1415 ipv6.val.hop_limit &= ipv6.mask.hop_limit;
1417 mlx5_flow_create_copy(parser, &ipv6, ipv6_size);
1422 * Convert UDP item to Verbs specification.
1425 * Item specification.
1426 * @param default_mask[in]
1427 * Default bit-masks to use when item->mask is not provided.
1428 * @param data[in, out]
1432 * 0 on success, a negative errno value otherwise and rte_errno is set.
1435 mlx5_flow_create_udp(const struct rte_flow_item *item,
1436 const void *default_mask,
1439 const struct rte_flow_item_udp *spec = item->spec;
1440 const struct rte_flow_item_udp *mask = item->mask;
1441 struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1442 unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1443 struct ibv_flow_spec_tcp_udp udp = {
1444 .type = parser->inner | IBV_FLOW_SPEC_UDP,
1448 /* Don't update layer for the inner pattern. */
1449 if (!parser->inner) {
1450 if (parser->layer == HASH_RXQ_IPV4)
1451 parser->layer = HASH_RXQ_UDPV4;
1453 parser->layer = HASH_RXQ_UDPV6;
1457 mask = default_mask;
1458 udp.val.dst_port = spec->hdr.dst_port;
1459 udp.val.src_port = spec->hdr.src_port;
1460 udp.mask.dst_port = mask->hdr.dst_port;
1461 udp.mask.src_port = mask->hdr.src_port;
1462 /* Remove unwanted bits from values. */
1463 udp.val.src_port &= udp.mask.src_port;
1464 udp.val.dst_port &= udp.mask.dst_port;
1466 mlx5_flow_create_copy(parser, &udp, udp_size);
1471 * Convert TCP item to Verbs specification.
1474 * Item specification.
1475 * @param default_mask[in]
1476 * Default bit-masks to use when item->mask is not provided.
1477 * @param data[in, out]
1481 * 0 on success, a negative errno value otherwise and rte_errno is set.
1484 mlx5_flow_create_tcp(const struct rte_flow_item *item,
1485 const void *default_mask,
1488 const struct rte_flow_item_tcp *spec = item->spec;
1489 const struct rte_flow_item_tcp *mask = item->mask;
1490 struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1491 unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1492 struct ibv_flow_spec_tcp_udp tcp = {
1493 .type = parser->inner | IBV_FLOW_SPEC_TCP,
1497 /* Don't update layer for the inner pattern. */
1498 if (!parser->inner) {
1499 if (parser->layer == HASH_RXQ_IPV4)
1500 parser->layer = HASH_RXQ_TCPV4;
1502 parser->layer = HASH_RXQ_TCPV6;
1506 mask = default_mask;
1507 tcp.val.dst_port = spec->hdr.dst_port;
1508 tcp.val.src_port = spec->hdr.src_port;
1509 tcp.mask.dst_port = mask->hdr.dst_port;
1510 tcp.mask.src_port = mask->hdr.src_port;
1511 /* Remove unwanted bits from values. */
1512 tcp.val.src_port &= tcp.mask.src_port;
1513 tcp.val.dst_port &= tcp.mask.dst_port;
1515 mlx5_flow_create_copy(parser, &tcp, tcp_size);
1520 * Convert VXLAN item to Verbs specification.
1523 * Item specification.
1524 * @param default_mask[in]
1525 * Default bit-masks to use when item->mask is not provided.
1526 * @param data[in, out]
1530 * 0 on success, a negative errno value otherwise and rte_errno is set.
1533 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
1534 const void *default_mask,
1537 const struct rte_flow_item_vxlan *spec = item->spec;
1538 const struct rte_flow_item_vxlan *mask = item->mask;
1539 struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1540 unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1541 struct ibv_flow_spec_tunnel vxlan = {
1542 .type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1551 parser->inner = IBV_FLOW_SPEC_INNER;
1554 mask = default_mask;
1555 memcpy(&id.vni[1], spec->vni, 3);
1556 vxlan.val.tunnel_id = id.vlan_id;
1557 memcpy(&id.vni[1], mask->vni, 3);
1558 vxlan.mask.tunnel_id = id.vlan_id;
1559 /* Remove unwanted bits from values. */
1560 vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
1563 * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
1564 * layer is defined in the Verbs specification it is interpreted as
1565 * wildcard and all packets will match this rule, if it follows a full
1566 * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
1567 * before will also match this rule.
1568 * To avoid such situation, VNI 0 is currently refused.
1570 if (!vxlan.val.tunnel_id) {
1574 mlx5_flow_create_copy(parser, &vxlan, size);
1579 * Convert mark/flag action to Verbs specification.
1582 * Internal parser structure.
1587 * 0 on success, a negative errno value otherwise and rte_errno is set.
1590 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id)
1592 unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1593 struct ibv_flow_spec_action_tag tag = {
1594 .type = IBV_FLOW_SPEC_ACTION_TAG,
1596 .tag_id = mlx5_flow_mark_set(mark_id),
1599 assert(parser->mark);
1600 mlx5_flow_create_copy(parser, &tag, size);
1605 * Convert count action to Verbs specification.
1608 * Pointer to Ethernet device.
1610 * Pointer to MLX5 flow parser structure.
1613 * 0 on success, a negative errno value otherwise and rte_errno is set.
1616 mlx5_flow_create_count(struct rte_eth_dev *dev __rte_unused,
1617 struct mlx5_flow_parse *parser __rte_unused)
1619 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
1620 struct priv *priv = dev->data->dev_private;
1621 unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
1622 struct ibv_counter_set_init_attr init_attr = {0};
1623 struct ibv_flow_spec_counter_action counter = {
1624 .type = IBV_FLOW_SPEC_ACTION_COUNT,
1626 .counter_set_handle = 0,
1629 init_attr.counter_set_id = 0;
1630 parser->cs = mlx5_glue->create_counter_set(priv->ctx, &init_attr);
1635 counter.counter_set_handle = parser->cs->handle;
1636 mlx5_flow_create_copy(parser, &counter, size);
1642 * Complete flow rule creation with a drop queue.
1645 * Pointer to Ethernet device.
1647 * Internal parser structure.
1649 * Pointer to the rte_flow.
1651 * Perform verbose error reporting if not NULL.
1654 * 0 on success, a negative errno value otherwise and rte_errno is set.
1657 mlx5_flow_create_action_queue_drop(struct rte_eth_dev *dev,
1658 struct mlx5_flow_parse *parser,
1659 struct rte_flow *flow,
1660 struct rte_flow_error *error)
1662 struct priv *priv = dev->data->dev_private;
1663 struct ibv_flow_spec_action_drop *drop;
1664 unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
1669 drop = (void *)((uintptr_t)parser->queue[HASH_RXQ_ETH].ibv_attr +
1670 parser->queue[HASH_RXQ_ETH].offset);
1671 *drop = (struct ibv_flow_spec_action_drop){
1672 .type = IBV_FLOW_SPEC_ACTION_DROP,
1675 ++parser->queue[HASH_RXQ_ETH].ibv_attr->num_of_specs;
1676 parser->queue[HASH_RXQ_ETH].offset += size;
1677 flow->frxq[HASH_RXQ_ETH].ibv_attr =
1678 parser->queue[HASH_RXQ_ETH].ibv_attr;
1680 flow->cs = parser->cs;
1681 if (!priv->dev->data->dev_started)
1683 parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
1684 flow->frxq[HASH_RXQ_ETH].ibv_flow =
1685 mlx5_glue->create_flow(priv->flow_drop_queue->qp,
1686 flow->frxq[HASH_RXQ_ETH].ibv_attr);
1687 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
1688 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1689 NULL, "flow rule creation failure");
1695 if (flow->frxq[HASH_RXQ_ETH].ibv_flow) {
1696 claim_zero(mlx5_glue->destroy_flow
1697 (flow->frxq[HASH_RXQ_ETH].ibv_flow));
1698 flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
1700 if (flow->frxq[HASH_RXQ_ETH].ibv_attr) {
1701 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
1702 flow->frxq[HASH_RXQ_ETH].ibv_attr = NULL;
1705 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
1713 * Create hash Rx queues when RSS is enabled.
1716 * Pointer to Ethernet device.
1718 * Internal parser structure.
1720 * Pointer to the rte_flow.
1722 * Perform verbose error reporting if not NULL.
1725 * 0 on success, a negative errno value otherwise and rte_errno is set.
1728 mlx5_flow_create_action_queue_rss(struct rte_eth_dev *dev,
1729 struct mlx5_flow_parse *parser,
1730 struct rte_flow *flow,
1731 struct rte_flow_error *error)
1733 struct priv *priv = dev->data->dev_private;
1736 for (i = 0; i != hash_rxq_init_n; ++i) {
1737 uint64_t hash_fields;
1739 if (!parser->queue[i].ibv_attr)
1741 flow->frxq[i].ibv_attr = parser->queue[i].ibv_attr;
1742 parser->queue[i].ibv_attr = NULL;
1743 hash_fields = hash_rxq_init[i].hash_fields;
1744 if (!priv->dev->data->dev_started)
1746 flow->frxq[i].hrxq =
1748 parser->rss_conf.rss_key,
1749 parser->rss_conf.rss_key_len,
1753 if (flow->frxq[i].hrxq)
1755 flow->frxq[i].hrxq =
1757 parser->rss_conf.rss_key,
1758 parser->rss_conf.rss_key_len,
1762 if (!flow->frxq[i].hrxq) {
1763 return rte_flow_error_set(error, ENOMEM,
1764 RTE_FLOW_ERROR_TYPE_HANDLE,
1766 "cannot create hash rxq");
1773 * Complete flow rule creation.
1776 * Pointer to Ethernet device.
1778 * Internal parser structure.
1780 * Pointer to the rte_flow.
1782 * Perform verbose error reporting if not NULL.
1785 * 0 on success, a negative errno value otherwise and rte_errno is set.
1788 mlx5_flow_create_action_queue(struct rte_eth_dev *dev,
1789 struct mlx5_flow_parse *parser,
1790 struct rte_flow *flow,
1791 struct rte_flow_error *error)
1793 struct priv *priv = dev->data->dev_private;
1796 unsigned int flows_n = 0;
1800 assert(!parser->drop);
1801 ret = mlx5_flow_create_action_queue_rss(dev, parser, flow, error);
1805 flow->cs = parser->cs;
1806 if (!priv->dev->data->dev_started)
1808 for (i = 0; i != hash_rxq_init_n; ++i) {
1809 if (!flow->frxq[i].hrxq)
1811 flow->frxq[i].ibv_flow =
1812 mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
1813 flow->frxq[i].ibv_attr);
1814 if (!flow->frxq[i].ibv_flow) {
1815 rte_flow_error_set(error, ENOMEM,
1816 RTE_FLOW_ERROR_TYPE_HANDLE,
1817 NULL, "flow rule creation failure");
1821 DEBUG("port %u %p type %d QP %p ibv_flow %p",
1824 (void *)flow->frxq[i].hrxq,
1825 (void *)flow->frxq[i].ibv_flow);
1828 rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_HANDLE,
1829 NULL, "internal error in flow creation");
1832 for (i = 0; i != parser->queues_n; ++i) {
1833 struct mlx5_rxq_data *q =
1834 (*priv->rxqs)[parser->queues[i]];
1836 q->mark |= parser->mark;
1840 ret = rte_errno; /* Save rte_errno before cleanup. */
1842 for (i = 0; i != hash_rxq_init_n; ++i) {
1843 if (flow->frxq[i].ibv_flow) {
1844 struct ibv_flow *ibv_flow = flow->frxq[i].ibv_flow;
1846 claim_zero(mlx5_glue->destroy_flow(ibv_flow));
1848 if (flow->frxq[i].hrxq)
1849 mlx5_hrxq_release(dev, flow->frxq[i].hrxq);
1850 if (flow->frxq[i].ibv_attr)
1851 rte_free(flow->frxq[i].ibv_attr);
1854 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
1858 rte_errno = ret; /* Restore rte_errno. */
1866 * Pointer to Ethernet device.
1868 * Pointer to a TAILQ flow list.
1870 * Flow rule attributes.
1871 * @param[in] pattern
1872 * Pattern specification (list terminated by the END pattern item).
1873 * @param[in] actions
1874 * Associated actions (list terminated by the END action).
1876 * Perform verbose error reporting if not NULL.
1879 * A flow on success, NULL otherwise and rte_errno is set.
1881 static struct rte_flow *
1882 mlx5_flow_list_create(struct rte_eth_dev *dev,
1883 struct mlx5_flows *list,
1884 const struct rte_flow_attr *attr,
1885 const struct rte_flow_item items[],
1886 const struct rte_flow_action actions[],
1887 struct rte_flow_error *error)
1889 struct mlx5_flow_parse parser = { .create = 1, };
1890 struct rte_flow *flow = NULL;
1894 ret = mlx5_flow_convert(dev, attr, items, actions, error, &parser);
1897 flow = rte_calloc(__func__, 1,
1898 sizeof(*flow) + parser.queues_n * sizeof(uint16_t),
1901 rte_flow_error_set(error, ENOMEM,
1902 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1904 "cannot allocate flow memory");
1907 /* Copy queues configuration. */
1908 flow->queues = (uint16_t (*)[])(flow + 1);
1909 memcpy(flow->queues, parser.queues, parser.queues_n * sizeof(uint16_t));
1910 flow->queues_n = parser.queues_n;
1911 flow->mark = parser.mark;
1912 /* Copy RSS configuration. */
1913 flow->rss_conf = parser.rss_conf;
1914 flow->rss_conf.rss_key = flow->rss_key;
1915 memcpy(flow->rss_key, parser.rss_key, parser.rss_conf.rss_key_len);
1916 /* finalise the flow. */
1918 ret = mlx5_flow_create_action_queue_drop(dev, &parser, flow,
1921 ret = mlx5_flow_create_action_queue(dev, &parser, flow, error);
1924 TAILQ_INSERT_TAIL(list, flow, next);
1925 DEBUG("port %u flow created %p", dev->data->port_id, (void *)flow);
1928 ERROR("port %u flow creation error: %s", dev->data->port_id,
1930 for (i = 0; i != hash_rxq_init_n; ++i) {
1931 if (parser.queue[i].ibv_attr)
1932 rte_free(parser.queue[i].ibv_attr);
1939 * Validate a flow supported by the NIC.
1941 * @see rte_flow_validate()
1945 mlx5_flow_validate(struct rte_eth_dev *dev,
1946 const struct rte_flow_attr *attr,
1947 const struct rte_flow_item items[],
1948 const struct rte_flow_action actions[],
1949 struct rte_flow_error *error)
1951 struct mlx5_flow_parse parser = { .create = 0, };
1953 return mlx5_flow_convert(dev, attr, items, actions, error, &parser);
1959 * @see rte_flow_create()
1963 mlx5_flow_create(struct rte_eth_dev *dev,
1964 const struct rte_flow_attr *attr,
1965 const struct rte_flow_item items[],
1966 const struct rte_flow_action actions[],
1967 struct rte_flow_error *error)
1969 struct priv *priv = dev->data->dev_private;
1971 return mlx5_flow_list_create(dev, &priv->flows, attr, items, actions,
1976 * Destroy a flow in a list.
1979 * Pointer to Ethernet device.
1981 * Pointer to a TAILQ flow list.
1986 mlx5_flow_list_destroy(struct rte_eth_dev *dev, struct mlx5_flows *list,
1987 struct rte_flow *flow)
1989 struct priv *priv = dev->data->dev_private;
1992 if (flow->drop || !flow->mark)
1994 for (i = 0; i != flow->queues_n; ++i) {
1995 struct rte_flow *tmp;
1999 * To remove the mark from the queue, the queue must not be
2000 * present in any other marked flow (RSS or not).
2002 TAILQ_FOREACH(tmp, list, next) {
2004 uint16_t *tqs = NULL;
2009 for (j = 0; j != hash_rxq_init_n; ++j) {
2010 if (!tmp->frxq[j].hrxq)
2012 tqs = tmp->frxq[j].hrxq->ind_table->queues;
2013 tq_n = tmp->frxq[j].hrxq->ind_table->queues_n;
2017 for (j = 0; (j != tq_n) && !mark; j++)
2018 if (tqs[j] == (*flow->queues)[i])
2021 (*priv->rxqs)[(*flow->queues)[i]]->mark = mark;
2025 if (flow->frxq[HASH_RXQ_ETH].ibv_flow)
2026 claim_zero(mlx5_glue->destroy_flow
2027 (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2028 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
2030 for (i = 0; i != hash_rxq_init_n; ++i) {
2031 struct mlx5_flow *frxq = &flow->frxq[i];
2034 claim_zero(mlx5_glue->destroy_flow
2037 mlx5_hrxq_release(dev, frxq->hrxq);
2039 rte_free(frxq->ibv_attr);
2043 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
2046 TAILQ_REMOVE(list, flow, next);
2047 DEBUG("port %u flow destroyed %p", dev->data->port_id, (void *)flow);
2052 * Destroy all flows.
2055 * Pointer to Ethernet device.
2057 * Pointer to a TAILQ flow list.
2060 mlx5_flow_list_flush(struct rte_eth_dev *dev, struct mlx5_flows *list)
2062 while (!TAILQ_EMPTY(list)) {
2063 struct rte_flow *flow;
2065 flow = TAILQ_FIRST(list);
2066 mlx5_flow_list_destroy(dev, list, flow);
2071 * Create drop queue.
2074 * Pointer to Ethernet device.
2077 * 0 on success, a negative errno value otherwise and rte_errno is set.
2080 mlx5_flow_create_drop_queue(struct rte_eth_dev *dev)
2082 struct priv *priv = dev->data->dev_private;
2083 struct mlx5_hrxq_drop *fdq = NULL;
2087 fdq = rte_calloc(__func__, 1, sizeof(*fdq), 0);
2089 WARN("port %u cannot allocate memory for drop queue",
2090 dev->data->port_id);
2094 fdq->cq = mlx5_glue->create_cq(priv->ctx, 1, NULL, NULL, 0);
2096 WARN("port %u cannot allocate CQ for drop queue",
2097 dev->data->port_id);
2101 fdq->wq = mlx5_glue->create_wq
2103 &(struct ibv_wq_init_attr){
2104 .wq_type = IBV_WQT_RQ,
2111 WARN("port %u cannot allocate WQ for drop queue",
2112 dev->data->port_id);
2116 fdq->ind_table = mlx5_glue->create_rwq_ind_table
2118 &(struct ibv_rwq_ind_table_init_attr){
2119 .log_ind_tbl_size = 0,
2120 .ind_tbl = &fdq->wq,
2123 if (!fdq->ind_table) {
2124 WARN("port %u cannot allocate indirection table for drop"
2125 " queue", dev->data->port_id);
2129 fdq->qp = mlx5_glue->create_qp_ex
2131 &(struct ibv_qp_init_attr_ex){
2132 .qp_type = IBV_QPT_RAW_PACKET,
2134 IBV_QP_INIT_ATTR_PD |
2135 IBV_QP_INIT_ATTR_IND_TABLE |
2136 IBV_QP_INIT_ATTR_RX_HASH,
2137 .rx_hash_conf = (struct ibv_rx_hash_conf){
2139 IBV_RX_HASH_FUNC_TOEPLITZ,
2140 .rx_hash_key_len = rss_hash_default_key_len,
2141 .rx_hash_key = rss_hash_default_key,
2142 .rx_hash_fields_mask = 0,
2144 .rwq_ind_tbl = fdq->ind_table,
2148 WARN("port %u cannot allocate QP for drop queue",
2149 dev->data->port_id);
2153 priv->flow_drop_queue = fdq;
2157 claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2159 claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2161 claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2163 claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2166 priv->flow_drop_queue = NULL;
2171 * Delete drop queue.
2174 * Pointer to Ethernet device.
2177 mlx5_flow_delete_drop_queue(struct rte_eth_dev *dev)
2179 struct priv *priv = dev->data->dev_private;
2180 struct mlx5_hrxq_drop *fdq = priv->flow_drop_queue;
2185 claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2187 claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2189 claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2191 claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2193 priv->flow_drop_queue = NULL;
2200 * Pointer to Ethernet device.
2202 * Pointer to a TAILQ flow list.
2205 mlx5_flow_stop(struct rte_eth_dev *dev, struct mlx5_flows *list)
2207 struct priv *priv = dev->data->dev_private;
2208 struct rte_flow *flow;
2210 TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) {
2212 struct mlx5_ind_table_ibv *ind_tbl = NULL;
2215 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow)
2217 claim_zero(mlx5_glue->destroy_flow
2218 (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2219 flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
2220 DEBUG("port %u flow %p removed", dev->data->port_id,
2225 /* Verify the flow has not already been cleaned. */
2226 for (i = 0; i != hash_rxq_init_n; ++i) {
2227 if (!flow->frxq[i].ibv_flow)
2230 * Indirection table may be necessary to remove the
2231 * flags in the Rx queues.
2232 * This helps to speed-up the process by avoiding
2235 ind_tbl = flow->frxq[i].hrxq->ind_table;
2238 if (i == hash_rxq_init_n)
2242 for (i = 0; i != ind_tbl->queues_n; ++i)
2243 (*priv->rxqs)[ind_tbl->queues[i]]->mark = 0;
2245 for (i = 0; i != hash_rxq_init_n; ++i) {
2246 if (!flow->frxq[i].ibv_flow)
2248 claim_zero(mlx5_glue->destroy_flow
2249 (flow->frxq[i].ibv_flow));
2250 flow->frxq[i].ibv_flow = NULL;
2251 mlx5_hrxq_release(dev, flow->frxq[i].hrxq);
2252 flow->frxq[i].hrxq = NULL;
2254 DEBUG("port %u flow %p removed", dev->data->port_id,
2263 * Pointer to Ethernet device.
2265 * Pointer to a TAILQ flow list.
2268 * 0 on success, a negative errno value otherwise and rte_errno is set.
2271 mlx5_flow_start(struct rte_eth_dev *dev, struct mlx5_flows *list)
2273 struct priv *priv = dev->data->dev_private;
2274 struct rte_flow *flow;
2276 TAILQ_FOREACH(flow, list, next) {
2280 flow->frxq[HASH_RXQ_ETH].ibv_flow =
2281 mlx5_glue->create_flow
2282 (priv->flow_drop_queue->qp,
2283 flow->frxq[HASH_RXQ_ETH].ibv_attr);
2284 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
2285 DEBUG("port %u flow %p cannot be applied",
2291 DEBUG("port %u flow %p applied", dev->data->port_id,
2296 for (i = 0; i != hash_rxq_init_n; ++i) {
2297 if (!flow->frxq[i].ibv_attr)
2299 flow->frxq[i].hrxq =
2300 mlx5_hrxq_get(dev, flow->rss_conf.rss_key,
2301 flow->rss_conf.rss_key_len,
2302 hash_rxq_init[i].hash_fields,
2305 if (flow->frxq[i].hrxq)
2307 flow->frxq[i].hrxq =
2308 mlx5_hrxq_new(dev, flow->rss_conf.rss_key,
2309 flow->rss_conf.rss_key_len,
2310 hash_rxq_init[i].hash_fields,
2313 if (!flow->frxq[i].hrxq) {
2314 DEBUG("port %u flow %p cannot be applied",
2315 dev->data->port_id, (void *)flow);
2320 flow->frxq[i].ibv_flow =
2321 mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
2322 flow->frxq[i].ibv_attr);
2323 if (!flow->frxq[i].ibv_flow) {
2324 DEBUG("port %u flow %p cannot be applied",
2325 dev->data->port_id, (void *)flow);
2329 DEBUG("port %u flow %p applied",
2330 dev->data->port_id, (void *)flow);
2334 for (i = 0; i != flow->queues_n; ++i)
2335 (*priv->rxqs)[(*flow->queues)[i]]->mark = 1;
2341 * Verify the flow list is empty
2344 * Pointer to Ethernet device.
2346 * @return the number of flows not released.
2349 mlx5_flow_verify(struct rte_eth_dev *dev)
2351 struct priv *priv = dev->data->dev_private;
2352 struct rte_flow *flow;
2355 TAILQ_FOREACH(flow, &priv->flows, next) {
2356 DEBUG("port %u flow %p still referenced",
2357 dev->data->port_id, (void *)flow);
2364 * Enable a control flow configured from the control plane.
2367 * Pointer to Ethernet device.
2369 * An Ethernet flow spec to apply.
2371 * An Ethernet flow mask to apply.
2373 * A VLAN flow spec to apply.
2375 * A VLAN flow mask to apply.
2378 * 0 on success, a negative errno value otherwise and rte_errno is set.
2381 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
2382 struct rte_flow_item_eth *eth_spec,
2383 struct rte_flow_item_eth *eth_mask,
2384 struct rte_flow_item_vlan *vlan_spec,
2385 struct rte_flow_item_vlan *vlan_mask)
2387 struct priv *priv = dev->data->dev_private;
2388 const struct rte_flow_attr attr = {
2390 .priority = MLX5_CTRL_FLOW_PRIORITY,
2392 struct rte_flow_item items[] = {
2394 .type = RTE_FLOW_ITEM_TYPE_ETH,
2400 .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
2401 RTE_FLOW_ITEM_TYPE_END,
2407 .type = RTE_FLOW_ITEM_TYPE_END,
2410 struct rte_flow_action actions[] = {
2412 .type = RTE_FLOW_ACTION_TYPE_RSS,
2415 .type = RTE_FLOW_ACTION_TYPE_END,
2418 struct rte_flow *flow;
2419 struct rte_flow_error error;
2422 struct rte_flow_action_rss rss;
2424 const struct rte_eth_rss_conf *rss_conf;
2426 uint16_t queue[RTE_MAX_QUEUES_PER_PORT];
2430 if (!priv->reta_idx_n) {
2434 for (i = 0; i != priv->reta_idx_n; ++i)
2435 action_rss.local.queue[i] = (*priv->reta_idx)[i];
2436 action_rss.local.rss_conf = &priv->rss_conf;
2437 action_rss.local.num = priv->reta_idx_n;
2438 actions[0].conf = (const void *)&action_rss.rss;
2439 flow = mlx5_flow_list_create(dev, &priv->ctrl_flows, &attr, items,
2447 * Enable a flow control configured from the control plane.
2450 * Pointer to Ethernet device.
2452 * An Ethernet flow spec to apply.
2454 * An Ethernet flow mask to apply.
2457 * 0 on success, a negative errno value otherwise and rte_errno is set.
2460 mlx5_ctrl_flow(struct rte_eth_dev *dev,
2461 struct rte_flow_item_eth *eth_spec,
2462 struct rte_flow_item_eth *eth_mask)
2464 return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
2470 * @see rte_flow_destroy()
2474 mlx5_flow_destroy(struct rte_eth_dev *dev,
2475 struct rte_flow *flow,
2476 struct rte_flow_error *error __rte_unused)
2478 struct priv *priv = dev->data->dev_private;
2480 mlx5_flow_list_destroy(dev, &priv->flows, flow);
2485 * Destroy all flows.
2487 * @see rte_flow_flush()
2491 mlx5_flow_flush(struct rte_eth_dev *dev,
2492 struct rte_flow_error *error __rte_unused)
2494 struct priv *priv = dev->data->dev_private;
2496 mlx5_flow_list_flush(dev, &priv->flows);
2500 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
2502 * Query flow counter.
2506 * @param counter_value
2507 * returned data from the counter.
2510 * 0 on success, a negative errno value otherwise and rte_errno is set.
2513 mlx5_flow_query_count(struct ibv_counter_set *cs,
2514 struct mlx5_flow_counter_stats *counter_stats,
2515 struct rte_flow_query_count *query_count,
2516 struct rte_flow_error *error)
2518 uint64_t counters[2];
2519 struct ibv_query_counter_set_attr query_cs_attr = {
2521 .query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
2523 struct ibv_counter_set_data query_out = {
2525 .outlen = 2 * sizeof(uint64_t),
2527 int err = mlx5_glue->query_counter_set(&query_cs_attr, &query_out);
2530 return rte_flow_error_set(error, err,
2531 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2533 "cannot read counter");
2534 query_count->hits_set = 1;
2535 query_count->bytes_set = 1;
2536 query_count->hits = counters[0] - counter_stats->hits;
2537 query_count->bytes = counters[1] - counter_stats->bytes;
2538 if (query_count->reset) {
2539 counter_stats->hits = counters[0];
2540 counter_stats->bytes = counters[1];
2548 * @see rte_flow_query()
2552 mlx5_flow_query(struct rte_eth_dev *dev __rte_unused,
2553 struct rte_flow *flow,
2554 enum rte_flow_action_type action __rte_unused,
2556 struct rte_flow_error *error)
2561 ret = mlx5_flow_query_count(flow->cs,
2562 &flow->counter_stats,
2563 (struct rte_flow_query_count *)data,
2568 return rte_flow_error_set(error, EINVAL,
2569 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2571 "no counter found for flow");
2580 * @see rte_flow_isolate()
2584 mlx5_flow_isolate(struct rte_eth_dev *dev,
2586 struct rte_flow_error *error)
2588 struct priv *priv = dev->data->dev_private;
2590 if (dev->data->dev_started) {
2591 rte_flow_error_set(error, EBUSY,
2592 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2594 "port must be stopped first");
2597 priv->isolated = !!enable;
2599 priv->dev->dev_ops = &mlx5_dev_ops_isolate;
2601 priv->dev->dev_ops = &mlx5_dev_ops;
2606 * Convert a flow director filter to a generic flow.
2609 * Pointer to Ethernet device.
2610 * @param fdir_filter
2611 * Flow director filter to add.
2613 * Generic flow parameters structure.
2616 * 0 on success, a negative errno value otherwise and rte_errno is set.
2619 mlx5_fdir_filter_convert(struct rte_eth_dev *dev,
2620 const struct rte_eth_fdir_filter *fdir_filter,
2621 struct mlx5_fdir *attributes)
2623 struct priv *priv = dev->data->dev_private;
2624 const struct rte_eth_fdir_input *input = &fdir_filter->input;
2626 /* Validate queue number. */
2627 if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
2628 ERROR("port %u invalid queue number %d",
2629 dev->data->port_id, fdir_filter->action.rx_queue);
2633 attributes->attr.ingress = 1;
2634 attributes->items[0] = (struct rte_flow_item) {
2635 .type = RTE_FLOW_ITEM_TYPE_ETH,
2636 .spec = &attributes->l2,
2637 .mask = &attributes->l2_mask,
2639 switch (fdir_filter->action.behavior) {
2640 case RTE_ETH_FDIR_ACCEPT:
2641 attributes->actions[0] = (struct rte_flow_action){
2642 .type = RTE_FLOW_ACTION_TYPE_QUEUE,
2643 .conf = &attributes->queue,
2646 case RTE_ETH_FDIR_REJECT:
2647 attributes->actions[0] = (struct rte_flow_action){
2648 .type = RTE_FLOW_ACTION_TYPE_DROP,
2652 ERROR("port %u invalid behavior %d",
2654 fdir_filter->action.behavior);
2655 rte_errno = ENOTSUP;
2658 attributes->queue.index = fdir_filter->action.rx_queue;
2659 switch (fdir_filter->input.flow_type) {
2660 case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
2661 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2662 .src_addr = input->flow.udp4_flow.ip.src_ip,
2663 .dst_addr = input->flow.udp4_flow.ip.dst_ip,
2664 .time_to_live = input->flow.udp4_flow.ip.ttl,
2665 .type_of_service = input->flow.udp4_flow.ip.tos,
2666 .next_proto_id = input->flow.udp4_flow.ip.proto,
2668 attributes->l4.udp.hdr = (struct udp_hdr){
2669 .src_port = input->flow.udp4_flow.src_port,
2670 .dst_port = input->flow.udp4_flow.dst_port,
2672 attributes->items[1] = (struct rte_flow_item){
2673 .type = RTE_FLOW_ITEM_TYPE_IPV4,
2674 .spec = &attributes->l3,
2675 .mask = &attributes->l3,
2677 attributes->items[2] = (struct rte_flow_item){
2678 .type = RTE_FLOW_ITEM_TYPE_UDP,
2679 .spec = &attributes->l4,
2680 .mask = &attributes->l4,
2683 case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
2684 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2685 .src_addr = input->flow.tcp4_flow.ip.src_ip,
2686 .dst_addr = input->flow.tcp4_flow.ip.dst_ip,
2687 .time_to_live = input->flow.tcp4_flow.ip.ttl,
2688 .type_of_service = input->flow.tcp4_flow.ip.tos,
2689 .next_proto_id = input->flow.tcp4_flow.ip.proto,
2691 attributes->l4.tcp.hdr = (struct tcp_hdr){
2692 .src_port = input->flow.tcp4_flow.src_port,
2693 .dst_port = input->flow.tcp4_flow.dst_port,
2695 attributes->items[1] = (struct rte_flow_item){
2696 .type = RTE_FLOW_ITEM_TYPE_IPV4,
2697 .spec = &attributes->l3,
2698 .mask = &attributes->l3,
2700 attributes->items[2] = (struct rte_flow_item){
2701 .type = RTE_FLOW_ITEM_TYPE_TCP,
2702 .spec = &attributes->l4,
2703 .mask = &attributes->l4,
2706 case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
2707 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2708 .src_addr = input->flow.ip4_flow.src_ip,
2709 .dst_addr = input->flow.ip4_flow.dst_ip,
2710 .time_to_live = input->flow.ip4_flow.ttl,
2711 .type_of_service = input->flow.ip4_flow.tos,
2712 .next_proto_id = input->flow.ip4_flow.proto,
2714 attributes->items[1] = (struct rte_flow_item){
2715 .type = RTE_FLOW_ITEM_TYPE_IPV4,
2716 .spec = &attributes->l3,
2717 .mask = &attributes->l3,
2720 case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
2721 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2722 .hop_limits = input->flow.udp6_flow.ip.hop_limits,
2723 .proto = input->flow.udp6_flow.ip.proto,
2725 memcpy(attributes->l3.ipv6.hdr.src_addr,
2726 input->flow.udp6_flow.ip.src_ip,
2727 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2728 memcpy(attributes->l3.ipv6.hdr.dst_addr,
2729 input->flow.udp6_flow.ip.dst_ip,
2730 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2731 attributes->l4.udp.hdr = (struct udp_hdr){
2732 .src_port = input->flow.udp6_flow.src_port,
2733 .dst_port = input->flow.udp6_flow.dst_port,
2735 attributes->items[1] = (struct rte_flow_item){
2736 .type = RTE_FLOW_ITEM_TYPE_IPV6,
2737 .spec = &attributes->l3,
2738 .mask = &attributes->l3,
2740 attributes->items[2] = (struct rte_flow_item){
2741 .type = RTE_FLOW_ITEM_TYPE_UDP,
2742 .spec = &attributes->l4,
2743 .mask = &attributes->l4,
2746 case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
2747 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2748 .hop_limits = input->flow.tcp6_flow.ip.hop_limits,
2749 .proto = input->flow.tcp6_flow.ip.proto,
2751 memcpy(attributes->l3.ipv6.hdr.src_addr,
2752 input->flow.tcp6_flow.ip.src_ip,
2753 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2754 memcpy(attributes->l3.ipv6.hdr.dst_addr,
2755 input->flow.tcp6_flow.ip.dst_ip,
2756 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2757 attributes->l4.tcp.hdr = (struct tcp_hdr){
2758 .src_port = input->flow.tcp6_flow.src_port,
2759 .dst_port = input->flow.tcp6_flow.dst_port,
2761 attributes->items[1] = (struct rte_flow_item){
2762 .type = RTE_FLOW_ITEM_TYPE_IPV6,
2763 .spec = &attributes->l3,
2764 .mask = &attributes->l3,
2766 attributes->items[2] = (struct rte_flow_item){
2767 .type = RTE_FLOW_ITEM_TYPE_TCP,
2768 .spec = &attributes->l4,
2769 .mask = &attributes->l4,
2772 case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
2773 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2774 .hop_limits = input->flow.ipv6_flow.hop_limits,
2775 .proto = input->flow.ipv6_flow.proto,
2777 memcpy(attributes->l3.ipv6.hdr.src_addr,
2778 input->flow.ipv6_flow.src_ip,
2779 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2780 memcpy(attributes->l3.ipv6.hdr.dst_addr,
2781 input->flow.ipv6_flow.dst_ip,
2782 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2783 attributes->items[1] = (struct rte_flow_item){
2784 .type = RTE_FLOW_ITEM_TYPE_IPV6,
2785 .spec = &attributes->l3,
2786 .mask = &attributes->l3,
2790 ERROR("port %u invalid flow type%d",
2791 dev->data->port_id, fdir_filter->input.flow_type);
2792 rte_errno = ENOTSUP;
2799 * Add new flow director filter and store it in list.
2802 * Pointer to Ethernet device.
2803 * @param fdir_filter
2804 * Flow director filter to add.
2807 * 0 on success, a negative errno value otherwise and rte_errno is set.
2810 mlx5_fdir_filter_add(struct rte_eth_dev *dev,
2811 const struct rte_eth_fdir_filter *fdir_filter)
2813 struct priv *priv = dev->data->dev_private;
2814 struct mlx5_fdir attributes = {
2817 .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2818 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2822 struct mlx5_flow_parse parser = {
2823 .layer = HASH_RXQ_ETH,
2825 struct rte_flow_error error;
2826 struct rte_flow *flow;
2829 ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
2832 ret = mlx5_flow_convert(dev, &attributes.attr, attributes.items,
2833 attributes.actions, &error, &parser);
2836 flow = mlx5_flow_list_create(dev, &priv->flows, &attributes.attr,
2837 attributes.items, attributes.actions,
2840 DEBUG("port %u FDIR created %p", dev->data->port_id,
2848 * Delete specific filter.
2851 * Pointer to Ethernet device.
2852 * @param fdir_filter
2853 * Filter to be deleted.
2856 * 0 on success, a negative errno value otherwise and rte_errno is set.
2859 mlx5_fdir_filter_delete(struct rte_eth_dev *dev,
2860 const struct rte_eth_fdir_filter *fdir_filter)
2862 struct priv *priv = dev->data->dev_private;
2863 struct mlx5_fdir attributes = {
2866 struct mlx5_flow_parse parser = {
2868 .layer = HASH_RXQ_ETH,
2870 struct rte_flow_error error;
2871 struct rte_flow *flow;
2875 ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
2878 ret = mlx5_flow_convert(dev, &attributes.attr, attributes.items,
2879 attributes.actions, &error, &parser);
2883 * Special case for drop action which is only set in the
2884 * specifications when the flow is created. In this situation the
2885 * drop specification is missing.
2888 struct ibv_flow_spec_action_drop *drop;
2890 drop = (void *)((uintptr_t)parser.queue[HASH_RXQ_ETH].ibv_attr +
2891 parser.queue[HASH_RXQ_ETH].offset);
2892 *drop = (struct ibv_flow_spec_action_drop){
2893 .type = IBV_FLOW_SPEC_ACTION_DROP,
2894 .size = sizeof(struct ibv_flow_spec_action_drop),
2896 parser.queue[HASH_RXQ_ETH].ibv_attr->num_of_specs++;
2898 TAILQ_FOREACH(flow, &priv->flows, next) {
2899 struct ibv_flow_attr *attr;
2900 struct ibv_spec_header *attr_h;
2902 struct ibv_flow_attr *flow_attr;
2903 struct ibv_spec_header *flow_h;
2905 unsigned int specs_n;
2907 attr = parser.queue[HASH_RXQ_ETH].ibv_attr;
2908 flow_attr = flow->frxq[HASH_RXQ_ETH].ibv_attr;
2909 /* Compare first the attributes. */
2910 if (memcmp(attr, flow_attr, sizeof(struct ibv_flow_attr)))
2912 if (attr->num_of_specs == 0)
2914 spec = (void *)((uintptr_t)attr +
2915 sizeof(struct ibv_flow_attr));
2916 flow_spec = (void *)((uintptr_t)flow_attr +
2917 sizeof(struct ibv_flow_attr));
2918 specs_n = RTE_MIN(attr->num_of_specs, flow_attr->num_of_specs);
2919 for (i = 0; i != specs_n; ++i) {
2922 if (memcmp(spec, flow_spec,
2923 RTE_MIN(attr_h->size, flow_h->size)))
2925 spec = (void *)((uintptr_t)spec + attr_h->size);
2926 flow_spec = (void *)((uintptr_t)flow_spec +
2929 /* At this point, the flow match. */
2932 /* The flow does not match. */
2935 ret = rte_errno; /* Save rte_errno before cleanup. */
2937 mlx5_flow_list_destroy(dev, &priv->flows, flow);
2939 for (i = 0; i != hash_rxq_init_n; ++i) {
2940 if (parser.queue[i].ibv_attr)
2941 rte_free(parser.queue[i].ibv_attr);
2943 rte_errno = ret; /* Restore rte_errno. */
2948 * Update queue for specific filter.
2951 * Pointer to Ethernet device.
2952 * @param fdir_filter
2953 * Filter to be updated.
2956 * 0 on success, a negative errno value otherwise and rte_errno is set.
2959 mlx5_fdir_filter_update(struct rte_eth_dev *dev,
2960 const struct rte_eth_fdir_filter *fdir_filter)
2964 ret = mlx5_fdir_filter_delete(dev, fdir_filter);
2967 return mlx5_fdir_filter_add(dev, fdir_filter);
2971 * Flush all filters.
2974 * Pointer to Ethernet device.
2977 mlx5_fdir_filter_flush(struct rte_eth_dev *dev)
2979 struct priv *priv = dev->data->dev_private;
2981 mlx5_flow_list_flush(dev, &priv->flows);
2985 * Get flow director information.
2988 * Pointer to Ethernet device.
2989 * @param[out] fdir_info
2990 * Resulting flow director information.
2993 mlx5_fdir_info_get(struct rte_eth_dev *dev, struct rte_eth_fdir_info *fdir_info)
2995 struct priv *priv = dev->data->dev_private;
2996 struct rte_eth_fdir_masks *mask =
2997 &priv->dev->data->dev_conf.fdir_conf.mask;
2999 fdir_info->mode = priv->dev->data->dev_conf.fdir_conf.mode;
3000 fdir_info->guarant_spc = 0;
3001 rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
3002 fdir_info->max_flexpayload = 0;
3003 fdir_info->flow_types_mask[0] = 0;
3004 fdir_info->flex_payload_unit = 0;
3005 fdir_info->max_flex_payload_segment_num = 0;
3006 fdir_info->flex_payload_limit = 0;
3007 memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
3011 * Deal with flow director operations.
3014 * Pointer to Ethernet device.
3016 * Operation to perform.
3018 * Pointer to operation-specific structure.
3021 * 0 on success, a negative errno value otherwise and rte_errno is set.
3024 mlx5_fdir_ctrl_func(struct rte_eth_dev *dev, enum rte_filter_op filter_op,
3027 struct priv *priv = dev->data->dev_private;
3028 enum rte_fdir_mode fdir_mode =
3029 priv->dev->data->dev_conf.fdir_conf.mode;
3031 if (filter_op == RTE_ETH_FILTER_NOP)
3033 if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
3034 fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
3035 ERROR("port %u flow director mode %d not supported",
3036 dev->data->port_id, fdir_mode);
3040 switch (filter_op) {
3041 case RTE_ETH_FILTER_ADD:
3042 return mlx5_fdir_filter_add(dev, arg);
3043 case RTE_ETH_FILTER_UPDATE:
3044 return mlx5_fdir_filter_update(dev, arg);
3045 case RTE_ETH_FILTER_DELETE:
3046 return mlx5_fdir_filter_delete(dev, arg);
3047 case RTE_ETH_FILTER_FLUSH:
3048 mlx5_fdir_filter_flush(dev);
3050 case RTE_ETH_FILTER_INFO:
3051 mlx5_fdir_info_get(dev, arg);
3054 DEBUG("port %u unknown operation %u", dev->data->port_id,
3063 * Manage filter operations.
3066 * Pointer to Ethernet device structure.
3067 * @param filter_type
3070 * Operation to perform.
3072 * Pointer to operation-specific structure.
3075 * 0 on success, a negative errno value otherwise and rte_errno is set.
3078 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
3079 enum rte_filter_type filter_type,
3080 enum rte_filter_op filter_op,
3083 switch (filter_type) {
3084 case RTE_ETH_FILTER_GENERIC:
3085 if (filter_op != RTE_ETH_FILTER_GET) {
3089 *(const void **)arg = &mlx5_flow_ops;
3091 case RTE_ETH_FILTER_FDIR:
3092 return mlx5_fdir_ctrl_func(dev, filter_op, arg);
3094 ERROR("port %u filter type (%d) not supported",
3095 dev->data->port_id, filter_type);
3096 rte_errno = ENOTSUP;