1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2016 6WIND S.A.
3 * Copyright 2016 Mellanox Technologies, Ltd
10 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
12 #pragma GCC diagnostic ignored "-Wpedantic"
14 #include <infiniband/verbs.h>
16 #pragma GCC diagnostic error "-Wpedantic"
19 #include <rte_common.h>
20 #include <rte_ethdev_driver.h>
22 #include <rte_flow_driver.h>
23 #include <rte_malloc.h>
27 #include "mlx5_defs.h"
29 #include "mlx5_glue.h"
31 /* Define minimal priority for control plane flows. */
32 #define MLX5_CTRL_FLOW_PRIORITY 4
34 /* Internet Protocol versions. */
38 #ifndef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
39 struct ibv_flow_spec_counter_action {
44 /* Dev ops structure defined in mlx5.c */
45 extern const struct eth_dev_ops mlx5_dev_ops;
46 extern const struct eth_dev_ops mlx5_dev_ops_isolate;
48 /** Structure give to the conversion functions. */
49 struct mlx5_flow_data {
50 struct mlx5_flow_parse *parser; /** Parser context. */
51 struct rte_flow_error *error; /** Error context. */
55 mlx5_flow_create_eth(const struct rte_flow_item *item,
56 const void *default_mask,
57 struct mlx5_flow_data *data);
60 mlx5_flow_create_vlan(const struct rte_flow_item *item,
61 const void *default_mask,
62 struct mlx5_flow_data *data);
65 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
66 const void *default_mask,
67 struct mlx5_flow_data *data);
70 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
71 const void *default_mask,
72 struct mlx5_flow_data *data);
75 mlx5_flow_create_udp(const struct rte_flow_item *item,
76 const void *default_mask,
77 struct mlx5_flow_data *data);
80 mlx5_flow_create_tcp(const struct rte_flow_item *item,
81 const void *default_mask,
82 struct mlx5_flow_data *data);
85 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
86 const void *default_mask,
87 struct mlx5_flow_data *data);
89 struct mlx5_flow_parse;
92 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
96 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id);
99 mlx5_flow_create_count(struct rte_eth_dev *dev, struct mlx5_flow_parse *parser);
101 /* Hash RX queue types. */
112 /* Initialization data for hash RX queue. */
113 struct hash_rxq_init {
114 uint64_t hash_fields; /* Fields that participate in the hash. */
115 uint64_t dpdk_rss_hf; /* Matching DPDK RSS hash fields. */
116 unsigned int flow_priority; /* Flow priority to use. */
117 unsigned int ip_version; /* Internet protocol. */
120 /* Initialization data for hash RX queues. */
121 const struct hash_rxq_init hash_rxq_init[] = {
123 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
124 IBV_RX_HASH_DST_IPV4 |
125 IBV_RX_HASH_SRC_PORT_TCP |
126 IBV_RX_HASH_DST_PORT_TCP),
127 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,
129 .ip_version = MLX5_IPV4,
132 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
133 IBV_RX_HASH_DST_IPV4 |
134 IBV_RX_HASH_SRC_PORT_UDP |
135 IBV_RX_HASH_DST_PORT_UDP),
136 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP,
138 .ip_version = MLX5_IPV4,
141 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
142 IBV_RX_HASH_DST_IPV4),
143 .dpdk_rss_hf = (ETH_RSS_IPV4 |
146 .ip_version = MLX5_IPV4,
149 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
150 IBV_RX_HASH_DST_IPV6 |
151 IBV_RX_HASH_SRC_PORT_TCP |
152 IBV_RX_HASH_DST_PORT_TCP),
153 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP,
155 .ip_version = MLX5_IPV6,
158 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
159 IBV_RX_HASH_DST_IPV6 |
160 IBV_RX_HASH_SRC_PORT_UDP |
161 IBV_RX_HASH_DST_PORT_UDP),
162 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP,
164 .ip_version = MLX5_IPV6,
167 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
168 IBV_RX_HASH_DST_IPV6),
169 .dpdk_rss_hf = (ETH_RSS_IPV6 |
172 .ip_version = MLX5_IPV6,
181 /* Number of entries in hash_rxq_init[]. */
182 const unsigned int hash_rxq_init_n = RTE_DIM(hash_rxq_init);
184 /** Structure for holding counter stats. */
185 struct mlx5_flow_counter_stats {
186 uint64_t hits; /**< Number of packets matched by the rule. */
187 uint64_t bytes; /**< Number of bytes matched by the rule. */
190 /** Structure for Drop queue. */
191 struct mlx5_hrxq_drop {
192 struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */
193 struct ibv_qp *qp; /**< Verbs queue pair. */
194 struct ibv_wq *wq; /**< Verbs work queue. */
195 struct ibv_cq *cq; /**< Verbs completion queue. */
198 /* Flows structures. */
200 uint64_t hash_fields; /**< Fields that participate in the hash. */
201 struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
202 struct ibv_flow *ibv_flow; /**< Verbs flow. */
203 struct mlx5_hrxq *hrxq; /**< Hash Rx queues. */
206 /* Drop flows structures. */
207 struct mlx5_flow_drop {
208 struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
209 struct ibv_flow *ibv_flow; /**< Verbs flow. */
213 TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
214 uint32_t mark:1; /**< Set if the flow is marked. */
215 uint32_t drop:1; /**< Drop queue. */
216 uint16_t queues_n; /**< Number of entries in queue[]. */
217 uint16_t (*queues)[]; /**< Queues indexes to use. */
218 struct rte_eth_rss_conf rss_conf; /**< RSS configuration */
219 uint8_t rss_key[40]; /**< copy of the RSS key. */
220 struct ibv_counter_set *cs; /**< Holds the counters for the rule. */
221 struct mlx5_flow_counter_stats counter_stats;/**<The counter stats. */
222 struct mlx5_flow frxq[RTE_DIM(hash_rxq_init)];
223 /**< Flow with Rx queue. */
226 /** Static initializer for items. */
228 (const enum rte_flow_item_type []){ \
229 __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
232 /** Structure to generate a simple graph of layers supported by the NIC. */
233 struct mlx5_flow_items {
234 /** List of possible actions for these items. */
235 const enum rte_flow_action_type *const actions;
236 /** Bit-masks corresponding to the possibilities for the item. */
239 * Default bit-masks to use when item->mask is not provided. When
240 * \default_mask is also NULL, the full supported bit-mask (\mask) is
243 const void *default_mask;
244 /** Bit-masks size in bytes. */
245 const unsigned int mask_sz;
247 * Conversion function from rte_flow to NIC specific flow.
250 * rte_flow item to convert.
251 * @param default_mask
252 * Default bit-masks to use when item->mask is not provided.
254 * Internal structure to store the conversion.
257 * 0 on success, a negative errno value otherwise and rte_errno is
260 int (*convert)(const struct rte_flow_item *item,
261 const void *default_mask,
262 struct mlx5_flow_data *data);
263 /** Size in bytes of the destination structure. */
264 const unsigned int dst_sz;
265 /** List of possible following items. */
266 const enum rte_flow_item_type *const items;
269 /** Valid action for this PMD. */
270 static const enum rte_flow_action_type valid_actions[] = {
271 RTE_FLOW_ACTION_TYPE_DROP,
272 RTE_FLOW_ACTION_TYPE_QUEUE,
273 RTE_FLOW_ACTION_TYPE_MARK,
274 RTE_FLOW_ACTION_TYPE_FLAG,
275 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
276 RTE_FLOW_ACTION_TYPE_COUNT,
278 RTE_FLOW_ACTION_TYPE_END,
281 /** Graph of supported items and associated actions. */
282 static const struct mlx5_flow_items mlx5_flow_items[] = {
283 [RTE_FLOW_ITEM_TYPE_END] = {
284 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
285 RTE_FLOW_ITEM_TYPE_VXLAN),
287 [RTE_FLOW_ITEM_TYPE_ETH] = {
288 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
289 RTE_FLOW_ITEM_TYPE_IPV4,
290 RTE_FLOW_ITEM_TYPE_IPV6),
291 .actions = valid_actions,
292 .mask = &(const struct rte_flow_item_eth){
293 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
294 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
297 .default_mask = &rte_flow_item_eth_mask,
298 .mask_sz = sizeof(struct rte_flow_item_eth),
299 .convert = mlx5_flow_create_eth,
300 .dst_sz = sizeof(struct ibv_flow_spec_eth),
302 [RTE_FLOW_ITEM_TYPE_VLAN] = {
303 .items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
304 RTE_FLOW_ITEM_TYPE_IPV6),
305 .actions = valid_actions,
306 .mask = &(const struct rte_flow_item_vlan){
309 .default_mask = &rte_flow_item_vlan_mask,
310 .mask_sz = sizeof(struct rte_flow_item_vlan),
311 .convert = mlx5_flow_create_vlan,
314 [RTE_FLOW_ITEM_TYPE_IPV4] = {
315 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
316 RTE_FLOW_ITEM_TYPE_TCP),
317 .actions = valid_actions,
318 .mask = &(const struct rte_flow_item_ipv4){
322 .type_of_service = -1,
326 .default_mask = &rte_flow_item_ipv4_mask,
327 .mask_sz = sizeof(struct rte_flow_item_ipv4),
328 .convert = mlx5_flow_create_ipv4,
329 .dst_sz = sizeof(struct ibv_flow_spec_ipv4_ext),
331 [RTE_FLOW_ITEM_TYPE_IPV6] = {
332 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
333 RTE_FLOW_ITEM_TYPE_TCP),
334 .actions = valid_actions,
335 .mask = &(const struct rte_flow_item_ipv6){
338 0xff, 0xff, 0xff, 0xff,
339 0xff, 0xff, 0xff, 0xff,
340 0xff, 0xff, 0xff, 0xff,
341 0xff, 0xff, 0xff, 0xff,
344 0xff, 0xff, 0xff, 0xff,
345 0xff, 0xff, 0xff, 0xff,
346 0xff, 0xff, 0xff, 0xff,
347 0xff, 0xff, 0xff, 0xff,
354 .default_mask = &rte_flow_item_ipv6_mask,
355 .mask_sz = sizeof(struct rte_flow_item_ipv6),
356 .convert = mlx5_flow_create_ipv6,
357 .dst_sz = sizeof(struct ibv_flow_spec_ipv6),
359 [RTE_FLOW_ITEM_TYPE_UDP] = {
360 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN),
361 .actions = valid_actions,
362 .mask = &(const struct rte_flow_item_udp){
368 .default_mask = &rte_flow_item_udp_mask,
369 .mask_sz = sizeof(struct rte_flow_item_udp),
370 .convert = mlx5_flow_create_udp,
371 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
373 [RTE_FLOW_ITEM_TYPE_TCP] = {
374 .actions = valid_actions,
375 .mask = &(const struct rte_flow_item_tcp){
381 .default_mask = &rte_flow_item_tcp_mask,
382 .mask_sz = sizeof(struct rte_flow_item_tcp),
383 .convert = mlx5_flow_create_tcp,
384 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
386 [RTE_FLOW_ITEM_TYPE_VXLAN] = {
387 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH),
388 .actions = valid_actions,
389 .mask = &(const struct rte_flow_item_vxlan){
390 .vni = "\xff\xff\xff",
392 .default_mask = &rte_flow_item_vxlan_mask,
393 .mask_sz = sizeof(struct rte_flow_item_vxlan),
394 .convert = mlx5_flow_create_vxlan,
395 .dst_sz = sizeof(struct ibv_flow_spec_tunnel),
399 /** Structure to pass to the conversion function. */
400 struct mlx5_flow_parse {
401 uint32_t inner; /**< Set once VXLAN is encountered. */
403 /**< Whether resources should remain after a validate. */
404 uint32_t drop:1; /**< Target is a drop queue. */
405 uint32_t mark:1; /**< Mark is present in the flow. */
406 uint32_t count:1; /**< Count is present in the flow. */
407 uint32_t mark_id; /**< Mark identifier. */
408 uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
409 uint16_t queues_n; /**< Number of entries in queue[]. */
410 struct rte_eth_rss_conf rss_conf; /**< RSS configuration */
411 uint8_t rss_key[40]; /**< copy of the RSS key. */
412 enum hash_rxq_type layer; /**< Last pattern layer detected. */
413 struct ibv_counter_set *cs; /**< Holds the counter set for the rule */
415 struct ibv_flow_attr *ibv_attr;
416 /**< Pointer to Verbs attributes. */
418 /**< Current position or total size of the attribute. */
419 } queue[RTE_DIM(hash_rxq_init)];
422 static const struct rte_flow_ops mlx5_flow_ops = {
423 .validate = mlx5_flow_validate,
424 .create = mlx5_flow_create,
425 .destroy = mlx5_flow_destroy,
426 .flush = mlx5_flow_flush,
427 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
428 .query = mlx5_flow_query,
432 .isolate = mlx5_flow_isolate,
435 /* Convert FDIR request to Generic flow. */
437 struct rte_flow_attr attr;
438 struct rte_flow_action actions[2];
439 struct rte_flow_item items[4];
440 struct rte_flow_item_eth l2;
441 struct rte_flow_item_eth l2_mask;
443 struct rte_flow_item_ipv4 ipv4;
444 struct rte_flow_item_ipv6 ipv6;
447 struct rte_flow_item_udp udp;
448 struct rte_flow_item_tcp tcp;
450 struct rte_flow_action_queue queue;
453 /* Verbs specification header. */
454 struct ibv_spec_header {
455 enum ibv_flow_spec_type type;
460 * Check support for a given item.
463 * Item specification.
465 * Bit-masks covering supported fields to compare with spec, last and mask in
468 * Bit-Mask size in bytes.
471 * 0 on success, a negative errno value otherwise and rte_errno is set.
474 mlx5_flow_item_validate(const struct rte_flow_item *item,
475 const uint8_t *mask, unsigned int size)
477 if (!item->spec && (item->mask || item->last)) {
481 if (item->spec && !item->mask) {
483 const uint8_t *spec = item->spec;
485 for (i = 0; i < size; ++i)
486 if ((spec[i] | mask[i]) != mask[i]) {
491 if (item->last && !item->mask) {
493 const uint8_t *spec = item->last;
495 for (i = 0; i < size; ++i)
496 if ((spec[i] | mask[i]) != mask[i]) {
503 const uint8_t *spec = item->spec;
505 for (i = 0; i < size; ++i)
506 if ((spec[i] | mask[i]) != mask[i]) {
511 if (item->spec && item->last) {
514 const uint8_t *apply = mask;
520 for (i = 0; i < size; ++i) {
521 spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
522 last[i] = ((const uint8_t *)item->last)[i] & apply[i];
524 ret = memcmp(spec, last, size);
534 * Copy the RSS configuration from the user ones, of the rss_conf is null,
535 * uses the driver one.
538 * Internal parser structure.
540 * User RSS configuration to save.
543 * 0 on success, a negative errno value otherwise and rte_errno is set.
546 mlx5_flow_convert_rss_conf(struct mlx5_flow_parse *parser,
547 const struct rte_eth_rss_conf *rss_conf)
550 * This function is also called at the beginning of
551 * mlx5_flow_convert_actions() to initialize the parser with the
552 * device default RSS configuration.
555 if (rss_conf->rss_hf & MLX5_RSS_HF_MASK) {
559 if (rss_conf->rss_key_len != 40) {
563 if (rss_conf->rss_key_len && rss_conf->rss_key) {
564 parser->rss_conf.rss_key_len = rss_conf->rss_key_len;
565 memcpy(parser->rss_key, rss_conf->rss_key,
566 rss_conf->rss_key_len);
567 parser->rss_conf.rss_key = parser->rss_key;
569 parser->rss_conf.rss_hf = rss_conf->rss_hf;
575 * Extract attribute to the parser.
578 * Flow rule attributes.
580 * Perform verbose error reporting if not NULL.
583 * 0 on success, a negative errno value otherwise and rte_errno is set.
586 mlx5_flow_convert_attributes(const struct rte_flow_attr *attr,
587 struct rte_flow_error *error)
590 rte_flow_error_set(error, ENOTSUP,
591 RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
593 "groups are not supported");
596 if (attr->priority && attr->priority != MLX5_CTRL_FLOW_PRIORITY) {
597 rte_flow_error_set(error, ENOTSUP,
598 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
600 "priorities are not supported");
604 rte_flow_error_set(error, ENOTSUP,
605 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
607 "egress is not supported");
610 if (!attr->ingress) {
611 rte_flow_error_set(error, ENOTSUP,
612 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
614 "only ingress is supported");
621 * Extract actions request to the parser.
624 * Pointer to Ethernet device.
626 * Associated actions (list terminated by the END action).
628 * Perform verbose error reporting if not NULL.
629 * @param[in, out] parser
630 * Internal parser structure.
633 * 0 on success, a negative errno value otherwise and rte_errno is set.
636 mlx5_flow_convert_actions(struct rte_eth_dev *dev,
637 const struct rte_flow_action actions[],
638 struct rte_flow_error *error,
639 struct mlx5_flow_parse *parser)
641 struct priv *priv = dev->data->dev_private;
645 * Add default RSS configuration necessary for Verbs to create QP even
646 * if no RSS is necessary.
648 ret = mlx5_flow_convert_rss_conf(parser,
649 (const struct rte_eth_rss_conf *)
653 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
654 if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
656 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
658 } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
659 const struct rte_flow_action_queue *queue =
660 (const struct rte_flow_action_queue *)
665 if (!queue || (queue->index > (priv->rxqs_n - 1)))
666 goto exit_action_not_supported;
667 for (n = 0; n < parser->queues_n; ++n) {
668 if (parser->queues[n] == queue->index) {
673 if (parser->queues_n > 1 && !found) {
674 rte_flow_error_set(error, ENOTSUP,
675 RTE_FLOW_ERROR_TYPE_ACTION,
677 "queue action not in RSS queues");
681 parser->queues_n = 1;
682 parser->queues[0] = queue->index;
684 } else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
685 const struct rte_flow_action_rss *rss =
686 (const struct rte_flow_action_rss *)
690 if (!rss || !rss->num) {
691 rte_flow_error_set(error, EINVAL,
692 RTE_FLOW_ERROR_TYPE_ACTION,
697 if (parser->queues_n == 1) {
700 assert(parser->queues_n);
701 for (n = 0; n < rss->num; ++n) {
702 if (parser->queues[0] ==
709 rte_flow_error_set(error, ENOTSUP,
710 RTE_FLOW_ERROR_TYPE_ACTION,
712 "queue action not in RSS"
717 if (rss->num > RTE_DIM(parser->queues)) {
718 rte_flow_error_set(error, EINVAL,
719 RTE_FLOW_ERROR_TYPE_ACTION,
721 "too many queues for RSS"
725 for (n = 0; n < rss->num; ++n) {
726 if (rss->queue[n] >= priv->rxqs_n) {
727 rte_flow_error_set(error, EINVAL,
728 RTE_FLOW_ERROR_TYPE_ACTION,
730 "queue id > number of"
735 for (n = 0; n < rss->num; ++n)
736 parser->queues[n] = rss->queue[n];
737 parser->queues_n = rss->num;
738 if (mlx5_flow_convert_rss_conf(parser, rss->rss_conf)) {
739 rte_flow_error_set(error, EINVAL,
740 RTE_FLOW_ERROR_TYPE_ACTION,
742 "wrong RSS configuration");
745 } else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
746 const struct rte_flow_action_mark *mark =
747 (const struct rte_flow_action_mark *)
751 rte_flow_error_set(error, EINVAL,
752 RTE_FLOW_ERROR_TYPE_ACTION,
754 "mark must be defined");
756 } else if (mark->id >= MLX5_FLOW_MARK_MAX) {
757 rte_flow_error_set(error, ENOTSUP,
758 RTE_FLOW_ERROR_TYPE_ACTION,
760 "mark must be between 0"
765 parser->mark_id = mark->id;
766 } else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
768 } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT &&
769 priv->config.flow_counter_en) {
772 goto exit_action_not_supported;
775 if (parser->drop && parser->mark)
777 if (!parser->queues_n && !parser->drop) {
778 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
779 NULL, "no valid action");
783 exit_action_not_supported:
784 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
785 actions, "action not supported");
793 * Pattern specification (list terminated by the END pattern item).
795 * Perform verbose error reporting if not NULL.
796 * @param[in, out] parser
797 * Internal parser structure.
800 * 0 on success, a negative errno value otherwise and rte_errno is set.
803 mlx5_flow_convert_items_validate(const struct rte_flow_item items[],
804 struct rte_flow_error *error,
805 struct mlx5_flow_parse *parser)
807 const struct mlx5_flow_items *cur_item = mlx5_flow_items;
811 /* Initialise the offsets to start after verbs attribute. */
812 for (i = 0; i != hash_rxq_init_n; ++i)
813 parser->queue[i].offset = sizeof(struct ibv_flow_attr);
814 for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
815 const struct mlx5_flow_items *token = NULL;
818 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
822 cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
824 if (cur_item->items[i] == items->type) {
825 token = &mlx5_flow_items[items->type];
831 goto exit_item_not_supported;
834 ret = mlx5_flow_item_validate(items,
835 (const uint8_t *)cur_item->mask,
838 goto exit_item_not_supported;
839 if (items->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
841 rte_flow_error_set(error, ENOTSUP,
842 RTE_FLOW_ERROR_TYPE_ITEM,
844 "cannot recognize multiple"
845 " VXLAN encapsulations");
848 parser->inner = IBV_FLOW_SPEC_INNER;
851 parser->queue[HASH_RXQ_ETH].offset += cur_item->dst_sz;
853 for (n = 0; n != hash_rxq_init_n; ++n)
854 parser->queue[n].offset += cur_item->dst_sz;
858 parser->queue[HASH_RXQ_ETH].offset +=
859 sizeof(struct ibv_flow_spec_action_drop);
862 for (i = 0; i != hash_rxq_init_n; ++i)
863 parser->queue[i].offset +=
864 sizeof(struct ibv_flow_spec_action_tag);
867 unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
869 for (i = 0; i != hash_rxq_init_n; ++i)
870 parser->queue[i].offset += size;
873 exit_item_not_supported:
874 return rte_flow_error_set(error, -ret, RTE_FLOW_ERROR_TYPE_ITEM,
875 items, "item not supported");
879 * Allocate memory space to store verbs flow attributes.
882 * Amount of byte to allocate.
884 * Perform verbose error reporting if not NULL.
887 * A verbs flow attribute on success, NULL otherwise and rte_errno is set.
889 static struct ibv_flow_attr *
890 mlx5_flow_convert_allocate(unsigned int size, struct rte_flow_error *error)
892 struct ibv_flow_attr *ibv_attr;
894 ibv_attr = rte_calloc(__func__, 1, size, 0);
896 rte_flow_error_set(error, ENOMEM,
897 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
899 "cannot allocate verbs spec attributes");
906 * Make inner packet matching with an higher priority from the non Inner
909 * @param[in, out] parser
910 * Internal parser structure.
912 * User flow attribute.
915 mlx5_flow_update_priority(struct mlx5_flow_parse *parser,
916 const struct rte_flow_attr *attr)
921 parser->queue[HASH_RXQ_ETH].ibv_attr->priority =
923 hash_rxq_init[HASH_RXQ_ETH].flow_priority;
926 for (i = 0; i != hash_rxq_init_n; ++i) {
927 if (parser->queue[i].ibv_attr) {
928 parser->queue[i].ibv_attr->priority =
930 hash_rxq_init[i].flow_priority -
931 (parser->inner ? 1 : 0);
937 * Finalise verbs flow attributes.
939 * @param[in, out] parser
940 * Internal parser structure.
943 mlx5_flow_convert_finalise(struct mlx5_flow_parse *parser)
945 const unsigned int ipv4 =
946 hash_rxq_init[parser->layer].ip_version == MLX5_IPV4;
947 const enum hash_rxq_type hmin = ipv4 ? HASH_RXQ_TCPV4 : HASH_RXQ_TCPV6;
948 const enum hash_rxq_type hmax = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
949 const enum hash_rxq_type ohmin = ipv4 ? HASH_RXQ_TCPV6 : HASH_RXQ_TCPV4;
950 const enum hash_rxq_type ohmax = ipv4 ? HASH_RXQ_IPV6 : HASH_RXQ_IPV4;
951 const enum hash_rxq_type ip = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
954 /* Remove any other flow not matching the pattern. */
955 if (parser->queues_n == 1 && !parser->rss_conf.rss_hf) {
956 for (i = 0; i != hash_rxq_init_n; ++i) {
957 if (i == HASH_RXQ_ETH)
959 rte_free(parser->queue[i].ibv_attr);
960 parser->queue[i].ibv_attr = NULL;
964 if (parser->layer == HASH_RXQ_ETH) {
968 * This layer becomes useless as the pattern define under
971 rte_free(parser->queue[HASH_RXQ_ETH].ibv_attr);
972 parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
974 /* Remove opposite kind of layer e.g. IPv6 if the pattern is IPv4. */
975 for (i = ohmin; i != (ohmax + 1); ++i) {
976 if (!parser->queue[i].ibv_attr)
978 rte_free(parser->queue[i].ibv_attr);
979 parser->queue[i].ibv_attr = NULL;
981 /* Remove impossible flow according to the RSS configuration. */
982 if (hash_rxq_init[parser->layer].dpdk_rss_hf &
983 parser->rss_conf.rss_hf) {
984 /* Remove any other flow. */
985 for (i = hmin; i != (hmax + 1); ++i) {
986 if ((i == parser->layer) ||
987 (!parser->queue[i].ibv_attr))
989 rte_free(parser->queue[i].ibv_attr);
990 parser->queue[i].ibv_attr = NULL;
992 } else if (!parser->queue[ip].ibv_attr) {
993 /* no RSS possible with the current configuration. */
994 parser->queues_n = 1;
999 * Fill missing layers in verbs specifications, or compute the correct
1000 * offset to allocate the memory space for the attributes and
1003 for (i = 0; i != hash_rxq_init_n - 1; ++i) {
1005 struct ibv_flow_spec_ipv4_ext ipv4;
1006 struct ibv_flow_spec_ipv6 ipv6;
1007 struct ibv_flow_spec_tcp_udp udp_tcp;
1012 if (i == parser->layer)
1014 if (parser->layer == HASH_RXQ_ETH) {
1015 if (hash_rxq_init[i].ip_version == MLX5_IPV4) {
1016 size = sizeof(struct ibv_flow_spec_ipv4_ext);
1017 specs.ipv4 = (struct ibv_flow_spec_ipv4_ext){
1018 .type = IBV_FLOW_SPEC_IPV4_EXT,
1022 size = sizeof(struct ibv_flow_spec_ipv6);
1023 specs.ipv6 = (struct ibv_flow_spec_ipv6){
1024 .type = IBV_FLOW_SPEC_IPV6,
1028 if (parser->queue[i].ibv_attr) {
1029 dst = (void *)((uintptr_t)
1030 parser->queue[i].ibv_attr +
1031 parser->queue[i].offset);
1032 memcpy(dst, &specs, size);
1033 ++parser->queue[i].ibv_attr->num_of_specs;
1035 parser->queue[i].offset += size;
1037 if ((i == HASH_RXQ_UDPV4) || (i == HASH_RXQ_TCPV4) ||
1038 (i == HASH_RXQ_UDPV6) || (i == HASH_RXQ_TCPV6)) {
1039 size = sizeof(struct ibv_flow_spec_tcp_udp);
1040 specs.udp_tcp = (struct ibv_flow_spec_tcp_udp) {
1041 .type = ((i == HASH_RXQ_UDPV4 ||
1042 i == HASH_RXQ_UDPV6) ?
1047 if (parser->queue[i].ibv_attr) {
1048 dst = (void *)((uintptr_t)
1049 parser->queue[i].ibv_attr +
1050 parser->queue[i].offset);
1051 memcpy(dst, &specs, size);
1052 ++parser->queue[i].ibv_attr->num_of_specs;
1054 parser->queue[i].offset += size;
1060 * Validate and convert a flow supported by the NIC.
1063 * Pointer to Ethernet device.
1065 * Flow rule attributes.
1066 * @param[in] pattern
1067 * Pattern specification (list terminated by the END pattern item).
1068 * @param[in] actions
1069 * Associated actions (list terminated by the END action).
1071 * Perform verbose error reporting if not NULL.
1072 * @param[in, out] parser
1073 * Internal parser structure.
1076 * 0 on success, a negative errno value otherwise and rte_errno is set.
1079 mlx5_flow_convert(struct rte_eth_dev *dev,
1080 const struct rte_flow_attr *attr,
1081 const struct rte_flow_item items[],
1082 const struct rte_flow_action actions[],
1083 struct rte_flow_error *error,
1084 struct mlx5_flow_parse *parser)
1086 const struct mlx5_flow_items *cur_item = mlx5_flow_items;
1090 /* First step. Validate the attributes, items and actions. */
1091 *parser = (struct mlx5_flow_parse){
1092 .create = parser->create,
1093 .layer = HASH_RXQ_ETH,
1094 .mark_id = MLX5_FLOW_MARK_DEFAULT,
1096 ret = mlx5_flow_convert_attributes(attr, error);
1099 ret = mlx5_flow_convert_actions(dev, actions, error, parser);
1102 ret = mlx5_flow_convert_items_validate(items, error, parser);
1105 mlx5_flow_convert_finalise(parser);
1108 * Allocate the memory space to store verbs specifications.
1111 unsigned int offset = parser->queue[HASH_RXQ_ETH].offset;
1113 parser->queue[HASH_RXQ_ETH].ibv_attr =
1114 mlx5_flow_convert_allocate(offset, error);
1115 if (!parser->queue[HASH_RXQ_ETH].ibv_attr)
1117 parser->queue[HASH_RXQ_ETH].offset =
1118 sizeof(struct ibv_flow_attr);
1120 for (i = 0; i != hash_rxq_init_n; ++i) {
1121 unsigned int offset;
1123 if (!(parser->rss_conf.rss_hf &
1124 hash_rxq_init[i].dpdk_rss_hf) &&
1125 (i != HASH_RXQ_ETH))
1127 offset = parser->queue[i].offset;
1128 parser->queue[i].ibv_attr =
1129 mlx5_flow_convert_allocate(offset, error);
1130 if (!parser->queue[i].ibv_attr)
1132 parser->queue[i].offset = sizeof(struct ibv_flow_attr);
1135 /* Third step. Conversion parse, fill the specifications. */
1137 for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
1138 struct mlx5_flow_data data = {
1143 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
1145 cur_item = &mlx5_flow_items[items->type];
1146 ret = cur_item->convert(items,
1147 (cur_item->default_mask ?
1148 cur_item->default_mask :
1155 mlx5_flow_create_flag_mark(parser, parser->mark_id);
1156 if (parser->count && parser->create) {
1157 mlx5_flow_create_count(dev, parser);
1159 goto exit_count_error;
1162 * Last step. Complete missing specification to reach the RSS
1166 mlx5_flow_convert_finalise(parser);
1167 mlx5_flow_update_priority(parser, attr);
1169 /* Only verification is expected, all resources should be released. */
1170 if (!parser->create) {
1171 for (i = 0; i != hash_rxq_init_n; ++i) {
1172 if (parser->queue[i].ibv_attr) {
1173 rte_free(parser->queue[i].ibv_attr);
1174 parser->queue[i].ibv_attr = NULL;
1180 for (i = 0; i != hash_rxq_init_n; ++i) {
1181 if (parser->queue[i].ibv_attr) {
1182 rte_free(parser->queue[i].ibv_attr);
1183 parser->queue[i].ibv_attr = NULL;
1186 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1187 NULL, "cannot allocate verbs spec attributes");
1190 rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1191 NULL, "cannot create counter");
1196 * Copy the specification created into the flow.
1199 * Internal parser structure.
1201 * Create specification.
1203 * Size in bytes of the specification to copy.
1206 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
1212 for (i = 0; i != hash_rxq_init_n; ++i) {
1213 if (!parser->queue[i].ibv_attr)
1215 /* Specification must be the same l3 type or none. */
1216 if (parser->layer == HASH_RXQ_ETH ||
1217 (hash_rxq_init[parser->layer].ip_version ==
1218 hash_rxq_init[i].ip_version) ||
1219 (hash_rxq_init[i].ip_version == 0)) {
1220 dst = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1221 parser->queue[i].offset);
1222 memcpy(dst, src, size);
1223 ++parser->queue[i].ibv_attr->num_of_specs;
1224 parser->queue[i].offset += size;
1230 * Convert Ethernet item to Verbs specification.
1233 * Item specification.
1234 * @param default_mask[in]
1235 * Default bit-masks to use when item->mask is not provided.
1236 * @param data[in, out]
1240 * 0 on success, a negative errno value otherwise and rte_errno is set.
1243 mlx5_flow_create_eth(const struct rte_flow_item *item,
1244 const void *default_mask,
1245 struct mlx5_flow_data *data)
1247 const struct rte_flow_item_eth *spec = item->spec;
1248 const struct rte_flow_item_eth *mask = item->mask;
1249 struct mlx5_flow_parse *parser = data->parser;
1250 const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1251 struct ibv_flow_spec_eth eth = {
1252 .type = parser->inner | IBV_FLOW_SPEC_ETH,
1256 /* Don't update layer for the inner pattern. */
1258 parser->layer = HASH_RXQ_ETH;
1263 mask = default_mask;
1264 memcpy(ð.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
1265 memcpy(ð.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
1266 eth.val.ether_type = spec->type;
1267 memcpy(ð.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
1268 memcpy(ð.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
1269 eth.mask.ether_type = mask->type;
1270 /* Remove unwanted bits from values. */
1271 for (i = 0; i < ETHER_ADDR_LEN; ++i) {
1272 eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
1273 eth.val.src_mac[i] &= eth.mask.src_mac[i];
1275 eth.val.ether_type &= eth.mask.ether_type;
1277 mlx5_flow_create_copy(parser, ð, eth_size);
1282 * Convert VLAN item to Verbs specification.
1285 * Item specification.
1286 * @param default_mask[in]
1287 * Default bit-masks to use when item->mask is not provided.
1288 * @param data[in, out]
1292 * 0 on success, a negative errno value otherwise and rte_errno is set.
1295 mlx5_flow_create_vlan(const struct rte_flow_item *item,
1296 const void *default_mask,
1297 struct mlx5_flow_data *data)
1299 const struct rte_flow_item_vlan *spec = item->spec;
1300 const struct rte_flow_item_vlan *mask = item->mask;
1301 struct mlx5_flow_parse *parser = data->parser;
1302 struct ibv_flow_spec_eth *eth;
1303 const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1308 mask = default_mask;
1310 for (i = 0; i != hash_rxq_init_n; ++i) {
1311 if (!parser->queue[i].ibv_attr)
1314 eth = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1315 parser->queue[i].offset - eth_size);
1316 eth->val.vlan_tag = spec->tci;
1317 eth->mask.vlan_tag = mask->tci;
1318 eth->val.vlan_tag &= eth->mask.vlan_tag;
1320 * From verbs perspective an empty VLAN is equivalent
1321 * to a packet without VLAN layer.
1323 if (!eth->mask.vlan_tag)
1329 return rte_flow_error_set(data->error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM,
1330 item, "VLAN cannot be empty");
1334 * Convert IPv4 item to Verbs specification.
1337 * Item specification.
1338 * @param default_mask[in]
1339 * Default bit-masks to use when item->mask is not provided.
1340 * @param data[in, out]
1344 * 0 on success, a negative errno value otherwise and rte_errno is set.
1347 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
1348 const void *default_mask,
1349 struct mlx5_flow_data *data)
1351 const struct rte_flow_item_ipv4 *spec = item->spec;
1352 const struct rte_flow_item_ipv4 *mask = item->mask;
1353 struct mlx5_flow_parse *parser = data->parser;
1354 unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4_ext);
1355 struct ibv_flow_spec_ipv4_ext ipv4 = {
1356 .type = parser->inner | IBV_FLOW_SPEC_IPV4_EXT,
1360 /* Don't update layer for the inner pattern. */
1362 parser->layer = HASH_RXQ_IPV4;
1365 mask = default_mask;
1366 ipv4.val = (struct ibv_flow_ipv4_ext_filter){
1367 .src_ip = spec->hdr.src_addr,
1368 .dst_ip = spec->hdr.dst_addr,
1369 .proto = spec->hdr.next_proto_id,
1370 .tos = spec->hdr.type_of_service,
1372 ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
1373 .src_ip = mask->hdr.src_addr,
1374 .dst_ip = mask->hdr.dst_addr,
1375 .proto = mask->hdr.next_proto_id,
1376 .tos = mask->hdr.type_of_service,
1378 /* Remove unwanted bits from values. */
1379 ipv4.val.src_ip &= ipv4.mask.src_ip;
1380 ipv4.val.dst_ip &= ipv4.mask.dst_ip;
1381 ipv4.val.proto &= ipv4.mask.proto;
1382 ipv4.val.tos &= ipv4.mask.tos;
1384 mlx5_flow_create_copy(parser, &ipv4, ipv4_size);
1389 * Convert IPv6 item to Verbs specification.
1392 * Item specification.
1393 * @param default_mask[in]
1394 * Default bit-masks to use when item->mask is not provided.
1395 * @param data[in, out]
1399 * 0 on success, a negative errno value otherwise and rte_errno is set.
1402 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
1403 const void *default_mask,
1404 struct mlx5_flow_data *data)
1406 const struct rte_flow_item_ipv6 *spec = item->spec;
1407 const struct rte_flow_item_ipv6 *mask = item->mask;
1408 struct mlx5_flow_parse *parser = data->parser;
1409 unsigned int ipv6_size = sizeof(struct ibv_flow_spec_ipv6);
1410 struct ibv_flow_spec_ipv6 ipv6 = {
1411 .type = parser->inner | IBV_FLOW_SPEC_IPV6,
1415 /* Don't update layer for the inner pattern. */
1417 parser->layer = HASH_RXQ_IPV6;
1420 uint32_t vtc_flow_val;
1421 uint32_t vtc_flow_mask;
1424 mask = default_mask;
1425 memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
1426 RTE_DIM(ipv6.val.src_ip));
1427 memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
1428 RTE_DIM(ipv6.val.dst_ip));
1429 memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
1430 RTE_DIM(ipv6.mask.src_ip));
1431 memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
1432 RTE_DIM(ipv6.mask.dst_ip));
1433 vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
1434 vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
1435 ipv6.val.flow_label =
1436 rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >>
1438 ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >>
1440 ipv6.val.next_hdr = spec->hdr.proto;
1441 ipv6.val.hop_limit = spec->hdr.hop_limits;
1442 ipv6.mask.flow_label =
1443 rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >>
1445 ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >>
1447 ipv6.mask.next_hdr = mask->hdr.proto;
1448 ipv6.mask.hop_limit = mask->hdr.hop_limits;
1449 /* Remove unwanted bits from values. */
1450 for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
1451 ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
1452 ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
1454 ipv6.val.flow_label &= ipv6.mask.flow_label;
1455 ipv6.val.traffic_class &= ipv6.mask.traffic_class;
1456 ipv6.val.next_hdr &= ipv6.mask.next_hdr;
1457 ipv6.val.hop_limit &= ipv6.mask.hop_limit;
1459 mlx5_flow_create_copy(parser, &ipv6, ipv6_size);
1464 * Convert UDP item to Verbs specification.
1467 * Item specification.
1468 * @param default_mask[in]
1469 * Default bit-masks to use when item->mask is not provided.
1470 * @param data[in, out]
1474 * 0 on success, a negative errno value otherwise and rte_errno is set.
1477 mlx5_flow_create_udp(const struct rte_flow_item *item,
1478 const void *default_mask,
1479 struct mlx5_flow_data *data)
1481 const struct rte_flow_item_udp *spec = item->spec;
1482 const struct rte_flow_item_udp *mask = item->mask;
1483 struct mlx5_flow_parse *parser = data->parser;
1484 unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1485 struct ibv_flow_spec_tcp_udp udp = {
1486 .type = parser->inner | IBV_FLOW_SPEC_UDP,
1490 /* Don't update layer for the inner pattern. */
1491 if (!parser->inner) {
1492 if (parser->layer == HASH_RXQ_IPV4)
1493 parser->layer = HASH_RXQ_UDPV4;
1495 parser->layer = HASH_RXQ_UDPV6;
1499 mask = default_mask;
1500 udp.val.dst_port = spec->hdr.dst_port;
1501 udp.val.src_port = spec->hdr.src_port;
1502 udp.mask.dst_port = mask->hdr.dst_port;
1503 udp.mask.src_port = mask->hdr.src_port;
1504 /* Remove unwanted bits from values. */
1505 udp.val.src_port &= udp.mask.src_port;
1506 udp.val.dst_port &= udp.mask.dst_port;
1508 mlx5_flow_create_copy(parser, &udp, udp_size);
1513 * Convert TCP item to Verbs specification.
1516 * Item specification.
1517 * @param default_mask[in]
1518 * Default bit-masks to use when item->mask is not provided.
1519 * @param data[in, out]
1523 * 0 on success, a negative errno value otherwise and rte_errno is set.
1526 mlx5_flow_create_tcp(const struct rte_flow_item *item,
1527 const void *default_mask,
1528 struct mlx5_flow_data *data)
1530 const struct rte_flow_item_tcp *spec = item->spec;
1531 const struct rte_flow_item_tcp *mask = item->mask;
1532 struct mlx5_flow_parse *parser = data->parser;
1533 unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1534 struct ibv_flow_spec_tcp_udp tcp = {
1535 .type = parser->inner | IBV_FLOW_SPEC_TCP,
1539 /* Don't update layer for the inner pattern. */
1540 if (!parser->inner) {
1541 if (parser->layer == HASH_RXQ_IPV4)
1542 parser->layer = HASH_RXQ_TCPV4;
1544 parser->layer = HASH_RXQ_TCPV6;
1548 mask = default_mask;
1549 tcp.val.dst_port = spec->hdr.dst_port;
1550 tcp.val.src_port = spec->hdr.src_port;
1551 tcp.mask.dst_port = mask->hdr.dst_port;
1552 tcp.mask.src_port = mask->hdr.src_port;
1553 /* Remove unwanted bits from values. */
1554 tcp.val.src_port &= tcp.mask.src_port;
1555 tcp.val.dst_port &= tcp.mask.dst_port;
1557 mlx5_flow_create_copy(parser, &tcp, tcp_size);
1562 * Convert VXLAN item to Verbs specification.
1565 * Item specification.
1566 * @param default_mask[in]
1567 * Default bit-masks to use when item->mask is not provided.
1568 * @param data[in, out]
1572 * 0 on success, a negative errno value otherwise and rte_errno is set.
1575 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
1576 const void *default_mask,
1577 struct mlx5_flow_data *data)
1579 const struct rte_flow_item_vxlan *spec = item->spec;
1580 const struct rte_flow_item_vxlan *mask = item->mask;
1581 struct mlx5_flow_parse *parser = data->parser;
1582 unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1583 struct ibv_flow_spec_tunnel vxlan = {
1584 .type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1593 parser->inner = IBV_FLOW_SPEC_INNER;
1596 mask = default_mask;
1597 memcpy(&id.vni[1], spec->vni, 3);
1598 vxlan.val.tunnel_id = id.vlan_id;
1599 memcpy(&id.vni[1], mask->vni, 3);
1600 vxlan.mask.tunnel_id = id.vlan_id;
1601 /* Remove unwanted bits from values. */
1602 vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
1605 * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
1606 * layer is defined in the Verbs specification it is interpreted as
1607 * wildcard and all packets will match this rule, if it follows a full
1608 * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
1609 * before will also match this rule.
1610 * To avoid such situation, VNI 0 is currently refused.
1612 if (!vxlan.val.tunnel_id)
1613 return rte_flow_error_set(data->error, EINVAL,
1614 RTE_FLOW_ERROR_TYPE_ITEM,
1616 "VxLAN vni cannot be 0");
1617 mlx5_flow_create_copy(parser, &vxlan, size);
1622 * Convert mark/flag action to Verbs specification.
1625 * Internal parser structure.
1630 * 0 on success, a negative errno value otherwise and rte_errno is set.
1633 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id)
1635 unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1636 struct ibv_flow_spec_action_tag tag = {
1637 .type = IBV_FLOW_SPEC_ACTION_TAG,
1639 .tag_id = mlx5_flow_mark_set(mark_id),
1642 assert(parser->mark);
1643 mlx5_flow_create_copy(parser, &tag, size);
1648 * Convert count action to Verbs specification.
1651 * Pointer to Ethernet device.
1653 * Pointer to MLX5 flow parser structure.
1656 * 0 on success, a negative errno value otherwise and rte_errno is set.
1659 mlx5_flow_create_count(struct rte_eth_dev *dev __rte_unused,
1660 struct mlx5_flow_parse *parser __rte_unused)
1662 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
1663 struct priv *priv = dev->data->dev_private;
1664 unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
1665 struct ibv_counter_set_init_attr init_attr = {0};
1666 struct ibv_flow_spec_counter_action counter = {
1667 .type = IBV_FLOW_SPEC_ACTION_COUNT,
1669 .counter_set_handle = 0,
1672 init_attr.counter_set_id = 0;
1673 parser->cs = mlx5_glue->create_counter_set(priv->ctx, &init_attr);
1678 counter.counter_set_handle = parser->cs->handle;
1679 mlx5_flow_create_copy(parser, &counter, size);
1685 * Complete flow rule creation with a drop queue.
1688 * Pointer to Ethernet device.
1690 * Internal parser structure.
1692 * Pointer to the rte_flow.
1694 * Perform verbose error reporting if not NULL.
1697 * 0 on success, a negative errno value otherwise and rte_errno is set.
1700 mlx5_flow_create_action_queue_drop(struct rte_eth_dev *dev,
1701 struct mlx5_flow_parse *parser,
1702 struct rte_flow *flow,
1703 struct rte_flow_error *error)
1705 struct priv *priv = dev->data->dev_private;
1706 struct ibv_flow_spec_action_drop *drop;
1707 unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
1712 drop = (void *)((uintptr_t)parser->queue[HASH_RXQ_ETH].ibv_attr +
1713 parser->queue[HASH_RXQ_ETH].offset);
1714 *drop = (struct ibv_flow_spec_action_drop){
1715 .type = IBV_FLOW_SPEC_ACTION_DROP,
1718 ++parser->queue[HASH_RXQ_ETH].ibv_attr->num_of_specs;
1719 parser->queue[HASH_RXQ_ETH].offset += size;
1720 flow->frxq[HASH_RXQ_ETH].ibv_attr =
1721 parser->queue[HASH_RXQ_ETH].ibv_attr;
1723 flow->cs = parser->cs;
1724 if (!priv->dev->data->dev_started)
1726 parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
1727 flow->frxq[HASH_RXQ_ETH].ibv_flow =
1728 mlx5_glue->create_flow(priv->flow_drop_queue->qp,
1729 flow->frxq[HASH_RXQ_ETH].ibv_attr);
1730 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
1731 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1732 NULL, "flow rule creation failure");
1738 if (flow->frxq[HASH_RXQ_ETH].ibv_flow) {
1739 claim_zero(mlx5_glue->destroy_flow
1740 (flow->frxq[HASH_RXQ_ETH].ibv_flow));
1741 flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
1743 if (flow->frxq[HASH_RXQ_ETH].ibv_attr) {
1744 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
1745 flow->frxq[HASH_RXQ_ETH].ibv_attr = NULL;
1748 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
1756 * Create hash Rx queues when RSS is enabled.
1759 * Pointer to Ethernet device.
1761 * Internal parser structure.
1763 * Pointer to the rte_flow.
1765 * Perform verbose error reporting if not NULL.
1768 * 0 on success, a negative errno value otherwise and rte_errno is set.
1771 mlx5_flow_create_action_queue_rss(struct rte_eth_dev *dev,
1772 struct mlx5_flow_parse *parser,
1773 struct rte_flow *flow,
1774 struct rte_flow_error *error)
1776 struct priv *priv = dev->data->dev_private;
1779 for (i = 0; i != hash_rxq_init_n; ++i) {
1780 uint64_t hash_fields;
1782 if (!parser->queue[i].ibv_attr)
1784 flow->frxq[i].ibv_attr = parser->queue[i].ibv_attr;
1785 parser->queue[i].ibv_attr = NULL;
1786 hash_fields = hash_rxq_init[i].hash_fields;
1787 if (!priv->dev->data->dev_started)
1789 flow->frxq[i].hrxq =
1791 parser->rss_conf.rss_key,
1792 parser->rss_conf.rss_key_len,
1796 if (flow->frxq[i].hrxq)
1798 flow->frxq[i].hrxq =
1800 parser->rss_conf.rss_key,
1801 parser->rss_conf.rss_key_len,
1805 if (!flow->frxq[i].hrxq) {
1806 return rte_flow_error_set(error, ENOMEM,
1807 RTE_FLOW_ERROR_TYPE_HANDLE,
1809 "cannot create hash rxq");
1816 * Complete flow rule creation.
1819 * Pointer to Ethernet device.
1821 * Internal parser structure.
1823 * Pointer to the rte_flow.
1825 * Perform verbose error reporting if not NULL.
1828 * 0 on success, a negative errno value otherwise and rte_errno is set.
1831 mlx5_flow_create_action_queue(struct rte_eth_dev *dev,
1832 struct mlx5_flow_parse *parser,
1833 struct rte_flow *flow,
1834 struct rte_flow_error *error)
1836 struct priv *priv = dev->data->dev_private;
1839 unsigned int flows_n = 0;
1843 assert(!parser->drop);
1844 ret = mlx5_flow_create_action_queue_rss(dev, parser, flow, error);
1848 flow->cs = parser->cs;
1849 if (!priv->dev->data->dev_started)
1851 for (i = 0; i != hash_rxq_init_n; ++i) {
1852 if (!flow->frxq[i].hrxq)
1854 flow->frxq[i].ibv_flow =
1855 mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
1856 flow->frxq[i].ibv_attr);
1857 if (!flow->frxq[i].ibv_flow) {
1858 rte_flow_error_set(error, ENOMEM,
1859 RTE_FLOW_ERROR_TYPE_HANDLE,
1860 NULL, "flow rule creation failure");
1864 DRV_LOG(DEBUG, "port %u %p type %d QP %p ibv_flow %p",
1867 (void *)flow->frxq[i].hrxq,
1868 (void *)flow->frxq[i].ibv_flow);
1871 rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_HANDLE,
1872 NULL, "internal error in flow creation");
1875 for (i = 0; i != parser->queues_n; ++i) {
1876 struct mlx5_rxq_data *q =
1877 (*priv->rxqs)[parser->queues[i]];
1879 q->mark |= parser->mark;
1883 ret = rte_errno; /* Save rte_errno before cleanup. */
1885 for (i = 0; i != hash_rxq_init_n; ++i) {
1886 if (flow->frxq[i].ibv_flow) {
1887 struct ibv_flow *ibv_flow = flow->frxq[i].ibv_flow;
1889 claim_zero(mlx5_glue->destroy_flow(ibv_flow));
1891 if (flow->frxq[i].hrxq)
1892 mlx5_hrxq_release(dev, flow->frxq[i].hrxq);
1893 if (flow->frxq[i].ibv_attr)
1894 rte_free(flow->frxq[i].ibv_attr);
1897 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
1901 rte_errno = ret; /* Restore rte_errno. */
1909 * Pointer to Ethernet device.
1911 * Pointer to a TAILQ flow list.
1913 * Flow rule attributes.
1914 * @param[in] pattern
1915 * Pattern specification (list terminated by the END pattern item).
1916 * @param[in] actions
1917 * Associated actions (list terminated by the END action).
1919 * Perform verbose error reporting if not NULL.
1922 * A flow on success, NULL otherwise and rte_errno is set.
1924 static struct rte_flow *
1925 mlx5_flow_list_create(struct rte_eth_dev *dev,
1926 struct mlx5_flows *list,
1927 const struct rte_flow_attr *attr,
1928 const struct rte_flow_item items[],
1929 const struct rte_flow_action actions[],
1930 struct rte_flow_error *error)
1932 struct mlx5_flow_parse parser = { .create = 1, };
1933 struct rte_flow *flow = NULL;
1937 ret = mlx5_flow_convert(dev, attr, items, actions, error, &parser);
1940 flow = rte_calloc(__func__, 1,
1941 sizeof(*flow) + parser.queues_n * sizeof(uint16_t),
1944 rte_flow_error_set(error, ENOMEM,
1945 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1947 "cannot allocate flow memory");
1950 /* Copy queues configuration. */
1951 flow->queues = (uint16_t (*)[])(flow + 1);
1952 memcpy(flow->queues, parser.queues, parser.queues_n * sizeof(uint16_t));
1953 flow->queues_n = parser.queues_n;
1954 flow->mark = parser.mark;
1955 /* Copy RSS configuration. */
1956 flow->rss_conf = parser.rss_conf;
1957 flow->rss_conf.rss_key = flow->rss_key;
1958 memcpy(flow->rss_key, parser.rss_key, parser.rss_conf.rss_key_len);
1959 /* finalise the flow. */
1961 ret = mlx5_flow_create_action_queue_drop(dev, &parser, flow,
1964 ret = mlx5_flow_create_action_queue(dev, &parser, flow, error);
1967 TAILQ_INSERT_TAIL(list, flow, next);
1968 DRV_LOG(DEBUG, "port %u flow created %p", dev->data->port_id,
1972 DRV_LOG(ERR, "port %u flow creation error: %s", dev->data->port_id,
1974 for (i = 0; i != hash_rxq_init_n; ++i) {
1975 if (parser.queue[i].ibv_attr)
1976 rte_free(parser.queue[i].ibv_attr);
1983 * Validate a flow supported by the NIC.
1985 * @see rte_flow_validate()
1989 mlx5_flow_validate(struct rte_eth_dev *dev,
1990 const struct rte_flow_attr *attr,
1991 const struct rte_flow_item items[],
1992 const struct rte_flow_action actions[],
1993 struct rte_flow_error *error)
1995 struct mlx5_flow_parse parser = { .create = 0, };
1997 return mlx5_flow_convert(dev, attr, items, actions, error, &parser);
2003 * @see rte_flow_create()
2007 mlx5_flow_create(struct rte_eth_dev *dev,
2008 const struct rte_flow_attr *attr,
2009 const struct rte_flow_item items[],
2010 const struct rte_flow_action actions[],
2011 struct rte_flow_error *error)
2013 struct priv *priv = dev->data->dev_private;
2015 return mlx5_flow_list_create(dev, &priv->flows, attr, items, actions,
2020 * Destroy a flow in a list.
2023 * Pointer to Ethernet device.
2025 * Pointer to a TAILQ flow list.
2030 mlx5_flow_list_destroy(struct rte_eth_dev *dev, struct mlx5_flows *list,
2031 struct rte_flow *flow)
2033 struct priv *priv = dev->data->dev_private;
2036 if (flow->drop || !flow->mark)
2038 for (i = 0; i != flow->queues_n; ++i) {
2039 struct rte_flow *tmp;
2043 * To remove the mark from the queue, the queue must not be
2044 * present in any other marked flow (RSS or not).
2046 TAILQ_FOREACH(tmp, list, next) {
2048 uint16_t *tqs = NULL;
2053 for (j = 0; j != hash_rxq_init_n; ++j) {
2054 if (!tmp->frxq[j].hrxq)
2056 tqs = tmp->frxq[j].hrxq->ind_table->queues;
2057 tq_n = tmp->frxq[j].hrxq->ind_table->queues_n;
2061 for (j = 0; (j != tq_n) && !mark; j++)
2062 if (tqs[j] == (*flow->queues)[i])
2065 (*priv->rxqs)[(*flow->queues)[i]]->mark = mark;
2069 if (flow->frxq[HASH_RXQ_ETH].ibv_flow)
2070 claim_zero(mlx5_glue->destroy_flow
2071 (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2072 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
2074 for (i = 0; i != hash_rxq_init_n; ++i) {
2075 struct mlx5_flow *frxq = &flow->frxq[i];
2078 claim_zero(mlx5_glue->destroy_flow
2081 mlx5_hrxq_release(dev, frxq->hrxq);
2083 rte_free(frxq->ibv_attr);
2087 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
2090 TAILQ_REMOVE(list, flow, next);
2091 DRV_LOG(DEBUG, "port %u flow destroyed %p", dev->data->port_id,
2097 * Destroy all flows.
2100 * Pointer to Ethernet device.
2102 * Pointer to a TAILQ flow list.
2105 mlx5_flow_list_flush(struct rte_eth_dev *dev, struct mlx5_flows *list)
2107 while (!TAILQ_EMPTY(list)) {
2108 struct rte_flow *flow;
2110 flow = TAILQ_FIRST(list);
2111 mlx5_flow_list_destroy(dev, list, flow);
2116 * Create drop queue.
2119 * Pointer to Ethernet device.
2122 * 0 on success, a negative errno value otherwise and rte_errno is set.
2125 mlx5_flow_create_drop_queue(struct rte_eth_dev *dev)
2127 struct priv *priv = dev->data->dev_private;
2128 struct mlx5_hrxq_drop *fdq = NULL;
2132 fdq = rte_calloc(__func__, 1, sizeof(*fdq), 0);
2135 "port %u cannot allocate memory for drop queue",
2136 dev->data->port_id);
2140 fdq->cq = mlx5_glue->create_cq(priv->ctx, 1, NULL, NULL, 0);
2142 DRV_LOG(WARNING, "port %u cannot allocate CQ for drop queue",
2143 dev->data->port_id);
2147 fdq->wq = mlx5_glue->create_wq
2149 &(struct ibv_wq_init_attr){
2150 .wq_type = IBV_WQT_RQ,
2157 DRV_LOG(WARNING, "port %u cannot allocate WQ for drop queue",
2158 dev->data->port_id);
2162 fdq->ind_table = mlx5_glue->create_rwq_ind_table
2164 &(struct ibv_rwq_ind_table_init_attr){
2165 .log_ind_tbl_size = 0,
2166 .ind_tbl = &fdq->wq,
2169 if (!fdq->ind_table) {
2171 "port %u cannot allocate indirection table for drop"
2173 dev->data->port_id);
2177 fdq->qp = mlx5_glue->create_qp_ex
2179 &(struct ibv_qp_init_attr_ex){
2180 .qp_type = IBV_QPT_RAW_PACKET,
2182 IBV_QP_INIT_ATTR_PD |
2183 IBV_QP_INIT_ATTR_IND_TABLE |
2184 IBV_QP_INIT_ATTR_RX_HASH,
2185 .rx_hash_conf = (struct ibv_rx_hash_conf){
2187 IBV_RX_HASH_FUNC_TOEPLITZ,
2188 .rx_hash_key_len = rss_hash_default_key_len,
2189 .rx_hash_key = rss_hash_default_key,
2190 .rx_hash_fields_mask = 0,
2192 .rwq_ind_tbl = fdq->ind_table,
2196 DRV_LOG(WARNING, "port %u cannot allocate QP for drop queue",
2197 dev->data->port_id);
2201 priv->flow_drop_queue = fdq;
2205 claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2207 claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2209 claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2211 claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2214 priv->flow_drop_queue = NULL;
2219 * Delete drop queue.
2222 * Pointer to Ethernet device.
2225 mlx5_flow_delete_drop_queue(struct rte_eth_dev *dev)
2227 struct priv *priv = dev->data->dev_private;
2228 struct mlx5_hrxq_drop *fdq = priv->flow_drop_queue;
2233 claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2235 claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2237 claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2239 claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2241 priv->flow_drop_queue = NULL;
2248 * Pointer to Ethernet device.
2250 * Pointer to a TAILQ flow list.
2253 mlx5_flow_stop(struct rte_eth_dev *dev, struct mlx5_flows *list)
2255 struct priv *priv = dev->data->dev_private;
2256 struct rte_flow *flow;
2258 TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) {
2260 struct mlx5_ind_table_ibv *ind_tbl = NULL;
2263 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow)
2265 claim_zero(mlx5_glue->destroy_flow
2266 (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2267 flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
2268 DRV_LOG(DEBUG, "port %u flow %p removed",
2269 dev->data->port_id, (void *)flow);
2273 /* Verify the flow has not already been cleaned. */
2274 for (i = 0; i != hash_rxq_init_n; ++i) {
2275 if (!flow->frxq[i].ibv_flow)
2278 * Indirection table may be necessary to remove the
2279 * flags in the Rx queues.
2280 * This helps to speed-up the process by avoiding
2283 ind_tbl = flow->frxq[i].hrxq->ind_table;
2286 if (i == hash_rxq_init_n)
2290 for (i = 0; i != ind_tbl->queues_n; ++i)
2291 (*priv->rxqs)[ind_tbl->queues[i]]->mark = 0;
2293 for (i = 0; i != hash_rxq_init_n; ++i) {
2294 if (!flow->frxq[i].ibv_flow)
2296 claim_zero(mlx5_glue->destroy_flow
2297 (flow->frxq[i].ibv_flow));
2298 flow->frxq[i].ibv_flow = NULL;
2299 mlx5_hrxq_release(dev, flow->frxq[i].hrxq);
2300 flow->frxq[i].hrxq = NULL;
2302 DRV_LOG(DEBUG, "port %u flow %p removed", dev->data->port_id,
2311 * Pointer to Ethernet device.
2313 * Pointer to a TAILQ flow list.
2316 * 0 on success, a negative errno value otherwise and rte_errno is set.
2319 mlx5_flow_start(struct rte_eth_dev *dev, struct mlx5_flows *list)
2321 struct priv *priv = dev->data->dev_private;
2322 struct rte_flow *flow;
2324 TAILQ_FOREACH(flow, list, next) {
2328 flow->frxq[HASH_RXQ_ETH].ibv_flow =
2329 mlx5_glue->create_flow
2330 (priv->flow_drop_queue->qp,
2331 flow->frxq[HASH_RXQ_ETH].ibv_attr);
2332 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
2334 "port %u flow %p cannot be applied",
2335 dev->data->port_id, (void *)flow);
2339 DRV_LOG(DEBUG, "port %u flow %p applied",
2340 dev->data->port_id, (void *)flow);
2344 for (i = 0; i != hash_rxq_init_n; ++i) {
2345 if (!flow->frxq[i].ibv_attr)
2347 flow->frxq[i].hrxq =
2348 mlx5_hrxq_get(dev, flow->rss_conf.rss_key,
2349 flow->rss_conf.rss_key_len,
2350 hash_rxq_init[i].hash_fields,
2353 if (flow->frxq[i].hrxq)
2355 flow->frxq[i].hrxq =
2356 mlx5_hrxq_new(dev, flow->rss_conf.rss_key,
2357 flow->rss_conf.rss_key_len,
2358 hash_rxq_init[i].hash_fields,
2361 if (!flow->frxq[i].hrxq) {
2363 "port %u flow %p cannot be applied",
2364 dev->data->port_id, (void *)flow);
2369 flow->frxq[i].ibv_flow =
2370 mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
2371 flow->frxq[i].ibv_attr);
2372 if (!flow->frxq[i].ibv_flow) {
2374 "port %u flow %p cannot be applied",
2375 dev->data->port_id, (void *)flow);
2379 DRV_LOG(DEBUG, "port %u flow %p applied",
2380 dev->data->port_id, (void *)flow);
2384 for (i = 0; i != flow->queues_n; ++i)
2385 (*priv->rxqs)[(*flow->queues)[i]]->mark = 1;
2391 * Verify the flow list is empty
2394 * Pointer to Ethernet device.
2396 * @return the number of flows not released.
2399 mlx5_flow_verify(struct rte_eth_dev *dev)
2401 struct priv *priv = dev->data->dev_private;
2402 struct rte_flow *flow;
2405 TAILQ_FOREACH(flow, &priv->flows, next) {
2406 DRV_LOG(DEBUG, "port %u flow %p still referenced",
2407 dev->data->port_id, (void *)flow);
2414 * Enable a control flow configured from the control plane.
2417 * Pointer to Ethernet device.
2419 * An Ethernet flow spec to apply.
2421 * An Ethernet flow mask to apply.
2423 * A VLAN flow spec to apply.
2425 * A VLAN flow mask to apply.
2428 * 0 on success, a negative errno value otherwise and rte_errno is set.
2431 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
2432 struct rte_flow_item_eth *eth_spec,
2433 struct rte_flow_item_eth *eth_mask,
2434 struct rte_flow_item_vlan *vlan_spec,
2435 struct rte_flow_item_vlan *vlan_mask)
2437 struct priv *priv = dev->data->dev_private;
2438 const struct rte_flow_attr attr = {
2440 .priority = MLX5_CTRL_FLOW_PRIORITY,
2442 struct rte_flow_item items[] = {
2444 .type = RTE_FLOW_ITEM_TYPE_ETH,
2450 .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
2451 RTE_FLOW_ITEM_TYPE_END,
2457 .type = RTE_FLOW_ITEM_TYPE_END,
2460 struct rte_flow_action actions[] = {
2462 .type = RTE_FLOW_ACTION_TYPE_RSS,
2465 .type = RTE_FLOW_ACTION_TYPE_END,
2468 struct rte_flow *flow;
2469 struct rte_flow_error error;
2472 struct rte_flow_action_rss rss;
2474 const struct rte_eth_rss_conf *rss_conf;
2476 uint16_t queue[RTE_MAX_QUEUES_PER_PORT];
2480 if (!priv->reta_idx_n) {
2484 for (i = 0; i != priv->reta_idx_n; ++i)
2485 action_rss.local.queue[i] = (*priv->reta_idx)[i];
2486 action_rss.local.rss_conf = &priv->rss_conf;
2487 action_rss.local.num = priv->reta_idx_n;
2488 actions[0].conf = (const void *)&action_rss.rss;
2489 flow = mlx5_flow_list_create(dev, &priv->ctrl_flows, &attr, items,
2497 * Enable a flow control configured from the control plane.
2500 * Pointer to Ethernet device.
2502 * An Ethernet flow spec to apply.
2504 * An Ethernet flow mask to apply.
2507 * 0 on success, a negative errno value otherwise and rte_errno is set.
2510 mlx5_ctrl_flow(struct rte_eth_dev *dev,
2511 struct rte_flow_item_eth *eth_spec,
2512 struct rte_flow_item_eth *eth_mask)
2514 return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
2520 * @see rte_flow_destroy()
2524 mlx5_flow_destroy(struct rte_eth_dev *dev,
2525 struct rte_flow *flow,
2526 struct rte_flow_error *error __rte_unused)
2528 struct priv *priv = dev->data->dev_private;
2530 mlx5_flow_list_destroy(dev, &priv->flows, flow);
2535 * Destroy all flows.
2537 * @see rte_flow_flush()
2541 mlx5_flow_flush(struct rte_eth_dev *dev,
2542 struct rte_flow_error *error __rte_unused)
2544 struct priv *priv = dev->data->dev_private;
2546 mlx5_flow_list_flush(dev, &priv->flows);
2550 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
2552 * Query flow counter.
2556 * @param counter_value
2557 * returned data from the counter.
2560 * 0 on success, a negative errno value otherwise and rte_errno is set.
2563 mlx5_flow_query_count(struct ibv_counter_set *cs,
2564 struct mlx5_flow_counter_stats *counter_stats,
2565 struct rte_flow_query_count *query_count,
2566 struct rte_flow_error *error)
2568 uint64_t counters[2];
2569 struct ibv_query_counter_set_attr query_cs_attr = {
2571 .query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
2573 struct ibv_counter_set_data query_out = {
2575 .outlen = 2 * sizeof(uint64_t),
2577 int err = mlx5_glue->query_counter_set(&query_cs_attr, &query_out);
2580 return rte_flow_error_set(error, err,
2581 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2583 "cannot read counter");
2584 query_count->hits_set = 1;
2585 query_count->bytes_set = 1;
2586 query_count->hits = counters[0] - counter_stats->hits;
2587 query_count->bytes = counters[1] - counter_stats->bytes;
2588 if (query_count->reset) {
2589 counter_stats->hits = counters[0];
2590 counter_stats->bytes = counters[1];
2598 * @see rte_flow_query()
2602 mlx5_flow_query(struct rte_eth_dev *dev __rte_unused,
2603 struct rte_flow *flow,
2604 enum rte_flow_action_type action __rte_unused,
2606 struct rte_flow_error *error)
2611 ret = mlx5_flow_query_count(flow->cs,
2612 &flow->counter_stats,
2613 (struct rte_flow_query_count *)data,
2618 return rte_flow_error_set(error, EINVAL,
2619 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2621 "no counter found for flow");
2630 * @see rte_flow_isolate()
2634 mlx5_flow_isolate(struct rte_eth_dev *dev,
2636 struct rte_flow_error *error)
2638 struct priv *priv = dev->data->dev_private;
2640 if (dev->data->dev_started) {
2641 rte_flow_error_set(error, EBUSY,
2642 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2644 "port must be stopped first");
2647 priv->isolated = !!enable;
2649 priv->dev->dev_ops = &mlx5_dev_ops_isolate;
2651 priv->dev->dev_ops = &mlx5_dev_ops;
2656 * Convert a flow director filter to a generic flow.
2659 * Pointer to Ethernet device.
2660 * @param fdir_filter
2661 * Flow director filter to add.
2663 * Generic flow parameters structure.
2666 * 0 on success, a negative errno value otherwise and rte_errno is set.
2669 mlx5_fdir_filter_convert(struct rte_eth_dev *dev,
2670 const struct rte_eth_fdir_filter *fdir_filter,
2671 struct mlx5_fdir *attributes)
2673 struct priv *priv = dev->data->dev_private;
2674 const struct rte_eth_fdir_input *input = &fdir_filter->input;
2676 /* Validate queue number. */
2677 if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
2678 DRV_LOG(ERR, "port %u invalid queue number %d",
2679 dev->data->port_id, fdir_filter->action.rx_queue);
2683 attributes->attr.ingress = 1;
2684 attributes->items[0] = (struct rte_flow_item) {
2685 .type = RTE_FLOW_ITEM_TYPE_ETH,
2686 .spec = &attributes->l2,
2687 .mask = &attributes->l2_mask,
2689 switch (fdir_filter->action.behavior) {
2690 case RTE_ETH_FDIR_ACCEPT:
2691 attributes->actions[0] = (struct rte_flow_action){
2692 .type = RTE_FLOW_ACTION_TYPE_QUEUE,
2693 .conf = &attributes->queue,
2696 case RTE_ETH_FDIR_REJECT:
2697 attributes->actions[0] = (struct rte_flow_action){
2698 .type = RTE_FLOW_ACTION_TYPE_DROP,
2702 DRV_LOG(ERR, "port %u invalid behavior %d",
2704 fdir_filter->action.behavior);
2705 rte_errno = ENOTSUP;
2708 attributes->queue.index = fdir_filter->action.rx_queue;
2710 switch (fdir_filter->input.flow_type) {
2711 case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
2712 case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
2713 case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
2714 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2715 .src_addr = input->flow.ip4_flow.src_ip,
2716 .dst_addr = input->flow.ip4_flow.dst_ip,
2717 .time_to_live = input->flow.ip4_flow.ttl,
2718 .type_of_service = input->flow.ip4_flow.tos,
2719 .next_proto_id = input->flow.ip4_flow.proto,
2721 attributes->items[1] = (struct rte_flow_item){
2722 .type = RTE_FLOW_ITEM_TYPE_IPV4,
2723 .spec = &attributes->l3,
2724 .mask = &attributes->l3,
2727 case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
2728 case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
2729 case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
2730 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2731 .hop_limits = input->flow.udp6_flow.ip.hop_limits,
2732 .proto = input->flow.udp6_flow.ip.proto,
2734 memcpy(attributes->l3.ipv6.hdr.src_addr,
2735 input->flow.ipv6_flow.src_ip,
2736 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2737 memcpy(attributes->l3.ipv6.hdr.dst_addr,
2738 input->flow.ipv6_flow.dst_ip,
2739 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2740 attributes->items[1] = (struct rte_flow_item){
2741 .type = RTE_FLOW_ITEM_TYPE_IPV6,
2742 .spec = &attributes->l3,
2743 .mask = &attributes->l3,
2747 DRV_LOG(ERR, "port %u invalid flow type%d",
2748 dev->data->port_id, fdir_filter->input.flow_type);
2749 rte_errno = ENOTSUP;
2753 switch (fdir_filter->input.flow_type) {
2754 case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
2755 attributes->l4.udp.hdr = (struct udp_hdr){
2756 .src_port = input->flow.udp4_flow.src_port,
2757 .dst_port = input->flow.udp4_flow.dst_port,
2759 attributes->items[2] = (struct rte_flow_item){
2760 .type = RTE_FLOW_ITEM_TYPE_UDP,
2761 .spec = &attributes->l4,
2762 .mask = &attributes->l4,
2765 case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
2766 attributes->l4.tcp.hdr = (struct tcp_hdr){
2767 .src_port = input->flow.tcp4_flow.src_port,
2768 .dst_port = input->flow.tcp4_flow.dst_port,
2770 attributes->items[2] = (struct rte_flow_item){
2771 .type = RTE_FLOW_ITEM_TYPE_TCP,
2772 .spec = &attributes->l4,
2773 .mask = &attributes->l4,
2776 case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
2777 attributes->l4.udp.hdr = (struct udp_hdr){
2778 .src_port = input->flow.udp6_flow.src_port,
2779 .dst_port = input->flow.udp6_flow.dst_port,
2781 attributes->items[2] = (struct rte_flow_item){
2782 .type = RTE_FLOW_ITEM_TYPE_UDP,
2783 .spec = &attributes->l4,
2784 .mask = &attributes->l4,
2787 case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
2788 attributes->l4.tcp.hdr = (struct tcp_hdr){
2789 .src_port = input->flow.tcp6_flow.src_port,
2790 .dst_port = input->flow.tcp6_flow.dst_port,
2792 attributes->items[2] = (struct rte_flow_item){
2793 .type = RTE_FLOW_ITEM_TYPE_TCP,
2794 .spec = &attributes->l4,
2795 .mask = &attributes->l4,
2798 case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
2799 case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
2802 DRV_LOG(ERR, "port %u invalid flow type%d",
2803 dev->data->port_id, fdir_filter->input.flow_type);
2804 rte_errno = ENOTSUP;
2811 * Add new flow director filter and store it in list.
2814 * Pointer to Ethernet device.
2815 * @param fdir_filter
2816 * Flow director filter to add.
2819 * 0 on success, a negative errno value otherwise and rte_errno is set.
2822 mlx5_fdir_filter_add(struct rte_eth_dev *dev,
2823 const struct rte_eth_fdir_filter *fdir_filter)
2825 struct priv *priv = dev->data->dev_private;
2826 struct mlx5_fdir attributes = {
2829 .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2830 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2834 struct mlx5_flow_parse parser = {
2835 .layer = HASH_RXQ_ETH,
2837 struct rte_flow_error error;
2838 struct rte_flow *flow;
2841 ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
2844 ret = mlx5_flow_convert(dev, &attributes.attr, attributes.items,
2845 attributes.actions, &error, &parser);
2848 flow = mlx5_flow_list_create(dev, &priv->flows, &attributes.attr,
2849 attributes.items, attributes.actions,
2852 DRV_LOG(DEBUG, "port %u FDIR created %p", dev->data->port_id,
2860 * Delete specific filter.
2863 * Pointer to Ethernet device.
2864 * @param fdir_filter
2865 * Filter to be deleted.
2868 * 0 on success, a negative errno value otherwise and rte_errno is set.
2871 mlx5_fdir_filter_delete(struct rte_eth_dev *dev,
2872 const struct rte_eth_fdir_filter *fdir_filter)
2874 struct priv *priv = dev->data->dev_private;
2875 struct mlx5_fdir attributes = {
2878 struct mlx5_flow_parse parser = {
2880 .layer = HASH_RXQ_ETH,
2882 struct rte_flow_error error;
2883 struct rte_flow *flow;
2887 ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
2890 ret = mlx5_flow_convert(dev, &attributes.attr, attributes.items,
2891 attributes.actions, &error, &parser);
2895 * Special case for drop action which is only set in the
2896 * specifications when the flow is created. In this situation the
2897 * drop specification is missing.
2900 struct ibv_flow_spec_action_drop *drop;
2902 drop = (void *)((uintptr_t)parser.queue[HASH_RXQ_ETH].ibv_attr +
2903 parser.queue[HASH_RXQ_ETH].offset);
2904 *drop = (struct ibv_flow_spec_action_drop){
2905 .type = IBV_FLOW_SPEC_ACTION_DROP,
2906 .size = sizeof(struct ibv_flow_spec_action_drop),
2908 parser.queue[HASH_RXQ_ETH].ibv_attr->num_of_specs++;
2910 TAILQ_FOREACH(flow, &priv->flows, next) {
2911 struct ibv_flow_attr *attr;
2912 struct ibv_spec_header *attr_h;
2914 struct ibv_flow_attr *flow_attr;
2915 struct ibv_spec_header *flow_h;
2917 unsigned int specs_n;
2919 attr = parser.queue[HASH_RXQ_ETH].ibv_attr;
2920 flow_attr = flow->frxq[HASH_RXQ_ETH].ibv_attr;
2921 /* Compare first the attributes. */
2922 if (memcmp(attr, flow_attr, sizeof(struct ibv_flow_attr)))
2924 if (attr->num_of_specs == 0)
2926 spec = (void *)((uintptr_t)attr +
2927 sizeof(struct ibv_flow_attr));
2928 flow_spec = (void *)((uintptr_t)flow_attr +
2929 sizeof(struct ibv_flow_attr));
2930 specs_n = RTE_MIN(attr->num_of_specs, flow_attr->num_of_specs);
2931 for (i = 0; i != specs_n; ++i) {
2934 if (memcmp(spec, flow_spec,
2935 RTE_MIN(attr_h->size, flow_h->size)))
2937 spec = (void *)((uintptr_t)spec + attr_h->size);
2938 flow_spec = (void *)((uintptr_t)flow_spec +
2941 /* At this point, the flow match. */
2944 /* The flow does not match. */
2947 ret = rte_errno; /* Save rte_errno before cleanup. */
2949 mlx5_flow_list_destroy(dev, &priv->flows, flow);
2951 for (i = 0; i != hash_rxq_init_n; ++i) {
2952 if (parser.queue[i].ibv_attr)
2953 rte_free(parser.queue[i].ibv_attr);
2955 rte_errno = ret; /* Restore rte_errno. */
2960 * Update queue for specific filter.
2963 * Pointer to Ethernet device.
2964 * @param fdir_filter
2965 * Filter to be updated.
2968 * 0 on success, a negative errno value otherwise and rte_errno is set.
2971 mlx5_fdir_filter_update(struct rte_eth_dev *dev,
2972 const struct rte_eth_fdir_filter *fdir_filter)
2976 ret = mlx5_fdir_filter_delete(dev, fdir_filter);
2979 return mlx5_fdir_filter_add(dev, fdir_filter);
2983 * Flush all filters.
2986 * Pointer to Ethernet device.
2989 mlx5_fdir_filter_flush(struct rte_eth_dev *dev)
2991 struct priv *priv = dev->data->dev_private;
2993 mlx5_flow_list_flush(dev, &priv->flows);
2997 * Get flow director information.
3000 * Pointer to Ethernet device.
3001 * @param[out] fdir_info
3002 * Resulting flow director information.
3005 mlx5_fdir_info_get(struct rte_eth_dev *dev, struct rte_eth_fdir_info *fdir_info)
3007 struct priv *priv = dev->data->dev_private;
3008 struct rte_eth_fdir_masks *mask =
3009 &priv->dev->data->dev_conf.fdir_conf.mask;
3011 fdir_info->mode = priv->dev->data->dev_conf.fdir_conf.mode;
3012 fdir_info->guarant_spc = 0;
3013 rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
3014 fdir_info->max_flexpayload = 0;
3015 fdir_info->flow_types_mask[0] = 0;
3016 fdir_info->flex_payload_unit = 0;
3017 fdir_info->max_flex_payload_segment_num = 0;
3018 fdir_info->flex_payload_limit = 0;
3019 memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
3023 * Deal with flow director operations.
3026 * Pointer to Ethernet device.
3028 * Operation to perform.
3030 * Pointer to operation-specific structure.
3033 * 0 on success, a negative errno value otherwise and rte_errno is set.
3036 mlx5_fdir_ctrl_func(struct rte_eth_dev *dev, enum rte_filter_op filter_op,
3039 struct priv *priv = dev->data->dev_private;
3040 enum rte_fdir_mode fdir_mode =
3041 priv->dev->data->dev_conf.fdir_conf.mode;
3043 if (filter_op == RTE_ETH_FILTER_NOP)
3045 if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
3046 fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
3047 DRV_LOG(ERR, "port %u flow director mode %d not supported",
3048 dev->data->port_id, fdir_mode);
3052 switch (filter_op) {
3053 case RTE_ETH_FILTER_ADD:
3054 return mlx5_fdir_filter_add(dev, arg);
3055 case RTE_ETH_FILTER_UPDATE:
3056 return mlx5_fdir_filter_update(dev, arg);
3057 case RTE_ETH_FILTER_DELETE:
3058 return mlx5_fdir_filter_delete(dev, arg);
3059 case RTE_ETH_FILTER_FLUSH:
3060 mlx5_fdir_filter_flush(dev);
3062 case RTE_ETH_FILTER_INFO:
3063 mlx5_fdir_info_get(dev, arg);
3066 DRV_LOG(DEBUG, "port %u unknown operation %u",
3067 dev->data->port_id, filter_op);
3075 * Manage filter operations.
3078 * Pointer to Ethernet device structure.
3079 * @param filter_type
3082 * Operation to perform.
3084 * Pointer to operation-specific structure.
3087 * 0 on success, a negative errno value otherwise and rte_errno is set.
3090 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
3091 enum rte_filter_type filter_type,
3092 enum rte_filter_op filter_op,
3095 switch (filter_type) {
3096 case RTE_ETH_FILTER_GENERIC:
3097 if (filter_op != RTE_ETH_FILTER_GET) {
3101 *(const void **)arg = &mlx5_flow_ops;
3103 case RTE_ETH_FILTER_FDIR:
3104 return mlx5_fdir_ctrl_func(dev, filter_op, arg);
3106 DRV_LOG(ERR, "port %u filter type (%d) not supported",
3107 dev->data->port_id, filter_type);
3108 rte_errno = ENOTSUP;