1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2016 6WIND S.A.
3 * Copyright 2016 Mellanox Technologies, Ltd
10 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
12 #pragma GCC diagnostic ignored "-Wpedantic"
14 #include <infiniband/verbs.h>
16 #pragma GCC diagnostic error "-Wpedantic"
19 #include <rte_common.h>
20 #include <rte_ethdev_driver.h>
22 #include <rte_flow_driver.h>
23 #include <rte_malloc.h>
27 #include "mlx5_defs.h"
29 #include "mlx5_glue.h"
31 /* Define minimal priority for control plane flows. */
32 #define MLX5_CTRL_FLOW_PRIORITY 4
34 /* Internet Protocol versions. */
38 #ifndef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
39 struct ibv_flow_spec_counter_action {
44 /* Dev ops structure defined in mlx5.c */
45 extern const struct eth_dev_ops mlx5_dev_ops;
46 extern const struct eth_dev_ops mlx5_dev_ops_isolate;
48 /** Structure give to the conversion functions. */
49 struct mlx5_flow_data {
50 struct mlx5_flow_parse *parser; /** Parser context. */
51 struct rte_flow_error *error; /** Error context. */
55 mlx5_flow_create_eth(const struct rte_flow_item *item,
56 const void *default_mask,
57 struct mlx5_flow_data *data);
60 mlx5_flow_create_vlan(const struct rte_flow_item *item,
61 const void *default_mask,
62 struct mlx5_flow_data *data);
65 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
66 const void *default_mask,
67 struct mlx5_flow_data *data);
70 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
71 const void *default_mask,
72 struct mlx5_flow_data *data);
75 mlx5_flow_create_udp(const struct rte_flow_item *item,
76 const void *default_mask,
77 struct mlx5_flow_data *data);
80 mlx5_flow_create_tcp(const struct rte_flow_item *item,
81 const void *default_mask,
82 struct mlx5_flow_data *data);
85 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
86 const void *default_mask,
87 struct mlx5_flow_data *data);
89 struct mlx5_flow_parse;
92 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
96 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id);
99 mlx5_flow_create_count(struct rte_eth_dev *dev, struct mlx5_flow_parse *parser);
101 /* Hash RX queue types. */
112 /* Initialization data for hash RX queue. */
113 struct hash_rxq_init {
114 uint64_t hash_fields; /* Fields that participate in the hash. */
115 uint64_t dpdk_rss_hf; /* Matching DPDK RSS hash fields. */
116 unsigned int flow_priority; /* Flow priority to use. */
117 unsigned int ip_version; /* Internet protocol. */
120 /* Initialization data for hash RX queues. */
121 const struct hash_rxq_init hash_rxq_init[] = {
123 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
124 IBV_RX_HASH_DST_IPV4 |
125 IBV_RX_HASH_SRC_PORT_TCP |
126 IBV_RX_HASH_DST_PORT_TCP),
127 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,
129 .ip_version = MLX5_IPV4,
132 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
133 IBV_RX_HASH_DST_IPV4 |
134 IBV_RX_HASH_SRC_PORT_UDP |
135 IBV_RX_HASH_DST_PORT_UDP),
136 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP,
138 .ip_version = MLX5_IPV4,
141 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
142 IBV_RX_HASH_DST_IPV4),
143 .dpdk_rss_hf = (ETH_RSS_IPV4 |
146 .ip_version = MLX5_IPV4,
149 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
150 IBV_RX_HASH_DST_IPV6 |
151 IBV_RX_HASH_SRC_PORT_TCP |
152 IBV_RX_HASH_DST_PORT_TCP),
153 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP,
155 .ip_version = MLX5_IPV6,
158 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
159 IBV_RX_HASH_DST_IPV6 |
160 IBV_RX_HASH_SRC_PORT_UDP |
161 IBV_RX_HASH_DST_PORT_UDP),
162 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP,
164 .ip_version = MLX5_IPV6,
167 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
168 IBV_RX_HASH_DST_IPV6),
169 .dpdk_rss_hf = (ETH_RSS_IPV6 |
172 .ip_version = MLX5_IPV6,
181 /* Number of entries in hash_rxq_init[]. */
182 const unsigned int hash_rxq_init_n = RTE_DIM(hash_rxq_init);
184 /** Structure for holding counter stats. */
185 struct mlx5_flow_counter_stats {
186 uint64_t hits; /**< Number of packets matched by the rule. */
187 uint64_t bytes; /**< Number of bytes matched by the rule. */
190 /** Structure for Drop queue. */
191 struct mlx5_hrxq_drop {
192 struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */
193 struct ibv_qp *qp; /**< Verbs queue pair. */
194 struct ibv_wq *wq; /**< Verbs work queue. */
195 struct ibv_cq *cq; /**< Verbs completion queue. */
198 /* Flows structures. */
200 uint64_t hash_fields; /**< Fields that participate in the hash. */
201 struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
202 struct ibv_flow *ibv_flow; /**< Verbs flow. */
203 struct mlx5_hrxq *hrxq; /**< Hash Rx queues. */
206 /* Drop flows structures. */
207 struct mlx5_flow_drop {
208 struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
209 struct ibv_flow *ibv_flow; /**< Verbs flow. */
213 TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
214 uint32_t mark:1; /**< Set if the flow is marked. */
215 uint32_t drop:1; /**< Drop queue. */
216 uint16_t queues_n; /**< Number of entries in queue[]. */
217 uint16_t (*queues)[]; /**< Queues indexes to use. */
218 struct rte_eth_rss_conf rss_conf; /**< RSS configuration */
219 uint8_t rss_key[40]; /**< copy of the RSS key. */
220 struct ibv_counter_set *cs; /**< Holds the counters for the rule. */
221 struct mlx5_flow_counter_stats counter_stats;/**<The counter stats. */
222 struct mlx5_flow frxq[RTE_DIM(hash_rxq_init)];
223 /**< Flow with Rx queue. */
226 /** Static initializer for items. */
228 (const enum rte_flow_item_type []){ \
229 __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
232 /** Structure to generate a simple graph of layers supported by the NIC. */
233 struct mlx5_flow_items {
234 /** List of possible actions for these items. */
235 const enum rte_flow_action_type *const actions;
236 /** Bit-masks corresponding to the possibilities for the item. */
239 * Default bit-masks to use when item->mask is not provided. When
240 * \default_mask is also NULL, the full supported bit-mask (\mask) is
243 const void *default_mask;
244 /** Bit-masks size in bytes. */
245 const unsigned int mask_sz;
247 * Conversion function from rte_flow to NIC specific flow.
250 * rte_flow item to convert.
251 * @param default_mask
252 * Default bit-masks to use when item->mask is not provided.
254 * Internal structure to store the conversion.
257 * 0 on success, a negative errno value otherwise and rte_errno is
260 int (*convert)(const struct rte_flow_item *item,
261 const void *default_mask,
262 struct mlx5_flow_data *data);
263 /** Size in bytes of the destination structure. */
264 const unsigned int dst_sz;
265 /** List of possible following items. */
266 const enum rte_flow_item_type *const items;
269 /** Valid action for this PMD. */
270 static const enum rte_flow_action_type valid_actions[] = {
271 RTE_FLOW_ACTION_TYPE_DROP,
272 RTE_FLOW_ACTION_TYPE_QUEUE,
273 RTE_FLOW_ACTION_TYPE_MARK,
274 RTE_FLOW_ACTION_TYPE_FLAG,
275 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
276 RTE_FLOW_ACTION_TYPE_COUNT,
278 RTE_FLOW_ACTION_TYPE_END,
281 /** Graph of supported items and associated actions. */
282 static const struct mlx5_flow_items mlx5_flow_items[] = {
283 [RTE_FLOW_ITEM_TYPE_END] = {
284 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
285 RTE_FLOW_ITEM_TYPE_VXLAN),
287 [RTE_FLOW_ITEM_TYPE_ETH] = {
288 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
289 RTE_FLOW_ITEM_TYPE_IPV4,
290 RTE_FLOW_ITEM_TYPE_IPV6),
291 .actions = valid_actions,
292 .mask = &(const struct rte_flow_item_eth){
293 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
294 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
297 .default_mask = &rte_flow_item_eth_mask,
298 .mask_sz = sizeof(struct rte_flow_item_eth),
299 .convert = mlx5_flow_create_eth,
300 .dst_sz = sizeof(struct ibv_flow_spec_eth),
302 [RTE_FLOW_ITEM_TYPE_VLAN] = {
303 .items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
304 RTE_FLOW_ITEM_TYPE_IPV6),
305 .actions = valid_actions,
306 .mask = &(const struct rte_flow_item_vlan){
309 .default_mask = &rte_flow_item_vlan_mask,
310 .mask_sz = sizeof(struct rte_flow_item_vlan),
311 .convert = mlx5_flow_create_vlan,
314 [RTE_FLOW_ITEM_TYPE_IPV4] = {
315 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
316 RTE_FLOW_ITEM_TYPE_TCP),
317 .actions = valid_actions,
318 .mask = &(const struct rte_flow_item_ipv4){
322 .type_of_service = -1,
326 .default_mask = &rte_flow_item_ipv4_mask,
327 .mask_sz = sizeof(struct rte_flow_item_ipv4),
328 .convert = mlx5_flow_create_ipv4,
329 .dst_sz = sizeof(struct ibv_flow_spec_ipv4_ext),
331 [RTE_FLOW_ITEM_TYPE_IPV6] = {
332 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
333 RTE_FLOW_ITEM_TYPE_TCP),
334 .actions = valid_actions,
335 .mask = &(const struct rte_flow_item_ipv6){
338 0xff, 0xff, 0xff, 0xff,
339 0xff, 0xff, 0xff, 0xff,
340 0xff, 0xff, 0xff, 0xff,
341 0xff, 0xff, 0xff, 0xff,
344 0xff, 0xff, 0xff, 0xff,
345 0xff, 0xff, 0xff, 0xff,
346 0xff, 0xff, 0xff, 0xff,
347 0xff, 0xff, 0xff, 0xff,
354 .default_mask = &rte_flow_item_ipv6_mask,
355 .mask_sz = sizeof(struct rte_flow_item_ipv6),
356 .convert = mlx5_flow_create_ipv6,
357 .dst_sz = sizeof(struct ibv_flow_spec_ipv6),
359 [RTE_FLOW_ITEM_TYPE_UDP] = {
360 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN),
361 .actions = valid_actions,
362 .mask = &(const struct rte_flow_item_udp){
368 .default_mask = &rte_flow_item_udp_mask,
369 .mask_sz = sizeof(struct rte_flow_item_udp),
370 .convert = mlx5_flow_create_udp,
371 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
373 [RTE_FLOW_ITEM_TYPE_TCP] = {
374 .actions = valid_actions,
375 .mask = &(const struct rte_flow_item_tcp){
381 .default_mask = &rte_flow_item_tcp_mask,
382 .mask_sz = sizeof(struct rte_flow_item_tcp),
383 .convert = mlx5_flow_create_tcp,
384 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
386 [RTE_FLOW_ITEM_TYPE_VXLAN] = {
387 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH),
388 .actions = valid_actions,
389 .mask = &(const struct rte_flow_item_vxlan){
390 .vni = "\xff\xff\xff",
392 .default_mask = &rte_flow_item_vxlan_mask,
393 .mask_sz = sizeof(struct rte_flow_item_vxlan),
394 .convert = mlx5_flow_create_vxlan,
395 .dst_sz = sizeof(struct ibv_flow_spec_tunnel),
399 /** Structure to pass to the conversion function. */
400 struct mlx5_flow_parse {
401 uint32_t inner; /**< Set once VXLAN is encountered. */
403 /**< Whether resources should remain after a validate. */
404 uint32_t drop:1; /**< Target is a drop queue. */
405 uint32_t mark:1; /**< Mark is present in the flow. */
406 uint32_t count:1; /**< Count is present in the flow. */
407 uint32_t mark_id; /**< Mark identifier. */
408 uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
409 uint16_t queues_n; /**< Number of entries in queue[]. */
410 struct rte_eth_rss_conf rss_conf; /**< RSS configuration */
411 uint8_t rss_key[40]; /**< copy of the RSS key. */
412 enum hash_rxq_type layer; /**< Last pattern layer detected. */
413 struct ibv_counter_set *cs; /**< Holds the counter set for the rule */
415 struct ibv_flow_attr *ibv_attr;
416 /**< Pointer to Verbs attributes. */
418 /**< Current position or total size of the attribute. */
419 } queue[RTE_DIM(hash_rxq_init)];
422 static const struct rte_flow_ops mlx5_flow_ops = {
423 .validate = mlx5_flow_validate,
424 .create = mlx5_flow_create,
425 .destroy = mlx5_flow_destroy,
426 .flush = mlx5_flow_flush,
427 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
428 .query = mlx5_flow_query,
432 .isolate = mlx5_flow_isolate,
435 /* Convert FDIR request to Generic flow. */
437 struct rte_flow_attr attr;
438 struct rte_flow_action actions[2];
439 struct rte_flow_item items[4];
440 struct rte_flow_item_eth l2;
441 struct rte_flow_item_eth l2_mask;
443 struct rte_flow_item_ipv4 ipv4;
444 struct rte_flow_item_ipv6 ipv6;
447 struct rte_flow_item_udp udp;
448 struct rte_flow_item_tcp tcp;
450 struct rte_flow_action_queue queue;
453 /* Verbs specification header. */
454 struct ibv_spec_header {
455 enum ibv_flow_spec_type type;
460 * Check support for a given item.
463 * Item specification.
465 * Bit-masks covering supported fields to compare with spec, last and mask in
468 * Bit-Mask size in bytes.
471 * 0 on success, a negative errno value otherwise and rte_errno is set.
474 mlx5_flow_item_validate(const struct rte_flow_item *item,
475 const uint8_t *mask, unsigned int size)
477 if (!item->spec && (item->mask || item->last)) {
481 if (item->spec && !item->mask) {
483 const uint8_t *spec = item->spec;
485 for (i = 0; i < size; ++i)
486 if ((spec[i] | mask[i]) != mask[i]) {
491 if (item->last && !item->mask) {
493 const uint8_t *spec = item->last;
495 for (i = 0; i < size; ++i)
496 if ((spec[i] | mask[i]) != mask[i]) {
503 const uint8_t *spec = item->spec;
505 for (i = 0; i < size; ++i)
506 if ((spec[i] | mask[i]) != mask[i]) {
511 if (item->spec && item->last) {
514 const uint8_t *apply = mask;
520 for (i = 0; i < size; ++i) {
521 spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
522 last[i] = ((const uint8_t *)item->last)[i] & apply[i];
524 ret = memcmp(spec, last, size);
534 * Copy the RSS configuration from the user ones, of the rss_conf is null,
535 * uses the driver one.
538 * Internal parser structure.
540 * User RSS configuration to save.
543 * 0 on success, a negative errno value otherwise and rte_errno is set.
546 mlx5_flow_convert_rss_conf(struct mlx5_flow_parse *parser,
547 const struct rte_eth_rss_conf *rss_conf)
550 * This function is also called at the beginning of
551 * mlx5_flow_convert_actions() to initialize the parser with the
552 * device default RSS configuration.
555 if (rss_conf->rss_hf & MLX5_RSS_HF_MASK) {
559 if (rss_conf->rss_key_len != 40) {
563 if (rss_conf->rss_key_len && rss_conf->rss_key) {
564 parser->rss_conf.rss_key_len = rss_conf->rss_key_len;
565 memcpy(parser->rss_key, rss_conf->rss_key,
566 rss_conf->rss_key_len);
567 parser->rss_conf.rss_key = parser->rss_key;
569 parser->rss_conf.rss_hf = rss_conf->rss_hf;
575 * Extract attribute to the parser.
578 * Flow rule attributes.
580 * Perform verbose error reporting if not NULL.
583 * 0 on success, a negative errno value otherwise and rte_errno is set.
586 mlx5_flow_convert_attributes(const struct rte_flow_attr *attr,
587 struct rte_flow_error *error)
590 rte_flow_error_set(error, ENOTSUP,
591 RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
593 "groups are not supported");
596 if (attr->priority && attr->priority != MLX5_CTRL_FLOW_PRIORITY) {
597 rte_flow_error_set(error, ENOTSUP,
598 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
600 "priorities are not supported");
604 rte_flow_error_set(error, ENOTSUP,
605 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
607 "egress is not supported");
610 if (!attr->ingress) {
611 rte_flow_error_set(error, ENOTSUP,
612 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
614 "only ingress is supported");
621 * Extract actions request to the parser.
624 * Pointer to Ethernet device.
626 * Associated actions (list terminated by the END action).
628 * Perform verbose error reporting if not NULL.
629 * @param[in, out] parser
630 * Internal parser structure.
633 * 0 on success, a negative errno value otherwise and rte_errno is set.
636 mlx5_flow_convert_actions(struct rte_eth_dev *dev,
637 const struct rte_flow_action actions[],
638 struct rte_flow_error *error,
639 struct mlx5_flow_parse *parser)
641 struct priv *priv = dev->data->dev_private;
645 * Add default RSS configuration necessary for Verbs to create QP even
646 * if no RSS is necessary.
648 ret = mlx5_flow_convert_rss_conf(parser,
649 (const struct rte_eth_rss_conf *)
653 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
654 if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
656 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
658 } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
659 const struct rte_flow_action_queue *queue =
660 (const struct rte_flow_action_queue *)
665 if (!queue || (queue->index > (priv->rxqs_n - 1)))
666 goto exit_action_not_supported;
667 for (n = 0; n < parser->queues_n; ++n) {
668 if (parser->queues[n] == queue->index) {
673 if (parser->queues_n > 1 && !found) {
674 rte_flow_error_set(error, ENOTSUP,
675 RTE_FLOW_ERROR_TYPE_ACTION,
677 "queue action not in RSS queues");
681 parser->queues_n = 1;
682 parser->queues[0] = queue->index;
684 } else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
685 const struct rte_flow_action_rss *rss =
686 (const struct rte_flow_action_rss *)
690 if (!rss || !rss->num) {
691 rte_flow_error_set(error, EINVAL,
692 RTE_FLOW_ERROR_TYPE_ACTION,
697 if (parser->queues_n == 1) {
700 assert(parser->queues_n);
701 for (n = 0; n < rss->num; ++n) {
702 if (parser->queues[0] ==
709 rte_flow_error_set(error, ENOTSUP,
710 RTE_FLOW_ERROR_TYPE_ACTION,
712 "queue action not in RSS"
717 if (rss->num > RTE_DIM(parser->queues)) {
718 rte_flow_error_set(error, EINVAL,
719 RTE_FLOW_ERROR_TYPE_ACTION,
721 "too many queues for RSS"
725 for (n = 0; n < rss->num; ++n) {
726 if (rss->queue[n] >= priv->rxqs_n) {
727 rte_flow_error_set(error, EINVAL,
728 RTE_FLOW_ERROR_TYPE_ACTION,
730 "queue id > number of"
735 for (n = 0; n < rss->num; ++n)
736 parser->queues[n] = rss->queue[n];
737 parser->queues_n = rss->num;
738 if (mlx5_flow_convert_rss_conf(parser, rss->rss_conf)) {
739 rte_flow_error_set(error, EINVAL,
740 RTE_FLOW_ERROR_TYPE_ACTION,
742 "wrong RSS configuration");
745 } else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
746 const struct rte_flow_action_mark *mark =
747 (const struct rte_flow_action_mark *)
751 rte_flow_error_set(error, EINVAL,
752 RTE_FLOW_ERROR_TYPE_ACTION,
754 "mark must be defined");
756 } else if (mark->id >= MLX5_FLOW_MARK_MAX) {
757 rte_flow_error_set(error, ENOTSUP,
758 RTE_FLOW_ERROR_TYPE_ACTION,
760 "mark must be between 0"
765 parser->mark_id = mark->id;
766 } else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
768 } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT &&
769 priv->config.flow_counter_en) {
772 goto exit_action_not_supported;
775 if (parser->drop && parser->mark)
777 if (!parser->queues_n && !parser->drop) {
778 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
779 NULL, "no valid action");
783 exit_action_not_supported:
784 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
785 actions, "action not supported");
793 * Pattern specification (list terminated by the END pattern item).
795 * Perform verbose error reporting if not NULL.
796 * @param[in, out] parser
797 * Internal parser structure.
800 * 0 on success, a negative errno value otherwise and rte_errno is set.
803 mlx5_flow_convert_items_validate(const struct rte_flow_item items[],
804 struct rte_flow_error *error,
805 struct mlx5_flow_parse *parser)
807 const struct mlx5_flow_items *cur_item = mlx5_flow_items;
811 /* Initialise the offsets to start after verbs attribute. */
812 for (i = 0; i != hash_rxq_init_n; ++i)
813 parser->queue[i].offset = sizeof(struct ibv_flow_attr);
814 for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
815 const struct mlx5_flow_items *token = NULL;
818 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
822 cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
824 if (cur_item->items[i] == items->type) {
825 token = &mlx5_flow_items[items->type];
830 goto exit_item_not_supported;
832 ret = mlx5_flow_item_validate(items,
833 (const uint8_t *)cur_item->mask,
836 goto exit_item_not_supported;
837 if (items->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
839 rte_flow_error_set(error, ENOTSUP,
840 RTE_FLOW_ERROR_TYPE_ITEM,
842 "cannot recognize multiple"
843 " VXLAN encapsulations");
846 parser->inner = IBV_FLOW_SPEC_INNER;
849 parser->queue[HASH_RXQ_ETH].offset += cur_item->dst_sz;
851 for (n = 0; n != hash_rxq_init_n; ++n)
852 parser->queue[n].offset += cur_item->dst_sz;
856 parser->queue[HASH_RXQ_ETH].offset +=
857 sizeof(struct ibv_flow_spec_action_drop);
860 for (i = 0; i != hash_rxq_init_n; ++i)
861 parser->queue[i].offset +=
862 sizeof(struct ibv_flow_spec_action_tag);
865 unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
867 for (i = 0; i != hash_rxq_init_n; ++i)
868 parser->queue[i].offset += size;
871 exit_item_not_supported:
872 return rte_flow_error_set(error, -ret, RTE_FLOW_ERROR_TYPE_ITEM,
873 items, "item not supported");
877 * Allocate memory space to store verbs flow attributes.
880 * Amount of byte to allocate.
882 * Perform verbose error reporting if not NULL.
885 * A verbs flow attribute on success, NULL otherwise and rte_errno is set.
887 static struct ibv_flow_attr *
888 mlx5_flow_convert_allocate(unsigned int size, struct rte_flow_error *error)
890 struct ibv_flow_attr *ibv_attr;
892 ibv_attr = rte_calloc(__func__, 1, size, 0);
894 rte_flow_error_set(error, ENOMEM,
895 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
897 "cannot allocate verbs spec attributes");
904 * Make inner packet matching with an higher priority from the non Inner
907 * @param[in, out] parser
908 * Internal parser structure.
910 * User flow attribute.
913 mlx5_flow_update_priority(struct mlx5_flow_parse *parser,
914 const struct rte_flow_attr *attr)
919 parser->queue[HASH_RXQ_ETH].ibv_attr->priority =
921 hash_rxq_init[HASH_RXQ_ETH].flow_priority;
924 for (i = 0; i != hash_rxq_init_n; ++i) {
925 if (parser->queue[i].ibv_attr) {
926 parser->queue[i].ibv_attr->priority =
928 hash_rxq_init[i].flow_priority -
929 (parser->inner ? 1 : 0);
935 * Finalise verbs flow attributes.
937 * @param[in, out] parser
938 * Internal parser structure.
941 mlx5_flow_convert_finalise(struct mlx5_flow_parse *parser)
943 const unsigned int ipv4 =
944 hash_rxq_init[parser->layer].ip_version == MLX5_IPV4;
945 const enum hash_rxq_type hmin = ipv4 ? HASH_RXQ_TCPV4 : HASH_RXQ_TCPV6;
946 const enum hash_rxq_type hmax = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
947 const enum hash_rxq_type ohmin = ipv4 ? HASH_RXQ_TCPV6 : HASH_RXQ_TCPV4;
948 const enum hash_rxq_type ohmax = ipv4 ? HASH_RXQ_IPV6 : HASH_RXQ_IPV4;
949 const enum hash_rxq_type ip = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
952 /* Remove any other flow not matching the pattern. */
953 if (parser->queues_n == 1 && !parser->rss_conf.rss_hf) {
954 for (i = 0; i != hash_rxq_init_n; ++i) {
955 if (i == HASH_RXQ_ETH)
957 rte_free(parser->queue[i].ibv_attr);
958 parser->queue[i].ibv_attr = NULL;
962 if (parser->layer == HASH_RXQ_ETH) {
966 * This layer becomes useless as the pattern define under
969 rte_free(parser->queue[HASH_RXQ_ETH].ibv_attr);
970 parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
972 /* Remove opposite kind of layer e.g. IPv6 if the pattern is IPv4. */
973 for (i = ohmin; i != (ohmax + 1); ++i) {
974 if (!parser->queue[i].ibv_attr)
976 rte_free(parser->queue[i].ibv_attr);
977 parser->queue[i].ibv_attr = NULL;
979 /* Remove impossible flow according to the RSS configuration. */
980 if (hash_rxq_init[parser->layer].dpdk_rss_hf &
981 parser->rss_conf.rss_hf) {
982 /* Remove any other flow. */
983 for (i = hmin; i != (hmax + 1); ++i) {
984 if ((i == parser->layer) ||
985 (!parser->queue[i].ibv_attr))
987 rte_free(parser->queue[i].ibv_attr);
988 parser->queue[i].ibv_attr = NULL;
990 } else if (!parser->queue[ip].ibv_attr) {
991 /* no RSS possible with the current configuration. */
992 parser->queues_n = 1;
997 * Fill missing layers in verbs specifications, or compute the correct
998 * offset to allocate the memory space for the attributes and
1001 for (i = 0; i != hash_rxq_init_n - 1; ++i) {
1003 struct ibv_flow_spec_ipv4_ext ipv4;
1004 struct ibv_flow_spec_ipv6 ipv6;
1005 struct ibv_flow_spec_tcp_udp udp_tcp;
1010 if (i == parser->layer)
1012 if (parser->layer == HASH_RXQ_ETH) {
1013 if (hash_rxq_init[i].ip_version == MLX5_IPV4) {
1014 size = sizeof(struct ibv_flow_spec_ipv4_ext);
1015 specs.ipv4 = (struct ibv_flow_spec_ipv4_ext){
1016 .type = IBV_FLOW_SPEC_IPV4_EXT,
1020 size = sizeof(struct ibv_flow_spec_ipv6);
1021 specs.ipv6 = (struct ibv_flow_spec_ipv6){
1022 .type = IBV_FLOW_SPEC_IPV6,
1026 if (parser->queue[i].ibv_attr) {
1027 dst = (void *)((uintptr_t)
1028 parser->queue[i].ibv_attr +
1029 parser->queue[i].offset);
1030 memcpy(dst, &specs, size);
1031 ++parser->queue[i].ibv_attr->num_of_specs;
1033 parser->queue[i].offset += size;
1035 if ((i == HASH_RXQ_UDPV4) || (i == HASH_RXQ_TCPV4) ||
1036 (i == HASH_RXQ_UDPV6) || (i == HASH_RXQ_TCPV6)) {
1037 size = sizeof(struct ibv_flow_spec_tcp_udp);
1038 specs.udp_tcp = (struct ibv_flow_spec_tcp_udp) {
1039 .type = ((i == HASH_RXQ_UDPV4 ||
1040 i == HASH_RXQ_UDPV6) ?
1045 if (parser->queue[i].ibv_attr) {
1046 dst = (void *)((uintptr_t)
1047 parser->queue[i].ibv_attr +
1048 parser->queue[i].offset);
1049 memcpy(dst, &specs, size);
1050 ++parser->queue[i].ibv_attr->num_of_specs;
1052 parser->queue[i].offset += size;
1058 * Validate and convert a flow supported by the NIC.
1061 * Pointer to Ethernet device.
1063 * Flow rule attributes.
1064 * @param[in] pattern
1065 * Pattern specification (list terminated by the END pattern item).
1066 * @param[in] actions
1067 * Associated actions (list terminated by the END action).
1069 * Perform verbose error reporting if not NULL.
1070 * @param[in, out] parser
1071 * Internal parser structure.
1074 * 0 on success, a negative errno value otherwise and rte_errno is set.
1077 mlx5_flow_convert(struct rte_eth_dev *dev,
1078 const struct rte_flow_attr *attr,
1079 const struct rte_flow_item items[],
1080 const struct rte_flow_action actions[],
1081 struct rte_flow_error *error,
1082 struct mlx5_flow_parse *parser)
1084 const struct mlx5_flow_items *cur_item = mlx5_flow_items;
1088 /* First step. Validate the attributes, items and actions. */
1089 *parser = (struct mlx5_flow_parse){
1090 .create = parser->create,
1091 .layer = HASH_RXQ_ETH,
1092 .mark_id = MLX5_FLOW_MARK_DEFAULT,
1094 ret = mlx5_flow_convert_attributes(attr, error);
1097 ret = mlx5_flow_convert_actions(dev, actions, error, parser);
1100 ret = mlx5_flow_convert_items_validate(items, error, parser);
1103 mlx5_flow_convert_finalise(parser);
1106 * Allocate the memory space to store verbs specifications.
1109 unsigned int offset = parser->queue[HASH_RXQ_ETH].offset;
1111 parser->queue[HASH_RXQ_ETH].ibv_attr =
1112 mlx5_flow_convert_allocate(offset, error);
1113 if (!parser->queue[HASH_RXQ_ETH].ibv_attr)
1115 parser->queue[HASH_RXQ_ETH].offset =
1116 sizeof(struct ibv_flow_attr);
1118 for (i = 0; i != hash_rxq_init_n; ++i) {
1119 unsigned int offset;
1121 if (!(parser->rss_conf.rss_hf &
1122 hash_rxq_init[i].dpdk_rss_hf) &&
1123 (i != HASH_RXQ_ETH))
1125 offset = parser->queue[i].offset;
1126 parser->queue[i].ibv_attr =
1127 mlx5_flow_convert_allocate(offset, error);
1128 if (!parser->queue[i].ibv_attr)
1130 parser->queue[i].offset = sizeof(struct ibv_flow_attr);
1133 /* Third step. Conversion parse, fill the specifications. */
1135 for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
1136 struct mlx5_flow_data data = {
1141 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
1143 cur_item = &mlx5_flow_items[items->type];
1144 ret = cur_item->convert(items,
1145 (cur_item->default_mask ?
1146 cur_item->default_mask :
1153 mlx5_flow_create_flag_mark(parser, parser->mark_id);
1154 if (parser->count && parser->create) {
1155 mlx5_flow_create_count(dev, parser);
1157 goto exit_count_error;
1160 * Last step. Complete missing specification to reach the RSS
1164 mlx5_flow_convert_finalise(parser);
1165 mlx5_flow_update_priority(parser, attr);
1167 /* Only verification is expected, all resources should be released. */
1168 if (!parser->create) {
1169 for (i = 0; i != hash_rxq_init_n; ++i) {
1170 if (parser->queue[i].ibv_attr) {
1171 rte_free(parser->queue[i].ibv_attr);
1172 parser->queue[i].ibv_attr = NULL;
1178 for (i = 0; i != hash_rxq_init_n; ++i) {
1179 if (parser->queue[i].ibv_attr) {
1180 rte_free(parser->queue[i].ibv_attr);
1181 parser->queue[i].ibv_attr = NULL;
1184 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1185 NULL, "cannot allocate verbs spec attributes");
1188 rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1189 NULL, "cannot create counter");
1194 * Copy the specification created into the flow.
1197 * Internal parser structure.
1199 * Create specification.
1201 * Size in bytes of the specification to copy.
1204 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
1210 for (i = 0; i != hash_rxq_init_n; ++i) {
1211 if (!parser->queue[i].ibv_attr)
1213 /* Specification must be the same l3 type or none. */
1214 if (parser->layer == HASH_RXQ_ETH ||
1215 (hash_rxq_init[parser->layer].ip_version ==
1216 hash_rxq_init[i].ip_version) ||
1217 (hash_rxq_init[i].ip_version == 0)) {
1218 dst = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1219 parser->queue[i].offset);
1220 memcpy(dst, src, size);
1221 ++parser->queue[i].ibv_attr->num_of_specs;
1222 parser->queue[i].offset += size;
1228 * Convert Ethernet item to Verbs specification.
1231 * Item specification.
1232 * @param default_mask[in]
1233 * Default bit-masks to use when item->mask is not provided.
1234 * @param data[in, out]
1238 * 0 on success, a negative errno value otherwise and rte_errno is set.
1241 mlx5_flow_create_eth(const struct rte_flow_item *item,
1242 const void *default_mask,
1243 struct mlx5_flow_data *data)
1245 const struct rte_flow_item_eth *spec = item->spec;
1246 const struct rte_flow_item_eth *mask = item->mask;
1247 struct mlx5_flow_parse *parser = data->parser;
1248 const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1249 struct ibv_flow_spec_eth eth = {
1250 .type = parser->inner | IBV_FLOW_SPEC_ETH,
1254 /* Don't update layer for the inner pattern. */
1256 parser->layer = HASH_RXQ_ETH;
1261 mask = default_mask;
1262 memcpy(ð.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
1263 memcpy(ð.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
1264 eth.val.ether_type = spec->type;
1265 memcpy(ð.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
1266 memcpy(ð.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
1267 eth.mask.ether_type = mask->type;
1268 /* Remove unwanted bits from values. */
1269 for (i = 0; i < ETHER_ADDR_LEN; ++i) {
1270 eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
1271 eth.val.src_mac[i] &= eth.mask.src_mac[i];
1273 eth.val.ether_type &= eth.mask.ether_type;
1275 mlx5_flow_create_copy(parser, ð, eth_size);
1280 * Convert VLAN item to Verbs specification.
1283 * Item specification.
1284 * @param default_mask[in]
1285 * Default bit-masks to use when item->mask is not provided.
1286 * @param data[in, out]
1290 * 0 on success, a negative errno value otherwise and rte_errno is set.
1293 mlx5_flow_create_vlan(const struct rte_flow_item *item,
1294 const void *default_mask,
1295 struct mlx5_flow_data *data)
1297 const struct rte_flow_item_vlan *spec = item->spec;
1298 const struct rte_flow_item_vlan *mask = item->mask;
1299 struct mlx5_flow_parse *parser = data->parser;
1300 struct ibv_flow_spec_eth *eth;
1301 const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1306 mask = default_mask;
1308 for (i = 0; i != hash_rxq_init_n; ++i) {
1309 if (!parser->queue[i].ibv_attr)
1312 eth = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1313 parser->queue[i].offset - eth_size);
1314 eth->val.vlan_tag = spec->tci;
1315 eth->mask.vlan_tag = mask->tci;
1316 eth->val.vlan_tag &= eth->mask.vlan_tag;
1318 * From verbs perspective an empty VLAN is equivalent
1319 * to a packet without VLAN layer.
1321 if (!eth->mask.vlan_tag)
1327 return rte_flow_error_set(data->error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM,
1328 item, "VLAN cannot be empty");
1332 * Convert IPv4 item to Verbs specification.
1335 * Item specification.
1336 * @param default_mask[in]
1337 * Default bit-masks to use when item->mask is not provided.
1338 * @param data[in, out]
1342 * 0 on success, a negative errno value otherwise and rte_errno is set.
1345 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
1346 const void *default_mask,
1347 struct mlx5_flow_data *data)
1349 const struct rte_flow_item_ipv4 *spec = item->spec;
1350 const struct rte_flow_item_ipv4 *mask = item->mask;
1351 struct mlx5_flow_parse *parser = data->parser;
1352 unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4_ext);
1353 struct ibv_flow_spec_ipv4_ext ipv4 = {
1354 .type = parser->inner | IBV_FLOW_SPEC_IPV4_EXT,
1358 /* Don't update layer for the inner pattern. */
1360 parser->layer = HASH_RXQ_IPV4;
1363 mask = default_mask;
1364 ipv4.val = (struct ibv_flow_ipv4_ext_filter){
1365 .src_ip = spec->hdr.src_addr,
1366 .dst_ip = spec->hdr.dst_addr,
1367 .proto = spec->hdr.next_proto_id,
1368 .tos = spec->hdr.type_of_service,
1370 ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
1371 .src_ip = mask->hdr.src_addr,
1372 .dst_ip = mask->hdr.dst_addr,
1373 .proto = mask->hdr.next_proto_id,
1374 .tos = mask->hdr.type_of_service,
1376 /* Remove unwanted bits from values. */
1377 ipv4.val.src_ip &= ipv4.mask.src_ip;
1378 ipv4.val.dst_ip &= ipv4.mask.dst_ip;
1379 ipv4.val.proto &= ipv4.mask.proto;
1380 ipv4.val.tos &= ipv4.mask.tos;
1382 mlx5_flow_create_copy(parser, &ipv4, ipv4_size);
1387 * Convert IPv6 item to Verbs specification.
1390 * Item specification.
1391 * @param default_mask[in]
1392 * Default bit-masks to use when item->mask is not provided.
1393 * @param data[in, out]
1397 * 0 on success, a negative errno value otherwise and rte_errno is set.
1400 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
1401 const void *default_mask,
1402 struct mlx5_flow_data *data)
1404 const struct rte_flow_item_ipv6 *spec = item->spec;
1405 const struct rte_flow_item_ipv6 *mask = item->mask;
1406 struct mlx5_flow_parse *parser = data->parser;
1407 unsigned int ipv6_size = sizeof(struct ibv_flow_spec_ipv6);
1408 struct ibv_flow_spec_ipv6 ipv6 = {
1409 .type = parser->inner | IBV_FLOW_SPEC_IPV6,
1413 /* Don't update layer for the inner pattern. */
1415 parser->layer = HASH_RXQ_IPV6;
1418 uint32_t vtc_flow_val;
1419 uint32_t vtc_flow_mask;
1422 mask = default_mask;
1423 memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
1424 RTE_DIM(ipv6.val.src_ip));
1425 memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
1426 RTE_DIM(ipv6.val.dst_ip));
1427 memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
1428 RTE_DIM(ipv6.mask.src_ip));
1429 memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
1430 RTE_DIM(ipv6.mask.dst_ip));
1431 vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
1432 vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
1433 ipv6.val.flow_label =
1434 rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >>
1436 ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >>
1438 ipv6.val.next_hdr = spec->hdr.proto;
1439 ipv6.val.hop_limit = spec->hdr.hop_limits;
1440 ipv6.mask.flow_label =
1441 rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >>
1443 ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >>
1445 ipv6.mask.next_hdr = mask->hdr.proto;
1446 ipv6.mask.hop_limit = mask->hdr.hop_limits;
1447 /* Remove unwanted bits from values. */
1448 for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
1449 ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
1450 ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
1452 ipv6.val.flow_label &= ipv6.mask.flow_label;
1453 ipv6.val.traffic_class &= ipv6.mask.traffic_class;
1454 ipv6.val.next_hdr &= ipv6.mask.next_hdr;
1455 ipv6.val.hop_limit &= ipv6.mask.hop_limit;
1457 mlx5_flow_create_copy(parser, &ipv6, ipv6_size);
1462 * Convert UDP item to Verbs specification.
1465 * Item specification.
1466 * @param default_mask[in]
1467 * Default bit-masks to use when item->mask is not provided.
1468 * @param data[in, out]
1472 * 0 on success, a negative errno value otherwise and rte_errno is set.
1475 mlx5_flow_create_udp(const struct rte_flow_item *item,
1476 const void *default_mask,
1477 struct mlx5_flow_data *data)
1479 const struct rte_flow_item_udp *spec = item->spec;
1480 const struct rte_flow_item_udp *mask = item->mask;
1481 struct mlx5_flow_parse *parser = data->parser;
1482 unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1483 struct ibv_flow_spec_tcp_udp udp = {
1484 .type = parser->inner | IBV_FLOW_SPEC_UDP,
1488 /* Don't update layer for the inner pattern. */
1489 if (!parser->inner) {
1490 if (parser->layer == HASH_RXQ_IPV4)
1491 parser->layer = HASH_RXQ_UDPV4;
1493 parser->layer = HASH_RXQ_UDPV6;
1497 mask = default_mask;
1498 udp.val.dst_port = spec->hdr.dst_port;
1499 udp.val.src_port = spec->hdr.src_port;
1500 udp.mask.dst_port = mask->hdr.dst_port;
1501 udp.mask.src_port = mask->hdr.src_port;
1502 /* Remove unwanted bits from values. */
1503 udp.val.src_port &= udp.mask.src_port;
1504 udp.val.dst_port &= udp.mask.dst_port;
1506 mlx5_flow_create_copy(parser, &udp, udp_size);
1511 * Convert TCP item to Verbs specification.
1514 * Item specification.
1515 * @param default_mask[in]
1516 * Default bit-masks to use when item->mask is not provided.
1517 * @param data[in, out]
1521 * 0 on success, a negative errno value otherwise and rte_errno is set.
1524 mlx5_flow_create_tcp(const struct rte_flow_item *item,
1525 const void *default_mask,
1526 struct mlx5_flow_data *data)
1528 const struct rte_flow_item_tcp *spec = item->spec;
1529 const struct rte_flow_item_tcp *mask = item->mask;
1530 struct mlx5_flow_parse *parser = data->parser;
1531 unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1532 struct ibv_flow_spec_tcp_udp tcp = {
1533 .type = parser->inner | IBV_FLOW_SPEC_TCP,
1537 /* Don't update layer for the inner pattern. */
1538 if (!parser->inner) {
1539 if (parser->layer == HASH_RXQ_IPV4)
1540 parser->layer = HASH_RXQ_TCPV4;
1542 parser->layer = HASH_RXQ_TCPV6;
1546 mask = default_mask;
1547 tcp.val.dst_port = spec->hdr.dst_port;
1548 tcp.val.src_port = spec->hdr.src_port;
1549 tcp.mask.dst_port = mask->hdr.dst_port;
1550 tcp.mask.src_port = mask->hdr.src_port;
1551 /* Remove unwanted bits from values. */
1552 tcp.val.src_port &= tcp.mask.src_port;
1553 tcp.val.dst_port &= tcp.mask.dst_port;
1555 mlx5_flow_create_copy(parser, &tcp, tcp_size);
1560 * Convert VXLAN item to Verbs specification.
1563 * Item specification.
1564 * @param default_mask[in]
1565 * Default bit-masks to use when item->mask is not provided.
1566 * @param data[in, out]
1570 * 0 on success, a negative errno value otherwise and rte_errno is set.
1573 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
1574 const void *default_mask,
1575 struct mlx5_flow_data *data)
1577 const struct rte_flow_item_vxlan *spec = item->spec;
1578 const struct rte_flow_item_vxlan *mask = item->mask;
1579 struct mlx5_flow_parse *parser = data->parser;
1580 unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1581 struct ibv_flow_spec_tunnel vxlan = {
1582 .type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1591 parser->inner = IBV_FLOW_SPEC_INNER;
1594 mask = default_mask;
1595 memcpy(&id.vni[1], spec->vni, 3);
1596 vxlan.val.tunnel_id = id.vlan_id;
1597 memcpy(&id.vni[1], mask->vni, 3);
1598 vxlan.mask.tunnel_id = id.vlan_id;
1599 /* Remove unwanted bits from values. */
1600 vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
1603 * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
1604 * layer is defined in the Verbs specification it is interpreted as
1605 * wildcard and all packets will match this rule, if it follows a full
1606 * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
1607 * before will also match this rule.
1608 * To avoid such situation, VNI 0 is currently refused.
1610 if (!vxlan.val.tunnel_id)
1611 return rte_flow_error_set(data->error, EINVAL,
1612 RTE_FLOW_ERROR_TYPE_ITEM,
1614 "VxLAN vni cannot be 0");
1615 mlx5_flow_create_copy(parser, &vxlan, size);
1620 * Convert mark/flag action to Verbs specification.
1623 * Internal parser structure.
1628 * 0 on success, a negative errno value otherwise and rte_errno is set.
1631 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id)
1633 unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1634 struct ibv_flow_spec_action_tag tag = {
1635 .type = IBV_FLOW_SPEC_ACTION_TAG,
1637 .tag_id = mlx5_flow_mark_set(mark_id),
1640 assert(parser->mark);
1641 mlx5_flow_create_copy(parser, &tag, size);
1646 * Convert count action to Verbs specification.
1649 * Pointer to Ethernet device.
1651 * Pointer to MLX5 flow parser structure.
1654 * 0 on success, a negative errno value otherwise and rte_errno is set.
1657 mlx5_flow_create_count(struct rte_eth_dev *dev __rte_unused,
1658 struct mlx5_flow_parse *parser __rte_unused)
1660 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
1661 struct priv *priv = dev->data->dev_private;
1662 unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
1663 struct ibv_counter_set_init_attr init_attr = {0};
1664 struct ibv_flow_spec_counter_action counter = {
1665 .type = IBV_FLOW_SPEC_ACTION_COUNT,
1667 .counter_set_handle = 0,
1670 init_attr.counter_set_id = 0;
1671 parser->cs = mlx5_glue->create_counter_set(priv->ctx, &init_attr);
1676 counter.counter_set_handle = parser->cs->handle;
1677 mlx5_flow_create_copy(parser, &counter, size);
1683 * Complete flow rule creation with a drop queue.
1686 * Pointer to Ethernet device.
1688 * Internal parser structure.
1690 * Pointer to the rte_flow.
1692 * Perform verbose error reporting if not NULL.
1695 * 0 on success, a negative errno value otherwise and rte_errno is set.
1698 mlx5_flow_create_action_queue_drop(struct rte_eth_dev *dev,
1699 struct mlx5_flow_parse *parser,
1700 struct rte_flow *flow,
1701 struct rte_flow_error *error)
1703 struct priv *priv = dev->data->dev_private;
1704 struct ibv_flow_spec_action_drop *drop;
1705 unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
1710 drop = (void *)((uintptr_t)parser->queue[HASH_RXQ_ETH].ibv_attr +
1711 parser->queue[HASH_RXQ_ETH].offset);
1712 *drop = (struct ibv_flow_spec_action_drop){
1713 .type = IBV_FLOW_SPEC_ACTION_DROP,
1716 ++parser->queue[HASH_RXQ_ETH].ibv_attr->num_of_specs;
1717 parser->queue[HASH_RXQ_ETH].offset += size;
1718 flow->frxq[HASH_RXQ_ETH].ibv_attr =
1719 parser->queue[HASH_RXQ_ETH].ibv_attr;
1721 flow->cs = parser->cs;
1722 if (!priv->dev->data->dev_started)
1724 parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
1725 flow->frxq[HASH_RXQ_ETH].ibv_flow =
1726 mlx5_glue->create_flow(priv->flow_drop_queue->qp,
1727 flow->frxq[HASH_RXQ_ETH].ibv_attr);
1728 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
1729 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1730 NULL, "flow rule creation failure");
1736 if (flow->frxq[HASH_RXQ_ETH].ibv_flow) {
1737 claim_zero(mlx5_glue->destroy_flow
1738 (flow->frxq[HASH_RXQ_ETH].ibv_flow));
1739 flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
1741 if (flow->frxq[HASH_RXQ_ETH].ibv_attr) {
1742 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
1743 flow->frxq[HASH_RXQ_ETH].ibv_attr = NULL;
1746 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
1754 * Create hash Rx queues when RSS is enabled.
1757 * Pointer to Ethernet device.
1759 * Internal parser structure.
1761 * Pointer to the rte_flow.
1763 * Perform verbose error reporting if not NULL.
1766 * 0 on success, a negative errno value otherwise and rte_errno is set.
1769 mlx5_flow_create_action_queue_rss(struct rte_eth_dev *dev,
1770 struct mlx5_flow_parse *parser,
1771 struct rte_flow *flow,
1772 struct rte_flow_error *error)
1774 struct priv *priv = dev->data->dev_private;
1777 for (i = 0; i != hash_rxq_init_n; ++i) {
1778 uint64_t hash_fields;
1780 if (!parser->queue[i].ibv_attr)
1782 flow->frxq[i].ibv_attr = parser->queue[i].ibv_attr;
1783 parser->queue[i].ibv_attr = NULL;
1784 hash_fields = hash_rxq_init[i].hash_fields;
1785 if (!priv->dev->data->dev_started)
1787 flow->frxq[i].hrxq =
1789 parser->rss_conf.rss_key,
1790 parser->rss_conf.rss_key_len,
1794 if (flow->frxq[i].hrxq)
1796 flow->frxq[i].hrxq =
1798 parser->rss_conf.rss_key,
1799 parser->rss_conf.rss_key_len,
1803 if (!flow->frxq[i].hrxq) {
1804 return rte_flow_error_set(error, ENOMEM,
1805 RTE_FLOW_ERROR_TYPE_HANDLE,
1807 "cannot create hash rxq");
1814 * Complete flow rule creation.
1817 * Pointer to Ethernet device.
1819 * Internal parser structure.
1821 * Pointer to the rte_flow.
1823 * Perform verbose error reporting if not NULL.
1826 * 0 on success, a negative errno value otherwise and rte_errno is set.
1829 mlx5_flow_create_action_queue(struct rte_eth_dev *dev,
1830 struct mlx5_flow_parse *parser,
1831 struct rte_flow *flow,
1832 struct rte_flow_error *error)
1834 struct priv *priv = dev->data->dev_private;
1837 unsigned int flows_n = 0;
1841 assert(!parser->drop);
1842 ret = mlx5_flow_create_action_queue_rss(dev, parser, flow, error);
1846 flow->cs = parser->cs;
1847 if (!priv->dev->data->dev_started)
1849 for (i = 0; i != hash_rxq_init_n; ++i) {
1850 if (!flow->frxq[i].hrxq)
1852 flow->frxq[i].ibv_flow =
1853 mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
1854 flow->frxq[i].ibv_attr);
1855 if (!flow->frxq[i].ibv_flow) {
1856 rte_flow_error_set(error, ENOMEM,
1857 RTE_FLOW_ERROR_TYPE_HANDLE,
1858 NULL, "flow rule creation failure");
1862 DRV_LOG(DEBUG, "port %u %p type %d QP %p ibv_flow %p",
1865 (void *)flow->frxq[i].hrxq,
1866 (void *)flow->frxq[i].ibv_flow);
1869 rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_HANDLE,
1870 NULL, "internal error in flow creation");
1873 for (i = 0; i != parser->queues_n; ++i) {
1874 struct mlx5_rxq_data *q =
1875 (*priv->rxqs)[parser->queues[i]];
1877 q->mark |= parser->mark;
1881 ret = rte_errno; /* Save rte_errno before cleanup. */
1883 for (i = 0; i != hash_rxq_init_n; ++i) {
1884 if (flow->frxq[i].ibv_flow) {
1885 struct ibv_flow *ibv_flow = flow->frxq[i].ibv_flow;
1887 claim_zero(mlx5_glue->destroy_flow(ibv_flow));
1889 if (flow->frxq[i].hrxq)
1890 mlx5_hrxq_release(dev, flow->frxq[i].hrxq);
1891 if (flow->frxq[i].ibv_attr)
1892 rte_free(flow->frxq[i].ibv_attr);
1895 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
1899 rte_errno = ret; /* Restore rte_errno. */
1907 * Pointer to Ethernet device.
1909 * Pointer to a TAILQ flow list.
1911 * Flow rule attributes.
1912 * @param[in] pattern
1913 * Pattern specification (list terminated by the END pattern item).
1914 * @param[in] actions
1915 * Associated actions (list terminated by the END action).
1917 * Perform verbose error reporting if not NULL.
1920 * A flow on success, NULL otherwise and rte_errno is set.
1922 static struct rte_flow *
1923 mlx5_flow_list_create(struct rte_eth_dev *dev,
1924 struct mlx5_flows *list,
1925 const struct rte_flow_attr *attr,
1926 const struct rte_flow_item items[],
1927 const struct rte_flow_action actions[],
1928 struct rte_flow_error *error)
1930 struct mlx5_flow_parse parser = { .create = 1, };
1931 struct rte_flow *flow = NULL;
1935 ret = mlx5_flow_convert(dev, attr, items, actions, error, &parser);
1938 flow = rte_calloc(__func__, 1,
1939 sizeof(*flow) + parser.queues_n * sizeof(uint16_t),
1942 rte_flow_error_set(error, ENOMEM,
1943 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1945 "cannot allocate flow memory");
1948 /* Copy queues configuration. */
1949 flow->queues = (uint16_t (*)[])(flow + 1);
1950 memcpy(flow->queues, parser.queues, parser.queues_n * sizeof(uint16_t));
1951 flow->queues_n = parser.queues_n;
1952 flow->mark = parser.mark;
1953 /* Copy RSS configuration. */
1954 flow->rss_conf = parser.rss_conf;
1955 flow->rss_conf.rss_key = flow->rss_key;
1956 memcpy(flow->rss_key, parser.rss_key, parser.rss_conf.rss_key_len);
1957 /* finalise the flow. */
1959 ret = mlx5_flow_create_action_queue_drop(dev, &parser, flow,
1962 ret = mlx5_flow_create_action_queue(dev, &parser, flow, error);
1965 TAILQ_INSERT_TAIL(list, flow, next);
1966 DRV_LOG(DEBUG, "port %u flow created %p", dev->data->port_id,
1970 DRV_LOG(ERR, "port %u flow creation error: %s", dev->data->port_id,
1972 for (i = 0; i != hash_rxq_init_n; ++i) {
1973 if (parser.queue[i].ibv_attr)
1974 rte_free(parser.queue[i].ibv_attr);
1981 * Validate a flow supported by the NIC.
1983 * @see rte_flow_validate()
1987 mlx5_flow_validate(struct rte_eth_dev *dev,
1988 const struct rte_flow_attr *attr,
1989 const struct rte_flow_item items[],
1990 const struct rte_flow_action actions[],
1991 struct rte_flow_error *error)
1993 struct mlx5_flow_parse parser = { .create = 0, };
1995 return mlx5_flow_convert(dev, attr, items, actions, error, &parser);
2001 * @see rte_flow_create()
2005 mlx5_flow_create(struct rte_eth_dev *dev,
2006 const struct rte_flow_attr *attr,
2007 const struct rte_flow_item items[],
2008 const struct rte_flow_action actions[],
2009 struct rte_flow_error *error)
2011 struct priv *priv = dev->data->dev_private;
2013 return mlx5_flow_list_create(dev, &priv->flows, attr, items, actions,
2018 * Destroy a flow in a list.
2021 * Pointer to Ethernet device.
2023 * Pointer to a TAILQ flow list.
2028 mlx5_flow_list_destroy(struct rte_eth_dev *dev, struct mlx5_flows *list,
2029 struct rte_flow *flow)
2031 struct priv *priv = dev->data->dev_private;
2034 if (flow->drop || !flow->mark)
2036 for (i = 0; i != flow->queues_n; ++i) {
2037 struct rte_flow *tmp;
2041 * To remove the mark from the queue, the queue must not be
2042 * present in any other marked flow (RSS or not).
2044 TAILQ_FOREACH(tmp, list, next) {
2046 uint16_t *tqs = NULL;
2051 for (j = 0; j != hash_rxq_init_n; ++j) {
2052 if (!tmp->frxq[j].hrxq)
2054 tqs = tmp->frxq[j].hrxq->ind_table->queues;
2055 tq_n = tmp->frxq[j].hrxq->ind_table->queues_n;
2059 for (j = 0; (j != tq_n) && !mark; j++)
2060 if (tqs[j] == (*flow->queues)[i])
2063 (*priv->rxqs)[(*flow->queues)[i]]->mark = mark;
2067 if (flow->frxq[HASH_RXQ_ETH].ibv_flow)
2068 claim_zero(mlx5_glue->destroy_flow
2069 (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2070 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
2072 for (i = 0; i != hash_rxq_init_n; ++i) {
2073 struct mlx5_flow *frxq = &flow->frxq[i];
2076 claim_zero(mlx5_glue->destroy_flow
2079 mlx5_hrxq_release(dev, frxq->hrxq);
2081 rte_free(frxq->ibv_attr);
2085 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
2088 TAILQ_REMOVE(list, flow, next);
2089 DRV_LOG(DEBUG, "port %u flow destroyed %p", dev->data->port_id,
2095 * Destroy all flows.
2098 * Pointer to Ethernet device.
2100 * Pointer to a TAILQ flow list.
2103 mlx5_flow_list_flush(struct rte_eth_dev *dev, struct mlx5_flows *list)
2105 while (!TAILQ_EMPTY(list)) {
2106 struct rte_flow *flow;
2108 flow = TAILQ_FIRST(list);
2109 mlx5_flow_list_destroy(dev, list, flow);
2114 * Create drop queue.
2117 * Pointer to Ethernet device.
2120 * 0 on success, a negative errno value otherwise and rte_errno is set.
2123 mlx5_flow_create_drop_queue(struct rte_eth_dev *dev)
2125 struct priv *priv = dev->data->dev_private;
2126 struct mlx5_hrxq_drop *fdq = NULL;
2130 fdq = rte_calloc(__func__, 1, sizeof(*fdq), 0);
2133 "port %u cannot allocate memory for drop queue",
2134 dev->data->port_id);
2138 fdq->cq = mlx5_glue->create_cq(priv->ctx, 1, NULL, NULL, 0);
2140 DRV_LOG(WARNING, "port %u cannot allocate CQ for drop queue",
2141 dev->data->port_id);
2145 fdq->wq = mlx5_glue->create_wq
2147 &(struct ibv_wq_init_attr){
2148 .wq_type = IBV_WQT_RQ,
2155 DRV_LOG(WARNING, "port %u cannot allocate WQ for drop queue",
2156 dev->data->port_id);
2160 fdq->ind_table = mlx5_glue->create_rwq_ind_table
2162 &(struct ibv_rwq_ind_table_init_attr){
2163 .log_ind_tbl_size = 0,
2164 .ind_tbl = &fdq->wq,
2167 if (!fdq->ind_table) {
2169 "port %u cannot allocate indirection table for drop"
2171 dev->data->port_id);
2175 fdq->qp = mlx5_glue->create_qp_ex
2177 &(struct ibv_qp_init_attr_ex){
2178 .qp_type = IBV_QPT_RAW_PACKET,
2180 IBV_QP_INIT_ATTR_PD |
2181 IBV_QP_INIT_ATTR_IND_TABLE |
2182 IBV_QP_INIT_ATTR_RX_HASH,
2183 .rx_hash_conf = (struct ibv_rx_hash_conf){
2185 IBV_RX_HASH_FUNC_TOEPLITZ,
2186 .rx_hash_key_len = rss_hash_default_key_len,
2187 .rx_hash_key = rss_hash_default_key,
2188 .rx_hash_fields_mask = 0,
2190 .rwq_ind_tbl = fdq->ind_table,
2194 DRV_LOG(WARNING, "port %u cannot allocate QP for drop queue",
2195 dev->data->port_id);
2199 priv->flow_drop_queue = fdq;
2203 claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2205 claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2207 claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2209 claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2212 priv->flow_drop_queue = NULL;
2217 * Delete drop queue.
2220 * Pointer to Ethernet device.
2223 mlx5_flow_delete_drop_queue(struct rte_eth_dev *dev)
2225 struct priv *priv = dev->data->dev_private;
2226 struct mlx5_hrxq_drop *fdq = priv->flow_drop_queue;
2231 claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2233 claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2235 claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2237 claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2239 priv->flow_drop_queue = NULL;
2246 * Pointer to Ethernet device.
2248 * Pointer to a TAILQ flow list.
2251 mlx5_flow_stop(struct rte_eth_dev *dev, struct mlx5_flows *list)
2253 struct priv *priv = dev->data->dev_private;
2254 struct rte_flow *flow;
2256 TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) {
2258 struct mlx5_ind_table_ibv *ind_tbl = NULL;
2261 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow)
2263 claim_zero(mlx5_glue->destroy_flow
2264 (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2265 flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
2266 DRV_LOG(DEBUG, "port %u flow %p removed",
2267 dev->data->port_id, (void *)flow);
2271 /* Verify the flow has not already been cleaned. */
2272 for (i = 0; i != hash_rxq_init_n; ++i) {
2273 if (!flow->frxq[i].ibv_flow)
2276 * Indirection table may be necessary to remove the
2277 * flags in the Rx queues.
2278 * This helps to speed-up the process by avoiding
2281 ind_tbl = flow->frxq[i].hrxq->ind_table;
2284 if (i == hash_rxq_init_n)
2288 for (i = 0; i != ind_tbl->queues_n; ++i)
2289 (*priv->rxqs)[ind_tbl->queues[i]]->mark = 0;
2291 for (i = 0; i != hash_rxq_init_n; ++i) {
2292 if (!flow->frxq[i].ibv_flow)
2294 claim_zero(mlx5_glue->destroy_flow
2295 (flow->frxq[i].ibv_flow));
2296 flow->frxq[i].ibv_flow = NULL;
2297 mlx5_hrxq_release(dev, flow->frxq[i].hrxq);
2298 flow->frxq[i].hrxq = NULL;
2300 DRV_LOG(DEBUG, "port %u flow %p removed", dev->data->port_id,
2309 * Pointer to Ethernet device.
2311 * Pointer to a TAILQ flow list.
2314 * 0 on success, a negative errno value otherwise and rte_errno is set.
2317 mlx5_flow_start(struct rte_eth_dev *dev, struct mlx5_flows *list)
2319 struct priv *priv = dev->data->dev_private;
2320 struct rte_flow *flow;
2322 TAILQ_FOREACH(flow, list, next) {
2326 flow->frxq[HASH_RXQ_ETH].ibv_flow =
2327 mlx5_glue->create_flow
2328 (priv->flow_drop_queue->qp,
2329 flow->frxq[HASH_RXQ_ETH].ibv_attr);
2330 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
2332 "port %u flow %p cannot be applied",
2333 dev->data->port_id, (void *)flow);
2337 DRV_LOG(DEBUG, "port %u flow %p applied",
2338 dev->data->port_id, (void *)flow);
2342 for (i = 0; i != hash_rxq_init_n; ++i) {
2343 if (!flow->frxq[i].ibv_attr)
2345 flow->frxq[i].hrxq =
2346 mlx5_hrxq_get(dev, flow->rss_conf.rss_key,
2347 flow->rss_conf.rss_key_len,
2348 hash_rxq_init[i].hash_fields,
2351 if (flow->frxq[i].hrxq)
2353 flow->frxq[i].hrxq =
2354 mlx5_hrxq_new(dev, flow->rss_conf.rss_key,
2355 flow->rss_conf.rss_key_len,
2356 hash_rxq_init[i].hash_fields,
2359 if (!flow->frxq[i].hrxq) {
2361 "port %u flow %p cannot be applied",
2362 dev->data->port_id, (void *)flow);
2367 flow->frxq[i].ibv_flow =
2368 mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
2369 flow->frxq[i].ibv_attr);
2370 if (!flow->frxq[i].ibv_flow) {
2372 "port %u flow %p cannot be applied",
2373 dev->data->port_id, (void *)flow);
2377 DRV_LOG(DEBUG, "port %u flow %p applied",
2378 dev->data->port_id, (void *)flow);
2382 for (i = 0; i != flow->queues_n; ++i)
2383 (*priv->rxqs)[(*flow->queues)[i]]->mark = 1;
2389 * Verify the flow list is empty
2392 * Pointer to Ethernet device.
2394 * @return the number of flows not released.
2397 mlx5_flow_verify(struct rte_eth_dev *dev)
2399 struct priv *priv = dev->data->dev_private;
2400 struct rte_flow *flow;
2403 TAILQ_FOREACH(flow, &priv->flows, next) {
2404 DRV_LOG(DEBUG, "port %u flow %p still referenced",
2405 dev->data->port_id, (void *)flow);
2412 * Enable a control flow configured from the control plane.
2415 * Pointer to Ethernet device.
2417 * An Ethernet flow spec to apply.
2419 * An Ethernet flow mask to apply.
2421 * A VLAN flow spec to apply.
2423 * A VLAN flow mask to apply.
2426 * 0 on success, a negative errno value otherwise and rte_errno is set.
2429 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
2430 struct rte_flow_item_eth *eth_spec,
2431 struct rte_flow_item_eth *eth_mask,
2432 struct rte_flow_item_vlan *vlan_spec,
2433 struct rte_flow_item_vlan *vlan_mask)
2435 struct priv *priv = dev->data->dev_private;
2436 const struct rte_flow_attr attr = {
2438 .priority = MLX5_CTRL_FLOW_PRIORITY,
2440 struct rte_flow_item items[] = {
2442 .type = RTE_FLOW_ITEM_TYPE_ETH,
2448 .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
2449 RTE_FLOW_ITEM_TYPE_END,
2455 .type = RTE_FLOW_ITEM_TYPE_END,
2458 struct rte_flow_action actions[] = {
2460 .type = RTE_FLOW_ACTION_TYPE_RSS,
2463 .type = RTE_FLOW_ACTION_TYPE_END,
2466 struct rte_flow *flow;
2467 struct rte_flow_error error;
2470 struct rte_flow_action_rss rss;
2472 const struct rte_eth_rss_conf *rss_conf;
2474 uint16_t queue[RTE_MAX_QUEUES_PER_PORT];
2478 if (!priv->reta_idx_n) {
2482 for (i = 0; i != priv->reta_idx_n; ++i)
2483 action_rss.local.queue[i] = (*priv->reta_idx)[i];
2484 action_rss.local.rss_conf = &priv->rss_conf;
2485 action_rss.local.num = priv->reta_idx_n;
2486 actions[0].conf = (const void *)&action_rss.rss;
2487 flow = mlx5_flow_list_create(dev, &priv->ctrl_flows, &attr, items,
2495 * Enable a flow control configured from the control plane.
2498 * Pointer to Ethernet device.
2500 * An Ethernet flow spec to apply.
2502 * An Ethernet flow mask to apply.
2505 * 0 on success, a negative errno value otherwise and rte_errno is set.
2508 mlx5_ctrl_flow(struct rte_eth_dev *dev,
2509 struct rte_flow_item_eth *eth_spec,
2510 struct rte_flow_item_eth *eth_mask)
2512 return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
2518 * @see rte_flow_destroy()
2522 mlx5_flow_destroy(struct rte_eth_dev *dev,
2523 struct rte_flow *flow,
2524 struct rte_flow_error *error __rte_unused)
2526 struct priv *priv = dev->data->dev_private;
2528 mlx5_flow_list_destroy(dev, &priv->flows, flow);
2533 * Destroy all flows.
2535 * @see rte_flow_flush()
2539 mlx5_flow_flush(struct rte_eth_dev *dev,
2540 struct rte_flow_error *error __rte_unused)
2542 struct priv *priv = dev->data->dev_private;
2544 mlx5_flow_list_flush(dev, &priv->flows);
2548 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
2550 * Query flow counter.
2554 * @param counter_value
2555 * returned data from the counter.
2558 * 0 on success, a negative errno value otherwise and rte_errno is set.
2561 mlx5_flow_query_count(struct ibv_counter_set *cs,
2562 struct mlx5_flow_counter_stats *counter_stats,
2563 struct rte_flow_query_count *query_count,
2564 struct rte_flow_error *error)
2566 uint64_t counters[2];
2567 struct ibv_query_counter_set_attr query_cs_attr = {
2569 .query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
2571 struct ibv_counter_set_data query_out = {
2573 .outlen = 2 * sizeof(uint64_t),
2575 int err = mlx5_glue->query_counter_set(&query_cs_attr, &query_out);
2578 return rte_flow_error_set(error, err,
2579 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2581 "cannot read counter");
2582 query_count->hits_set = 1;
2583 query_count->bytes_set = 1;
2584 query_count->hits = counters[0] - counter_stats->hits;
2585 query_count->bytes = counters[1] - counter_stats->bytes;
2586 if (query_count->reset) {
2587 counter_stats->hits = counters[0];
2588 counter_stats->bytes = counters[1];
2596 * @see rte_flow_query()
2600 mlx5_flow_query(struct rte_eth_dev *dev __rte_unused,
2601 struct rte_flow *flow,
2602 enum rte_flow_action_type action __rte_unused,
2604 struct rte_flow_error *error)
2609 ret = mlx5_flow_query_count(flow->cs,
2610 &flow->counter_stats,
2611 (struct rte_flow_query_count *)data,
2616 return rte_flow_error_set(error, EINVAL,
2617 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2619 "no counter found for flow");
2628 * @see rte_flow_isolate()
2632 mlx5_flow_isolate(struct rte_eth_dev *dev,
2634 struct rte_flow_error *error)
2636 struct priv *priv = dev->data->dev_private;
2638 if (dev->data->dev_started) {
2639 rte_flow_error_set(error, EBUSY,
2640 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2642 "port must be stopped first");
2645 priv->isolated = !!enable;
2647 priv->dev->dev_ops = &mlx5_dev_ops_isolate;
2649 priv->dev->dev_ops = &mlx5_dev_ops;
2654 * Convert a flow director filter to a generic flow.
2657 * Pointer to Ethernet device.
2658 * @param fdir_filter
2659 * Flow director filter to add.
2661 * Generic flow parameters structure.
2664 * 0 on success, a negative errno value otherwise and rte_errno is set.
2667 mlx5_fdir_filter_convert(struct rte_eth_dev *dev,
2668 const struct rte_eth_fdir_filter *fdir_filter,
2669 struct mlx5_fdir *attributes)
2671 struct priv *priv = dev->data->dev_private;
2672 const struct rte_eth_fdir_input *input = &fdir_filter->input;
2674 /* Validate queue number. */
2675 if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
2676 DRV_LOG(ERR, "port %u invalid queue number %d",
2677 dev->data->port_id, fdir_filter->action.rx_queue);
2681 attributes->attr.ingress = 1;
2682 attributes->items[0] = (struct rte_flow_item) {
2683 .type = RTE_FLOW_ITEM_TYPE_ETH,
2684 .spec = &attributes->l2,
2685 .mask = &attributes->l2_mask,
2687 switch (fdir_filter->action.behavior) {
2688 case RTE_ETH_FDIR_ACCEPT:
2689 attributes->actions[0] = (struct rte_flow_action){
2690 .type = RTE_FLOW_ACTION_TYPE_QUEUE,
2691 .conf = &attributes->queue,
2694 case RTE_ETH_FDIR_REJECT:
2695 attributes->actions[0] = (struct rte_flow_action){
2696 .type = RTE_FLOW_ACTION_TYPE_DROP,
2700 DRV_LOG(ERR, "port %u invalid behavior %d",
2702 fdir_filter->action.behavior);
2703 rte_errno = ENOTSUP;
2706 attributes->queue.index = fdir_filter->action.rx_queue;
2707 switch (fdir_filter->input.flow_type) {
2708 case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
2709 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2710 .src_addr = input->flow.udp4_flow.ip.src_ip,
2711 .dst_addr = input->flow.udp4_flow.ip.dst_ip,
2712 .time_to_live = input->flow.udp4_flow.ip.ttl,
2713 .type_of_service = input->flow.udp4_flow.ip.tos,
2714 .next_proto_id = input->flow.udp4_flow.ip.proto,
2716 attributes->l4.udp.hdr = (struct udp_hdr){
2717 .src_port = input->flow.udp4_flow.src_port,
2718 .dst_port = input->flow.udp4_flow.dst_port,
2720 attributes->items[1] = (struct rte_flow_item){
2721 .type = RTE_FLOW_ITEM_TYPE_IPV4,
2722 .spec = &attributes->l3,
2723 .mask = &attributes->l3,
2725 attributes->items[2] = (struct rte_flow_item){
2726 .type = RTE_FLOW_ITEM_TYPE_UDP,
2727 .spec = &attributes->l4,
2728 .mask = &attributes->l4,
2731 case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
2732 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2733 .src_addr = input->flow.tcp4_flow.ip.src_ip,
2734 .dst_addr = input->flow.tcp4_flow.ip.dst_ip,
2735 .time_to_live = input->flow.tcp4_flow.ip.ttl,
2736 .type_of_service = input->flow.tcp4_flow.ip.tos,
2737 .next_proto_id = input->flow.tcp4_flow.ip.proto,
2739 attributes->l4.tcp.hdr = (struct tcp_hdr){
2740 .src_port = input->flow.tcp4_flow.src_port,
2741 .dst_port = input->flow.tcp4_flow.dst_port,
2743 attributes->items[1] = (struct rte_flow_item){
2744 .type = RTE_FLOW_ITEM_TYPE_IPV4,
2745 .spec = &attributes->l3,
2746 .mask = &attributes->l3,
2748 attributes->items[2] = (struct rte_flow_item){
2749 .type = RTE_FLOW_ITEM_TYPE_TCP,
2750 .spec = &attributes->l4,
2751 .mask = &attributes->l4,
2754 case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
2755 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2756 .src_addr = input->flow.ip4_flow.src_ip,
2757 .dst_addr = input->flow.ip4_flow.dst_ip,
2758 .time_to_live = input->flow.ip4_flow.ttl,
2759 .type_of_service = input->flow.ip4_flow.tos,
2760 .next_proto_id = input->flow.ip4_flow.proto,
2762 attributes->items[1] = (struct rte_flow_item){
2763 .type = RTE_FLOW_ITEM_TYPE_IPV4,
2764 .spec = &attributes->l3,
2765 .mask = &attributes->l3,
2768 case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
2769 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2770 .hop_limits = input->flow.udp6_flow.ip.hop_limits,
2771 .proto = input->flow.udp6_flow.ip.proto,
2773 memcpy(attributes->l3.ipv6.hdr.src_addr,
2774 input->flow.udp6_flow.ip.src_ip,
2775 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2776 memcpy(attributes->l3.ipv6.hdr.dst_addr,
2777 input->flow.udp6_flow.ip.dst_ip,
2778 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2779 attributes->l4.udp.hdr = (struct udp_hdr){
2780 .src_port = input->flow.udp6_flow.src_port,
2781 .dst_port = input->flow.udp6_flow.dst_port,
2783 attributes->items[1] = (struct rte_flow_item){
2784 .type = RTE_FLOW_ITEM_TYPE_IPV6,
2785 .spec = &attributes->l3,
2786 .mask = &attributes->l3,
2788 attributes->items[2] = (struct rte_flow_item){
2789 .type = RTE_FLOW_ITEM_TYPE_UDP,
2790 .spec = &attributes->l4,
2791 .mask = &attributes->l4,
2794 case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
2795 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2796 .hop_limits = input->flow.tcp6_flow.ip.hop_limits,
2797 .proto = input->flow.tcp6_flow.ip.proto,
2799 memcpy(attributes->l3.ipv6.hdr.src_addr,
2800 input->flow.tcp6_flow.ip.src_ip,
2801 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2802 memcpy(attributes->l3.ipv6.hdr.dst_addr,
2803 input->flow.tcp6_flow.ip.dst_ip,
2804 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2805 attributes->l4.tcp.hdr = (struct tcp_hdr){
2806 .src_port = input->flow.tcp6_flow.src_port,
2807 .dst_port = input->flow.tcp6_flow.dst_port,
2809 attributes->items[1] = (struct rte_flow_item){
2810 .type = RTE_FLOW_ITEM_TYPE_IPV6,
2811 .spec = &attributes->l3,
2812 .mask = &attributes->l3,
2814 attributes->items[2] = (struct rte_flow_item){
2815 .type = RTE_FLOW_ITEM_TYPE_TCP,
2816 .spec = &attributes->l4,
2817 .mask = &attributes->l4,
2820 case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
2821 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2822 .hop_limits = input->flow.ipv6_flow.hop_limits,
2823 .proto = input->flow.ipv6_flow.proto,
2825 memcpy(attributes->l3.ipv6.hdr.src_addr,
2826 input->flow.ipv6_flow.src_ip,
2827 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2828 memcpy(attributes->l3.ipv6.hdr.dst_addr,
2829 input->flow.ipv6_flow.dst_ip,
2830 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2831 attributes->items[1] = (struct rte_flow_item){
2832 .type = RTE_FLOW_ITEM_TYPE_IPV6,
2833 .spec = &attributes->l3,
2834 .mask = &attributes->l3,
2838 DRV_LOG(ERR, "port %u invalid flow type%d",
2839 dev->data->port_id, fdir_filter->input.flow_type);
2840 rte_errno = ENOTSUP;
2847 * Add new flow director filter and store it in list.
2850 * Pointer to Ethernet device.
2851 * @param fdir_filter
2852 * Flow director filter to add.
2855 * 0 on success, a negative errno value otherwise and rte_errno is set.
2858 mlx5_fdir_filter_add(struct rte_eth_dev *dev,
2859 const struct rte_eth_fdir_filter *fdir_filter)
2861 struct priv *priv = dev->data->dev_private;
2862 struct mlx5_fdir attributes = {
2865 .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2866 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2870 struct mlx5_flow_parse parser = {
2871 .layer = HASH_RXQ_ETH,
2873 struct rte_flow_error error;
2874 struct rte_flow *flow;
2877 ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
2880 ret = mlx5_flow_convert(dev, &attributes.attr, attributes.items,
2881 attributes.actions, &error, &parser);
2884 flow = mlx5_flow_list_create(dev, &priv->flows, &attributes.attr,
2885 attributes.items, attributes.actions,
2888 DRV_LOG(DEBUG, "port %u FDIR created %p", dev->data->port_id,
2896 * Delete specific filter.
2899 * Pointer to Ethernet device.
2900 * @param fdir_filter
2901 * Filter to be deleted.
2904 * 0 on success, a negative errno value otherwise and rte_errno is set.
2907 mlx5_fdir_filter_delete(struct rte_eth_dev *dev,
2908 const struct rte_eth_fdir_filter *fdir_filter)
2910 struct priv *priv = dev->data->dev_private;
2911 struct mlx5_fdir attributes = {
2914 struct mlx5_flow_parse parser = {
2916 .layer = HASH_RXQ_ETH,
2918 struct rte_flow_error error;
2919 struct rte_flow *flow;
2923 ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
2926 ret = mlx5_flow_convert(dev, &attributes.attr, attributes.items,
2927 attributes.actions, &error, &parser);
2931 * Special case for drop action which is only set in the
2932 * specifications when the flow is created. In this situation the
2933 * drop specification is missing.
2936 struct ibv_flow_spec_action_drop *drop;
2938 drop = (void *)((uintptr_t)parser.queue[HASH_RXQ_ETH].ibv_attr +
2939 parser.queue[HASH_RXQ_ETH].offset);
2940 *drop = (struct ibv_flow_spec_action_drop){
2941 .type = IBV_FLOW_SPEC_ACTION_DROP,
2942 .size = sizeof(struct ibv_flow_spec_action_drop),
2944 parser.queue[HASH_RXQ_ETH].ibv_attr->num_of_specs++;
2946 TAILQ_FOREACH(flow, &priv->flows, next) {
2947 struct ibv_flow_attr *attr;
2948 struct ibv_spec_header *attr_h;
2950 struct ibv_flow_attr *flow_attr;
2951 struct ibv_spec_header *flow_h;
2953 unsigned int specs_n;
2955 attr = parser.queue[HASH_RXQ_ETH].ibv_attr;
2956 flow_attr = flow->frxq[HASH_RXQ_ETH].ibv_attr;
2957 /* Compare first the attributes. */
2958 if (memcmp(attr, flow_attr, sizeof(struct ibv_flow_attr)))
2960 if (attr->num_of_specs == 0)
2962 spec = (void *)((uintptr_t)attr +
2963 sizeof(struct ibv_flow_attr));
2964 flow_spec = (void *)((uintptr_t)flow_attr +
2965 sizeof(struct ibv_flow_attr));
2966 specs_n = RTE_MIN(attr->num_of_specs, flow_attr->num_of_specs);
2967 for (i = 0; i != specs_n; ++i) {
2970 if (memcmp(spec, flow_spec,
2971 RTE_MIN(attr_h->size, flow_h->size)))
2973 spec = (void *)((uintptr_t)spec + attr_h->size);
2974 flow_spec = (void *)((uintptr_t)flow_spec +
2977 /* At this point, the flow match. */
2980 /* The flow does not match. */
2983 ret = rte_errno; /* Save rte_errno before cleanup. */
2985 mlx5_flow_list_destroy(dev, &priv->flows, flow);
2987 for (i = 0; i != hash_rxq_init_n; ++i) {
2988 if (parser.queue[i].ibv_attr)
2989 rte_free(parser.queue[i].ibv_attr);
2991 rte_errno = ret; /* Restore rte_errno. */
2996 * Update queue for specific filter.
2999 * Pointer to Ethernet device.
3000 * @param fdir_filter
3001 * Filter to be updated.
3004 * 0 on success, a negative errno value otherwise and rte_errno is set.
3007 mlx5_fdir_filter_update(struct rte_eth_dev *dev,
3008 const struct rte_eth_fdir_filter *fdir_filter)
3012 ret = mlx5_fdir_filter_delete(dev, fdir_filter);
3015 return mlx5_fdir_filter_add(dev, fdir_filter);
3019 * Flush all filters.
3022 * Pointer to Ethernet device.
3025 mlx5_fdir_filter_flush(struct rte_eth_dev *dev)
3027 struct priv *priv = dev->data->dev_private;
3029 mlx5_flow_list_flush(dev, &priv->flows);
3033 * Get flow director information.
3036 * Pointer to Ethernet device.
3037 * @param[out] fdir_info
3038 * Resulting flow director information.
3041 mlx5_fdir_info_get(struct rte_eth_dev *dev, struct rte_eth_fdir_info *fdir_info)
3043 struct priv *priv = dev->data->dev_private;
3044 struct rte_eth_fdir_masks *mask =
3045 &priv->dev->data->dev_conf.fdir_conf.mask;
3047 fdir_info->mode = priv->dev->data->dev_conf.fdir_conf.mode;
3048 fdir_info->guarant_spc = 0;
3049 rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
3050 fdir_info->max_flexpayload = 0;
3051 fdir_info->flow_types_mask[0] = 0;
3052 fdir_info->flex_payload_unit = 0;
3053 fdir_info->max_flex_payload_segment_num = 0;
3054 fdir_info->flex_payload_limit = 0;
3055 memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
3059 * Deal with flow director operations.
3062 * Pointer to Ethernet device.
3064 * Operation to perform.
3066 * Pointer to operation-specific structure.
3069 * 0 on success, a negative errno value otherwise and rte_errno is set.
3072 mlx5_fdir_ctrl_func(struct rte_eth_dev *dev, enum rte_filter_op filter_op,
3075 struct priv *priv = dev->data->dev_private;
3076 enum rte_fdir_mode fdir_mode =
3077 priv->dev->data->dev_conf.fdir_conf.mode;
3079 if (filter_op == RTE_ETH_FILTER_NOP)
3081 if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
3082 fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
3083 DRV_LOG(ERR, "port %u flow director mode %d not supported",
3084 dev->data->port_id, fdir_mode);
3088 switch (filter_op) {
3089 case RTE_ETH_FILTER_ADD:
3090 return mlx5_fdir_filter_add(dev, arg);
3091 case RTE_ETH_FILTER_UPDATE:
3092 return mlx5_fdir_filter_update(dev, arg);
3093 case RTE_ETH_FILTER_DELETE:
3094 return mlx5_fdir_filter_delete(dev, arg);
3095 case RTE_ETH_FILTER_FLUSH:
3096 mlx5_fdir_filter_flush(dev);
3098 case RTE_ETH_FILTER_INFO:
3099 mlx5_fdir_info_get(dev, arg);
3102 DRV_LOG(DEBUG, "port %u unknown operation %u",
3103 dev->data->port_id, filter_op);
3111 * Manage filter operations.
3114 * Pointer to Ethernet device structure.
3115 * @param filter_type
3118 * Operation to perform.
3120 * Pointer to operation-specific structure.
3123 * 0 on success, a negative errno value otherwise and rte_errno is set.
3126 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
3127 enum rte_filter_type filter_type,
3128 enum rte_filter_op filter_op,
3131 switch (filter_type) {
3132 case RTE_ETH_FILTER_GENERIC:
3133 if (filter_op != RTE_ETH_FILTER_GET) {
3137 *(const void **)arg = &mlx5_flow_ops;
3139 case RTE_ETH_FILTER_FDIR:
3140 return mlx5_fdir_ctrl_func(dev, filter_op, arg);
3142 DRV_LOG(ERR, "port %u filter type (%d) not supported",
3143 dev->data->port_id, filter_type);
3144 rte_errno = ENOTSUP;