1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2016 6WIND S.A.
3 * Copyright 2016 Mellanox Technologies, Ltd
11 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
13 #pragma GCC diagnostic ignored "-Wpedantic"
15 #include <infiniband/verbs.h>
17 #pragma GCC diagnostic error "-Wpedantic"
20 #include <rte_common.h>
21 #include <rte_ether.h>
22 #include <rte_eth_ctrl.h>
23 #include <rte_ethdev_driver.h>
25 #include <rte_flow_driver.h>
26 #include <rte_malloc.h>
30 #include "mlx5_defs.h"
32 #include "mlx5_glue.h"
34 /* Define minimal priority for control plane flows. */
35 #define MLX5_CTRL_FLOW_PRIORITY 4
37 /* Internet Protocol versions. */
41 #ifndef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
42 struct ibv_flow_spec_counter_action {
47 /* Dev ops structure defined in mlx5.c */
48 extern const struct eth_dev_ops mlx5_dev_ops;
49 extern const struct eth_dev_ops mlx5_dev_ops_isolate;
51 /** Structure give to the conversion functions. */
52 struct mlx5_flow_data {
53 struct mlx5_flow_parse *parser; /** Parser context. */
54 struct rte_flow_error *error; /** Error context. */
58 mlx5_flow_create_eth(const struct rte_flow_item *item,
59 const void *default_mask,
60 struct mlx5_flow_data *data);
63 mlx5_flow_create_vlan(const struct rte_flow_item *item,
64 const void *default_mask,
65 struct mlx5_flow_data *data);
68 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
69 const void *default_mask,
70 struct mlx5_flow_data *data);
73 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
74 const void *default_mask,
75 struct mlx5_flow_data *data);
78 mlx5_flow_create_udp(const struct rte_flow_item *item,
79 const void *default_mask,
80 struct mlx5_flow_data *data);
83 mlx5_flow_create_tcp(const struct rte_flow_item *item,
84 const void *default_mask,
85 struct mlx5_flow_data *data);
88 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
89 const void *default_mask,
90 struct mlx5_flow_data *data);
92 struct mlx5_flow_parse;
95 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
99 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id);
102 mlx5_flow_create_count(struct rte_eth_dev *dev, struct mlx5_flow_parse *parser);
104 /* Hash RX queue types. */
115 /* Initialization data for hash RX queue. */
116 struct hash_rxq_init {
117 uint64_t hash_fields; /* Fields that participate in the hash. */
118 uint64_t dpdk_rss_hf; /* Matching DPDK RSS hash fields. */
119 unsigned int flow_priority; /* Flow priority to use. */
120 unsigned int ip_version; /* Internet protocol. */
123 /* Initialization data for hash RX queues. */
124 const struct hash_rxq_init hash_rxq_init[] = {
126 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
127 IBV_RX_HASH_DST_IPV4 |
128 IBV_RX_HASH_SRC_PORT_TCP |
129 IBV_RX_HASH_DST_PORT_TCP),
130 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,
132 .ip_version = MLX5_IPV4,
135 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
136 IBV_RX_HASH_DST_IPV4 |
137 IBV_RX_HASH_SRC_PORT_UDP |
138 IBV_RX_HASH_DST_PORT_UDP),
139 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP,
141 .ip_version = MLX5_IPV4,
144 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
145 IBV_RX_HASH_DST_IPV4),
146 .dpdk_rss_hf = (ETH_RSS_IPV4 |
149 .ip_version = MLX5_IPV4,
152 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
153 IBV_RX_HASH_DST_IPV6 |
154 IBV_RX_HASH_SRC_PORT_TCP |
155 IBV_RX_HASH_DST_PORT_TCP),
156 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP,
158 .ip_version = MLX5_IPV6,
161 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
162 IBV_RX_HASH_DST_IPV6 |
163 IBV_RX_HASH_SRC_PORT_UDP |
164 IBV_RX_HASH_DST_PORT_UDP),
165 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP,
167 .ip_version = MLX5_IPV6,
170 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
171 IBV_RX_HASH_DST_IPV6),
172 .dpdk_rss_hf = (ETH_RSS_IPV6 |
175 .ip_version = MLX5_IPV6,
184 /* Number of entries in hash_rxq_init[]. */
185 const unsigned int hash_rxq_init_n = RTE_DIM(hash_rxq_init);
187 /** Structure for holding counter stats. */
188 struct mlx5_flow_counter_stats {
189 uint64_t hits; /**< Number of packets matched by the rule. */
190 uint64_t bytes; /**< Number of bytes matched by the rule. */
193 /** Structure for Drop queue. */
194 struct mlx5_hrxq_drop {
195 struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */
196 struct ibv_qp *qp; /**< Verbs queue pair. */
197 struct ibv_wq *wq; /**< Verbs work queue. */
198 struct ibv_cq *cq; /**< Verbs completion queue. */
201 /* Flows structures. */
203 uint64_t hash_fields; /**< Fields that participate in the hash. */
204 struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
205 struct ibv_flow *ibv_flow; /**< Verbs flow. */
206 struct mlx5_hrxq *hrxq; /**< Hash Rx queues. */
209 /* Drop flows structures. */
210 struct mlx5_flow_drop {
211 struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
212 struct ibv_flow *ibv_flow; /**< Verbs flow. */
216 TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
217 uint32_t mark:1; /**< Set if the flow is marked. */
218 uint32_t drop:1; /**< Drop queue. */
219 struct rte_flow_action_rss rss_conf; /**< RSS configuration */
220 uint16_t (*queues)[]; /**< Queues indexes to use. */
221 uint8_t rss_key[40]; /**< copy of the RSS key. */
222 struct ibv_counter_set *cs; /**< Holds the counters for the rule. */
223 struct mlx5_flow_counter_stats counter_stats;/**<The counter stats. */
224 struct mlx5_flow frxq[RTE_DIM(hash_rxq_init)];
225 /**< Flow with Rx queue. */
228 /** Static initializer for items. */
230 (const enum rte_flow_item_type []){ \
231 __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
234 /** Structure to generate a simple graph of layers supported by the NIC. */
235 struct mlx5_flow_items {
236 /** List of possible actions for these items. */
237 const enum rte_flow_action_type *const actions;
238 /** Bit-masks corresponding to the possibilities for the item. */
241 * Default bit-masks to use when item->mask is not provided. When
242 * \default_mask is also NULL, the full supported bit-mask (\mask) is
245 const void *default_mask;
246 /** Bit-masks size in bytes. */
247 const unsigned int mask_sz;
249 * Conversion function from rte_flow to NIC specific flow.
252 * rte_flow item to convert.
253 * @param default_mask
254 * Default bit-masks to use when item->mask is not provided.
256 * Internal structure to store the conversion.
259 * 0 on success, a negative errno value otherwise and rte_errno is
262 int (*convert)(const struct rte_flow_item *item,
263 const void *default_mask,
264 struct mlx5_flow_data *data);
265 /** Size in bytes of the destination structure. */
266 const unsigned int dst_sz;
267 /** List of possible following items. */
268 const enum rte_flow_item_type *const items;
271 /** Valid action for this PMD. */
272 static const enum rte_flow_action_type valid_actions[] = {
273 RTE_FLOW_ACTION_TYPE_DROP,
274 RTE_FLOW_ACTION_TYPE_QUEUE,
275 RTE_FLOW_ACTION_TYPE_MARK,
276 RTE_FLOW_ACTION_TYPE_FLAG,
277 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
278 RTE_FLOW_ACTION_TYPE_COUNT,
280 RTE_FLOW_ACTION_TYPE_END,
283 /** Graph of supported items and associated actions. */
284 static const struct mlx5_flow_items mlx5_flow_items[] = {
285 [RTE_FLOW_ITEM_TYPE_END] = {
286 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
287 RTE_FLOW_ITEM_TYPE_VXLAN),
289 [RTE_FLOW_ITEM_TYPE_ETH] = {
290 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
291 RTE_FLOW_ITEM_TYPE_IPV4,
292 RTE_FLOW_ITEM_TYPE_IPV6),
293 .actions = valid_actions,
294 .mask = &(const struct rte_flow_item_eth){
295 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
296 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
299 .default_mask = &rte_flow_item_eth_mask,
300 .mask_sz = sizeof(struct rte_flow_item_eth),
301 .convert = mlx5_flow_create_eth,
302 .dst_sz = sizeof(struct ibv_flow_spec_eth),
304 [RTE_FLOW_ITEM_TYPE_VLAN] = {
305 .items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
306 RTE_FLOW_ITEM_TYPE_IPV6),
307 .actions = valid_actions,
308 .mask = &(const struct rte_flow_item_vlan){
312 .default_mask = &rte_flow_item_vlan_mask,
313 .mask_sz = sizeof(struct rte_flow_item_vlan),
314 .convert = mlx5_flow_create_vlan,
317 [RTE_FLOW_ITEM_TYPE_IPV4] = {
318 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
319 RTE_FLOW_ITEM_TYPE_TCP),
320 .actions = valid_actions,
321 .mask = &(const struct rte_flow_item_ipv4){
325 .type_of_service = -1,
329 .default_mask = &rte_flow_item_ipv4_mask,
330 .mask_sz = sizeof(struct rte_flow_item_ipv4),
331 .convert = mlx5_flow_create_ipv4,
332 .dst_sz = sizeof(struct ibv_flow_spec_ipv4_ext),
334 [RTE_FLOW_ITEM_TYPE_IPV6] = {
335 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
336 RTE_FLOW_ITEM_TYPE_TCP),
337 .actions = valid_actions,
338 .mask = &(const struct rte_flow_item_ipv6){
341 0xff, 0xff, 0xff, 0xff,
342 0xff, 0xff, 0xff, 0xff,
343 0xff, 0xff, 0xff, 0xff,
344 0xff, 0xff, 0xff, 0xff,
347 0xff, 0xff, 0xff, 0xff,
348 0xff, 0xff, 0xff, 0xff,
349 0xff, 0xff, 0xff, 0xff,
350 0xff, 0xff, 0xff, 0xff,
357 .default_mask = &rte_flow_item_ipv6_mask,
358 .mask_sz = sizeof(struct rte_flow_item_ipv6),
359 .convert = mlx5_flow_create_ipv6,
360 .dst_sz = sizeof(struct ibv_flow_spec_ipv6),
362 [RTE_FLOW_ITEM_TYPE_UDP] = {
363 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN),
364 .actions = valid_actions,
365 .mask = &(const struct rte_flow_item_udp){
371 .default_mask = &rte_flow_item_udp_mask,
372 .mask_sz = sizeof(struct rte_flow_item_udp),
373 .convert = mlx5_flow_create_udp,
374 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
376 [RTE_FLOW_ITEM_TYPE_TCP] = {
377 .actions = valid_actions,
378 .mask = &(const struct rte_flow_item_tcp){
384 .default_mask = &rte_flow_item_tcp_mask,
385 .mask_sz = sizeof(struct rte_flow_item_tcp),
386 .convert = mlx5_flow_create_tcp,
387 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
389 [RTE_FLOW_ITEM_TYPE_VXLAN] = {
390 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH),
391 .actions = valid_actions,
392 .mask = &(const struct rte_flow_item_vxlan){
393 .vni = "\xff\xff\xff",
395 .default_mask = &rte_flow_item_vxlan_mask,
396 .mask_sz = sizeof(struct rte_flow_item_vxlan),
397 .convert = mlx5_flow_create_vxlan,
398 .dst_sz = sizeof(struct ibv_flow_spec_tunnel),
402 /** Structure to pass to the conversion function. */
403 struct mlx5_flow_parse {
404 uint32_t inner; /**< Set once VXLAN is encountered. */
406 /**< Whether resources should remain after a validate. */
407 uint32_t drop:1; /**< Target is a drop queue. */
408 uint32_t mark:1; /**< Mark is present in the flow. */
409 uint32_t count:1; /**< Count is present in the flow. */
410 uint32_t mark_id; /**< Mark identifier. */
411 struct rte_flow_action_rss rss_conf; /**< RSS configuration */
412 uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
413 uint8_t rss_key[40]; /**< copy of the RSS key. */
414 enum hash_rxq_type layer; /**< Last pattern layer detected. */
415 struct ibv_counter_set *cs; /**< Holds the counter set for the rule */
417 struct ibv_flow_attr *ibv_attr;
418 /**< Pointer to Verbs attributes. */
420 /**< Current position or total size of the attribute. */
421 } queue[RTE_DIM(hash_rxq_init)];
424 static const struct rte_flow_ops mlx5_flow_ops = {
425 .validate = mlx5_flow_validate,
426 .create = mlx5_flow_create,
427 .destroy = mlx5_flow_destroy,
428 .flush = mlx5_flow_flush,
429 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
430 .query = mlx5_flow_query,
434 .isolate = mlx5_flow_isolate,
437 /* Convert FDIR request to Generic flow. */
439 struct rte_flow_attr attr;
440 struct rte_flow_action actions[2];
441 struct rte_flow_item items[4];
442 struct rte_flow_item_eth l2;
443 struct rte_flow_item_eth l2_mask;
445 struct rte_flow_item_ipv4 ipv4;
446 struct rte_flow_item_ipv6 ipv6;
449 struct rte_flow_item_ipv4 ipv4;
450 struct rte_flow_item_ipv6 ipv6;
453 struct rte_flow_item_udp udp;
454 struct rte_flow_item_tcp tcp;
457 struct rte_flow_item_udp udp;
458 struct rte_flow_item_tcp tcp;
460 struct rte_flow_action_queue queue;
463 /* Verbs specification header. */
464 struct ibv_spec_header {
465 enum ibv_flow_spec_type type;
470 * Check support for a given item.
473 * Item specification.
475 * Bit-masks covering supported fields to compare with spec, last and mask in
478 * Bit-Mask size in bytes.
481 * 0 on success, a negative errno value otherwise and rte_errno is set.
484 mlx5_flow_item_validate(const struct rte_flow_item *item,
485 const uint8_t *mask, unsigned int size)
487 if (!item->spec && (item->mask || item->last)) {
491 if (item->spec && !item->mask) {
493 const uint8_t *spec = item->spec;
495 for (i = 0; i < size; ++i)
496 if ((spec[i] | mask[i]) != mask[i]) {
501 if (item->last && !item->mask) {
503 const uint8_t *spec = item->last;
505 for (i = 0; i < size; ++i)
506 if ((spec[i] | mask[i]) != mask[i]) {
513 const uint8_t *spec = item->spec;
515 for (i = 0; i < size; ++i)
516 if ((spec[i] | mask[i]) != mask[i]) {
521 if (item->spec && item->last) {
524 const uint8_t *apply = mask;
530 for (i = 0; i < size; ++i) {
531 spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
532 last[i] = ((const uint8_t *)item->last)[i] & apply[i];
534 ret = memcmp(spec, last, size);
544 * Extract attribute to the parser.
547 * Flow rule attributes.
549 * Perform verbose error reporting if not NULL.
552 * 0 on success, a negative errno value otherwise and rte_errno is set.
555 mlx5_flow_convert_attributes(const struct rte_flow_attr *attr,
556 struct rte_flow_error *error)
559 rte_flow_error_set(error, ENOTSUP,
560 RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
562 "groups are not supported");
565 if (attr->priority && attr->priority != MLX5_CTRL_FLOW_PRIORITY) {
566 rte_flow_error_set(error, ENOTSUP,
567 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
569 "priorities are not supported");
573 rte_flow_error_set(error, ENOTSUP,
574 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
576 "egress is not supported");
579 if (attr->transfer) {
580 rte_flow_error_set(error, ENOTSUP,
581 RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
583 "transfer is not supported");
586 if (!attr->ingress) {
587 rte_flow_error_set(error, ENOTSUP,
588 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
590 "only ingress is supported");
597 * Extract actions request to the parser.
600 * Pointer to Ethernet device.
602 * Associated actions (list terminated by the END action).
604 * Perform verbose error reporting if not NULL.
605 * @param[in, out] parser
606 * Internal parser structure.
609 * 0 on success, a negative errno value otherwise and rte_errno is set.
612 mlx5_flow_convert_actions(struct rte_eth_dev *dev,
613 const struct rte_flow_action actions[],
614 struct rte_flow_error *error,
615 struct mlx5_flow_parse *parser)
617 enum { FATE = 1, MARK = 2, COUNT = 4, };
618 uint32_t overlap = 0;
619 struct priv *priv = dev->data->dev_private;
621 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
622 if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
624 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
626 goto exit_action_overlap;
629 } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
630 const struct rte_flow_action_queue *queue =
631 (const struct rte_flow_action_queue *)
635 goto exit_action_overlap;
637 if (!queue || (queue->index > (priv->rxqs_n - 1)))
638 goto exit_action_not_supported;
639 parser->queues[0] = queue->index;
640 parser->rss_conf = (struct rte_flow_action_rss){
642 .queue = parser->queues,
644 } else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
645 const struct rte_flow_action_rss *rss =
646 (const struct rte_flow_action_rss *)
648 const uint8_t *rss_key;
649 uint32_t rss_key_len;
653 goto exit_action_overlap;
656 rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ) {
657 rte_flow_error_set(error, EINVAL,
658 RTE_FLOW_ERROR_TYPE_ACTION,
660 "the only supported RSS hash"
661 " function is Toeplitz");
665 rte_flow_error_set(error, EINVAL,
666 RTE_FLOW_ERROR_TYPE_ACTION,
668 "a nonzero RSS encapsulation"
669 " level is not supported");
672 if (rss->types & MLX5_RSS_HF_MASK) {
673 rte_flow_error_set(error, EINVAL,
674 RTE_FLOW_ERROR_TYPE_ACTION,
676 "unsupported RSS type"
681 rss_key_len = rss->key_len;
684 rss_key_len = rss_hash_default_key_len;
685 rss_key = rss_hash_default_key;
687 if (rss_key_len != RTE_DIM(parser->rss_key)) {
688 rte_flow_error_set(error, EINVAL,
689 RTE_FLOW_ERROR_TYPE_ACTION,
691 "RSS hash key must be"
692 " exactly 40 bytes long");
695 if (!rss->queue_num) {
696 rte_flow_error_set(error, EINVAL,
697 RTE_FLOW_ERROR_TYPE_ACTION,
702 if (rss->queue_num > RTE_DIM(parser->queues)) {
703 rte_flow_error_set(error, EINVAL,
704 RTE_FLOW_ERROR_TYPE_ACTION,
706 "too many queues for RSS"
710 for (n = 0; n < rss->queue_num; ++n) {
711 if (rss->queue[n] >= priv->rxqs_n) {
712 rte_flow_error_set(error, EINVAL,
713 RTE_FLOW_ERROR_TYPE_ACTION,
715 "queue id > number of"
720 parser->rss_conf = (struct rte_flow_action_rss){
721 .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
724 .key_len = rss_key_len,
725 .queue_num = rss->queue_num,
726 .key = memcpy(parser->rss_key, rss_key,
727 sizeof(*rss_key) * rss_key_len),
728 .queue = memcpy(parser->queues, rss->queue,
729 sizeof(*rss->queue) *
732 } else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
733 const struct rte_flow_action_mark *mark =
734 (const struct rte_flow_action_mark *)
738 goto exit_action_overlap;
741 rte_flow_error_set(error, EINVAL,
742 RTE_FLOW_ERROR_TYPE_ACTION,
744 "mark must be defined");
746 } else if (mark->id >= MLX5_FLOW_MARK_MAX) {
747 rte_flow_error_set(error, ENOTSUP,
748 RTE_FLOW_ERROR_TYPE_ACTION,
750 "mark must be between 0"
755 parser->mark_id = mark->id;
756 } else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
758 goto exit_action_overlap;
761 } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT &&
762 priv->config.flow_counter_en) {
764 goto exit_action_overlap;
768 goto exit_action_not_supported;
771 /* When fate is unknown, drop traffic. */
772 if (!(overlap & FATE))
774 if (parser->drop && parser->mark)
776 if (!parser->rss_conf.queue_num && !parser->drop) {
777 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
778 NULL, "no valid action");
782 exit_action_not_supported:
783 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
784 actions, "action not supported");
787 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
788 actions, "overlapping actions are not supported");
796 * Pattern specification (list terminated by the END pattern item).
798 * Perform verbose error reporting if not NULL.
799 * @param[in, out] parser
800 * Internal parser structure.
803 * 0 on success, a negative errno value otherwise and rte_errno is set.
806 mlx5_flow_convert_items_validate(const struct rte_flow_item items[],
807 struct rte_flow_error *error,
808 struct mlx5_flow_parse *parser)
810 const struct mlx5_flow_items *cur_item = mlx5_flow_items;
814 /* Initialise the offsets to start after verbs attribute. */
815 for (i = 0; i != hash_rxq_init_n; ++i)
816 parser->queue[i].offset = sizeof(struct ibv_flow_attr);
817 for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
818 const struct mlx5_flow_items *token = NULL;
821 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
825 cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
827 if (cur_item->items[i] == items->type) {
828 token = &mlx5_flow_items[items->type];
834 goto exit_item_not_supported;
837 ret = mlx5_flow_item_validate(items,
838 (const uint8_t *)cur_item->mask,
841 goto exit_item_not_supported;
842 if (items->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
844 rte_flow_error_set(error, ENOTSUP,
845 RTE_FLOW_ERROR_TYPE_ITEM,
847 "cannot recognize multiple"
848 " VXLAN encapsulations");
851 parser->inner = IBV_FLOW_SPEC_INNER;
854 parser->queue[HASH_RXQ_ETH].offset += cur_item->dst_sz;
856 for (n = 0; n != hash_rxq_init_n; ++n)
857 parser->queue[n].offset += cur_item->dst_sz;
861 parser->queue[HASH_RXQ_ETH].offset +=
862 sizeof(struct ibv_flow_spec_action_drop);
865 for (i = 0; i != hash_rxq_init_n; ++i)
866 parser->queue[i].offset +=
867 sizeof(struct ibv_flow_spec_action_tag);
870 unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
872 for (i = 0; i != hash_rxq_init_n; ++i)
873 parser->queue[i].offset += size;
876 exit_item_not_supported:
877 return rte_flow_error_set(error, -ret, RTE_FLOW_ERROR_TYPE_ITEM,
878 items, "item not supported");
882 * Allocate memory space to store verbs flow attributes.
885 * Amount of byte to allocate.
887 * Perform verbose error reporting if not NULL.
890 * A verbs flow attribute on success, NULL otherwise and rte_errno is set.
892 static struct ibv_flow_attr *
893 mlx5_flow_convert_allocate(unsigned int size, struct rte_flow_error *error)
895 struct ibv_flow_attr *ibv_attr;
897 ibv_attr = rte_calloc(__func__, 1, size, 0);
899 rte_flow_error_set(error, ENOMEM,
900 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
902 "cannot allocate verbs spec attributes");
909 * Make inner packet matching with an higher priority from the non Inner
912 * @param[in, out] parser
913 * Internal parser structure.
915 * User flow attribute.
918 mlx5_flow_update_priority(struct mlx5_flow_parse *parser,
919 const struct rte_flow_attr *attr)
924 parser->queue[HASH_RXQ_ETH].ibv_attr->priority =
926 hash_rxq_init[HASH_RXQ_ETH].flow_priority;
929 for (i = 0; i != hash_rxq_init_n; ++i) {
930 if (parser->queue[i].ibv_attr) {
931 parser->queue[i].ibv_attr->priority =
933 hash_rxq_init[i].flow_priority -
934 (parser->inner ? 1 : 0);
940 * Finalise verbs flow attributes.
942 * @param[in, out] parser
943 * Internal parser structure.
946 mlx5_flow_convert_finalise(struct mlx5_flow_parse *parser)
948 const unsigned int ipv4 =
949 hash_rxq_init[parser->layer].ip_version == MLX5_IPV4;
950 const enum hash_rxq_type hmin = ipv4 ? HASH_RXQ_TCPV4 : HASH_RXQ_TCPV6;
951 const enum hash_rxq_type hmax = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
952 const enum hash_rxq_type ohmin = ipv4 ? HASH_RXQ_TCPV6 : HASH_RXQ_TCPV4;
953 const enum hash_rxq_type ohmax = ipv4 ? HASH_RXQ_IPV6 : HASH_RXQ_IPV4;
954 const enum hash_rxq_type ip = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
957 /* Remove any other flow not matching the pattern. */
958 if (parser->rss_conf.queue_num == 1 && !parser->rss_conf.types) {
959 for (i = 0; i != hash_rxq_init_n; ++i) {
960 if (i == HASH_RXQ_ETH)
962 rte_free(parser->queue[i].ibv_attr);
963 parser->queue[i].ibv_attr = NULL;
967 if (parser->layer == HASH_RXQ_ETH) {
971 * This layer becomes useless as the pattern define under
974 rte_free(parser->queue[HASH_RXQ_ETH].ibv_attr);
975 parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
977 /* Remove opposite kind of layer e.g. IPv6 if the pattern is IPv4. */
978 for (i = ohmin; i != (ohmax + 1); ++i) {
979 if (!parser->queue[i].ibv_attr)
981 rte_free(parser->queue[i].ibv_attr);
982 parser->queue[i].ibv_attr = NULL;
984 /* Remove impossible flow according to the RSS configuration. */
985 if (hash_rxq_init[parser->layer].dpdk_rss_hf &
986 parser->rss_conf.types) {
987 /* Remove any other flow. */
988 for (i = hmin; i != (hmax + 1); ++i) {
989 if ((i == parser->layer) ||
990 (!parser->queue[i].ibv_attr))
992 rte_free(parser->queue[i].ibv_attr);
993 parser->queue[i].ibv_attr = NULL;
995 } else if (!parser->queue[ip].ibv_attr) {
996 /* no RSS possible with the current configuration. */
997 parser->rss_conf.queue_num = 1;
1002 * Fill missing layers in verbs specifications, or compute the correct
1003 * offset to allocate the memory space for the attributes and
1006 for (i = 0; i != hash_rxq_init_n - 1; ++i) {
1008 struct ibv_flow_spec_ipv4_ext ipv4;
1009 struct ibv_flow_spec_ipv6 ipv6;
1010 struct ibv_flow_spec_tcp_udp udp_tcp;
1015 if (i == parser->layer)
1017 if (parser->layer == HASH_RXQ_ETH) {
1018 if (hash_rxq_init[i].ip_version == MLX5_IPV4) {
1019 size = sizeof(struct ibv_flow_spec_ipv4_ext);
1020 specs.ipv4 = (struct ibv_flow_spec_ipv4_ext){
1021 .type = IBV_FLOW_SPEC_IPV4_EXT,
1025 size = sizeof(struct ibv_flow_spec_ipv6);
1026 specs.ipv6 = (struct ibv_flow_spec_ipv6){
1027 .type = IBV_FLOW_SPEC_IPV6,
1031 if (parser->queue[i].ibv_attr) {
1032 dst = (void *)((uintptr_t)
1033 parser->queue[i].ibv_attr +
1034 parser->queue[i].offset);
1035 memcpy(dst, &specs, size);
1036 ++parser->queue[i].ibv_attr->num_of_specs;
1038 parser->queue[i].offset += size;
1040 if ((i == HASH_RXQ_UDPV4) || (i == HASH_RXQ_TCPV4) ||
1041 (i == HASH_RXQ_UDPV6) || (i == HASH_RXQ_TCPV6)) {
1042 size = sizeof(struct ibv_flow_spec_tcp_udp);
1043 specs.udp_tcp = (struct ibv_flow_spec_tcp_udp) {
1044 .type = ((i == HASH_RXQ_UDPV4 ||
1045 i == HASH_RXQ_UDPV6) ?
1050 if (parser->queue[i].ibv_attr) {
1051 dst = (void *)((uintptr_t)
1052 parser->queue[i].ibv_attr +
1053 parser->queue[i].offset);
1054 memcpy(dst, &specs, size);
1055 ++parser->queue[i].ibv_attr->num_of_specs;
1057 parser->queue[i].offset += size;
1063 * Validate and convert a flow supported by the NIC.
1066 * Pointer to Ethernet device.
1068 * Flow rule attributes.
1069 * @param[in] pattern
1070 * Pattern specification (list terminated by the END pattern item).
1071 * @param[in] actions
1072 * Associated actions (list terminated by the END action).
1074 * Perform verbose error reporting if not NULL.
1075 * @param[in, out] parser
1076 * Internal parser structure.
1079 * 0 on success, a negative errno value otherwise and rte_errno is set.
1082 mlx5_flow_convert(struct rte_eth_dev *dev,
1083 const struct rte_flow_attr *attr,
1084 const struct rte_flow_item items[],
1085 const struct rte_flow_action actions[],
1086 struct rte_flow_error *error,
1087 struct mlx5_flow_parse *parser)
1089 const struct mlx5_flow_items *cur_item = mlx5_flow_items;
1093 /* First step. Validate the attributes, items and actions. */
1094 *parser = (struct mlx5_flow_parse){
1095 .create = parser->create,
1096 .layer = HASH_RXQ_ETH,
1097 .mark_id = MLX5_FLOW_MARK_DEFAULT,
1099 ret = mlx5_flow_convert_attributes(attr, error);
1102 ret = mlx5_flow_convert_actions(dev, actions, error, parser);
1105 ret = mlx5_flow_convert_items_validate(items, error, parser);
1108 mlx5_flow_convert_finalise(parser);
1111 * Allocate the memory space to store verbs specifications.
1114 unsigned int offset = parser->queue[HASH_RXQ_ETH].offset;
1116 parser->queue[HASH_RXQ_ETH].ibv_attr =
1117 mlx5_flow_convert_allocate(offset, error);
1118 if (!parser->queue[HASH_RXQ_ETH].ibv_attr)
1120 parser->queue[HASH_RXQ_ETH].offset =
1121 sizeof(struct ibv_flow_attr);
1123 for (i = 0; i != hash_rxq_init_n; ++i) {
1124 unsigned int offset;
1126 if (!(parser->rss_conf.types &
1127 hash_rxq_init[i].dpdk_rss_hf) &&
1128 (i != HASH_RXQ_ETH))
1130 offset = parser->queue[i].offset;
1131 parser->queue[i].ibv_attr =
1132 mlx5_flow_convert_allocate(offset, error);
1133 if (!parser->queue[i].ibv_attr)
1135 parser->queue[i].offset = sizeof(struct ibv_flow_attr);
1138 /* Third step. Conversion parse, fill the specifications. */
1140 for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
1141 struct mlx5_flow_data data = {
1146 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
1148 cur_item = &mlx5_flow_items[items->type];
1149 ret = cur_item->convert(items,
1150 (cur_item->default_mask ?
1151 cur_item->default_mask :
1158 mlx5_flow_create_flag_mark(parser, parser->mark_id);
1159 if (parser->count && parser->create) {
1160 mlx5_flow_create_count(dev, parser);
1162 goto exit_count_error;
1165 * Last step. Complete missing specification to reach the RSS
1169 mlx5_flow_convert_finalise(parser);
1170 mlx5_flow_update_priority(parser, attr);
1172 /* Only verification is expected, all resources should be released. */
1173 if (!parser->create) {
1174 for (i = 0; i != hash_rxq_init_n; ++i) {
1175 if (parser->queue[i].ibv_attr) {
1176 rte_free(parser->queue[i].ibv_attr);
1177 parser->queue[i].ibv_attr = NULL;
1183 for (i = 0; i != hash_rxq_init_n; ++i) {
1184 if (parser->queue[i].ibv_attr) {
1185 rte_free(parser->queue[i].ibv_attr);
1186 parser->queue[i].ibv_attr = NULL;
1189 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1190 NULL, "cannot allocate verbs spec attributes");
1193 rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1194 NULL, "cannot create counter");
1199 * Copy the specification created into the flow.
1202 * Internal parser structure.
1204 * Create specification.
1206 * Size in bytes of the specification to copy.
1209 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
1215 for (i = 0; i != hash_rxq_init_n; ++i) {
1216 if (!parser->queue[i].ibv_attr)
1218 /* Specification must be the same l3 type or none. */
1219 if (parser->layer == HASH_RXQ_ETH ||
1220 (hash_rxq_init[parser->layer].ip_version ==
1221 hash_rxq_init[i].ip_version) ||
1222 (hash_rxq_init[i].ip_version == 0)) {
1223 dst = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1224 parser->queue[i].offset);
1225 memcpy(dst, src, size);
1226 ++parser->queue[i].ibv_attr->num_of_specs;
1227 parser->queue[i].offset += size;
1233 * Convert Ethernet item to Verbs specification.
1236 * Item specification.
1237 * @param default_mask[in]
1238 * Default bit-masks to use when item->mask is not provided.
1239 * @param data[in, out]
1243 * 0 on success, a negative errno value otherwise and rte_errno is set.
1246 mlx5_flow_create_eth(const struct rte_flow_item *item,
1247 const void *default_mask,
1248 struct mlx5_flow_data *data)
1250 const struct rte_flow_item_eth *spec = item->spec;
1251 const struct rte_flow_item_eth *mask = item->mask;
1252 struct mlx5_flow_parse *parser = data->parser;
1253 const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1254 struct ibv_flow_spec_eth eth = {
1255 .type = parser->inner | IBV_FLOW_SPEC_ETH,
1259 /* Don't update layer for the inner pattern. */
1261 parser->layer = HASH_RXQ_ETH;
1266 mask = default_mask;
1267 memcpy(ð.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
1268 memcpy(ð.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
1269 eth.val.ether_type = spec->type;
1270 memcpy(ð.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
1271 memcpy(ð.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
1272 eth.mask.ether_type = mask->type;
1273 /* Remove unwanted bits from values. */
1274 for (i = 0; i < ETHER_ADDR_LEN; ++i) {
1275 eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
1276 eth.val.src_mac[i] &= eth.mask.src_mac[i];
1278 eth.val.ether_type &= eth.mask.ether_type;
1280 mlx5_flow_create_copy(parser, ð, eth_size);
1285 * Convert VLAN item to Verbs specification.
1288 * Item specification.
1289 * @param default_mask[in]
1290 * Default bit-masks to use when item->mask is not provided.
1291 * @param data[in, out]
1295 * 0 on success, a negative errno value otherwise and rte_errno is set.
1298 mlx5_flow_create_vlan(const struct rte_flow_item *item,
1299 const void *default_mask,
1300 struct mlx5_flow_data *data)
1302 const struct rte_flow_item_vlan *spec = item->spec;
1303 const struct rte_flow_item_vlan *mask = item->mask;
1304 struct mlx5_flow_parse *parser = data->parser;
1305 struct ibv_flow_spec_eth *eth;
1306 const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1307 const char *msg = "VLAN cannot be empty";
1312 mask = default_mask;
1314 for (i = 0; i != hash_rxq_init_n; ++i) {
1315 if (!parser->queue[i].ibv_attr)
1318 eth = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1319 parser->queue[i].offset - eth_size);
1320 eth->val.vlan_tag = spec->tci;
1321 eth->mask.vlan_tag = mask->tci;
1322 eth->val.vlan_tag &= eth->mask.vlan_tag;
1324 * From verbs perspective an empty VLAN is equivalent
1325 * to a packet without VLAN layer.
1327 if (!eth->mask.vlan_tag)
1329 /* Outer TPID cannot be matched. */
1330 if (eth->mask.ether_type) {
1331 msg = "VLAN TPID matching is not supported";
1334 eth->val.ether_type = spec->inner_type;
1335 eth->mask.ether_type = mask->inner_type;
1336 eth->val.ether_type &= eth->mask.ether_type;
1341 return rte_flow_error_set(data->error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM,
1346 * Convert IPv4 item to Verbs specification.
1349 * Item specification.
1350 * @param default_mask[in]
1351 * Default bit-masks to use when item->mask is not provided.
1352 * @param data[in, out]
1356 * 0 on success, a negative errno value otherwise and rte_errno is set.
1359 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
1360 const void *default_mask,
1361 struct mlx5_flow_data *data)
1363 const struct rte_flow_item_ipv4 *spec = item->spec;
1364 const struct rte_flow_item_ipv4 *mask = item->mask;
1365 struct mlx5_flow_parse *parser = data->parser;
1366 unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4_ext);
1367 struct ibv_flow_spec_ipv4_ext ipv4 = {
1368 .type = parser->inner | IBV_FLOW_SPEC_IPV4_EXT,
1372 /* Don't update layer for the inner pattern. */
1374 parser->layer = HASH_RXQ_IPV4;
1377 mask = default_mask;
1378 ipv4.val = (struct ibv_flow_ipv4_ext_filter){
1379 .src_ip = spec->hdr.src_addr,
1380 .dst_ip = spec->hdr.dst_addr,
1381 .proto = spec->hdr.next_proto_id,
1382 .tos = spec->hdr.type_of_service,
1384 ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
1385 .src_ip = mask->hdr.src_addr,
1386 .dst_ip = mask->hdr.dst_addr,
1387 .proto = mask->hdr.next_proto_id,
1388 .tos = mask->hdr.type_of_service,
1390 /* Remove unwanted bits from values. */
1391 ipv4.val.src_ip &= ipv4.mask.src_ip;
1392 ipv4.val.dst_ip &= ipv4.mask.dst_ip;
1393 ipv4.val.proto &= ipv4.mask.proto;
1394 ipv4.val.tos &= ipv4.mask.tos;
1396 mlx5_flow_create_copy(parser, &ipv4, ipv4_size);
1401 * Convert IPv6 item to Verbs specification.
1404 * Item specification.
1405 * @param default_mask[in]
1406 * Default bit-masks to use when item->mask is not provided.
1407 * @param data[in, out]
1411 * 0 on success, a negative errno value otherwise and rte_errno is set.
1414 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
1415 const void *default_mask,
1416 struct mlx5_flow_data *data)
1418 const struct rte_flow_item_ipv6 *spec = item->spec;
1419 const struct rte_flow_item_ipv6 *mask = item->mask;
1420 struct mlx5_flow_parse *parser = data->parser;
1421 unsigned int ipv6_size = sizeof(struct ibv_flow_spec_ipv6);
1422 struct ibv_flow_spec_ipv6 ipv6 = {
1423 .type = parser->inner | IBV_FLOW_SPEC_IPV6,
1427 /* Don't update layer for the inner pattern. */
1429 parser->layer = HASH_RXQ_IPV6;
1432 uint32_t vtc_flow_val;
1433 uint32_t vtc_flow_mask;
1436 mask = default_mask;
1437 memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
1438 RTE_DIM(ipv6.val.src_ip));
1439 memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
1440 RTE_DIM(ipv6.val.dst_ip));
1441 memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
1442 RTE_DIM(ipv6.mask.src_ip));
1443 memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
1444 RTE_DIM(ipv6.mask.dst_ip));
1445 vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
1446 vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
1447 ipv6.val.flow_label =
1448 rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >>
1450 ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >>
1452 ipv6.val.next_hdr = spec->hdr.proto;
1453 ipv6.val.hop_limit = spec->hdr.hop_limits;
1454 ipv6.mask.flow_label =
1455 rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >>
1457 ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >>
1459 ipv6.mask.next_hdr = mask->hdr.proto;
1460 ipv6.mask.hop_limit = mask->hdr.hop_limits;
1461 /* Remove unwanted bits from values. */
1462 for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
1463 ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
1464 ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
1466 ipv6.val.flow_label &= ipv6.mask.flow_label;
1467 ipv6.val.traffic_class &= ipv6.mask.traffic_class;
1468 ipv6.val.next_hdr &= ipv6.mask.next_hdr;
1469 ipv6.val.hop_limit &= ipv6.mask.hop_limit;
1471 mlx5_flow_create_copy(parser, &ipv6, ipv6_size);
1476 * Convert UDP item to Verbs specification.
1479 * Item specification.
1480 * @param default_mask[in]
1481 * Default bit-masks to use when item->mask is not provided.
1482 * @param data[in, out]
1486 * 0 on success, a negative errno value otherwise and rte_errno is set.
1489 mlx5_flow_create_udp(const struct rte_flow_item *item,
1490 const void *default_mask,
1491 struct mlx5_flow_data *data)
1493 const struct rte_flow_item_udp *spec = item->spec;
1494 const struct rte_flow_item_udp *mask = item->mask;
1495 struct mlx5_flow_parse *parser = data->parser;
1496 unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1497 struct ibv_flow_spec_tcp_udp udp = {
1498 .type = parser->inner | IBV_FLOW_SPEC_UDP,
1502 /* Don't update layer for the inner pattern. */
1503 if (!parser->inner) {
1504 if (parser->layer == HASH_RXQ_IPV4)
1505 parser->layer = HASH_RXQ_UDPV4;
1507 parser->layer = HASH_RXQ_UDPV6;
1511 mask = default_mask;
1512 udp.val.dst_port = spec->hdr.dst_port;
1513 udp.val.src_port = spec->hdr.src_port;
1514 udp.mask.dst_port = mask->hdr.dst_port;
1515 udp.mask.src_port = mask->hdr.src_port;
1516 /* Remove unwanted bits from values. */
1517 udp.val.src_port &= udp.mask.src_port;
1518 udp.val.dst_port &= udp.mask.dst_port;
1520 mlx5_flow_create_copy(parser, &udp, udp_size);
1525 * Convert TCP item to Verbs specification.
1528 * Item specification.
1529 * @param default_mask[in]
1530 * Default bit-masks to use when item->mask is not provided.
1531 * @param data[in, out]
1535 * 0 on success, a negative errno value otherwise and rte_errno is set.
1538 mlx5_flow_create_tcp(const struct rte_flow_item *item,
1539 const void *default_mask,
1540 struct mlx5_flow_data *data)
1542 const struct rte_flow_item_tcp *spec = item->spec;
1543 const struct rte_flow_item_tcp *mask = item->mask;
1544 struct mlx5_flow_parse *parser = data->parser;
1545 unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1546 struct ibv_flow_spec_tcp_udp tcp = {
1547 .type = parser->inner | IBV_FLOW_SPEC_TCP,
1551 /* Don't update layer for the inner pattern. */
1552 if (!parser->inner) {
1553 if (parser->layer == HASH_RXQ_IPV4)
1554 parser->layer = HASH_RXQ_TCPV4;
1556 parser->layer = HASH_RXQ_TCPV6;
1560 mask = default_mask;
1561 tcp.val.dst_port = spec->hdr.dst_port;
1562 tcp.val.src_port = spec->hdr.src_port;
1563 tcp.mask.dst_port = mask->hdr.dst_port;
1564 tcp.mask.src_port = mask->hdr.src_port;
1565 /* Remove unwanted bits from values. */
1566 tcp.val.src_port &= tcp.mask.src_port;
1567 tcp.val.dst_port &= tcp.mask.dst_port;
1569 mlx5_flow_create_copy(parser, &tcp, tcp_size);
1574 * Convert VXLAN item to Verbs specification.
1577 * Item specification.
1578 * @param default_mask[in]
1579 * Default bit-masks to use when item->mask is not provided.
1580 * @param data[in, out]
1584 * 0 on success, a negative errno value otherwise and rte_errno is set.
1587 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
1588 const void *default_mask,
1589 struct mlx5_flow_data *data)
1591 const struct rte_flow_item_vxlan *spec = item->spec;
1592 const struct rte_flow_item_vxlan *mask = item->mask;
1593 struct mlx5_flow_parse *parser = data->parser;
1594 unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1595 struct ibv_flow_spec_tunnel vxlan = {
1596 .type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1605 parser->inner = IBV_FLOW_SPEC_INNER;
1608 mask = default_mask;
1609 memcpy(&id.vni[1], spec->vni, 3);
1610 vxlan.val.tunnel_id = id.vlan_id;
1611 memcpy(&id.vni[1], mask->vni, 3);
1612 vxlan.mask.tunnel_id = id.vlan_id;
1613 /* Remove unwanted bits from values. */
1614 vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
1617 * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
1618 * layer is defined in the Verbs specification it is interpreted as
1619 * wildcard and all packets will match this rule, if it follows a full
1620 * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
1621 * before will also match this rule.
1622 * To avoid such situation, VNI 0 is currently refused.
1624 if (!vxlan.val.tunnel_id)
1625 return rte_flow_error_set(data->error, EINVAL,
1626 RTE_FLOW_ERROR_TYPE_ITEM,
1628 "VxLAN vni cannot be 0");
1629 mlx5_flow_create_copy(parser, &vxlan, size);
1634 * Convert mark/flag action to Verbs specification.
1637 * Internal parser structure.
1642 * 0 on success, a negative errno value otherwise and rte_errno is set.
1645 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id)
1647 unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1648 struct ibv_flow_spec_action_tag tag = {
1649 .type = IBV_FLOW_SPEC_ACTION_TAG,
1651 .tag_id = mlx5_flow_mark_set(mark_id),
1654 assert(parser->mark);
1655 mlx5_flow_create_copy(parser, &tag, size);
1660 * Convert count action to Verbs specification.
1663 * Pointer to Ethernet device.
1665 * Pointer to MLX5 flow parser structure.
1668 * 0 on success, a negative errno value otherwise and rte_errno is set.
1671 mlx5_flow_create_count(struct rte_eth_dev *dev __rte_unused,
1672 struct mlx5_flow_parse *parser __rte_unused)
1674 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
1675 struct priv *priv = dev->data->dev_private;
1676 unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
1677 struct ibv_counter_set_init_attr init_attr = {0};
1678 struct ibv_flow_spec_counter_action counter = {
1679 .type = IBV_FLOW_SPEC_ACTION_COUNT,
1681 .counter_set_handle = 0,
1684 init_attr.counter_set_id = 0;
1685 parser->cs = mlx5_glue->create_counter_set(priv->ctx, &init_attr);
1690 counter.counter_set_handle = parser->cs->handle;
1691 mlx5_flow_create_copy(parser, &counter, size);
1697 * Complete flow rule creation with a drop queue.
1700 * Pointer to Ethernet device.
1702 * Internal parser structure.
1704 * Pointer to the rte_flow.
1706 * Perform verbose error reporting if not NULL.
1709 * 0 on success, a negative errno value otherwise and rte_errno is set.
1712 mlx5_flow_create_action_queue_drop(struct rte_eth_dev *dev,
1713 struct mlx5_flow_parse *parser,
1714 struct rte_flow *flow,
1715 struct rte_flow_error *error)
1717 struct priv *priv = dev->data->dev_private;
1718 struct ibv_flow_spec_action_drop *drop;
1719 unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
1724 drop = (void *)((uintptr_t)parser->queue[HASH_RXQ_ETH].ibv_attr +
1725 parser->queue[HASH_RXQ_ETH].offset);
1726 *drop = (struct ibv_flow_spec_action_drop){
1727 .type = IBV_FLOW_SPEC_ACTION_DROP,
1730 ++parser->queue[HASH_RXQ_ETH].ibv_attr->num_of_specs;
1731 parser->queue[HASH_RXQ_ETH].offset += size;
1732 flow->frxq[HASH_RXQ_ETH].ibv_attr =
1733 parser->queue[HASH_RXQ_ETH].ibv_attr;
1735 flow->cs = parser->cs;
1736 if (!priv->dev->data->dev_started)
1738 parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
1739 flow->frxq[HASH_RXQ_ETH].ibv_flow =
1740 mlx5_glue->create_flow(priv->flow_drop_queue->qp,
1741 flow->frxq[HASH_RXQ_ETH].ibv_attr);
1742 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
1743 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1744 NULL, "flow rule creation failure");
1750 if (flow->frxq[HASH_RXQ_ETH].ibv_flow) {
1751 claim_zero(mlx5_glue->destroy_flow
1752 (flow->frxq[HASH_RXQ_ETH].ibv_flow));
1753 flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
1755 if (flow->frxq[HASH_RXQ_ETH].ibv_attr) {
1756 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
1757 flow->frxq[HASH_RXQ_ETH].ibv_attr = NULL;
1760 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
1768 * Create hash Rx queues when RSS is enabled.
1771 * Pointer to Ethernet device.
1773 * Internal parser structure.
1775 * Pointer to the rte_flow.
1777 * Perform verbose error reporting if not NULL.
1780 * 0 on success, a negative errno value otherwise and rte_errno is set.
1783 mlx5_flow_create_action_queue_rss(struct rte_eth_dev *dev,
1784 struct mlx5_flow_parse *parser,
1785 struct rte_flow *flow,
1786 struct rte_flow_error *error)
1788 struct priv *priv = dev->data->dev_private;
1791 for (i = 0; i != hash_rxq_init_n; ++i) {
1792 uint64_t hash_fields;
1794 if (!parser->queue[i].ibv_attr)
1796 flow->frxq[i].ibv_attr = parser->queue[i].ibv_attr;
1797 parser->queue[i].ibv_attr = NULL;
1798 hash_fields = hash_rxq_init[i].hash_fields;
1799 if (!priv->dev->data->dev_started)
1801 flow->frxq[i].hrxq =
1803 parser->rss_conf.key,
1804 parser->rss_conf.key_len,
1806 parser->rss_conf.queue,
1807 parser->rss_conf.queue_num);
1808 if (flow->frxq[i].hrxq)
1810 flow->frxq[i].hrxq =
1812 parser->rss_conf.key,
1813 parser->rss_conf.key_len,
1815 parser->rss_conf.queue,
1816 parser->rss_conf.queue_num);
1817 if (!flow->frxq[i].hrxq) {
1818 return rte_flow_error_set(error, ENOMEM,
1819 RTE_FLOW_ERROR_TYPE_HANDLE,
1821 "cannot create hash rxq");
1828 * Complete flow rule creation.
1831 * Pointer to Ethernet device.
1833 * Internal parser structure.
1835 * Pointer to the rte_flow.
1837 * Perform verbose error reporting if not NULL.
1840 * 0 on success, a negative errno value otherwise and rte_errno is set.
1843 mlx5_flow_create_action_queue(struct rte_eth_dev *dev,
1844 struct mlx5_flow_parse *parser,
1845 struct rte_flow *flow,
1846 struct rte_flow_error *error)
1848 struct priv *priv = dev->data->dev_private;
1851 unsigned int flows_n = 0;
1855 assert(!parser->drop);
1856 ret = mlx5_flow_create_action_queue_rss(dev, parser, flow, error);
1860 flow->cs = parser->cs;
1861 if (!priv->dev->data->dev_started)
1863 for (i = 0; i != hash_rxq_init_n; ++i) {
1864 if (!flow->frxq[i].hrxq)
1866 flow->frxq[i].ibv_flow =
1867 mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
1868 flow->frxq[i].ibv_attr);
1869 if (!flow->frxq[i].ibv_flow) {
1870 rte_flow_error_set(error, ENOMEM,
1871 RTE_FLOW_ERROR_TYPE_HANDLE,
1872 NULL, "flow rule creation failure");
1876 DRV_LOG(DEBUG, "port %u %p type %d QP %p ibv_flow %p",
1879 (void *)flow->frxq[i].hrxq,
1880 (void *)flow->frxq[i].ibv_flow);
1883 rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_HANDLE,
1884 NULL, "internal error in flow creation");
1887 for (i = 0; i != parser->rss_conf.queue_num; ++i) {
1888 struct mlx5_rxq_data *q =
1889 (*priv->rxqs)[parser->rss_conf.queue[i]];
1891 q->mark |= parser->mark;
1895 ret = rte_errno; /* Save rte_errno before cleanup. */
1897 for (i = 0; i != hash_rxq_init_n; ++i) {
1898 if (flow->frxq[i].ibv_flow) {
1899 struct ibv_flow *ibv_flow = flow->frxq[i].ibv_flow;
1901 claim_zero(mlx5_glue->destroy_flow(ibv_flow));
1903 if (flow->frxq[i].hrxq)
1904 mlx5_hrxq_release(dev, flow->frxq[i].hrxq);
1905 if (flow->frxq[i].ibv_attr)
1906 rte_free(flow->frxq[i].ibv_attr);
1909 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
1913 rte_errno = ret; /* Restore rte_errno. */
1921 * Pointer to Ethernet device.
1923 * Pointer to a TAILQ flow list.
1925 * Flow rule attributes.
1926 * @param[in] pattern
1927 * Pattern specification (list terminated by the END pattern item).
1928 * @param[in] actions
1929 * Associated actions (list terminated by the END action).
1931 * Perform verbose error reporting if not NULL.
1934 * A flow on success, NULL otherwise and rte_errno is set.
1936 static struct rte_flow *
1937 mlx5_flow_list_create(struct rte_eth_dev *dev,
1938 struct mlx5_flows *list,
1939 const struct rte_flow_attr *attr,
1940 const struct rte_flow_item items[],
1941 const struct rte_flow_action actions[],
1942 struct rte_flow_error *error)
1944 struct mlx5_flow_parse parser = { .create = 1, };
1945 struct rte_flow *flow = NULL;
1949 ret = mlx5_flow_convert(dev, attr, items, actions, error, &parser);
1952 flow = rte_calloc(__func__, 1,
1954 parser.rss_conf.queue_num * sizeof(uint16_t),
1957 rte_flow_error_set(error, ENOMEM,
1958 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1960 "cannot allocate flow memory");
1963 /* Copy configuration. */
1964 flow->queues = (uint16_t (*)[])(flow + 1);
1965 flow->rss_conf = (struct rte_flow_action_rss){
1966 .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
1968 .types = parser.rss_conf.types,
1969 .key_len = parser.rss_conf.key_len,
1970 .queue_num = parser.rss_conf.queue_num,
1971 .key = memcpy(flow->rss_key, parser.rss_conf.key,
1972 sizeof(*parser.rss_conf.key) *
1973 parser.rss_conf.key_len),
1974 .queue = memcpy(flow->queues, parser.rss_conf.queue,
1975 sizeof(*parser.rss_conf.queue) *
1976 parser.rss_conf.queue_num),
1978 flow->mark = parser.mark;
1979 /* finalise the flow. */
1981 ret = mlx5_flow_create_action_queue_drop(dev, &parser, flow,
1984 ret = mlx5_flow_create_action_queue(dev, &parser, flow, error);
1987 TAILQ_INSERT_TAIL(list, flow, next);
1988 DRV_LOG(DEBUG, "port %u flow created %p", dev->data->port_id,
1992 DRV_LOG(ERR, "port %u flow creation error: %s", dev->data->port_id,
1994 for (i = 0; i != hash_rxq_init_n; ++i) {
1995 if (parser.queue[i].ibv_attr)
1996 rte_free(parser.queue[i].ibv_attr);
2003 * Validate a flow supported by the NIC.
2005 * @see rte_flow_validate()
2009 mlx5_flow_validate(struct rte_eth_dev *dev,
2010 const struct rte_flow_attr *attr,
2011 const struct rte_flow_item items[],
2012 const struct rte_flow_action actions[],
2013 struct rte_flow_error *error)
2015 struct mlx5_flow_parse parser = { .create = 0, };
2017 return mlx5_flow_convert(dev, attr, items, actions, error, &parser);
2023 * @see rte_flow_create()
2027 mlx5_flow_create(struct rte_eth_dev *dev,
2028 const struct rte_flow_attr *attr,
2029 const struct rte_flow_item items[],
2030 const struct rte_flow_action actions[],
2031 struct rte_flow_error *error)
2033 struct priv *priv = dev->data->dev_private;
2035 return mlx5_flow_list_create(dev, &priv->flows, attr, items, actions,
2040 * Destroy a flow in a list.
2043 * Pointer to Ethernet device.
2045 * Pointer to a TAILQ flow list.
2050 mlx5_flow_list_destroy(struct rte_eth_dev *dev, struct mlx5_flows *list,
2051 struct rte_flow *flow)
2053 struct priv *priv = dev->data->dev_private;
2056 if (flow->drop || !flow->mark)
2058 for (i = 0; i != flow->rss_conf.queue_num; ++i) {
2059 struct rte_flow *tmp;
2063 * To remove the mark from the queue, the queue must not be
2064 * present in any other marked flow (RSS or not).
2066 TAILQ_FOREACH(tmp, list, next) {
2068 uint16_t *tqs = NULL;
2073 for (j = 0; j != hash_rxq_init_n; ++j) {
2074 if (!tmp->frxq[j].hrxq)
2076 tqs = tmp->frxq[j].hrxq->ind_table->queues;
2077 tq_n = tmp->frxq[j].hrxq->ind_table->queues_n;
2081 for (j = 0; (j != tq_n) && !mark; j++)
2082 if (tqs[j] == (*flow->queues)[i])
2085 (*priv->rxqs)[(*flow->queues)[i]]->mark = mark;
2089 if (flow->frxq[HASH_RXQ_ETH].ibv_flow)
2090 claim_zero(mlx5_glue->destroy_flow
2091 (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2092 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
2094 for (i = 0; i != hash_rxq_init_n; ++i) {
2095 struct mlx5_flow *frxq = &flow->frxq[i];
2098 claim_zero(mlx5_glue->destroy_flow
2101 mlx5_hrxq_release(dev, frxq->hrxq);
2103 rte_free(frxq->ibv_attr);
2107 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
2110 TAILQ_REMOVE(list, flow, next);
2111 DRV_LOG(DEBUG, "port %u flow destroyed %p", dev->data->port_id,
2117 * Destroy all flows.
2120 * Pointer to Ethernet device.
2122 * Pointer to a TAILQ flow list.
2125 mlx5_flow_list_flush(struct rte_eth_dev *dev, struct mlx5_flows *list)
2127 while (!TAILQ_EMPTY(list)) {
2128 struct rte_flow *flow;
2130 flow = TAILQ_FIRST(list);
2131 mlx5_flow_list_destroy(dev, list, flow);
2136 * Create drop queue.
2139 * Pointer to Ethernet device.
2142 * 0 on success, a negative errno value otherwise and rte_errno is set.
2145 mlx5_flow_create_drop_queue(struct rte_eth_dev *dev)
2147 struct priv *priv = dev->data->dev_private;
2148 struct mlx5_hrxq_drop *fdq = NULL;
2152 fdq = rte_calloc(__func__, 1, sizeof(*fdq), 0);
2155 "port %u cannot allocate memory for drop queue",
2156 dev->data->port_id);
2160 fdq->cq = mlx5_glue->create_cq(priv->ctx, 1, NULL, NULL, 0);
2162 DRV_LOG(WARNING, "port %u cannot allocate CQ for drop queue",
2163 dev->data->port_id);
2167 fdq->wq = mlx5_glue->create_wq
2169 &(struct ibv_wq_init_attr){
2170 .wq_type = IBV_WQT_RQ,
2177 DRV_LOG(WARNING, "port %u cannot allocate WQ for drop queue",
2178 dev->data->port_id);
2182 fdq->ind_table = mlx5_glue->create_rwq_ind_table
2184 &(struct ibv_rwq_ind_table_init_attr){
2185 .log_ind_tbl_size = 0,
2186 .ind_tbl = &fdq->wq,
2189 if (!fdq->ind_table) {
2191 "port %u cannot allocate indirection table for drop"
2193 dev->data->port_id);
2197 fdq->qp = mlx5_glue->create_qp_ex
2199 &(struct ibv_qp_init_attr_ex){
2200 .qp_type = IBV_QPT_RAW_PACKET,
2202 IBV_QP_INIT_ATTR_PD |
2203 IBV_QP_INIT_ATTR_IND_TABLE |
2204 IBV_QP_INIT_ATTR_RX_HASH,
2205 .rx_hash_conf = (struct ibv_rx_hash_conf){
2207 IBV_RX_HASH_FUNC_TOEPLITZ,
2208 .rx_hash_key_len = rss_hash_default_key_len,
2209 .rx_hash_key = rss_hash_default_key,
2210 .rx_hash_fields_mask = 0,
2212 .rwq_ind_tbl = fdq->ind_table,
2216 DRV_LOG(WARNING, "port %u cannot allocate QP for drop queue",
2217 dev->data->port_id);
2221 priv->flow_drop_queue = fdq;
2225 claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2227 claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2229 claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2231 claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2234 priv->flow_drop_queue = NULL;
2239 * Delete drop queue.
2242 * Pointer to Ethernet device.
2245 mlx5_flow_delete_drop_queue(struct rte_eth_dev *dev)
2247 struct priv *priv = dev->data->dev_private;
2248 struct mlx5_hrxq_drop *fdq = priv->flow_drop_queue;
2253 claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2255 claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2257 claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2259 claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2261 priv->flow_drop_queue = NULL;
2268 * Pointer to Ethernet device.
2270 * Pointer to a TAILQ flow list.
2273 mlx5_flow_stop(struct rte_eth_dev *dev, struct mlx5_flows *list)
2275 struct priv *priv = dev->data->dev_private;
2276 struct rte_flow *flow;
2278 TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) {
2280 struct mlx5_ind_table_ibv *ind_tbl = NULL;
2283 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow)
2285 claim_zero(mlx5_glue->destroy_flow
2286 (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2287 flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
2288 DRV_LOG(DEBUG, "port %u flow %p removed",
2289 dev->data->port_id, (void *)flow);
2293 /* Verify the flow has not already been cleaned. */
2294 for (i = 0; i != hash_rxq_init_n; ++i) {
2295 if (!flow->frxq[i].ibv_flow)
2298 * Indirection table may be necessary to remove the
2299 * flags in the Rx queues.
2300 * This helps to speed-up the process by avoiding
2303 ind_tbl = flow->frxq[i].hrxq->ind_table;
2306 if (i == hash_rxq_init_n)
2310 for (i = 0; i != ind_tbl->queues_n; ++i)
2311 (*priv->rxqs)[ind_tbl->queues[i]]->mark = 0;
2313 for (i = 0; i != hash_rxq_init_n; ++i) {
2314 if (!flow->frxq[i].ibv_flow)
2316 claim_zero(mlx5_glue->destroy_flow
2317 (flow->frxq[i].ibv_flow));
2318 flow->frxq[i].ibv_flow = NULL;
2319 mlx5_hrxq_release(dev, flow->frxq[i].hrxq);
2320 flow->frxq[i].hrxq = NULL;
2322 DRV_LOG(DEBUG, "port %u flow %p removed", dev->data->port_id,
2331 * Pointer to Ethernet device.
2333 * Pointer to a TAILQ flow list.
2336 * 0 on success, a negative errno value otherwise and rte_errno is set.
2339 mlx5_flow_start(struct rte_eth_dev *dev, struct mlx5_flows *list)
2341 struct priv *priv = dev->data->dev_private;
2342 struct rte_flow *flow;
2344 TAILQ_FOREACH(flow, list, next) {
2348 flow->frxq[HASH_RXQ_ETH].ibv_flow =
2349 mlx5_glue->create_flow
2350 (priv->flow_drop_queue->qp,
2351 flow->frxq[HASH_RXQ_ETH].ibv_attr);
2352 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
2354 "port %u flow %p cannot be applied",
2355 dev->data->port_id, (void *)flow);
2359 DRV_LOG(DEBUG, "port %u flow %p applied",
2360 dev->data->port_id, (void *)flow);
2364 for (i = 0; i != hash_rxq_init_n; ++i) {
2365 if (!flow->frxq[i].ibv_attr)
2367 flow->frxq[i].hrxq =
2368 mlx5_hrxq_get(dev, flow->rss_conf.key,
2369 flow->rss_conf.key_len,
2370 hash_rxq_init[i].hash_fields,
2371 flow->rss_conf.queue,
2372 flow->rss_conf.queue_num);
2373 if (flow->frxq[i].hrxq)
2375 flow->frxq[i].hrxq =
2376 mlx5_hrxq_new(dev, flow->rss_conf.key,
2377 flow->rss_conf.key_len,
2378 hash_rxq_init[i].hash_fields,
2379 flow->rss_conf.queue,
2380 flow->rss_conf.queue_num);
2381 if (!flow->frxq[i].hrxq) {
2383 "port %u flow %p cannot be applied",
2384 dev->data->port_id, (void *)flow);
2389 flow->frxq[i].ibv_flow =
2390 mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
2391 flow->frxq[i].ibv_attr);
2392 if (!flow->frxq[i].ibv_flow) {
2394 "port %u flow %p cannot be applied",
2395 dev->data->port_id, (void *)flow);
2399 DRV_LOG(DEBUG, "port %u flow %p applied",
2400 dev->data->port_id, (void *)flow);
2404 for (i = 0; i != flow->rss_conf.queue_num; ++i)
2405 (*priv->rxqs)[flow->rss_conf.queue[i]]->mark = 1;
2411 * Verify the flow list is empty
2414 * Pointer to Ethernet device.
2416 * @return the number of flows not released.
2419 mlx5_flow_verify(struct rte_eth_dev *dev)
2421 struct priv *priv = dev->data->dev_private;
2422 struct rte_flow *flow;
2425 TAILQ_FOREACH(flow, &priv->flows, next) {
2426 DRV_LOG(DEBUG, "port %u flow %p still referenced",
2427 dev->data->port_id, (void *)flow);
2434 * Enable a control flow configured from the control plane.
2437 * Pointer to Ethernet device.
2439 * An Ethernet flow spec to apply.
2441 * An Ethernet flow mask to apply.
2443 * A VLAN flow spec to apply.
2445 * A VLAN flow mask to apply.
2448 * 0 on success, a negative errno value otherwise and rte_errno is set.
2451 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
2452 struct rte_flow_item_eth *eth_spec,
2453 struct rte_flow_item_eth *eth_mask,
2454 struct rte_flow_item_vlan *vlan_spec,
2455 struct rte_flow_item_vlan *vlan_mask)
2457 struct priv *priv = dev->data->dev_private;
2458 const struct rte_flow_attr attr = {
2460 .priority = MLX5_CTRL_FLOW_PRIORITY,
2462 struct rte_flow_item items[] = {
2464 .type = RTE_FLOW_ITEM_TYPE_ETH,
2470 .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
2471 RTE_FLOW_ITEM_TYPE_END,
2477 .type = RTE_FLOW_ITEM_TYPE_END,
2480 uint16_t queue[priv->reta_idx_n];
2481 struct rte_flow_action_rss action_rss = {
2482 .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
2484 .types = priv->rss_conf.rss_hf,
2485 .key_len = priv->rss_conf.rss_key_len,
2486 .queue_num = priv->reta_idx_n,
2487 .key = priv->rss_conf.rss_key,
2490 struct rte_flow_action actions[] = {
2492 .type = RTE_FLOW_ACTION_TYPE_RSS,
2493 .conf = &action_rss,
2496 .type = RTE_FLOW_ACTION_TYPE_END,
2499 struct rte_flow *flow;
2500 struct rte_flow_error error;
2503 if (!priv->reta_idx_n) {
2507 for (i = 0; i != priv->reta_idx_n; ++i)
2508 queue[i] = (*priv->reta_idx)[i];
2509 flow = mlx5_flow_list_create(dev, &priv->ctrl_flows, &attr, items,
2517 * Enable a flow control configured from the control plane.
2520 * Pointer to Ethernet device.
2522 * An Ethernet flow spec to apply.
2524 * An Ethernet flow mask to apply.
2527 * 0 on success, a negative errno value otherwise and rte_errno is set.
2530 mlx5_ctrl_flow(struct rte_eth_dev *dev,
2531 struct rte_flow_item_eth *eth_spec,
2532 struct rte_flow_item_eth *eth_mask)
2534 return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
2540 * @see rte_flow_destroy()
2544 mlx5_flow_destroy(struct rte_eth_dev *dev,
2545 struct rte_flow *flow,
2546 struct rte_flow_error *error __rte_unused)
2548 struct priv *priv = dev->data->dev_private;
2550 mlx5_flow_list_destroy(dev, &priv->flows, flow);
2555 * Destroy all flows.
2557 * @see rte_flow_flush()
2561 mlx5_flow_flush(struct rte_eth_dev *dev,
2562 struct rte_flow_error *error __rte_unused)
2564 struct priv *priv = dev->data->dev_private;
2566 mlx5_flow_list_flush(dev, &priv->flows);
2570 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
2572 * Query flow counter.
2576 * @param counter_value
2577 * returned data from the counter.
2580 * 0 on success, a negative errno value otherwise and rte_errno is set.
2583 mlx5_flow_query_count(struct ibv_counter_set *cs,
2584 struct mlx5_flow_counter_stats *counter_stats,
2585 struct rte_flow_query_count *query_count,
2586 struct rte_flow_error *error)
2588 uint64_t counters[2];
2589 struct ibv_query_counter_set_attr query_cs_attr = {
2591 .query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
2593 struct ibv_counter_set_data query_out = {
2595 .outlen = 2 * sizeof(uint64_t),
2597 int err = mlx5_glue->query_counter_set(&query_cs_attr, &query_out);
2600 return rte_flow_error_set(error, err,
2601 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2603 "cannot read counter");
2604 query_count->hits_set = 1;
2605 query_count->bytes_set = 1;
2606 query_count->hits = counters[0] - counter_stats->hits;
2607 query_count->bytes = counters[1] - counter_stats->bytes;
2608 if (query_count->reset) {
2609 counter_stats->hits = counters[0];
2610 counter_stats->bytes = counters[1];
2618 * @see rte_flow_query()
2622 mlx5_flow_query(struct rte_eth_dev *dev __rte_unused,
2623 struct rte_flow *flow,
2624 enum rte_flow_action_type action __rte_unused,
2626 struct rte_flow_error *error)
2631 ret = mlx5_flow_query_count(flow->cs,
2632 &flow->counter_stats,
2633 (struct rte_flow_query_count *)data,
2638 return rte_flow_error_set(error, EINVAL,
2639 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2641 "no counter found for flow");
2650 * @see rte_flow_isolate()
2654 mlx5_flow_isolate(struct rte_eth_dev *dev,
2656 struct rte_flow_error *error)
2658 struct priv *priv = dev->data->dev_private;
2660 if (dev->data->dev_started) {
2661 rte_flow_error_set(error, EBUSY,
2662 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2664 "port must be stopped first");
2667 priv->isolated = !!enable;
2669 priv->dev->dev_ops = &mlx5_dev_ops_isolate;
2671 priv->dev->dev_ops = &mlx5_dev_ops;
2676 * Convert a flow director filter to a generic flow.
2679 * Pointer to Ethernet device.
2680 * @param fdir_filter
2681 * Flow director filter to add.
2683 * Generic flow parameters structure.
2686 * 0 on success, a negative errno value otherwise and rte_errno is set.
2689 mlx5_fdir_filter_convert(struct rte_eth_dev *dev,
2690 const struct rte_eth_fdir_filter *fdir_filter,
2691 struct mlx5_fdir *attributes)
2693 struct priv *priv = dev->data->dev_private;
2694 const struct rte_eth_fdir_input *input = &fdir_filter->input;
2695 const struct rte_eth_fdir_masks *mask =
2696 &dev->data->dev_conf.fdir_conf.mask;
2698 /* Validate queue number. */
2699 if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
2700 DRV_LOG(ERR, "port %u invalid queue number %d",
2701 dev->data->port_id, fdir_filter->action.rx_queue);
2705 attributes->attr.ingress = 1;
2706 attributes->items[0] = (struct rte_flow_item) {
2707 .type = RTE_FLOW_ITEM_TYPE_ETH,
2708 .spec = &attributes->l2,
2709 .mask = &attributes->l2_mask,
2711 switch (fdir_filter->action.behavior) {
2712 case RTE_ETH_FDIR_ACCEPT:
2713 attributes->actions[0] = (struct rte_flow_action){
2714 .type = RTE_FLOW_ACTION_TYPE_QUEUE,
2715 .conf = &attributes->queue,
2718 case RTE_ETH_FDIR_REJECT:
2719 attributes->actions[0] = (struct rte_flow_action){
2720 .type = RTE_FLOW_ACTION_TYPE_DROP,
2724 DRV_LOG(ERR, "port %u invalid behavior %d",
2726 fdir_filter->action.behavior);
2727 rte_errno = ENOTSUP;
2730 attributes->queue.index = fdir_filter->action.rx_queue;
2732 switch (fdir_filter->input.flow_type) {
2733 case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
2734 case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
2735 case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
2736 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2737 .src_addr = input->flow.ip4_flow.src_ip,
2738 .dst_addr = input->flow.ip4_flow.dst_ip,
2739 .time_to_live = input->flow.ip4_flow.ttl,
2740 .type_of_service = input->flow.ip4_flow.tos,
2741 .next_proto_id = input->flow.ip4_flow.proto,
2743 attributes->l3_mask.ipv4.hdr = (struct ipv4_hdr){
2744 .src_addr = mask->ipv4_mask.src_ip,
2745 .dst_addr = mask->ipv4_mask.dst_ip,
2746 .time_to_live = mask->ipv4_mask.ttl,
2747 .type_of_service = mask->ipv4_mask.tos,
2748 .next_proto_id = mask->ipv4_mask.proto,
2750 attributes->items[1] = (struct rte_flow_item){
2751 .type = RTE_FLOW_ITEM_TYPE_IPV4,
2752 .spec = &attributes->l3,
2753 .mask = &attributes->l3_mask,
2756 case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
2757 case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
2758 case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
2759 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2760 .hop_limits = input->flow.ipv6_flow.hop_limits,
2761 .proto = input->flow.ipv6_flow.proto,
2764 memcpy(attributes->l3.ipv6.hdr.src_addr,
2765 input->flow.ipv6_flow.src_ip,
2766 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2767 memcpy(attributes->l3.ipv6.hdr.dst_addr,
2768 input->flow.ipv6_flow.dst_ip,
2769 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2770 memcpy(attributes->l3_mask.ipv6.hdr.src_addr,
2771 mask->ipv6_mask.src_ip,
2772 RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
2773 memcpy(attributes->l3_mask.ipv6.hdr.dst_addr,
2774 mask->ipv6_mask.dst_ip,
2775 RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
2776 attributes->items[1] = (struct rte_flow_item){
2777 .type = RTE_FLOW_ITEM_TYPE_IPV6,
2778 .spec = &attributes->l3,
2779 .mask = &attributes->l3_mask,
2783 DRV_LOG(ERR, "port %u invalid flow type%d",
2784 dev->data->port_id, fdir_filter->input.flow_type);
2785 rte_errno = ENOTSUP;
2789 switch (fdir_filter->input.flow_type) {
2790 case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
2791 attributes->l4.udp.hdr = (struct udp_hdr){
2792 .src_port = input->flow.udp4_flow.src_port,
2793 .dst_port = input->flow.udp4_flow.dst_port,
2795 attributes->l4_mask.udp.hdr = (struct udp_hdr){
2796 .src_port = mask->src_port_mask,
2797 .dst_port = mask->dst_port_mask,
2799 attributes->items[2] = (struct rte_flow_item){
2800 .type = RTE_FLOW_ITEM_TYPE_UDP,
2801 .spec = &attributes->l4,
2802 .mask = &attributes->l4_mask,
2805 case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
2806 attributes->l4.tcp.hdr = (struct tcp_hdr){
2807 .src_port = input->flow.tcp4_flow.src_port,
2808 .dst_port = input->flow.tcp4_flow.dst_port,
2810 attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
2811 .src_port = mask->src_port_mask,
2812 .dst_port = mask->dst_port_mask,
2814 attributes->items[2] = (struct rte_flow_item){
2815 .type = RTE_FLOW_ITEM_TYPE_TCP,
2816 .spec = &attributes->l4,
2817 .mask = &attributes->l4_mask,
2820 case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
2821 attributes->l4.udp.hdr = (struct udp_hdr){
2822 .src_port = input->flow.udp6_flow.src_port,
2823 .dst_port = input->flow.udp6_flow.dst_port,
2825 attributes->l4_mask.udp.hdr = (struct udp_hdr){
2826 .src_port = mask->src_port_mask,
2827 .dst_port = mask->dst_port_mask,
2829 attributes->items[2] = (struct rte_flow_item){
2830 .type = RTE_FLOW_ITEM_TYPE_UDP,
2831 .spec = &attributes->l4,
2832 .mask = &attributes->l4_mask,
2835 case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
2836 attributes->l4.tcp.hdr = (struct tcp_hdr){
2837 .src_port = input->flow.tcp6_flow.src_port,
2838 .dst_port = input->flow.tcp6_flow.dst_port,
2840 attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
2841 .src_port = mask->src_port_mask,
2842 .dst_port = mask->dst_port_mask,
2844 attributes->items[2] = (struct rte_flow_item){
2845 .type = RTE_FLOW_ITEM_TYPE_TCP,
2846 .spec = &attributes->l4,
2847 .mask = &attributes->l4_mask,
2850 case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
2851 case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
2854 DRV_LOG(ERR, "port %u invalid flow type%d",
2855 dev->data->port_id, fdir_filter->input.flow_type);
2856 rte_errno = ENOTSUP;
2863 * Add new flow director filter and store it in list.
2866 * Pointer to Ethernet device.
2867 * @param fdir_filter
2868 * Flow director filter to add.
2871 * 0 on success, a negative errno value otherwise and rte_errno is set.
2874 mlx5_fdir_filter_add(struct rte_eth_dev *dev,
2875 const struct rte_eth_fdir_filter *fdir_filter)
2877 struct priv *priv = dev->data->dev_private;
2878 struct mlx5_fdir attributes = {
2881 .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2882 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2886 struct mlx5_flow_parse parser = {
2887 .layer = HASH_RXQ_ETH,
2889 struct rte_flow_error error;
2890 struct rte_flow *flow;
2893 ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
2896 ret = mlx5_flow_convert(dev, &attributes.attr, attributes.items,
2897 attributes.actions, &error, &parser);
2900 flow = mlx5_flow_list_create(dev, &priv->flows, &attributes.attr,
2901 attributes.items, attributes.actions,
2904 DRV_LOG(DEBUG, "port %u FDIR created %p", dev->data->port_id,
2912 * Delete specific filter.
2915 * Pointer to Ethernet device.
2916 * @param fdir_filter
2917 * Filter to be deleted.
2920 * 0 on success, a negative errno value otherwise and rte_errno is set.
2923 mlx5_fdir_filter_delete(struct rte_eth_dev *dev,
2924 const struct rte_eth_fdir_filter *fdir_filter)
2926 struct priv *priv = dev->data->dev_private;
2927 struct mlx5_fdir attributes = {
2930 struct mlx5_flow_parse parser = {
2932 .layer = HASH_RXQ_ETH,
2934 struct rte_flow_error error;
2935 struct rte_flow *flow;
2939 ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
2942 ret = mlx5_flow_convert(dev, &attributes.attr, attributes.items,
2943 attributes.actions, &error, &parser);
2947 * Special case for drop action which is only set in the
2948 * specifications when the flow is created. In this situation the
2949 * drop specification is missing.
2952 struct ibv_flow_spec_action_drop *drop;
2954 drop = (void *)((uintptr_t)parser.queue[HASH_RXQ_ETH].ibv_attr +
2955 parser.queue[HASH_RXQ_ETH].offset);
2956 *drop = (struct ibv_flow_spec_action_drop){
2957 .type = IBV_FLOW_SPEC_ACTION_DROP,
2958 .size = sizeof(struct ibv_flow_spec_action_drop),
2960 parser.queue[HASH_RXQ_ETH].ibv_attr->num_of_specs++;
2962 TAILQ_FOREACH(flow, &priv->flows, next) {
2963 struct ibv_flow_attr *attr;
2964 struct ibv_spec_header *attr_h;
2966 struct ibv_flow_attr *flow_attr;
2967 struct ibv_spec_header *flow_h;
2969 unsigned int specs_n;
2971 attr = parser.queue[HASH_RXQ_ETH].ibv_attr;
2972 flow_attr = flow->frxq[HASH_RXQ_ETH].ibv_attr;
2973 /* Compare first the attributes. */
2974 if (memcmp(attr, flow_attr, sizeof(struct ibv_flow_attr)))
2976 if (attr->num_of_specs == 0)
2978 spec = (void *)((uintptr_t)attr +
2979 sizeof(struct ibv_flow_attr));
2980 flow_spec = (void *)((uintptr_t)flow_attr +
2981 sizeof(struct ibv_flow_attr));
2982 specs_n = RTE_MIN(attr->num_of_specs, flow_attr->num_of_specs);
2983 for (i = 0; i != specs_n; ++i) {
2986 if (memcmp(spec, flow_spec,
2987 RTE_MIN(attr_h->size, flow_h->size)))
2989 spec = (void *)((uintptr_t)spec + attr_h->size);
2990 flow_spec = (void *)((uintptr_t)flow_spec +
2993 /* At this point, the flow match. */
2996 /* The flow does not match. */
2999 ret = rte_errno; /* Save rte_errno before cleanup. */
3001 mlx5_flow_list_destroy(dev, &priv->flows, flow);
3003 for (i = 0; i != hash_rxq_init_n; ++i) {
3004 if (parser.queue[i].ibv_attr)
3005 rte_free(parser.queue[i].ibv_attr);
3007 rte_errno = ret; /* Restore rte_errno. */
3012 * Update queue for specific filter.
3015 * Pointer to Ethernet device.
3016 * @param fdir_filter
3017 * Filter to be updated.
3020 * 0 on success, a negative errno value otherwise and rte_errno is set.
3023 mlx5_fdir_filter_update(struct rte_eth_dev *dev,
3024 const struct rte_eth_fdir_filter *fdir_filter)
3028 ret = mlx5_fdir_filter_delete(dev, fdir_filter);
3031 return mlx5_fdir_filter_add(dev, fdir_filter);
3035 * Flush all filters.
3038 * Pointer to Ethernet device.
3041 mlx5_fdir_filter_flush(struct rte_eth_dev *dev)
3043 struct priv *priv = dev->data->dev_private;
3045 mlx5_flow_list_flush(dev, &priv->flows);
3049 * Get flow director information.
3052 * Pointer to Ethernet device.
3053 * @param[out] fdir_info
3054 * Resulting flow director information.
3057 mlx5_fdir_info_get(struct rte_eth_dev *dev, struct rte_eth_fdir_info *fdir_info)
3059 struct priv *priv = dev->data->dev_private;
3060 struct rte_eth_fdir_masks *mask =
3061 &priv->dev->data->dev_conf.fdir_conf.mask;
3063 fdir_info->mode = priv->dev->data->dev_conf.fdir_conf.mode;
3064 fdir_info->guarant_spc = 0;
3065 rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
3066 fdir_info->max_flexpayload = 0;
3067 fdir_info->flow_types_mask[0] = 0;
3068 fdir_info->flex_payload_unit = 0;
3069 fdir_info->max_flex_payload_segment_num = 0;
3070 fdir_info->flex_payload_limit = 0;
3071 memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
3075 * Deal with flow director operations.
3078 * Pointer to Ethernet device.
3080 * Operation to perform.
3082 * Pointer to operation-specific structure.
3085 * 0 on success, a negative errno value otherwise and rte_errno is set.
3088 mlx5_fdir_ctrl_func(struct rte_eth_dev *dev, enum rte_filter_op filter_op,
3091 struct priv *priv = dev->data->dev_private;
3092 enum rte_fdir_mode fdir_mode =
3093 priv->dev->data->dev_conf.fdir_conf.mode;
3095 if (filter_op == RTE_ETH_FILTER_NOP)
3097 if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
3098 fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
3099 DRV_LOG(ERR, "port %u flow director mode %d not supported",
3100 dev->data->port_id, fdir_mode);
3104 switch (filter_op) {
3105 case RTE_ETH_FILTER_ADD:
3106 return mlx5_fdir_filter_add(dev, arg);
3107 case RTE_ETH_FILTER_UPDATE:
3108 return mlx5_fdir_filter_update(dev, arg);
3109 case RTE_ETH_FILTER_DELETE:
3110 return mlx5_fdir_filter_delete(dev, arg);
3111 case RTE_ETH_FILTER_FLUSH:
3112 mlx5_fdir_filter_flush(dev);
3114 case RTE_ETH_FILTER_INFO:
3115 mlx5_fdir_info_get(dev, arg);
3118 DRV_LOG(DEBUG, "port %u unknown operation %u",
3119 dev->data->port_id, filter_op);
3127 * Manage filter operations.
3130 * Pointer to Ethernet device structure.
3131 * @param filter_type
3134 * Operation to perform.
3136 * Pointer to operation-specific structure.
3139 * 0 on success, a negative errno value otherwise and rte_errno is set.
3142 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
3143 enum rte_filter_type filter_type,
3144 enum rte_filter_op filter_op,
3147 switch (filter_type) {
3148 case RTE_ETH_FILTER_GENERIC:
3149 if (filter_op != RTE_ETH_FILTER_GET) {
3153 *(const void **)arg = &mlx5_flow_ops;
3155 case RTE_ETH_FILTER_FDIR:
3156 return mlx5_fdir_ctrl_func(dev, filter_op, arg);
3158 DRV_LOG(ERR, "port %u filter type (%d) not supported",
3159 dev->data->port_id, filter_type);
3160 rte_errno = ENOTSUP;