1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2016 6WIND S.A.
3 * Copyright 2016 Mellanox Technologies, Ltd
11 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
13 #pragma GCC diagnostic ignored "-Wpedantic"
15 #include <infiniband/verbs.h>
17 #pragma GCC diagnostic error "-Wpedantic"
20 #include <rte_common.h>
21 #include <rte_ether.h>
22 #include <rte_eth_ctrl.h>
23 #include <rte_ethdev_driver.h>
25 #include <rte_flow_driver.h>
26 #include <rte_malloc.h>
30 #include "mlx5_defs.h"
32 #include "mlx5_glue.h"
34 /* Define minimal priority for control plane flows. */
35 #define MLX5_CTRL_FLOW_PRIORITY 4
37 /* Internet Protocol versions. */
41 #ifndef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
42 struct ibv_flow_spec_counter_action {
47 /* Dev ops structure defined in mlx5.c */
48 extern const struct eth_dev_ops mlx5_dev_ops;
49 extern const struct eth_dev_ops mlx5_dev_ops_isolate;
51 /** Structure give to the conversion functions. */
52 struct mlx5_flow_data {
53 struct mlx5_flow_parse *parser; /** Parser context. */
54 struct rte_flow_error *error; /** Error context. */
58 mlx5_flow_create_eth(const struct rte_flow_item *item,
59 const void *default_mask,
60 struct mlx5_flow_data *data);
63 mlx5_flow_create_vlan(const struct rte_flow_item *item,
64 const void *default_mask,
65 struct mlx5_flow_data *data);
68 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
69 const void *default_mask,
70 struct mlx5_flow_data *data);
73 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
74 const void *default_mask,
75 struct mlx5_flow_data *data);
78 mlx5_flow_create_udp(const struct rte_flow_item *item,
79 const void *default_mask,
80 struct mlx5_flow_data *data);
83 mlx5_flow_create_tcp(const struct rte_flow_item *item,
84 const void *default_mask,
85 struct mlx5_flow_data *data);
88 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
89 const void *default_mask,
90 struct mlx5_flow_data *data);
92 struct mlx5_flow_parse;
95 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
99 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id);
102 mlx5_flow_create_count(struct rte_eth_dev *dev, struct mlx5_flow_parse *parser);
104 /* Hash RX queue types. */
115 /* Initialization data for hash RX queue. */
116 struct hash_rxq_init {
117 uint64_t hash_fields; /* Fields that participate in the hash. */
118 uint64_t dpdk_rss_hf; /* Matching DPDK RSS hash fields. */
119 unsigned int flow_priority; /* Flow priority to use. */
120 unsigned int ip_version; /* Internet protocol. */
123 /* Initialization data for hash RX queues. */
124 const struct hash_rxq_init hash_rxq_init[] = {
126 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
127 IBV_RX_HASH_DST_IPV4 |
128 IBV_RX_HASH_SRC_PORT_TCP |
129 IBV_RX_HASH_DST_PORT_TCP),
130 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,
132 .ip_version = MLX5_IPV4,
135 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
136 IBV_RX_HASH_DST_IPV4 |
137 IBV_RX_HASH_SRC_PORT_UDP |
138 IBV_RX_HASH_DST_PORT_UDP),
139 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP,
141 .ip_version = MLX5_IPV4,
144 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
145 IBV_RX_HASH_DST_IPV4),
146 .dpdk_rss_hf = (ETH_RSS_IPV4 |
149 .ip_version = MLX5_IPV4,
152 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
153 IBV_RX_HASH_DST_IPV6 |
154 IBV_RX_HASH_SRC_PORT_TCP |
155 IBV_RX_HASH_DST_PORT_TCP),
156 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP,
158 .ip_version = MLX5_IPV6,
161 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
162 IBV_RX_HASH_DST_IPV6 |
163 IBV_RX_HASH_SRC_PORT_UDP |
164 IBV_RX_HASH_DST_PORT_UDP),
165 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP,
167 .ip_version = MLX5_IPV6,
170 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
171 IBV_RX_HASH_DST_IPV6),
172 .dpdk_rss_hf = (ETH_RSS_IPV6 |
175 .ip_version = MLX5_IPV6,
184 /* Number of entries in hash_rxq_init[]. */
185 const unsigned int hash_rxq_init_n = RTE_DIM(hash_rxq_init);
187 /** Structure for holding counter stats. */
188 struct mlx5_flow_counter_stats {
189 uint64_t hits; /**< Number of packets matched by the rule. */
190 uint64_t bytes; /**< Number of bytes matched by the rule. */
193 /** Structure for Drop queue. */
194 struct mlx5_hrxq_drop {
195 struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */
196 struct ibv_qp *qp; /**< Verbs queue pair. */
197 struct ibv_wq *wq; /**< Verbs work queue. */
198 struct ibv_cq *cq; /**< Verbs completion queue. */
201 /* Flows structures. */
203 uint64_t hash_fields; /**< Fields that participate in the hash. */
204 struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
205 struct ibv_flow *ibv_flow; /**< Verbs flow. */
206 struct mlx5_hrxq *hrxq; /**< Hash Rx queues. */
209 /* Drop flows structures. */
210 struct mlx5_flow_drop {
211 struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
212 struct ibv_flow *ibv_flow; /**< Verbs flow. */
216 TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
217 uint32_t mark:1; /**< Set if the flow is marked. */
218 uint32_t drop:1; /**< Drop queue. */
219 struct rte_flow_action_rss rss_conf; /**< RSS configuration */
220 uint16_t (*queues)[]; /**< Queues indexes to use. */
221 uint8_t rss_key[40]; /**< copy of the RSS key. */
222 struct ibv_counter_set *cs; /**< Holds the counters for the rule. */
223 struct mlx5_flow_counter_stats counter_stats;/**<The counter stats. */
224 struct mlx5_flow frxq[RTE_DIM(hash_rxq_init)];
225 /**< Flow with Rx queue. */
228 /** Static initializer for items. */
230 (const enum rte_flow_item_type []){ \
231 __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
234 /** Structure to generate a simple graph of layers supported by the NIC. */
235 struct mlx5_flow_items {
236 /** List of possible actions for these items. */
237 const enum rte_flow_action_type *const actions;
238 /** Bit-masks corresponding to the possibilities for the item. */
241 * Default bit-masks to use when item->mask is not provided. When
242 * \default_mask is also NULL, the full supported bit-mask (\mask) is
245 const void *default_mask;
246 /** Bit-masks size in bytes. */
247 const unsigned int mask_sz;
249 * Conversion function from rte_flow to NIC specific flow.
252 * rte_flow item to convert.
253 * @param default_mask
254 * Default bit-masks to use when item->mask is not provided.
256 * Internal structure to store the conversion.
259 * 0 on success, a negative errno value otherwise and rte_errno is
262 int (*convert)(const struct rte_flow_item *item,
263 const void *default_mask,
264 struct mlx5_flow_data *data);
265 /** Size in bytes of the destination structure. */
266 const unsigned int dst_sz;
267 /** List of possible following items. */
268 const enum rte_flow_item_type *const items;
271 /** Valid action for this PMD. */
272 static const enum rte_flow_action_type valid_actions[] = {
273 RTE_FLOW_ACTION_TYPE_DROP,
274 RTE_FLOW_ACTION_TYPE_QUEUE,
275 RTE_FLOW_ACTION_TYPE_MARK,
276 RTE_FLOW_ACTION_TYPE_FLAG,
277 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
278 RTE_FLOW_ACTION_TYPE_COUNT,
280 RTE_FLOW_ACTION_TYPE_END,
283 /** Graph of supported items and associated actions. */
284 static const struct mlx5_flow_items mlx5_flow_items[] = {
285 [RTE_FLOW_ITEM_TYPE_END] = {
286 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
287 RTE_FLOW_ITEM_TYPE_VXLAN),
289 [RTE_FLOW_ITEM_TYPE_ETH] = {
290 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
291 RTE_FLOW_ITEM_TYPE_IPV4,
292 RTE_FLOW_ITEM_TYPE_IPV6),
293 .actions = valid_actions,
294 .mask = &(const struct rte_flow_item_eth){
295 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
296 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
299 .default_mask = &rte_flow_item_eth_mask,
300 .mask_sz = sizeof(struct rte_flow_item_eth),
301 .convert = mlx5_flow_create_eth,
302 .dst_sz = sizeof(struct ibv_flow_spec_eth),
304 [RTE_FLOW_ITEM_TYPE_VLAN] = {
305 .items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
306 RTE_FLOW_ITEM_TYPE_IPV6),
307 .actions = valid_actions,
308 .mask = &(const struct rte_flow_item_vlan){
312 .default_mask = &rte_flow_item_vlan_mask,
313 .mask_sz = sizeof(struct rte_flow_item_vlan),
314 .convert = mlx5_flow_create_vlan,
317 [RTE_FLOW_ITEM_TYPE_IPV4] = {
318 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
319 RTE_FLOW_ITEM_TYPE_TCP),
320 .actions = valid_actions,
321 .mask = &(const struct rte_flow_item_ipv4){
325 .type_of_service = -1,
329 .default_mask = &rte_flow_item_ipv4_mask,
330 .mask_sz = sizeof(struct rte_flow_item_ipv4),
331 .convert = mlx5_flow_create_ipv4,
332 .dst_sz = sizeof(struct ibv_flow_spec_ipv4_ext),
334 [RTE_FLOW_ITEM_TYPE_IPV6] = {
335 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
336 RTE_FLOW_ITEM_TYPE_TCP),
337 .actions = valid_actions,
338 .mask = &(const struct rte_flow_item_ipv6){
341 0xff, 0xff, 0xff, 0xff,
342 0xff, 0xff, 0xff, 0xff,
343 0xff, 0xff, 0xff, 0xff,
344 0xff, 0xff, 0xff, 0xff,
347 0xff, 0xff, 0xff, 0xff,
348 0xff, 0xff, 0xff, 0xff,
349 0xff, 0xff, 0xff, 0xff,
350 0xff, 0xff, 0xff, 0xff,
357 .default_mask = &rte_flow_item_ipv6_mask,
358 .mask_sz = sizeof(struct rte_flow_item_ipv6),
359 .convert = mlx5_flow_create_ipv6,
360 .dst_sz = sizeof(struct ibv_flow_spec_ipv6),
362 [RTE_FLOW_ITEM_TYPE_UDP] = {
363 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN),
364 .actions = valid_actions,
365 .mask = &(const struct rte_flow_item_udp){
371 .default_mask = &rte_flow_item_udp_mask,
372 .mask_sz = sizeof(struct rte_flow_item_udp),
373 .convert = mlx5_flow_create_udp,
374 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
376 [RTE_FLOW_ITEM_TYPE_TCP] = {
377 .actions = valid_actions,
378 .mask = &(const struct rte_flow_item_tcp){
384 .default_mask = &rte_flow_item_tcp_mask,
385 .mask_sz = sizeof(struct rte_flow_item_tcp),
386 .convert = mlx5_flow_create_tcp,
387 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
389 [RTE_FLOW_ITEM_TYPE_VXLAN] = {
390 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH),
391 .actions = valid_actions,
392 .mask = &(const struct rte_flow_item_vxlan){
393 .vni = "\xff\xff\xff",
395 .default_mask = &rte_flow_item_vxlan_mask,
396 .mask_sz = sizeof(struct rte_flow_item_vxlan),
397 .convert = mlx5_flow_create_vxlan,
398 .dst_sz = sizeof(struct ibv_flow_spec_tunnel),
402 /** Structure to pass to the conversion function. */
403 struct mlx5_flow_parse {
404 uint32_t inner; /**< Set once VXLAN is encountered. */
406 /**< Whether resources should remain after a validate. */
407 uint32_t drop:1; /**< Target is a drop queue. */
408 uint32_t mark:1; /**< Mark is present in the flow. */
409 uint32_t count:1; /**< Count is present in the flow. */
410 uint32_t mark_id; /**< Mark identifier. */
411 struct rte_flow_action_rss rss_conf; /**< RSS configuration */
412 uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
413 uint8_t rss_key[40]; /**< copy of the RSS key. */
414 enum hash_rxq_type layer; /**< Last pattern layer detected. */
415 struct ibv_counter_set *cs; /**< Holds the counter set for the rule */
417 struct ibv_flow_attr *ibv_attr;
418 /**< Pointer to Verbs attributes. */
420 /**< Current position or total size of the attribute. */
421 } queue[RTE_DIM(hash_rxq_init)];
424 static const struct rte_flow_ops mlx5_flow_ops = {
425 .validate = mlx5_flow_validate,
426 .create = mlx5_flow_create,
427 .destroy = mlx5_flow_destroy,
428 .flush = mlx5_flow_flush,
429 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
430 .query = mlx5_flow_query,
434 .isolate = mlx5_flow_isolate,
437 /* Convert FDIR request to Generic flow. */
439 struct rte_flow_attr attr;
440 struct rte_flow_action actions[2];
441 struct rte_flow_item items[4];
442 struct rte_flow_item_eth l2;
443 struct rte_flow_item_eth l2_mask;
445 struct rte_flow_item_ipv4 ipv4;
446 struct rte_flow_item_ipv6 ipv6;
449 struct rte_flow_item_ipv4 ipv4;
450 struct rte_flow_item_ipv6 ipv6;
453 struct rte_flow_item_udp udp;
454 struct rte_flow_item_tcp tcp;
457 struct rte_flow_item_udp udp;
458 struct rte_flow_item_tcp tcp;
460 struct rte_flow_action_queue queue;
463 /* Verbs specification header. */
464 struct ibv_spec_header {
465 enum ibv_flow_spec_type type;
470 * Check support for a given item.
473 * Item specification.
475 * Bit-masks covering supported fields to compare with spec, last and mask in
478 * Bit-Mask size in bytes.
481 * 0 on success, a negative errno value otherwise and rte_errno is set.
484 mlx5_flow_item_validate(const struct rte_flow_item *item,
485 const uint8_t *mask, unsigned int size)
487 if (!item->spec && (item->mask || item->last)) {
491 if (item->spec && !item->mask) {
493 const uint8_t *spec = item->spec;
495 for (i = 0; i < size; ++i)
496 if ((spec[i] | mask[i]) != mask[i]) {
501 if (item->last && !item->mask) {
503 const uint8_t *spec = item->last;
505 for (i = 0; i < size; ++i)
506 if ((spec[i] | mask[i]) != mask[i]) {
513 const uint8_t *spec = item->spec;
515 for (i = 0; i < size; ++i)
516 if ((spec[i] | mask[i]) != mask[i]) {
521 if (item->spec && item->last) {
524 const uint8_t *apply = mask;
530 for (i = 0; i < size; ++i) {
531 spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
532 last[i] = ((const uint8_t *)item->last)[i] & apply[i];
534 ret = memcmp(spec, last, size);
544 * Extract attribute to the parser.
547 * Flow rule attributes.
549 * Perform verbose error reporting if not NULL.
552 * 0 on success, a negative errno value otherwise and rte_errno is set.
555 mlx5_flow_convert_attributes(const struct rte_flow_attr *attr,
556 struct rte_flow_error *error)
559 rte_flow_error_set(error, ENOTSUP,
560 RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
562 "groups are not supported");
565 if (attr->priority && attr->priority != MLX5_CTRL_FLOW_PRIORITY) {
566 rte_flow_error_set(error, ENOTSUP,
567 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
569 "priorities are not supported");
573 rte_flow_error_set(error, ENOTSUP,
574 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
576 "egress is not supported");
579 if (!attr->ingress) {
580 rte_flow_error_set(error, ENOTSUP,
581 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
583 "only ingress is supported");
590 * Extract actions request to the parser.
593 * Pointer to Ethernet device.
595 * Associated actions (list terminated by the END action).
597 * Perform verbose error reporting if not NULL.
598 * @param[in, out] parser
599 * Internal parser structure.
602 * 0 on success, a negative errno value otherwise and rte_errno is set.
605 mlx5_flow_convert_actions(struct rte_eth_dev *dev,
606 const struct rte_flow_action actions[],
607 struct rte_flow_error *error,
608 struct mlx5_flow_parse *parser)
610 enum { FATE = 1, MARK = 2, COUNT = 4, };
611 uint32_t overlap = 0;
612 struct priv *priv = dev->data->dev_private;
614 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
615 if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
617 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
619 goto exit_action_overlap;
622 } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
623 const struct rte_flow_action_queue *queue =
624 (const struct rte_flow_action_queue *)
628 goto exit_action_overlap;
630 if (!queue || (queue->index > (priv->rxqs_n - 1)))
631 goto exit_action_not_supported;
632 parser->queues[0] = queue->index;
633 parser->rss_conf = (struct rte_flow_action_rss){
635 .queue = parser->queues,
637 } else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
638 const struct rte_flow_action_rss *rss =
639 (const struct rte_flow_action_rss *)
641 const uint8_t *rss_key;
642 uint32_t rss_key_len;
646 goto exit_action_overlap;
649 rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ) {
650 rte_flow_error_set(error, EINVAL,
651 RTE_FLOW_ERROR_TYPE_ACTION,
653 "the only supported RSS hash"
654 " function is Toeplitz");
658 rte_flow_error_set(error, EINVAL,
659 RTE_FLOW_ERROR_TYPE_ACTION,
661 "a nonzero RSS encapsulation"
662 " level is not supported");
665 if (rss->types & MLX5_RSS_HF_MASK) {
666 rte_flow_error_set(error, EINVAL,
667 RTE_FLOW_ERROR_TYPE_ACTION,
669 "unsupported RSS type"
674 rss_key_len = rss->key_len;
677 rss_key_len = rss_hash_default_key_len;
678 rss_key = rss_hash_default_key;
680 if (rss_key_len != RTE_DIM(parser->rss_key)) {
681 rte_flow_error_set(error, EINVAL,
682 RTE_FLOW_ERROR_TYPE_ACTION,
684 "RSS hash key must be"
685 " exactly 40 bytes long");
688 if (!rss->queue_num) {
689 rte_flow_error_set(error, EINVAL,
690 RTE_FLOW_ERROR_TYPE_ACTION,
695 if (rss->queue_num > RTE_DIM(parser->queues)) {
696 rte_flow_error_set(error, EINVAL,
697 RTE_FLOW_ERROR_TYPE_ACTION,
699 "too many queues for RSS"
703 for (n = 0; n < rss->queue_num; ++n) {
704 if (rss->queue[n] >= priv->rxqs_n) {
705 rte_flow_error_set(error, EINVAL,
706 RTE_FLOW_ERROR_TYPE_ACTION,
708 "queue id > number of"
713 parser->rss_conf = (struct rte_flow_action_rss){
714 .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
717 .key_len = rss_key_len,
718 .queue_num = rss->queue_num,
719 .key = memcpy(parser->rss_key, rss_key,
720 sizeof(*rss_key) * rss_key_len),
721 .queue = memcpy(parser->queues, rss->queue,
722 sizeof(*rss->queue) *
725 } else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
726 const struct rte_flow_action_mark *mark =
727 (const struct rte_flow_action_mark *)
731 goto exit_action_overlap;
734 rte_flow_error_set(error, EINVAL,
735 RTE_FLOW_ERROR_TYPE_ACTION,
737 "mark must be defined");
739 } else if (mark->id >= MLX5_FLOW_MARK_MAX) {
740 rte_flow_error_set(error, ENOTSUP,
741 RTE_FLOW_ERROR_TYPE_ACTION,
743 "mark must be between 0"
748 parser->mark_id = mark->id;
749 } else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
751 goto exit_action_overlap;
754 } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT &&
755 priv->config.flow_counter_en) {
757 goto exit_action_overlap;
761 goto exit_action_not_supported;
764 /* When fate is unknown, drop traffic. */
765 if (!(overlap & FATE))
767 if (parser->drop && parser->mark)
769 if (!parser->rss_conf.queue_num && !parser->drop) {
770 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
771 NULL, "no valid action");
775 exit_action_not_supported:
776 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
777 actions, "action not supported");
780 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
781 actions, "overlapping actions are not supported");
789 * Pattern specification (list terminated by the END pattern item).
791 * Perform verbose error reporting if not NULL.
792 * @param[in, out] parser
793 * Internal parser structure.
796 * 0 on success, a negative errno value otherwise and rte_errno is set.
799 mlx5_flow_convert_items_validate(const struct rte_flow_item items[],
800 struct rte_flow_error *error,
801 struct mlx5_flow_parse *parser)
803 const struct mlx5_flow_items *cur_item = mlx5_flow_items;
807 /* Initialise the offsets to start after verbs attribute. */
808 for (i = 0; i != hash_rxq_init_n; ++i)
809 parser->queue[i].offset = sizeof(struct ibv_flow_attr);
810 for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
811 const struct mlx5_flow_items *token = NULL;
814 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
818 cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
820 if (cur_item->items[i] == items->type) {
821 token = &mlx5_flow_items[items->type];
827 goto exit_item_not_supported;
830 ret = mlx5_flow_item_validate(items,
831 (const uint8_t *)cur_item->mask,
834 goto exit_item_not_supported;
835 if (items->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
837 rte_flow_error_set(error, ENOTSUP,
838 RTE_FLOW_ERROR_TYPE_ITEM,
840 "cannot recognize multiple"
841 " VXLAN encapsulations");
844 parser->inner = IBV_FLOW_SPEC_INNER;
847 parser->queue[HASH_RXQ_ETH].offset += cur_item->dst_sz;
849 for (n = 0; n != hash_rxq_init_n; ++n)
850 parser->queue[n].offset += cur_item->dst_sz;
854 parser->queue[HASH_RXQ_ETH].offset +=
855 sizeof(struct ibv_flow_spec_action_drop);
858 for (i = 0; i != hash_rxq_init_n; ++i)
859 parser->queue[i].offset +=
860 sizeof(struct ibv_flow_spec_action_tag);
863 unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
865 for (i = 0; i != hash_rxq_init_n; ++i)
866 parser->queue[i].offset += size;
869 exit_item_not_supported:
870 return rte_flow_error_set(error, -ret, RTE_FLOW_ERROR_TYPE_ITEM,
871 items, "item not supported");
875 * Allocate memory space to store verbs flow attributes.
878 * Amount of byte to allocate.
880 * Perform verbose error reporting if not NULL.
883 * A verbs flow attribute on success, NULL otherwise and rte_errno is set.
885 static struct ibv_flow_attr *
886 mlx5_flow_convert_allocate(unsigned int size, struct rte_flow_error *error)
888 struct ibv_flow_attr *ibv_attr;
890 ibv_attr = rte_calloc(__func__, 1, size, 0);
892 rte_flow_error_set(error, ENOMEM,
893 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
895 "cannot allocate verbs spec attributes");
902 * Make inner packet matching with an higher priority from the non Inner
905 * @param[in, out] parser
906 * Internal parser structure.
908 * User flow attribute.
911 mlx5_flow_update_priority(struct mlx5_flow_parse *parser,
912 const struct rte_flow_attr *attr)
917 parser->queue[HASH_RXQ_ETH].ibv_attr->priority =
919 hash_rxq_init[HASH_RXQ_ETH].flow_priority;
922 for (i = 0; i != hash_rxq_init_n; ++i) {
923 if (parser->queue[i].ibv_attr) {
924 parser->queue[i].ibv_attr->priority =
926 hash_rxq_init[i].flow_priority -
927 (parser->inner ? 1 : 0);
933 * Finalise verbs flow attributes.
935 * @param[in, out] parser
936 * Internal parser structure.
939 mlx5_flow_convert_finalise(struct mlx5_flow_parse *parser)
941 const unsigned int ipv4 =
942 hash_rxq_init[parser->layer].ip_version == MLX5_IPV4;
943 const enum hash_rxq_type hmin = ipv4 ? HASH_RXQ_TCPV4 : HASH_RXQ_TCPV6;
944 const enum hash_rxq_type hmax = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
945 const enum hash_rxq_type ohmin = ipv4 ? HASH_RXQ_TCPV6 : HASH_RXQ_TCPV4;
946 const enum hash_rxq_type ohmax = ipv4 ? HASH_RXQ_IPV6 : HASH_RXQ_IPV4;
947 const enum hash_rxq_type ip = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
950 /* Remove any other flow not matching the pattern. */
951 if (parser->rss_conf.queue_num == 1 && !parser->rss_conf.types) {
952 for (i = 0; i != hash_rxq_init_n; ++i) {
953 if (i == HASH_RXQ_ETH)
955 rte_free(parser->queue[i].ibv_attr);
956 parser->queue[i].ibv_attr = NULL;
960 if (parser->layer == HASH_RXQ_ETH) {
964 * This layer becomes useless as the pattern define under
967 rte_free(parser->queue[HASH_RXQ_ETH].ibv_attr);
968 parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
970 /* Remove opposite kind of layer e.g. IPv6 if the pattern is IPv4. */
971 for (i = ohmin; i != (ohmax + 1); ++i) {
972 if (!parser->queue[i].ibv_attr)
974 rte_free(parser->queue[i].ibv_attr);
975 parser->queue[i].ibv_attr = NULL;
977 /* Remove impossible flow according to the RSS configuration. */
978 if (hash_rxq_init[parser->layer].dpdk_rss_hf &
979 parser->rss_conf.types) {
980 /* Remove any other flow. */
981 for (i = hmin; i != (hmax + 1); ++i) {
982 if ((i == parser->layer) ||
983 (!parser->queue[i].ibv_attr))
985 rte_free(parser->queue[i].ibv_attr);
986 parser->queue[i].ibv_attr = NULL;
988 } else if (!parser->queue[ip].ibv_attr) {
989 /* no RSS possible with the current configuration. */
990 parser->rss_conf.queue_num = 1;
995 * Fill missing layers in verbs specifications, or compute the correct
996 * offset to allocate the memory space for the attributes and
999 for (i = 0; i != hash_rxq_init_n - 1; ++i) {
1001 struct ibv_flow_spec_ipv4_ext ipv4;
1002 struct ibv_flow_spec_ipv6 ipv6;
1003 struct ibv_flow_spec_tcp_udp udp_tcp;
1008 if (i == parser->layer)
1010 if (parser->layer == HASH_RXQ_ETH) {
1011 if (hash_rxq_init[i].ip_version == MLX5_IPV4) {
1012 size = sizeof(struct ibv_flow_spec_ipv4_ext);
1013 specs.ipv4 = (struct ibv_flow_spec_ipv4_ext){
1014 .type = IBV_FLOW_SPEC_IPV4_EXT,
1018 size = sizeof(struct ibv_flow_spec_ipv6);
1019 specs.ipv6 = (struct ibv_flow_spec_ipv6){
1020 .type = IBV_FLOW_SPEC_IPV6,
1024 if (parser->queue[i].ibv_attr) {
1025 dst = (void *)((uintptr_t)
1026 parser->queue[i].ibv_attr +
1027 parser->queue[i].offset);
1028 memcpy(dst, &specs, size);
1029 ++parser->queue[i].ibv_attr->num_of_specs;
1031 parser->queue[i].offset += size;
1033 if ((i == HASH_RXQ_UDPV4) || (i == HASH_RXQ_TCPV4) ||
1034 (i == HASH_RXQ_UDPV6) || (i == HASH_RXQ_TCPV6)) {
1035 size = sizeof(struct ibv_flow_spec_tcp_udp);
1036 specs.udp_tcp = (struct ibv_flow_spec_tcp_udp) {
1037 .type = ((i == HASH_RXQ_UDPV4 ||
1038 i == HASH_RXQ_UDPV6) ?
1043 if (parser->queue[i].ibv_attr) {
1044 dst = (void *)((uintptr_t)
1045 parser->queue[i].ibv_attr +
1046 parser->queue[i].offset);
1047 memcpy(dst, &specs, size);
1048 ++parser->queue[i].ibv_attr->num_of_specs;
1050 parser->queue[i].offset += size;
1056 * Validate and convert a flow supported by the NIC.
1059 * Pointer to Ethernet device.
1061 * Flow rule attributes.
1062 * @param[in] pattern
1063 * Pattern specification (list terminated by the END pattern item).
1064 * @param[in] actions
1065 * Associated actions (list terminated by the END action).
1067 * Perform verbose error reporting if not NULL.
1068 * @param[in, out] parser
1069 * Internal parser structure.
1072 * 0 on success, a negative errno value otherwise and rte_errno is set.
1075 mlx5_flow_convert(struct rte_eth_dev *dev,
1076 const struct rte_flow_attr *attr,
1077 const struct rte_flow_item items[],
1078 const struct rte_flow_action actions[],
1079 struct rte_flow_error *error,
1080 struct mlx5_flow_parse *parser)
1082 const struct mlx5_flow_items *cur_item = mlx5_flow_items;
1086 /* First step. Validate the attributes, items and actions. */
1087 *parser = (struct mlx5_flow_parse){
1088 .create = parser->create,
1089 .layer = HASH_RXQ_ETH,
1090 .mark_id = MLX5_FLOW_MARK_DEFAULT,
1092 ret = mlx5_flow_convert_attributes(attr, error);
1095 ret = mlx5_flow_convert_actions(dev, actions, error, parser);
1098 ret = mlx5_flow_convert_items_validate(items, error, parser);
1101 mlx5_flow_convert_finalise(parser);
1104 * Allocate the memory space to store verbs specifications.
1107 unsigned int offset = parser->queue[HASH_RXQ_ETH].offset;
1109 parser->queue[HASH_RXQ_ETH].ibv_attr =
1110 mlx5_flow_convert_allocate(offset, error);
1111 if (!parser->queue[HASH_RXQ_ETH].ibv_attr)
1113 parser->queue[HASH_RXQ_ETH].offset =
1114 sizeof(struct ibv_flow_attr);
1116 for (i = 0; i != hash_rxq_init_n; ++i) {
1117 unsigned int offset;
1119 if (!(parser->rss_conf.types &
1120 hash_rxq_init[i].dpdk_rss_hf) &&
1121 (i != HASH_RXQ_ETH))
1123 offset = parser->queue[i].offset;
1124 parser->queue[i].ibv_attr =
1125 mlx5_flow_convert_allocate(offset, error);
1126 if (!parser->queue[i].ibv_attr)
1128 parser->queue[i].offset = sizeof(struct ibv_flow_attr);
1131 /* Third step. Conversion parse, fill the specifications. */
1133 for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
1134 struct mlx5_flow_data data = {
1139 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
1141 cur_item = &mlx5_flow_items[items->type];
1142 ret = cur_item->convert(items,
1143 (cur_item->default_mask ?
1144 cur_item->default_mask :
1151 mlx5_flow_create_flag_mark(parser, parser->mark_id);
1152 if (parser->count && parser->create) {
1153 mlx5_flow_create_count(dev, parser);
1155 goto exit_count_error;
1158 * Last step. Complete missing specification to reach the RSS
1162 mlx5_flow_convert_finalise(parser);
1163 mlx5_flow_update_priority(parser, attr);
1165 /* Only verification is expected, all resources should be released. */
1166 if (!parser->create) {
1167 for (i = 0; i != hash_rxq_init_n; ++i) {
1168 if (parser->queue[i].ibv_attr) {
1169 rte_free(parser->queue[i].ibv_attr);
1170 parser->queue[i].ibv_attr = NULL;
1176 for (i = 0; i != hash_rxq_init_n; ++i) {
1177 if (parser->queue[i].ibv_attr) {
1178 rte_free(parser->queue[i].ibv_attr);
1179 parser->queue[i].ibv_attr = NULL;
1182 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1183 NULL, "cannot allocate verbs spec attributes");
1186 rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1187 NULL, "cannot create counter");
1192 * Copy the specification created into the flow.
1195 * Internal parser structure.
1197 * Create specification.
1199 * Size in bytes of the specification to copy.
1202 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
1208 for (i = 0; i != hash_rxq_init_n; ++i) {
1209 if (!parser->queue[i].ibv_attr)
1211 /* Specification must be the same l3 type or none. */
1212 if (parser->layer == HASH_RXQ_ETH ||
1213 (hash_rxq_init[parser->layer].ip_version ==
1214 hash_rxq_init[i].ip_version) ||
1215 (hash_rxq_init[i].ip_version == 0)) {
1216 dst = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1217 parser->queue[i].offset);
1218 memcpy(dst, src, size);
1219 ++parser->queue[i].ibv_attr->num_of_specs;
1220 parser->queue[i].offset += size;
1226 * Convert Ethernet item to Verbs specification.
1229 * Item specification.
1230 * @param default_mask[in]
1231 * Default bit-masks to use when item->mask is not provided.
1232 * @param data[in, out]
1236 * 0 on success, a negative errno value otherwise and rte_errno is set.
1239 mlx5_flow_create_eth(const struct rte_flow_item *item,
1240 const void *default_mask,
1241 struct mlx5_flow_data *data)
1243 const struct rte_flow_item_eth *spec = item->spec;
1244 const struct rte_flow_item_eth *mask = item->mask;
1245 struct mlx5_flow_parse *parser = data->parser;
1246 const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1247 struct ibv_flow_spec_eth eth = {
1248 .type = parser->inner | IBV_FLOW_SPEC_ETH,
1252 /* Don't update layer for the inner pattern. */
1254 parser->layer = HASH_RXQ_ETH;
1259 mask = default_mask;
1260 memcpy(ð.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
1261 memcpy(ð.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
1262 eth.val.ether_type = spec->type;
1263 memcpy(ð.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
1264 memcpy(ð.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
1265 eth.mask.ether_type = mask->type;
1266 /* Remove unwanted bits from values. */
1267 for (i = 0; i < ETHER_ADDR_LEN; ++i) {
1268 eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
1269 eth.val.src_mac[i] &= eth.mask.src_mac[i];
1271 eth.val.ether_type &= eth.mask.ether_type;
1273 mlx5_flow_create_copy(parser, ð, eth_size);
1278 * Convert VLAN item to Verbs specification.
1281 * Item specification.
1282 * @param default_mask[in]
1283 * Default bit-masks to use when item->mask is not provided.
1284 * @param data[in, out]
1288 * 0 on success, a negative errno value otherwise and rte_errno is set.
1291 mlx5_flow_create_vlan(const struct rte_flow_item *item,
1292 const void *default_mask,
1293 struct mlx5_flow_data *data)
1295 const struct rte_flow_item_vlan *spec = item->spec;
1296 const struct rte_flow_item_vlan *mask = item->mask;
1297 struct mlx5_flow_parse *parser = data->parser;
1298 struct ibv_flow_spec_eth *eth;
1299 const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1300 const char *msg = "VLAN cannot be empty";
1305 mask = default_mask;
1307 for (i = 0; i != hash_rxq_init_n; ++i) {
1308 if (!parser->queue[i].ibv_attr)
1311 eth = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1312 parser->queue[i].offset - eth_size);
1313 eth->val.vlan_tag = spec->tci;
1314 eth->mask.vlan_tag = mask->tci;
1315 eth->val.vlan_tag &= eth->mask.vlan_tag;
1317 * From verbs perspective an empty VLAN is equivalent
1318 * to a packet without VLAN layer.
1320 if (!eth->mask.vlan_tag)
1322 /* Outer TPID cannot be matched. */
1323 if (eth->mask.ether_type) {
1324 msg = "VLAN TPID matching is not supported";
1327 eth->val.ether_type = spec->inner_type;
1328 eth->mask.ether_type = mask->inner_type;
1329 eth->val.ether_type &= eth->mask.ether_type;
1334 return rte_flow_error_set(data->error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM,
1339 * Convert IPv4 item to Verbs specification.
1342 * Item specification.
1343 * @param default_mask[in]
1344 * Default bit-masks to use when item->mask is not provided.
1345 * @param data[in, out]
1349 * 0 on success, a negative errno value otherwise and rte_errno is set.
1352 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
1353 const void *default_mask,
1354 struct mlx5_flow_data *data)
1356 const struct rte_flow_item_ipv4 *spec = item->spec;
1357 const struct rte_flow_item_ipv4 *mask = item->mask;
1358 struct mlx5_flow_parse *parser = data->parser;
1359 unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4_ext);
1360 struct ibv_flow_spec_ipv4_ext ipv4 = {
1361 .type = parser->inner | IBV_FLOW_SPEC_IPV4_EXT,
1365 /* Don't update layer for the inner pattern. */
1367 parser->layer = HASH_RXQ_IPV4;
1370 mask = default_mask;
1371 ipv4.val = (struct ibv_flow_ipv4_ext_filter){
1372 .src_ip = spec->hdr.src_addr,
1373 .dst_ip = spec->hdr.dst_addr,
1374 .proto = spec->hdr.next_proto_id,
1375 .tos = spec->hdr.type_of_service,
1377 ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
1378 .src_ip = mask->hdr.src_addr,
1379 .dst_ip = mask->hdr.dst_addr,
1380 .proto = mask->hdr.next_proto_id,
1381 .tos = mask->hdr.type_of_service,
1383 /* Remove unwanted bits from values. */
1384 ipv4.val.src_ip &= ipv4.mask.src_ip;
1385 ipv4.val.dst_ip &= ipv4.mask.dst_ip;
1386 ipv4.val.proto &= ipv4.mask.proto;
1387 ipv4.val.tos &= ipv4.mask.tos;
1389 mlx5_flow_create_copy(parser, &ipv4, ipv4_size);
1394 * Convert IPv6 item to Verbs specification.
1397 * Item specification.
1398 * @param default_mask[in]
1399 * Default bit-masks to use when item->mask is not provided.
1400 * @param data[in, out]
1404 * 0 on success, a negative errno value otherwise and rte_errno is set.
1407 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
1408 const void *default_mask,
1409 struct mlx5_flow_data *data)
1411 const struct rte_flow_item_ipv6 *spec = item->spec;
1412 const struct rte_flow_item_ipv6 *mask = item->mask;
1413 struct mlx5_flow_parse *parser = data->parser;
1414 unsigned int ipv6_size = sizeof(struct ibv_flow_spec_ipv6);
1415 struct ibv_flow_spec_ipv6 ipv6 = {
1416 .type = parser->inner | IBV_FLOW_SPEC_IPV6,
1420 /* Don't update layer for the inner pattern. */
1422 parser->layer = HASH_RXQ_IPV6;
1425 uint32_t vtc_flow_val;
1426 uint32_t vtc_flow_mask;
1429 mask = default_mask;
1430 memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
1431 RTE_DIM(ipv6.val.src_ip));
1432 memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
1433 RTE_DIM(ipv6.val.dst_ip));
1434 memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
1435 RTE_DIM(ipv6.mask.src_ip));
1436 memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
1437 RTE_DIM(ipv6.mask.dst_ip));
1438 vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
1439 vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
1440 ipv6.val.flow_label =
1441 rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >>
1443 ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >>
1445 ipv6.val.next_hdr = spec->hdr.proto;
1446 ipv6.val.hop_limit = spec->hdr.hop_limits;
1447 ipv6.mask.flow_label =
1448 rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >>
1450 ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >>
1452 ipv6.mask.next_hdr = mask->hdr.proto;
1453 ipv6.mask.hop_limit = mask->hdr.hop_limits;
1454 /* Remove unwanted bits from values. */
1455 for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
1456 ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
1457 ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
1459 ipv6.val.flow_label &= ipv6.mask.flow_label;
1460 ipv6.val.traffic_class &= ipv6.mask.traffic_class;
1461 ipv6.val.next_hdr &= ipv6.mask.next_hdr;
1462 ipv6.val.hop_limit &= ipv6.mask.hop_limit;
1464 mlx5_flow_create_copy(parser, &ipv6, ipv6_size);
1469 * Convert UDP item to Verbs specification.
1472 * Item specification.
1473 * @param default_mask[in]
1474 * Default bit-masks to use when item->mask is not provided.
1475 * @param data[in, out]
1479 * 0 on success, a negative errno value otherwise and rte_errno is set.
1482 mlx5_flow_create_udp(const struct rte_flow_item *item,
1483 const void *default_mask,
1484 struct mlx5_flow_data *data)
1486 const struct rte_flow_item_udp *spec = item->spec;
1487 const struct rte_flow_item_udp *mask = item->mask;
1488 struct mlx5_flow_parse *parser = data->parser;
1489 unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1490 struct ibv_flow_spec_tcp_udp udp = {
1491 .type = parser->inner | IBV_FLOW_SPEC_UDP,
1495 /* Don't update layer for the inner pattern. */
1496 if (!parser->inner) {
1497 if (parser->layer == HASH_RXQ_IPV4)
1498 parser->layer = HASH_RXQ_UDPV4;
1500 parser->layer = HASH_RXQ_UDPV6;
1504 mask = default_mask;
1505 udp.val.dst_port = spec->hdr.dst_port;
1506 udp.val.src_port = spec->hdr.src_port;
1507 udp.mask.dst_port = mask->hdr.dst_port;
1508 udp.mask.src_port = mask->hdr.src_port;
1509 /* Remove unwanted bits from values. */
1510 udp.val.src_port &= udp.mask.src_port;
1511 udp.val.dst_port &= udp.mask.dst_port;
1513 mlx5_flow_create_copy(parser, &udp, udp_size);
1518 * Convert TCP item to Verbs specification.
1521 * Item specification.
1522 * @param default_mask[in]
1523 * Default bit-masks to use when item->mask is not provided.
1524 * @param data[in, out]
1528 * 0 on success, a negative errno value otherwise and rte_errno is set.
1531 mlx5_flow_create_tcp(const struct rte_flow_item *item,
1532 const void *default_mask,
1533 struct mlx5_flow_data *data)
1535 const struct rte_flow_item_tcp *spec = item->spec;
1536 const struct rte_flow_item_tcp *mask = item->mask;
1537 struct mlx5_flow_parse *parser = data->parser;
1538 unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1539 struct ibv_flow_spec_tcp_udp tcp = {
1540 .type = parser->inner | IBV_FLOW_SPEC_TCP,
1544 /* Don't update layer for the inner pattern. */
1545 if (!parser->inner) {
1546 if (parser->layer == HASH_RXQ_IPV4)
1547 parser->layer = HASH_RXQ_TCPV4;
1549 parser->layer = HASH_RXQ_TCPV6;
1553 mask = default_mask;
1554 tcp.val.dst_port = spec->hdr.dst_port;
1555 tcp.val.src_port = spec->hdr.src_port;
1556 tcp.mask.dst_port = mask->hdr.dst_port;
1557 tcp.mask.src_port = mask->hdr.src_port;
1558 /* Remove unwanted bits from values. */
1559 tcp.val.src_port &= tcp.mask.src_port;
1560 tcp.val.dst_port &= tcp.mask.dst_port;
1562 mlx5_flow_create_copy(parser, &tcp, tcp_size);
1567 * Convert VXLAN item to Verbs specification.
1570 * Item specification.
1571 * @param default_mask[in]
1572 * Default bit-masks to use when item->mask is not provided.
1573 * @param data[in, out]
1577 * 0 on success, a negative errno value otherwise and rte_errno is set.
1580 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
1581 const void *default_mask,
1582 struct mlx5_flow_data *data)
1584 const struct rte_flow_item_vxlan *spec = item->spec;
1585 const struct rte_flow_item_vxlan *mask = item->mask;
1586 struct mlx5_flow_parse *parser = data->parser;
1587 unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1588 struct ibv_flow_spec_tunnel vxlan = {
1589 .type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1598 parser->inner = IBV_FLOW_SPEC_INNER;
1601 mask = default_mask;
1602 memcpy(&id.vni[1], spec->vni, 3);
1603 vxlan.val.tunnel_id = id.vlan_id;
1604 memcpy(&id.vni[1], mask->vni, 3);
1605 vxlan.mask.tunnel_id = id.vlan_id;
1606 /* Remove unwanted bits from values. */
1607 vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
1610 * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
1611 * layer is defined in the Verbs specification it is interpreted as
1612 * wildcard and all packets will match this rule, if it follows a full
1613 * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
1614 * before will also match this rule.
1615 * To avoid such situation, VNI 0 is currently refused.
1617 if (!vxlan.val.tunnel_id)
1618 return rte_flow_error_set(data->error, EINVAL,
1619 RTE_FLOW_ERROR_TYPE_ITEM,
1621 "VxLAN vni cannot be 0");
1622 mlx5_flow_create_copy(parser, &vxlan, size);
1627 * Convert mark/flag action to Verbs specification.
1630 * Internal parser structure.
1635 * 0 on success, a negative errno value otherwise and rte_errno is set.
1638 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id)
1640 unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1641 struct ibv_flow_spec_action_tag tag = {
1642 .type = IBV_FLOW_SPEC_ACTION_TAG,
1644 .tag_id = mlx5_flow_mark_set(mark_id),
1647 assert(parser->mark);
1648 mlx5_flow_create_copy(parser, &tag, size);
1653 * Convert count action to Verbs specification.
1656 * Pointer to Ethernet device.
1658 * Pointer to MLX5 flow parser structure.
1661 * 0 on success, a negative errno value otherwise and rte_errno is set.
1664 mlx5_flow_create_count(struct rte_eth_dev *dev __rte_unused,
1665 struct mlx5_flow_parse *parser __rte_unused)
1667 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
1668 struct priv *priv = dev->data->dev_private;
1669 unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
1670 struct ibv_counter_set_init_attr init_attr = {0};
1671 struct ibv_flow_spec_counter_action counter = {
1672 .type = IBV_FLOW_SPEC_ACTION_COUNT,
1674 .counter_set_handle = 0,
1677 init_attr.counter_set_id = 0;
1678 parser->cs = mlx5_glue->create_counter_set(priv->ctx, &init_attr);
1683 counter.counter_set_handle = parser->cs->handle;
1684 mlx5_flow_create_copy(parser, &counter, size);
1690 * Complete flow rule creation with a drop queue.
1693 * Pointer to Ethernet device.
1695 * Internal parser structure.
1697 * Pointer to the rte_flow.
1699 * Perform verbose error reporting if not NULL.
1702 * 0 on success, a negative errno value otherwise and rte_errno is set.
1705 mlx5_flow_create_action_queue_drop(struct rte_eth_dev *dev,
1706 struct mlx5_flow_parse *parser,
1707 struct rte_flow *flow,
1708 struct rte_flow_error *error)
1710 struct priv *priv = dev->data->dev_private;
1711 struct ibv_flow_spec_action_drop *drop;
1712 unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
1717 drop = (void *)((uintptr_t)parser->queue[HASH_RXQ_ETH].ibv_attr +
1718 parser->queue[HASH_RXQ_ETH].offset);
1719 *drop = (struct ibv_flow_spec_action_drop){
1720 .type = IBV_FLOW_SPEC_ACTION_DROP,
1723 ++parser->queue[HASH_RXQ_ETH].ibv_attr->num_of_specs;
1724 parser->queue[HASH_RXQ_ETH].offset += size;
1725 flow->frxq[HASH_RXQ_ETH].ibv_attr =
1726 parser->queue[HASH_RXQ_ETH].ibv_attr;
1728 flow->cs = parser->cs;
1729 if (!priv->dev->data->dev_started)
1731 parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
1732 flow->frxq[HASH_RXQ_ETH].ibv_flow =
1733 mlx5_glue->create_flow(priv->flow_drop_queue->qp,
1734 flow->frxq[HASH_RXQ_ETH].ibv_attr);
1735 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
1736 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1737 NULL, "flow rule creation failure");
1743 if (flow->frxq[HASH_RXQ_ETH].ibv_flow) {
1744 claim_zero(mlx5_glue->destroy_flow
1745 (flow->frxq[HASH_RXQ_ETH].ibv_flow));
1746 flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
1748 if (flow->frxq[HASH_RXQ_ETH].ibv_attr) {
1749 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
1750 flow->frxq[HASH_RXQ_ETH].ibv_attr = NULL;
1753 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
1761 * Create hash Rx queues when RSS is enabled.
1764 * Pointer to Ethernet device.
1766 * Internal parser structure.
1768 * Pointer to the rte_flow.
1770 * Perform verbose error reporting if not NULL.
1773 * 0 on success, a negative errno value otherwise and rte_errno is set.
1776 mlx5_flow_create_action_queue_rss(struct rte_eth_dev *dev,
1777 struct mlx5_flow_parse *parser,
1778 struct rte_flow *flow,
1779 struct rte_flow_error *error)
1781 struct priv *priv = dev->data->dev_private;
1784 for (i = 0; i != hash_rxq_init_n; ++i) {
1785 uint64_t hash_fields;
1787 if (!parser->queue[i].ibv_attr)
1789 flow->frxq[i].ibv_attr = parser->queue[i].ibv_attr;
1790 parser->queue[i].ibv_attr = NULL;
1791 hash_fields = hash_rxq_init[i].hash_fields;
1792 if (!priv->dev->data->dev_started)
1794 flow->frxq[i].hrxq =
1796 parser->rss_conf.key,
1797 parser->rss_conf.key_len,
1799 parser->rss_conf.queue,
1800 parser->rss_conf.queue_num);
1801 if (flow->frxq[i].hrxq)
1803 flow->frxq[i].hrxq =
1805 parser->rss_conf.key,
1806 parser->rss_conf.key_len,
1808 parser->rss_conf.queue,
1809 parser->rss_conf.queue_num);
1810 if (!flow->frxq[i].hrxq) {
1811 return rte_flow_error_set(error, ENOMEM,
1812 RTE_FLOW_ERROR_TYPE_HANDLE,
1814 "cannot create hash rxq");
1821 * Complete flow rule creation.
1824 * Pointer to Ethernet device.
1826 * Internal parser structure.
1828 * Pointer to the rte_flow.
1830 * Perform verbose error reporting if not NULL.
1833 * 0 on success, a negative errno value otherwise and rte_errno is set.
1836 mlx5_flow_create_action_queue(struct rte_eth_dev *dev,
1837 struct mlx5_flow_parse *parser,
1838 struct rte_flow *flow,
1839 struct rte_flow_error *error)
1841 struct priv *priv = dev->data->dev_private;
1844 unsigned int flows_n = 0;
1848 assert(!parser->drop);
1849 ret = mlx5_flow_create_action_queue_rss(dev, parser, flow, error);
1853 flow->cs = parser->cs;
1854 if (!priv->dev->data->dev_started)
1856 for (i = 0; i != hash_rxq_init_n; ++i) {
1857 if (!flow->frxq[i].hrxq)
1859 flow->frxq[i].ibv_flow =
1860 mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
1861 flow->frxq[i].ibv_attr);
1862 if (!flow->frxq[i].ibv_flow) {
1863 rte_flow_error_set(error, ENOMEM,
1864 RTE_FLOW_ERROR_TYPE_HANDLE,
1865 NULL, "flow rule creation failure");
1869 DRV_LOG(DEBUG, "port %u %p type %d QP %p ibv_flow %p",
1872 (void *)flow->frxq[i].hrxq,
1873 (void *)flow->frxq[i].ibv_flow);
1876 rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_HANDLE,
1877 NULL, "internal error in flow creation");
1880 for (i = 0; i != parser->rss_conf.queue_num; ++i) {
1881 struct mlx5_rxq_data *q =
1882 (*priv->rxqs)[parser->rss_conf.queue[i]];
1884 q->mark |= parser->mark;
1888 ret = rte_errno; /* Save rte_errno before cleanup. */
1890 for (i = 0; i != hash_rxq_init_n; ++i) {
1891 if (flow->frxq[i].ibv_flow) {
1892 struct ibv_flow *ibv_flow = flow->frxq[i].ibv_flow;
1894 claim_zero(mlx5_glue->destroy_flow(ibv_flow));
1896 if (flow->frxq[i].hrxq)
1897 mlx5_hrxq_release(dev, flow->frxq[i].hrxq);
1898 if (flow->frxq[i].ibv_attr)
1899 rte_free(flow->frxq[i].ibv_attr);
1902 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
1906 rte_errno = ret; /* Restore rte_errno. */
1914 * Pointer to Ethernet device.
1916 * Pointer to a TAILQ flow list.
1918 * Flow rule attributes.
1919 * @param[in] pattern
1920 * Pattern specification (list terminated by the END pattern item).
1921 * @param[in] actions
1922 * Associated actions (list terminated by the END action).
1924 * Perform verbose error reporting if not NULL.
1927 * A flow on success, NULL otherwise and rte_errno is set.
1929 static struct rte_flow *
1930 mlx5_flow_list_create(struct rte_eth_dev *dev,
1931 struct mlx5_flows *list,
1932 const struct rte_flow_attr *attr,
1933 const struct rte_flow_item items[],
1934 const struct rte_flow_action actions[],
1935 struct rte_flow_error *error)
1937 struct mlx5_flow_parse parser = { .create = 1, };
1938 struct rte_flow *flow = NULL;
1942 ret = mlx5_flow_convert(dev, attr, items, actions, error, &parser);
1945 flow = rte_calloc(__func__, 1,
1947 parser.rss_conf.queue_num * sizeof(uint16_t),
1950 rte_flow_error_set(error, ENOMEM,
1951 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1953 "cannot allocate flow memory");
1956 /* Copy configuration. */
1957 flow->queues = (uint16_t (*)[])(flow + 1);
1958 flow->rss_conf = (struct rte_flow_action_rss){
1959 .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
1961 .types = parser.rss_conf.types,
1962 .key_len = parser.rss_conf.key_len,
1963 .queue_num = parser.rss_conf.queue_num,
1964 .key = memcpy(flow->rss_key, parser.rss_conf.key,
1965 sizeof(*parser.rss_conf.key) *
1966 parser.rss_conf.key_len),
1967 .queue = memcpy(flow->queues, parser.rss_conf.queue,
1968 sizeof(*parser.rss_conf.queue) *
1969 parser.rss_conf.queue_num),
1971 flow->mark = parser.mark;
1972 /* finalise the flow. */
1974 ret = mlx5_flow_create_action_queue_drop(dev, &parser, flow,
1977 ret = mlx5_flow_create_action_queue(dev, &parser, flow, error);
1980 TAILQ_INSERT_TAIL(list, flow, next);
1981 DRV_LOG(DEBUG, "port %u flow created %p", dev->data->port_id,
1985 DRV_LOG(ERR, "port %u flow creation error: %s", dev->data->port_id,
1987 for (i = 0; i != hash_rxq_init_n; ++i) {
1988 if (parser.queue[i].ibv_attr)
1989 rte_free(parser.queue[i].ibv_attr);
1996 * Validate a flow supported by the NIC.
1998 * @see rte_flow_validate()
2002 mlx5_flow_validate(struct rte_eth_dev *dev,
2003 const struct rte_flow_attr *attr,
2004 const struct rte_flow_item items[],
2005 const struct rte_flow_action actions[],
2006 struct rte_flow_error *error)
2008 struct mlx5_flow_parse parser = { .create = 0, };
2010 return mlx5_flow_convert(dev, attr, items, actions, error, &parser);
2016 * @see rte_flow_create()
2020 mlx5_flow_create(struct rte_eth_dev *dev,
2021 const struct rte_flow_attr *attr,
2022 const struct rte_flow_item items[],
2023 const struct rte_flow_action actions[],
2024 struct rte_flow_error *error)
2026 struct priv *priv = dev->data->dev_private;
2028 return mlx5_flow_list_create(dev, &priv->flows, attr, items, actions,
2033 * Destroy a flow in a list.
2036 * Pointer to Ethernet device.
2038 * Pointer to a TAILQ flow list.
2043 mlx5_flow_list_destroy(struct rte_eth_dev *dev, struct mlx5_flows *list,
2044 struct rte_flow *flow)
2046 struct priv *priv = dev->data->dev_private;
2049 if (flow->drop || !flow->mark)
2051 for (i = 0; i != flow->rss_conf.queue_num; ++i) {
2052 struct rte_flow *tmp;
2056 * To remove the mark from the queue, the queue must not be
2057 * present in any other marked flow (RSS or not).
2059 TAILQ_FOREACH(tmp, list, next) {
2061 uint16_t *tqs = NULL;
2066 for (j = 0; j != hash_rxq_init_n; ++j) {
2067 if (!tmp->frxq[j].hrxq)
2069 tqs = tmp->frxq[j].hrxq->ind_table->queues;
2070 tq_n = tmp->frxq[j].hrxq->ind_table->queues_n;
2074 for (j = 0; (j != tq_n) && !mark; j++)
2075 if (tqs[j] == (*flow->queues)[i])
2078 (*priv->rxqs)[(*flow->queues)[i]]->mark = mark;
2082 if (flow->frxq[HASH_RXQ_ETH].ibv_flow)
2083 claim_zero(mlx5_glue->destroy_flow
2084 (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2085 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
2087 for (i = 0; i != hash_rxq_init_n; ++i) {
2088 struct mlx5_flow *frxq = &flow->frxq[i];
2091 claim_zero(mlx5_glue->destroy_flow
2094 mlx5_hrxq_release(dev, frxq->hrxq);
2096 rte_free(frxq->ibv_attr);
2100 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
2103 TAILQ_REMOVE(list, flow, next);
2104 DRV_LOG(DEBUG, "port %u flow destroyed %p", dev->data->port_id,
2110 * Destroy all flows.
2113 * Pointer to Ethernet device.
2115 * Pointer to a TAILQ flow list.
2118 mlx5_flow_list_flush(struct rte_eth_dev *dev, struct mlx5_flows *list)
2120 while (!TAILQ_EMPTY(list)) {
2121 struct rte_flow *flow;
2123 flow = TAILQ_FIRST(list);
2124 mlx5_flow_list_destroy(dev, list, flow);
2129 * Create drop queue.
2132 * Pointer to Ethernet device.
2135 * 0 on success, a negative errno value otherwise and rte_errno is set.
2138 mlx5_flow_create_drop_queue(struct rte_eth_dev *dev)
2140 struct priv *priv = dev->data->dev_private;
2141 struct mlx5_hrxq_drop *fdq = NULL;
2145 fdq = rte_calloc(__func__, 1, sizeof(*fdq), 0);
2148 "port %u cannot allocate memory for drop queue",
2149 dev->data->port_id);
2153 fdq->cq = mlx5_glue->create_cq(priv->ctx, 1, NULL, NULL, 0);
2155 DRV_LOG(WARNING, "port %u cannot allocate CQ for drop queue",
2156 dev->data->port_id);
2160 fdq->wq = mlx5_glue->create_wq
2162 &(struct ibv_wq_init_attr){
2163 .wq_type = IBV_WQT_RQ,
2170 DRV_LOG(WARNING, "port %u cannot allocate WQ for drop queue",
2171 dev->data->port_id);
2175 fdq->ind_table = mlx5_glue->create_rwq_ind_table
2177 &(struct ibv_rwq_ind_table_init_attr){
2178 .log_ind_tbl_size = 0,
2179 .ind_tbl = &fdq->wq,
2182 if (!fdq->ind_table) {
2184 "port %u cannot allocate indirection table for drop"
2186 dev->data->port_id);
2190 fdq->qp = mlx5_glue->create_qp_ex
2192 &(struct ibv_qp_init_attr_ex){
2193 .qp_type = IBV_QPT_RAW_PACKET,
2195 IBV_QP_INIT_ATTR_PD |
2196 IBV_QP_INIT_ATTR_IND_TABLE |
2197 IBV_QP_INIT_ATTR_RX_HASH,
2198 .rx_hash_conf = (struct ibv_rx_hash_conf){
2200 IBV_RX_HASH_FUNC_TOEPLITZ,
2201 .rx_hash_key_len = rss_hash_default_key_len,
2202 .rx_hash_key = rss_hash_default_key,
2203 .rx_hash_fields_mask = 0,
2205 .rwq_ind_tbl = fdq->ind_table,
2209 DRV_LOG(WARNING, "port %u cannot allocate QP for drop queue",
2210 dev->data->port_id);
2214 priv->flow_drop_queue = fdq;
2218 claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2220 claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2222 claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2224 claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2227 priv->flow_drop_queue = NULL;
2232 * Delete drop queue.
2235 * Pointer to Ethernet device.
2238 mlx5_flow_delete_drop_queue(struct rte_eth_dev *dev)
2240 struct priv *priv = dev->data->dev_private;
2241 struct mlx5_hrxq_drop *fdq = priv->flow_drop_queue;
2246 claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2248 claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2250 claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2252 claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2254 priv->flow_drop_queue = NULL;
2261 * Pointer to Ethernet device.
2263 * Pointer to a TAILQ flow list.
2266 mlx5_flow_stop(struct rte_eth_dev *dev, struct mlx5_flows *list)
2268 struct priv *priv = dev->data->dev_private;
2269 struct rte_flow *flow;
2271 TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) {
2273 struct mlx5_ind_table_ibv *ind_tbl = NULL;
2276 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow)
2278 claim_zero(mlx5_glue->destroy_flow
2279 (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2280 flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
2281 DRV_LOG(DEBUG, "port %u flow %p removed",
2282 dev->data->port_id, (void *)flow);
2286 /* Verify the flow has not already been cleaned. */
2287 for (i = 0; i != hash_rxq_init_n; ++i) {
2288 if (!flow->frxq[i].ibv_flow)
2291 * Indirection table may be necessary to remove the
2292 * flags in the Rx queues.
2293 * This helps to speed-up the process by avoiding
2296 ind_tbl = flow->frxq[i].hrxq->ind_table;
2299 if (i == hash_rxq_init_n)
2303 for (i = 0; i != ind_tbl->queues_n; ++i)
2304 (*priv->rxqs)[ind_tbl->queues[i]]->mark = 0;
2306 for (i = 0; i != hash_rxq_init_n; ++i) {
2307 if (!flow->frxq[i].ibv_flow)
2309 claim_zero(mlx5_glue->destroy_flow
2310 (flow->frxq[i].ibv_flow));
2311 flow->frxq[i].ibv_flow = NULL;
2312 mlx5_hrxq_release(dev, flow->frxq[i].hrxq);
2313 flow->frxq[i].hrxq = NULL;
2315 DRV_LOG(DEBUG, "port %u flow %p removed", dev->data->port_id,
2324 * Pointer to Ethernet device.
2326 * Pointer to a TAILQ flow list.
2329 * 0 on success, a negative errno value otherwise and rte_errno is set.
2332 mlx5_flow_start(struct rte_eth_dev *dev, struct mlx5_flows *list)
2334 struct priv *priv = dev->data->dev_private;
2335 struct rte_flow *flow;
2337 TAILQ_FOREACH(flow, list, next) {
2341 flow->frxq[HASH_RXQ_ETH].ibv_flow =
2342 mlx5_glue->create_flow
2343 (priv->flow_drop_queue->qp,
2344 flow->frxq[HASH_RXQ_ETH].ibv_attr);
2345 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
2347 "port %u flow %p cannot be applied",
2348 dev->data->port_id, (void *)flow);
2352 DRV_LOG(DEBUG, "port %u flow %p applied",
2353 dev->data->port_id, (void *)flow);
2357 for (i = 0; i != hash_rxq_init_n; ++i) {
2358 if (!flow->frxq[i].ibv_attr)
2360 flow->frxq[i].hrxq =
2361 mlx5_hrxq_get(dev, flow->rss_conf.key,
2362 flow->rss_conf.key_len,
2363 hash_rxq_init[i].hash_fields,
2364 flow->rss_conf.queue,
2365 flow->rss_conf.queue_num);
2366 if (flow->frxq[i].hrxq)
2368 flow->frxq[i].hrxq =
2369 mlx5_hrxq_new(dev, flow->rss_conf.key,
2370 flow->rss_conf.key_len,
2371 hash_rxq_init[i].hash_fields,
2372 flow->rss_conf.queue,
2373 flow->rss_conf.queue_num);
2374 if (!flow->frxq[i].hrxq) {
2376 "port %u flow %p cannot be applied",
2377 dev->data->port_id, (void *)flow);
2382 flow->frxq[i].ibv_flow =
2383 mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
2384 flow->frxq[i].ibv_attr);
2385 if (!flow->frxq[i].ibv_flow) {
2387 "port %u flow %p cannot be applied",
2388 dev->data->port_id, (void *)flow);
2392 DRV_LOG(DEBUG, "port %u flow %p applied",
2393 dev->data->port_id, (void *)flow);
2397 for (i = 0; i != flow->rss_conf.queue_num; ++i)
2398 (*priv->rxqs)[flow->rss_conf.queue[i]]->mark = 1;
2404 * Verify the flow list is empty
2407 * Pointer to Ethernet device.
2409 * @return the number of flows not released.
2412 mlx5_flow_verify(struct rte_eth_dev *dev)
2414 struct priv *priv = dev->data->dev_private;
2415 struct rte_flow *flow;
2418 TAILQ_FOREACH(flow, &priv->flows, next) {
2419 DRV_LOG(DEBUG, "port %u flow %p still referenced",
2420 dev->data->port_id, (void *)flow);
2427 * Enable a control flow configured from the control plane.
2430 * Pointer to Ethernet device.
2432 * An Ethernet flow spec to apply.
2434 * An Ethernet flow mask to apply.
2436 * A VLAN flow spec to apply.
2438 * A VLAN flow mask to apply.
2441 * 0 on success, a negative errno value otherwise and rte_errno is set.
2444 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
2445 struct rte_flow_item_eth *eth_spec,
2446 struct rte_flow_item_eth *eth_mask,
2447 struct rte_flow_item_vlan *vlan_spec,
2448 struct rte_flow_item_vlan *vlan_mask)
2450 struct priv *priv = dev->data->dev_private;
2451 const struct rte_flow_attr attr = {
2453 .priority = MLX5_CTRL_FLOW_PRIORITY,
2455 struct rte_flow_item items[] = {
2457 .type = RTE_FLOW_ITEM_TYPE_ETH,
2463 .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
2464 RTE_FLOW_ITEM_TYPE_END,
2470 .type = RTE_FLOW_ITEM_TYPE_END,
2473 uint16_t queue[priv->reta_idx_n];
2474 struct rte_flow_action_rss action_rss = {
2475 .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
2477 .types = priv->rss_conf.rss_hf,
2478 .key_len = priv->rss_conf.rss_key_len,
2479 .queue_num = priv->reta_idx_n,
2480 .key = priv->rss_conf.rss_key,
2483 struct rte_flow_action actions[] = {
2485 .type = RTE_FLOW_ACTION_TYPE_RSS,
2486 .conf = &action_rss,
2489 .type = RTE_FLOW_ACTION_TYPE_END,
2492 struct rte_flow *flow;
2493 struct rte_flow_error error;
2496 if (!priv->reta_idx_n) {
2500 for (i = 0; i != priv->reta_idx_n; ++i)
2501 queue[i] = (*priv->reta_idx)[i];
2502 flow = mlx5_flow_list_create(dev, &priv->ctrl_flows, &attr, items,
2510 * Enable a flow control configured from the control plane.
2513 * Pointer to Ethernet device.
2515 * An Ethernet flow spec to apply.
2517 * An Ethernet flow mask to apply.
2520 * 0 on success, a negative errno value otherwise and rte_errno is set.
2523 mlx5_ctrl_flow(struct rte_eth_dev *dev,
2524 struct rte_flow_item_eth *eth_spec,
2525 struct rte_flow_item_eth *eth_mask)
2527 return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
2533 * @see rte_flow_destroy()
2537 mlx5_flow_destroy(struct rte_eth_dev *dev,
2538 struct rte_flow *flow,
2539 struct rte_flow_error *error __rte_unused)
2541 struct priv *priv = dev->data->dev_private;
2543 mlx5_flow_list_destroy(dev, &priv->flows, flow);
2548 * Destroy all flows.
2550 * @see rte_flow_flush()
2554 mlx5_flow_flush(struct rte_eth_dev *dev,
2555 struct rte_flow_error *error __rte_unused)
2557 struct priv *priv = dev->data->dev_private;
2559 mlx5_flow_list_flush(dev, &priv->flows);
2563 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
2565 * Query flow counter.
2569 * @param counter_value
2570 * returned data from the counter.
2573 * 0 on success, a negative errno value otherwise and rte_errno is set.
2576 mlx5_flow_query_count(struct ibv_counter_set *cs,
2577 struct mlx5_flow_counter_stats *counter_stats,
2578 struct rte_flow_query_count *query_count,
2579 struct rte_flow_error *error)
2581 uint64_t counters[2];
2582 struct ibv_query_counter_set_attr query_cs_attr = {
2584 .query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
2586 struct ibv_counter_set_data query_out = {
2588 .outlen = 2 * sizeof(uint64_t),
2590 int err = mlx5_glue->query_counter_set(&query_cs_attr, &query_out);
2593 return rte_flow_error_set(error, err,
2594 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2596 "cannot read counter");
2597 query_count->hits_set = 1;
2598 query_count->bytes_set = 1;
2599 query_count->hits = counters[0] - counter_stats->hits;
2600 query_count->bytes = counters[1] - counter_stats->bytes;
2601 if (query_count->reset) {
2602 counter_stats->hits = counters[0];
2603 counter_stats->bytes = counters[1];
2611 * @see rte_flow_query()
2615 mlx5_flow_query(struct rte_eth_dev *dev __rte_unused,
2616 struct rte_flow *flow,
2617 enum rte_flow_action_type action __rte_unused,
2619 struct rte_flow_error *error)
2624 ret = mlx5_flow_query_count(flow->cs,
2625 &flow->counter_stats,
2626 (struct rte_flow_query_count *)data,
2631 return rte_flow_error_set(error, EINVAL,
2632 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2634 "no counter found for flow");
2643 * @see rte_flow_isolate()
2647 mlx5_flow_isolate(struct rte_eth_dev *dev,
2649 struct rte_flow_error *error)
2651 struct priv *priv = dev->data->dev_private;
2653 if (dev->data->dev_started) {
2654 rte_flow_error_set(error, EBUSY,
2655 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2657 "port must be stopped first");
2660 priv->isolated = !!enable;
2662 priv->dev->dev_ops = &mlx5_dev_ops_isolate;
2664 priv->dev->dev_ops = &mlx5_dev_ops;
2669 * Convert a flow director filter to a generic flow.
2672 * Pointer to Ethernet device.
2673 * @param fdir_filter
2674 * Flow director filter to add.
2676 * Generic flow parameters structure.
2679 * 0 on success, a negative errno value otherwise and rte_errno is set.
2682 mlx5_fdir_filter_convert(struct rte_eth_dev *dev,
2683 const struct rte_eth_fdir_filter *fdir_filter,
2684 struct mlx5_fdir *attributes)
2686 struct priv *priv = dev->data->dev_private;
2687 const struct rte_eth_fdir_input *input = &fdir_filter->input;
2688 const struct rte_eth_fdir_masks *mask =
2689 &dev->data->dev_conf.fdir_conf.mask;
2691 /* Validate queue number. */
2692 if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
2693 DRV_LOG(ERR, "port %u invalid queue number %d",
2694 dev->data->port_id, fdir_filter->action.rx_queue);
2698 attributes->attr.ingress = 1;
2699 attributes->items[0] = (struct rte_flow_item) {
2700 .type = RTE_FLOW_ITEM_TYPE_ETH,
2701 .spec = &attributes->l2,
2702 .mask = &attributes->l2_mask,
2704 switch (fdir_filter->action.behavior) {
2705 case RTE_ETH_FDIR_ACCEPT:
2706 attributes->actions[0] = (struct rte_flow_action){
2707 .type = RTE_FLOW_ACTION_TYPE_QUEUE,
2708 .conf = &attributes->queue,
2711 case RTE_ETH_FDIR_REJECT:
2712 attributes->actions[0] = (struct rte_flow_action){
2713 .type = RTE_FLOW_ACTION_TYPE_DROP,
2717 DRV_LOG(ERR, "port %u invalid behavior %d",
2719 fdir_filter->action.behavior);
2720 rte_errno = ENOTSUP;
2723 attributes->queue.index = fdir_filter->action.rx_queue;
2725 switch (fdir_filter->input.flow_type) {
2726 case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
2727 case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
2728 case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
2729 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2730 .src_addr = input->flow.ip4_flow.src_ip,
2731 .dst_addr = input->flow.ip4_flow.dst_ip,
2732 .time_to_live = input->flow.ip4_flow.ttl,
2733 .type_of_service = input->flow.ip4_flow.tos,
2734 .next_proto_id = input->flow.ip4_flow.proto,
2736 attributes->l3_mask.ipv4.hdr = (struct ipv4_hdr){
2737 .src_addr = mask->ipv4_mask.src_ip,
2738 .dst_addr = mask->ipv4_mask.dst_ip,
2739 .time_to_live = mask->ipv4_mask.ttl,
2740 .type_of_service = mask->ipv4_mask.tos,
2741 .next_proto_id = mask->ipv4_mask.proto,
2743 attributes->items[1] = (struct rte_flow_item){
2744 .type = RTE_FLOW_ITEM_TYPE_IPV4,
2745 .spec = &attributes->l3,
2746 .mask = &attributes->l3_mask,
2749 case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
2750 case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
2751 case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
2752 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2753 .hop_limits = input->flow.ipv6_flow.hop_limits,
2754 .proto = input->flow.ipv6_flow.proto,
2757 memcpy(attributes->l3.ipv6.hdr.src_addr,
2758 input->flow.ipv6_flow.src_ip,
2759 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2760 memcpy(attributes->l3.ipv6.hdr.dst_addr,
2761 input->flow.ipv6_flow.dst_ip,
2762 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2763 memcpy(attributes->l3_mask.ipv6.hdr.src_addr,
2764 mask->ipv6_mask.src_ip,
2765 RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
2766 memcpy(attributes->l3_mask.ipv6.hdr.dst_addr,
2767 mask->ipv6_mask.dst_ip,
2768 RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
2769 attributes->items[1] = (struct rte_flow_item){
2770 .type = RTE_FLOW_ITEM_TYPE_IPV6,
2771 .spec = &attributes->l3,
2772 .mask = &attributes->l3_mask,
2776 DRV_LOG(ERR, "port %u invalid flow type%d",
2777 dev->data->port_id, fdir_filter->input.flow_type);
2778 rte_errno = ENOTSUP;
2782 switch (fdir_filter->input.flow_type) {
2783 case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
2784 attributes->l4.udp.hdr = (struct udp_hdr){
2785 .src_port = input->flow.udp4_flow.src_port,
2786 .dst_port = input->flow.udp4_flow.dst_port,
2788 attributes->l4_mask.udp.hdr = (struct udp_hdr){
2789 .src_port = mask->src_port_mask,
2790 .dst_port = mask->dst_port_mask,
2792 attributes->items[2] = (struct rte_flow_item){
2793 .type = RTE_FLOW_ITEM_TYPE_UDP,
2794 .spec = &attributes->l4,
2795 .mask = &attributes->l4_mask,
2798 case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
2799 attributes->l4.tcp.hdr = (struct tcp_hdr){
2800 .src_port = input->flow.tcp4_flow.src_port,
2801 .dst_port = input->flow.tcp4_flow.dst_port,
2803 attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
2804 .src_port = mask->src_port_mask,
2805 .dst_port = mask->dst_port_mask,
2807 attributes->items[2] = (struct rte_flow_item){
2808 .type = RTE_FLOW_ITEM_TYPE_TCP,
2809 .spec = &attributes->l4,
2810 .mask = &attributes->l4_mask,
2813 case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
2814 attributes->l4.udp.hdr = (struct udp_hdr){
2815 .src_port = input->flow.udp6_flow.src_port,
2816 .dst_port = input->flow.udp6_flow.dst_port,
2818 attributes->l4_mask.udp.hdr = (struct udp_hdr){
2819 .src_port = mask->src_port_mask,
2820 .dst_port = mask->dst_port_mask,
2822 attributes->items[2] = (struct rte_flow_item){
2823 .type = RTE_FLOW_ITEM_TYPE_UDP,
2824 .spec = &attributes->l4,
2825 .mask = &attributes->l4_mask,
2828 case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
2829 attributes->l4.tcp.hdr = (struct tcp_hdr){
2830 .src_port = input->flow.tcp6_flow.src_port,
2831 .dst_port = input->flow.tcp6_flow.dst_port,
2833 attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
2834 .src_port = mask->src_port_mask,
2835 .dst_port = mask->dst_port_mask,
2837 attributes->items[2] = (struct rte_flow_item){
2838 .type = RTE_FLOW_ITEM_TYPE_TCP,
2839 .spec = &attributes->l4,
2840 .mask = &attributes->l4_mask,
2843 case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
2844 case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
2847 DRV_LOG(ERR, "port %u invalid flow type%d",
2848 dev->data->port_id, fdir_filter->input.flow_type);
2849 rte_errno = ENOTSUP;
2856 * Add new flow director filter and store it in list.
2859 * Pointer to Ethernet device.
2860 * @param fdir_filter
2861 * Flow director filter to add.
2864 * 0 on success, a negative errno value otherwise and rte_errno is set.
2867 mlx5_fdir_filter_add(struct rte_eth_dev *dev,
2868 const struct rte_eth_fdir_filter *fdir_filter)
2870 struct priv *priv = dev->data->dev_private;
2871 struct mlx5_fdir attributes = {
2874 .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2875 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2879 struct mlx5_flow_parse parser = {
2880 .layer = HASH_RXQ_ETH,
2882 struct rte_flow_error error;
2883 struct rte_flow *flow;
2886 ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
2889 ret = mlx5_flow_convert(dev, &attributes.attr, attributes.items,
2890 attributes.actions, &error, &parser);
2893 flow = mlx5_flow_list_create(dev, &priv->flows, &attributes.attr,
2894 attributes.items, attributes.actions,
2897 DRV_LOG(DEBUG, "port %u FDIR created %p", dev->data->port_id,
2905 * Delete specific filter.
2908 * Pointer to Ethernet device.
2909 * @param fdir_filter
2910 * Filter to be deleted.
2913 * 0 on success, a negative errno value otherwise and rte_errno is set.
2916 mlx5_fdir_filter_delete(struct rte_eth_dev *dev,
2917 const struct rte_eth_fdir_filter *fdir_filter)
2919 struct priv *priv = dev->data->dev_private;
2920 struct mlx5_fdir attributes = {
2923 struct mlx5_flow_parse parser = {
2925 .layer = HASH_RXQ_ETH,
2927 struct rte_flow_error error;
2928 struct rte_flow *flow;
2932 ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
2935 ret = mlx5_flow_convert(dev, &attributes.attr, attributes.items,
2936 attributes.actions, &error, &parser);
2940 * Special case for drop action which is only set in the
2941 * specifications when the flow is created. In this situation the
2942 * drop specification is missing.
2945 struct ibv_flow_spec_action_drop *drop;
2947 drop = (void *)((uintptr_t)parser.queue[HASH_RXQ_ETH].ibv_attr +
2948 parser.queue[HASH_RXQ_ETH].offset);
2949 *drop = (struct ibv_flow_spec_action_drop){
2950 .type = IBV_FLOW_SPEC_ACTION_DROP,
2951 .size = sizeof(struct ibv_flow_spec_action_drop),
2953 parser.queue[HASH_RXQ_ETH].ibv_attr->num_of_specs++;
2955 TAILQ_FOREACH(flow, &priv->flows, next) {
2956 struct ibv_flow_attr *attr;
2957 struct ibv_spec_header *attr_h;
2959 struct ibv_flow_attr *flow_attr;
2960 struct ibv_spec_header *flow_h;
2962 unsigned int specs_n;
2964 attr = parser.queue[HASH_RXQ_ETH].ibv_attr;
2965 flow_attr = flow->frxq[HASH_RXQ_ETH].ibv_attr;
2966 /* Compare first the attributes. */
2967 if (memcmp(attr, flow_attr, sizeof(struct ibv_flow_attr)))
2969 if (attr->num_of_specs == 0)
2971 spec = (void *)((uintptr_t)attr +
2972 sizeof(struct ibv_flow_attr));
2973 flow_spec = (void *)((uintptr_t)flow_attr +
2974 sizeof(struct ibv_flow_attr));
2975 specs_n = RTE_MIN(attr->num_of_specs, flow_attr->num_of_specs);
2976 for (i = 0; i != specs_n; ++i) {
2979 if (memcmp(spec, flow_spec,
2980 RTE_MIN(attr_h->size, flow_h->size)))
2982 spec = (void *)((uintptr_t)spec + attr_h->size);
2983 flow_spec = (void *)((uintptr_t)flow_spec +
2986 /* At this point, the flow match. */
2989 /* The flow does not match. */
2992 ret = rte_errno; /* Save rte_errno before cleanup. */
2994 mlx5_flow_list_destroy(dev, &priv->flows, flow);
2996 for (i = 0; i != hash_rxq_init_n; ++i) {
2997 if (parser.queue[i].ibv_attr)
2998 rte_free(parser.queue[i].ibv_attr);
3000 rte_errno = ret; /* Restore rte_errno. */
3005 * Update queue for specific filter.
3008 * Pointer to Ethernet device.
3009 * @param fdir_filter
3010 * Filter to be updated.
3013 * 0 on success, a negative errno value otherwise and rte_errno is set.
3016 mlx5_fdir_filter_update(struct rte_eth_dev *dev,
3017 const struct rte_eth_fdir_filter *fdir_filter)
3021 ret = mlx5_fdir_filter_delete(dev, fdir_filter);
3024 return mlx5_fdir_filter_add(dev, fdir_filter);
3028 * Flush all filters.
3031 * Pointer to Ethernet device.
3034 mlx5_fdir_filter_flush(struct rte_eth_dev *dev)
3036 struct priv *priv = dev->data->dev_private;
3038 mlx5_flow_list_flush(dev, &priv->flows);
3042 * Get flow director information.
3045 * Pointer to Ethernet device.
3046 * @param[out] fdir_info
3047 * Resulting flow director information.
3050 mlx5_fdir_info_get(struct rte_eth_dev *dev, struct rte_eth_fdir_info *fdir_info)
3052 struct priv *priv = dev->data->dev_private;
3053 struct rte_eth_fdir_masks *mask =
3054 &priv->dev->data->dev_conf.fdir_conf.mask;
3056 fdir_info->mode = priv->dev->data->dev_conf.fdir_conf.mode;
3057 fdir_info->guarant_spc = 0;
3058 rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
3059 fdir_info->max_flexpayload = 0;
3060 fdir_info->flow_types_mask[0] = 0;
3061 fdir_info->flex_payload_unit = 0;
3062 fdir_info->max_flex_payload_segment_num = 0;
3063 fdir_info->flex_payload_limit = 0;
3064 memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
3068 * Deal with flow director operations.
3071 * Pointer to Ethernet device.
3073 * Operation to perform.
3075 * Pointer to operation-specific structure.
3078 * 0 on success, a negative errno value otherwise and rte_errno is set.
3081 mlx5_fdir_ctrl_func(struct rte_eth_dev *dev, enum rte_filter_op filter_op,
3084 struct priv *priv = dev->data->dev_private;
3085 enum rte_fdir_mode fdir_mode =
3086 priv->dev->data->dev_conf.fdir_conf.mode;
3088 if (filter_op == RTE_ETH_FILTER_NOP)
3090 if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
3091 fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
3092 DRV_LOG(ERR, "port %u flow director mode %d not supported",
3093 dev->data->port_id, fdir_mode);
3097 switch (filter_op) {
3098 case RTE_ETH_FILTER_ADD:
3099 return mlx5_fdir_filter_add(dev, arg);
3100 case RTE_ETH_FILTER_UPDATE:
3101 return mlx5_fdir_filter_update(dev, arg);
3102 case RTE_ETH_FILTER_DELETE:
3103 return mlx5_fdir_filter_delete(dev, arg);
3104 case RTE_ETH_FILTER_FLUSH:
3105 mlx5_fdir_filter_flush(dev);
3107 case RTE_ETH_FILTER_INFO:
3108 mlx5_fdir_info_get(dev, arg);
3111 DRV_LOG(DEBUG, "port %u unknown operation %u",
3112 dev->data->port_id, filter_op);
3120 * Manage filter operations.
3123 * Pointer to Ethernet device structure.
3124 * @param filter_type
3127 * Operation to perform.
3129 * Pointer to operation-specific structure.
3132 * 0 on success, a negative errno value otherwise and rte_errno is set.
3135 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
3136 enum rte_filter_type filter_type,
3137 enum rte_filter_op filter_op,
3140 switch (filter_type) {
3141 case RTE_ETH_FILTER_GENERIC:
3142 if (filter_op != RTE_ETH_FILTER_GET) {
3146 *(const void **)arg = &mlx5_flow_ops;
3148 case RTE_ETH_FILTER_FDIR:
3149 return mlx5_fdir_ctrl_func(dev, filter_op, arg);
3151 DRV_LOG(ERR, "port %u filter type (%d) not supported",
3152 dev->data->port_id, filter_type);
3153 rte_errno = ENOTSUP;