1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2016 6WIND S.A.
3 * Copyright 2016 Mellanox Technologies, Ltd
11 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
13 #pragma GCC diagnostic ignored "-Wpedantic"
15 #include <infiniband/verbs.h>
17 #pragma GCC diagnostic error "-Wpedantic"
20 #include <rte_common.h>
21 #include <rte_eth_ctrl.h>
22 #include <rte_ethdev_driver.h>
24 #include <rte_flow_driver.h>
25 #include <rte_malloc.h>
29 #include "mlx5_defs.h"
31 #include "mlx5_glue.h"
33 /* Define minimal priority for control plane flows. */
34 #define MLX5_CTRL_FLOW_PRIORITY 4
36 /* Internet Protocol versions. */
40 #ifndef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
41 struct ibv_flow_spec_counter_action {
46 /* Dev ops structure defined in mlx5.c */
47 extern const struct eth_dev_ops mlx5_dev_ops;
48 extern const struct eth_dev_ops mlx5_dev_ops_isolate;
50 /** Structure give to the conversion functions. */
51 struct mlx5_flow_data {
52 struct mlx5_flow_parse *parser; /** Parser context. */
53 struct rte_flow_error *error; /** Error context. */
57 mlx5_flow_create_eth(const struct rte_flow_item *item,
58 const void *default_mask,
59 struct mlx5_flow_data *data);
62 mlx5_flow_create_vlan(const struct rte_flow_item *item,
63 const void *default_mask,
64 struct mlx5_flow_data *data);
67 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
68 const void *default_mask,
69 struct mlx5_flow_data *data);
72 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
73 const void *default_mask,
74 struct mlx5_flow_data *data);
77 mlx5_flow_create_udp(const struct rte_flow_item *item,
78 const void *default_mask,
79 struct mlx5_flow_data *data);
82 mlx5_flow_create_tcp(const struct rte_flow_item *item,
83 const void *default_mask,
84 struct mlx5_flow_data *data);
87 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
88 const void *default_mask,
89 struct mlx5_flow_data *data);
91 struct mlx5_flow_parse;
94 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
98 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id);
101 mlx5_flow_create_count(struct rte_eth_dev *dev, struct mlx5_flow_parse *parser);
103 /* Hash RX queue types. */
114 /* Initialization data for hash RX queue. */
115 struct hash_rxq_init {
116 uint64_t hash_fields; /* Fields that participate in the hash. */
117 uint64_t dpdk_rss_hf; /* Matching DPDK RSS hash fields. */
118 unsigned int flow_priority; /* Flow priority to use. */
119 unsigned int ip_version; /* Internet protocol. */
122 /* Initialization data for hash RX queues. */
123 const struct hash_rxq_init hash_rxq_init[] = {
125 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
126 IBV_RX_HASH_DST_IPV4 |
127 IBV_RX_HASH_SRC_PORT_TCP |
128 IBV_RX_HASH_DST_PORT_TCP),
129 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,
131 .ip_version = MLX5_IPV4,
134 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
135 IBV_RX_HASH_DST_IPV4 |
136 IBV_RX_HASH_SRC_PORT_UDP |
137 IBV_RX_HASH_DST_PORT_UDP),
138 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP,
140 .ip_version = MLX5_IPV4,
143 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
144 IBV_RX_HASH_DST_IPV4),
145 .dpdk_rss_hf = (ETH_RSS_IPV4 |
148 .ip_version = MLX5_IPV4,
151 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
152 IBV_RX_HASH_DST_IPV6 |
153 IBV_RX_HASH_SRC_PORT_TCP |
154 IBV_RX_HASH_DST_PORT_TCP),
155 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP,
157 .ip_version = MLX5_IPV6,
160 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
161 IBV_RX_HASH_DST_IPV6 |
162 IBV_RX_HASH_SRC_PORT_UDP |
163 IBV_RX_HASH_DST_PORT_UDP),
164 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP,
166 .ip_version = MLX5_IPV6,
169 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
170 IBV_RX_HASH_DST_IPV6),
171 .dpdk_rss_hf = (ETH_RSS_IPV6 |
174 .ip_version = MLX5_IPV6,
183 /* Number of entries in hash_rxq_init[]. */
184 const unsigned int hash_rxq_init_n = RTE_DIM(hash_rxq_init);
186 /** Structure for holding counter stats. */
187 struct mlx5_flow_counter_stats {
188 uint64_t hits; /**< Number of packets matched by the rule. */
189 uint64_t bytes; /**< Number of bytes matched by the rule. */
192 /** Structure for Drop queue. */
193 struct mlx5_hrxq_drop {
194 struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */
195 struct ibv_qp *qp; /**< Verbs queue pair. */
196 struct ibv_wq *wq; /**< Verbs work queue. */
197 struct ibv_cq *cq; /**< Verbs completion queue. */
200 /* Flows structures. */
202 uint64_t hash_fields; /**< Fields that participate in the hash. */
203 struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
204 struct ibv_flow *ibv_flow; /**< Verbs flow. */
205 struct mlx5_hrxq *hrxq; /**< Hash Rx queues. */
208 /* Drop flows structures. */
209 struct mlx5_flow_drop {
210 struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
211 struct ibv_flow *ibv_flow; /**< Verbs flow. */
215 TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
216 uint32_t mark:1; /**< Set if the flow is marked. */
217 uint32_t drop:1; /**< Drop queue. */
218 struct rte_flow_action_rss rss_conf; /**< RSS configuration */
219 uint16_t (*queues)[]; /**< Queues indexes to use. */
220 uint8_t rss_key[40]; /**< copy of the RSS key. */
221 struct ibv_counter_set *cs; /**< Holds the counters for the rule. */
222 struct mlx5_flow_counter_stats counter_stats;/**<The counter stats. */
223 struct mlx5_flow frxq[RTE_DIM(hash_rxq_init)];
224 /**< Flow with Rx queue. */
227 /** Static initializer for items. */
229 (const enum rte_flow_item_type []){ \
230 __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
233 /** Structure to generate a simple graph of layers supported by the NIC. */
234 struct mlx5_flow_items {
235 /** List of possible actions for these items. */
236 const enum rte_flow_action_type *const actions;
237 /** Bit-masks corresponding to the possibilities for the item. */
240 * Default bit-masks to use when item->mask is not provided. When
241 * \default_mask is also NULL, the full supported bit-mask (\mask) is
244 const void *default_mask;
245 /** Bit-masks size in bytes. */
246 const unsigned int mask_sz;
248 * Conversion function from rte_flow to NIC specific flow.
251 * rte_flow item to convert.
252 * @param default_mask
253 * Default bit-masks to use when item->mask is not provided.
255 * Internal structure to store the conversion.
258 * 0 on success, a negative errno value otherwise and rte_errno is
261 int (*convert)(const struct rte_flow_item *item,
262 const void *default_mask,
263 struct mlx5_flow_data *data);
264 /** Size in bytes of the destination structure. */
265 const unsigned int dst_sz;
266 /** List of possible following items. */
267 const enum rte_flow_item_type *const items;
270 /** Valid action for this PMD. */
271 static const enum rte_flow_action_type valid_actions[] = {
272 RTE_FLOW_ACTION_TYPE_DROP,
273 RTE_FLOW_ACTION_TYPE_QUEUE,
274 RTE_FLOW_ACTION_TYPE_MARK,
275 RTE_FLOW_ACTION_TYPE_FLAG,
276 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
277 RTE_FLOW_ACTION_TYPE_COUNT,
279 RTE_FLOW_ACTION_TYPE_END,
282 /** Graph of supported items and associated actions. */
283 static const struct mlx5_flow_items mlx5_flow_items[] = {
284 [RTE_FLOW_ITEM_TYPE_END] = {
285 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
286 RTE_FLOW_ITEM_TYPE_VXLAN),
288 [RTE_FLOW_ITEM_TYPE_ETH] = {
289 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
290 RTE_FLOW_ITEM_TYPE_IPV4,
291 RTE_FLOW_ITEM_TYPE_IPV6),
292 .actions = valid_actions,
293 .mask = &(const struct rte_flow_item_eth){
294 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
295 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
298 .default_mask = &rte_flow_item_eth_mask,
299 .mask_sz = sizeof(struct rte_flow_item_eth),
300 .convert = mlx5_flow_create_eth,
301 .dst_sz = sizeof(struct ibv_flow_spec_eth),
303 [RTE_FLOW_ITEM_TYPE_VLAN] = {
304 .items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
305 RTE_FLOW_ITEM_TYPE_IPV6),
306 .actions = valid_actions,
307 .mask = &(const struct rte_flow_item_vlan){
310 .default_mask = &rte_flow_item_vlan_mask,
311 .mask_sz = sizeof(struct rte_flow_item_vlan),
312 .convert = mlx5_flow_create_vlan,
315 [RTE_FLOW_ITEM_TYPE_IPV4] = {
316 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
317 RTE_FLOW_ITEM_TYPE_TCP),
318 .actions = valid_actions,
319 .mask = &(const struct rte_flow_item_ipv4){
323 .type_of_service = -1,
327 .default_mask = &rte_flow_item_ipv4_mask,
328 .mask_sz = sizeof(struct rte_flow_item_ipv4),
329 .convert = mlx5_flow_create_ipv4,
330 .dst_sz = sizeof(struct ibv_flow_spec_ipv4_ext),
332 [RTE_FLOW_ITEM_TYPE_IPV6] = {
333 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
334 RTE_FLOW_ITEM_TYPE_TCP),
335 .actions = valid_actions,
336 .mask = &(const struct rte_flow_item_ipv6){
339 0xff, 0xff, 0xff, 0xff,
340 0xff, 0xff, 0xff, 0xff,
341 0xff, 0xff, 0xff, 0xff,
342 0xff, 0xff, 0xff, 0xff,
345 0xff, 0xff, 0xff, 0xff,
346 0xff, 0xff, 0xff, 0xff,
347 0xff, 0xff, 0xff, 0xff,
348 0xff, 0xff, 0xff, 0xff,
355 .default_mask = &rte_flow_item_ipv6_mask,
356 .mask_sz = sizeof(struct rte_flow_item_ipv6),
357 .convert = mlx5_flow_create_ipv6,
358 .dst_sz = sizeof(struct ibv_flow_spec_ipv6),
360 [RTE_FLOW_ITEM_TYPE_UDP] = {
361 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN),
362 .actions = valid_actions,
363 .mask = &(const struct rte_flow_item_udp){
369 .default_mask = &rte_flow_item_udp_mask,
370 .mask_sz = sizeof(struct rte_flow_item_udp),
371 .convert = mlx5_flow_create_udp,
372 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
374 [RTE_FLOW_ITEM_TYPE_TCP] = {
375 .actions = valid_actions,
376 .mask = &(const struct rte_flow_item_tcp){
382 .default_mask = &rte_flow_item_tcp_mask,
383 .mask_sz = sizeof(struct rte_flow_item_tcp),
384 .convert = mlx5_flow_create_tcp,
385 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
387 [RTE_FLOW_ITEM_TYPE_VXLAN] = {
388 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH),
389 .actions = valid_actions,
390 .mask = &(const struct rte_flow_item_vxlan){
391 .vni = "\xff\xff\xff",
393 .default_mask = &rte_flow_item_vxlan_mask,
394 .mask_sz = sizeof(struct rte_flow_item_vxlan),
395 .convert = mlx5_flow_create_vxlan,
396 .dst_sz = sizeof(struct ibv_flow_spec_tunnel),
400 /** Structure to pass to the conversion function. */
401 struct mlx5_flow_parse {
402 uint32_t inner; /**< Set once VXLAN is encountered. */
404 /**< Whether resources should remain after a validate. */
405 uint32_t drop:1; /**< Target is a drop queue. */
406 uint32_t mark:1; /**< Mark is present in the flow. */
407 uint32_t count:1; /**< Count is present in the flow. */
408 uint32_t mark_id; /**< Mark identifier. */
409 struct rte_flow_action_rss rss_conf; /**< RSS configuration */
410 uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
411 uint8_t rss_key[40]; /**< copy of the RSS key. */
412 enum hash_rxq_type layer; /**< Last pattern layer detected. */
413 struct ibv_counter_set *cs; /**< Holds the counter set for the rule */
415 struct ibv_flow_attr *ibv_attr;
416 /**< Pointer to Verbs attributes. */
418 /**< Current position or total size of the attribute. */
419 } queue[RTE_DIM(hash_rxq_init)];
422 static const struct rte_flow_ops mlx5_flow_ops = {
423 .validate = mlx5_flow_validate,
424 .create = mlx5_flow_create,
425 .destroy = mlx5_flow_destroy,
426 .flush = mlx5_flow_flush,
427 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
428 .query = mlx5_flow_query,
432 .isolate = mlx5_flow_isolate,
435 /* Convert FDIR request to Generic flow. */
437 struct rte_flow_attr attr;
438 struct rte_flow_action actions[2];
439 struct rte_flow_item items[4];
440 struct rte_flow_item_eth l2;
441 struct rte_flow_item_eth l2_mask;
443 struct rte_flow_item_ipv4 ipv4;
444 struct rte_flow_item_ipv6 ipv6;
447 struct rte_flow_item_ipv4 ipv4;
448 struct rte_flow_item_ipv6 ipv6;
451 struct rte_flow_item_udp udp;
452 struct rte_flow_item_tcp tcp;
455 struct rte_flow_item_udp udp;
456 struct rte_flow_item_tcp tcp;
458 struct rte_flow_action_queue queue;
461 /* Verbs specification header. */
462 struct ibv_spec_header {
463 enum ibv_flow_spec_type type;
468 * Check support for a given item.
471 * Item specification.
473 * Bit-masks covering supported fields to compare with spec, last and mask in
476 * Bit-Mask size in bytes.
479 * 0 on success, a negative errno value otherwise and rte_errno is set.
482 mlx5_flow_item_validate(const struct rte_flow_item *item,
483 const uint8_t *mask, unsigned int size)
485 if (!item->spec && (item->mask || item->last)) {
489 if (item->spec && !item->mask) {
491 const uint8_t *spec = item->spec;
493 for (i = 0; i < size; ++i)
494 if ((spec[i] | mask[i]) != mask[i]) {
499 if (item->last && !item->mask) {
501 const uint8_t *spec = item->last;
503 for (i = 0; i < size; ++i)
504 if ((spec[i] | mask[i]) != mask[i]) {
511 const uint8_t *spec = item->spec;
513 for (i = 0; i < size; ++i)
514 if ((spec[i] | mask[i]) != mask[i]) {
519 if (item->spec && item->last) {
522 const uint8_t *apply = mask;
528 for (i = 0; i < size; ++i) {
529 spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
530 last[i] = ((const uint8_t *)item->last)[i] & apply[i];
532 ret = memcmp(spec, last, size);
542 * Extract attribute to the parser.
545 * Flow rule attributes.
547 * Perform verbose error reporting if not NULL.
550 * 0 on success, a negative errno value otherwise and rte_errno is set.
553 mlx5_flow_convert_attributes(const struct rte_flow_attr *attr,
554 struct rte_flow_error *error)
557 rte_flow_error_set(error, ENOTSUP,
558 RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
560 "groups are not supported");
563 if (attr->priority && attr->priority != MLX5_CTRL_FLOW_PRIORITY) {
564 rte_flow_error_set(error, ENOTSUP,
565 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
567 "priorities are not supported");
571 rte_flow_error_set(error, ENOTSUP,
572 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
574 "egress is not supported");
577 if (!attr->ingress) {
578 rte_flow_error_set(error, ENOTSUP,
579 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
581 "only ingress is supported");
588 * Extract actions request to the parser.
591 * Pointer to Ethernet device.
593 * Associated actions (list terminated by the END action).
595 * Perform verbose error reporting if not NULL.
596 * @param[in, out] parser
597 * Internal parser structure.
600 * 0 on success, a negative errno value otherwise and rte_errno is set.
603 mlx5_flow_convert_actions(struct rte_eth_dev *dev,
604 const struct rte_flow_action actions[],
605 struct rte_flow_error *error,
606 struct mlx5_flow_parse *parser)
608 enum { FATE = 1, MARK = 2, COUNT = 4, };
609 uint32_t overlap = 0;
610 struct priv *priv = dev->data->dev_private;
612 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
613 if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
615 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
617 goto exit_action_overlap;
620 } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
621 const struct rte_flow_action_queue *queue =
622 (const struct rte_flow_action_queue *)
626 goto exit_action_overlap;
628 if (!queue || (queue->index > (priv->rxqs_n - 1)))
629 goto exit_action_not_supported;
630 parser->queues[0] = queue->index;
631 parser->rss_conf = (struct rte_flow_action_rss){
633 .queue = parser->queues,
635 } else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
636 const struct rte_flow_action_rss *rss =
637 (const struct rte_flow_action_rss *)
639 const uint8_t *rss_key;
640 uint32_t rss_key_len;
644 goto exit_action_overlap;
647 rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ) {
648 rte_flow_error_set(error, EINVAL,
649 RTE_FLOW_ERROR_TYPE_ACTION,
651 "the only supported RSS hash"
652 " function is Toeplitz");
655 if (rss->types & MLX5_RSS_HF_MASK) {
656 rte_flow_error_set(error, EINVAL,
657 RTE_FLOW_ERROR_TYPE_ACTION,
659 "unsupported RSS type"
664 rss_key_len = rss->key_len;
667 rss_key_len = rss_hash_default_key_len;
668 rss_key = rss_hash_default_key;
670 if (rss_key_len != RTE_DIM(parser->rss_key)) {
671 rte_flow_error_set(error, EINVAL,
672 RTE_FLOW_ERROR_TYPE_ACTION,
674 "RSS hash key must be"
675 " exactly 40 bytes long");
678 if (!rss->queue_num) {
679 rte_flow_error_set(error, EINVAL,
680 RTE_FLOW_ERROR_TYPE_ACTION,
685 if (rss->queue_num > RTE_DIM(parser->queues)) {
686 rte_flow_error_set(error, EINVAL,
687 RTE_FLOW_ERROR_TYPE_ACTION,
689 "too many queues for RSS"
693 for (n = 0; n < rss->queue_num; ++n) {
694 if (rss->queue[n] >= priv->rxqs_n) {
695 rte_flow_error_set(error, EINVAL,
696 RTE_FLOW_ERROR_TYPE_ACTION,
698 "queue id > number of"
703 parser->rss_conf = (struct rte_flow_action_rss){
704 .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
706 .key_len = rss_key_len,
707 .queue_num = rss->queue_num,
708 .key = memcpy(parser->rss_key, rss_key,
709 sizeof(*rss_key) * rss_key_len),
710 .queue = memcpy(parser->queues, rss->queue,
711 sizeof(*rss->queue) *
714 } else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
715 const struct rte_flow_action_mark *mark =
716 (const struct rte_flow_action_mark *)
720 goto exit_action_overlap;
723 rte_flow_error_set(error, EINVAL,
724 RTE_FLOW_ERROR_TYPE_ACTION,
726 "mark must be defined");
728 } else if (mark->id >= MLX5_FLOW_MARK_MAX) {
729 rte_flow_error_set(error, ENOTSUP,
730 RTE_FLOW_ERROR_TYPE_ACTION,
732 "mark must be between 0"
737 parser->mark_id = mark->id;
738 } else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
740 goto exit_action_overlap;
743 } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT &&
744 priv->config.flow_counter_en) {
746 goto exit_action_overlap;
750 goto exit_action_not_supported;
753 /* When fate is unknown, drop traffic. */
754 if (!(overlap & FATE))
756 if (parser->drop && parser->mark)
758 if (!parser->rss_conf.queue_num && !parser->drop) {
759 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
760 NULL, "no valid action");
764 exit_action_not_supported:
765 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
766 actions, "action not supported");
769 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
770 actions, "overlapping actions are not supported");
778 * Pattern specification (list terminated by the END pattern item).
780 * Perform verbose error reporting if not NULL.
781 * @param[in, out] parser
782 * Internal parser structure.
785 * 0 on success, a negative errno value otherwise and rte_errno is set.
788 mlx5_flow_convert_items_validate(const struct rte_flow_item items[],
789 struct rte_flow_error *error,
790 struct mlx5_flow_parse *parser)
792 const struct mlx5_flow_items *cur_item = mlx5_flow_items;
796 /* Initialise the offsets to start after verbs attribute. */
797 for (i = 0; i != hash_rxq_init_n; ++i)
798 parser->queue[i].offset = sizeof(struct ibv_flow_attr);
799 for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
800 const struct mlx5_flow_items *token = NULL;
803 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
807 cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
809 if (cur_item->items[i] == items->type) {
810 token = &mlx5_flow_items[items->type];
816 goto exit_item_not_supported;
819 ret = mlx5_flow_item_validate(items,
820 (const uint8_t *)cur_item->mask,
823 goto exit_item_not_supported;
824 if (items->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
826 rte_flow_error_set(error, ENOTSUP,
827 RTE_FLOW_ERROR_TYPE_ITEM,
829 "cannot recognize multiple"
830 " VXLAN encapsulations");
833 parser->inner = IBV_FLOW_SPEC_INNER;
836 parser->queue[HASH_RXQ_ETH].offset += cur_item->dst_sz;
838 for (n = 0; n != hash_rxq_init_n; ++n)
839 parser->queue[n].offset += cur_item->dst_sz;
843 parser->queue[HASH_RXQ_ETH].offset +=
844 sizeof(struct ibv_flow_spec_action_drop);
847 for (i = 0; i != hash_rxq_init_n; ++i)
848 parser->queue[i].offset +=
849 sizeof(struct ibv_flow_spec_action_tag);
852 unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
854 for (i = 0; i != hash_rxq_init_n; ++i)
855 parser->queue[i].offset += size;
858 exit_item_not_supported:
859 return rte_flow_error_set(error, -ret, RTE_FLOW_ERROR_TYPE_ITEM,
860 items, "item not supported");
864 * Allocate memory space to store verbs flow attributes.
867 * Amount of byte to allocate.
869 * Perform verbose error reporting if not NULL.
872 * A verbs flow attribute on success, NULL otherwise and rte_errno is set.
874 static struct ibv_flow_attr *
875 mlx5_flow_convert_allocate(unsigned int size, struct rte_flow_error *error)
877 struct ibv_flow_attr *ibv_attr;
879 ibv_attr = rte_calloc(__func__, 1, size, 0);
881 rte_flow_error_set(error, ENOMEM,
882 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
884 "cannot allocate verbs spec attributes");
891 * Make inner packet matching with an higher priority from the non Inner
894 * @param[in, out] parser
895 * Internal parser structure.
897 * User flow attribute.
900 mlx5_flow_update_priority(struct mlx5_flow_parse *parser,
901 const struct rte_flow_attr *attr)
906 parser->queue[HASH_RXQ_ETH].ibv_attr->priority =
908 hash_rxq_init[HASH_RXQ_ETH].flow_priority;
911 for (i = 0; i != hash_rxq_init_n; ++i) {
912 if (parser->queue[i].ibv_attr) {
913 parser->queue[i].ibv_attr->priority =
915 hash_rxq_init[i].flow_priority -
916 (parser->inner ? 1 : 0);
922 * Finalise verbs flow attributes.
924 * @param[in, out] parser
925 * Internal parser structure.
928 mlx5_flow_convert_finalise(struct mlx5_flow_parse *parser)
930 const unsigned int ipv4 =
931 hash_rxq_init[parser->layer].ip_version == MLX5_IPV4;
932 const enum hash_rxq_type hmin = ipv4 ? HASH_RXQ_TCPV4 : HASH_RXQ_TCPV6;
933 const enum hash_rxq_type hmax = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
934 const enum hash_rxq_type ohmin = ipv4 ? HASH_RXQ_TCPV6 : HASH_RXQ_TCPV4;
935 const enum hash_rxq_type ohmax = ipv4 ? HASH_RXQ_IPV6 : HASH_RXQ_IPV4;
936 const enum hash_rxq_type ip = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
939 /* Remove any other flow not matching the pattern. */
940 if (parser->rss_conf.queue_num == 1 && !parser->rss_conf.types) {
941 for (i = 0; i != hash_rxq_init_n; ++i) {
942 if (i == HASH_RXQ_ETH)
944 rte_free(parser->queue[i].ibv_attr);
945 parser->queue[i].ibv_attr = NULL;
949 if (parser->layer == HASH_RXQ_ETH) {
953 * This layer becomes useless as the pattern define under
956 rte_free(parser->queue[HASH_RXQ_ETH].ibv_attr);
957 parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
959 /* Remove opposite kind of layer e.g. IPv6 if the pattern is IPv4. */
960 for (i = ohmin; i != (ohmax + 1); ++i) {
961 if (!parser->queue[i].ibv_attr)
963 rte_free(parser->queue[i].ibv_attr);
964 parser->queue[i].ibv_attr = NULL;
966 /* Remove impossible flow according to the RSS configuration. */
967 if (hash_rxq_init[parser->layer].dpdk_rss_hf &
968 parser->rss_conf.types) {
969 /* Remove any other flow. */
970 for (i = hmin; i != (hmax + 1); ++i) {
971 if ((i == parser->layer) ||
972 (!parser->queue[i].ibv_attr))
974 rte_free(parser->queue[i].ibv_attr);
975 parser->queue[i].ibv_attr = NULL;
977 } else if (!parser->queue[ip].ibv_attr) {
978 /* no RSS possible with the current configuration. */
979 parser->rss_conf.queue_num = 1;
984 * Fill missing layers in verbs specifications, or compute the correct
985 * offset to allocate the memory space for the attributes and
988 for (i = 0; i != hash_rxq_init_n - 1; ++i) {
990 struct ibv_flow_spec_ipv4_ext ipv4;
991 struct ibv_flow_spec_ipv6 ipv6;
992 struct ibv_flow_spec_tcp_udp udp_tcp;
997 if (i == parser->layer)
999 if (parser->layer == HASH_RXQ_ETH) {
1000 if (hash_rxq_init[i].ip_version == MLX5_IPV4) {
1001 size = sizeof(struct ibv_flow_spec_ipv4_ext);
1002 specs.ipv4 = (struct ibv_flow_spec_ipv4_ext){
1003 .type = IBV_FLOW_SPEC_IPV4_EXT,
1007 size = sizeof(struct ibv_flow_spec_ipv6);
1008 specs.ipv6 = (struct ibv_flow_spec_ipv6){
1009 .type = IBV_FLOW_SPEC_IPV6,
1013 if (parser->queue[i].ibv_attr) {
1014 dst = (void *)((uintptr_t)
1015 parser->queue[i].ibv_attr +
1016 parser->queue[i].offset);
1017 memcpy(dst, &specs, size);
1018 ++parser->queue[i].ibv_attr->num_of_specs;
1020 parser->queue[i].offset += size;
1022 if ((i == HASH_RXQ_UDPV4) || (i == HASH_RXQ_TCPV4) ||
1023 (i == HASH_RXQ_UDPV6) || (i == HASH_RXQ_TCPV6)) {
1024 size = sizeof(struct ibv_flow_spec_tcp_udp);
1025 specs.udp_tcp = (struct ibv_flow_spec_tcp_udp) {
1026 .type = ((i == HASH_RXQ_UDPV4 ||
1027 i == HASH_RXQ_UDPV6) ?
1032 if (parser->queue[i].ibv_attr) {
1033 dst = (void *)((uintptr_t)
1034 parser->queue[i].ibv_attr +
1035 parser->queue[i].offset);
1036 memcpy(dst, &specs, size);
1037 ++parser->queue[i].ibv_attr->num_of_specs;
1039 parser->queue[i].offset += size;
1045 * Validate and convert a flow supported by the NIC.
1048 * Pointer to Ethernet device.
1050 * Flow rule attributes.
1051 * @param[in] pattern
1052 * Pattern specification (list terminated by the END pattern item).
1053 * @param[in] actions
1054 * Associated actions (list terminated by the END action).
1056 * Perform verbose error reporting if not NULL.
1057 * @param[in, out] parser
1058 * Internal parser structure.
1061 * 0 on success, a negative errno value otherwise and rte_errno is set.
1064 mlx5_flow_convert(struct rte_eth_dev *dev,
1065 const struct rte_flow_attr *attr,
1066 const struct rte_flow_item items[],
1067 const struct rte_flow_action actions[],
1068 struct rte_flow_error *error,
1069 struct mlx5_flow_parse *parser)
1071 const struct mlx5_flow_items *cur_item = mlx5_flow_items;
1075 /* First step. Validate the attributes, items and actions. */
1076 *parser = (struct mlx5_flow_parse){
1077 .create = parser->create,
1078 .layer = HASH_RXQ_ETH,
1079 .mark_id = MLX5_FLOW_MARK_DEFAULT,
1081 ret = mlx5_flow_convert_attributes(attr, error);
1084 ret = mlx5_flow_convert_actions(dev, actions, error, parser);
1087 ret = mlx5_flow_convert_items_validate(items, error, parser);
1090 mlx5_flow_convert_finalise(parser);
1093 * Allocate the memory space to store verbs specifications.
1096 unsigned int offset = parser->queue[HASH_RXQ_ETH].offset;
1098 parser->queue[HASH_RXQ_ETH].ibv_attr =
1099 mlx5_flow_convert_allocate(offset, error);
1100 if (!parser->queue[HASH_RXQ_ETH].ibv_attr)
1102 parser->queue[HASH_RXQ_ETH].offset =
1103 sizeof(struct ibv_flow_attr);
1105 for (i = 0; i != hash_rxq_init_n; ++i) {
1106 unsigned int offset;
1108 if (!(parser->rss_conf.types &
1109 hash_rxq_init[i].dpdk_rss_hf) &&
1110 (i != HASH_RXQ_ETH))
1112 offset = parser->queue[i].offset;
1113 parser->queue[i].ibv_attr =
1114 mlx5_flow_convert_allocate(offset, error);
1115 if (!parser->queue[i].ibv_attr)
1117 parser->queue[i].offset = sizeof(struct ibv_flow_attr);
1120 /* Third step. Conversion parse, fill the specifications. */
1122 for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
1123 struct mlx5_flow_data data = {
1128 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
1130 cur_item = &mlx5_flow_items[items->type];
1131 ret = cur_item->convert(items,
1132 (cur_item->default_mask ?
1133 cur_item->default_mask :
1140 mlx5_flow_create_flag_mark(parser, parser->mark_id);
1141 if (parser->count && parser->create) {
1142 mlx5_flow_create_count(dev, parser);
1144 goto exit_count_error;
1147 * Last step. Complete missing specification to reach the RSS
1151 mlx5_flow_convert_finalise(parser);
1152 mlx5_flow_update_priority(parser, attr);
1154 /* Only verification is expected, all resources should be released. */
1155 if (!parser->create) {
1156 for (i = 0; i != hash_rxq_init_n; ++i) {
1157 if (parser->queue[i].ibv_attr) {
1158 rte_free(parser->queue[i].ibv_attr);
1159 parser->queue[i].ibv_attr = NULL;
1165 for (i = 0; i != hash_rxq_init_n; ++i) {
1166 if (parser->queue[i].ibv_attr) {
1167 rte_free(parser->queue[i].ibv_attr);
1168 parser->queue[i].ibv_attr = NULL;
1171 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1172 NULL, "cannot allocate verbs spec attributes");
1175 rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1176 NULL, "cannot create counter");
1181 * Copy the specification created into the flow.
1184 * Internal parser structure.
1186 * Create specification.
1188 * Size in bytes of the specification to copy.
1191 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
1197 for (i = 0; i != hash_rxq_init_n; ++i) {
1198 if (!parser->queue[i].ibv_attr)
1200 /* Specification must be the same l3 type or none. */
1201 if (parser->layer == HASH_RXQ_ETH ||
1202 (hash_rxq_init[parser->layer].ip_version ==
1203 hash_rxq_init[i].ip_version) ||
1204 (hash_rxq_init[i].ip_version == 0)) {
1205 dst = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1206 parser->queue[i].offset);
1207 memcpy(dst, src, size);
1208 ++parser->queue[i].ibv_attr->num_of_specs;
1209 parser->queue[i].offset += size;
1215 * Convert Ethernet item to Verbs specification.
1218 * Item specification.
1219 * @param default_mask[in]
1220 * Default bit-masks to use when item->mask is not provided.
1221 * @param data[in, out]
1225 * 0 on success, a negative errno value otherwise and rte_errno is set.
1228 mlx5_flow_create_eth(const struct rte_flow_item *item,
1229 const void *default_mask,
1230 struct mlx5_flow_data *data)
1232 const struct rte_flow_item_eth *spec = item->spec;
1233 const struct rte_flow_item_eth *mask = item->mask;
1234 struct mlx5_flow_parse *parser = data->parser;
1235 const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1236 struct ibv_flow_spec_eth eth = {
1237 .type = parser->inner | IBV_FLOW_SPEC_ETH,
1241 /* Don't update layer for the inner pattern. */
1243 parser->layer = HASH_RXQ_ETH;
1248 mask = default_mask;
1249 memcpy(ð.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
1250 memcpy(ð.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
1251 eth.val.ether_type = spec->type;
1252 memcpy(ð.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
1253 memcpy(ð.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
1254 eth.mask.ether_type = mask->type;
1255 /* Remove unwanted bits from values. */
1256 for (i = 0; i < ETHER_ADDR_LEN; ++i) {
1257 eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
1258 eth.val.src_mac[i] &= eth.mask.src_mac[i];
1260 eth.val.ether_type &= eth.mask.ether_type;
1262 mlx5_flow_create_copy(parser, ð, eth_size);
1267 * Convert VLAN item to Verbs specification.
1270 * Item specification.
1271 * @param default_mask[in]
1272 * Default bit-masks to use when item->mask is not provided.
1273 * @param data[in, out]
1277 * 0 on success, a negative errno value otherwise and rte_errno is set.
1280 mlx5_flow_create_vlan(const struct rte_flow_item *item,
1281 const void *default_mask,
1282 struct mlx5_flow_data *data)
1284 const struct rte_flow_item_vlan *spec = item->spec;
1285 const struct rte_flow_item_vlan *mask = item->mask;
1286 struct mlx5_flow_parse *parser = data->parser;
1287 struct ibv_flow_spec_eth *eth;
1288 const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1293 mask = default_mask;
1295 for (i = 0; i != hash_rxq_init_n; ++i) {
1296 if (!parser->queue[i].ibv_attr)
1299 eth = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1300 parser->queue[i].offset - eth_size);
1301 eth->val.vlan_tag = spec->tci;
1302 eth->mask.vlan_tag = mask->tci;
1303 eth->val.vlan_tag &= eth->mask.vlan_tag;
1305 * From verbs perspective an empty VLAN is equivalent
1306 * to a packet without VLAN layer.
1308 if (!eth->mask.vlan_tag)
1314 return rte_flow_error_set(data->error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM,
1315 item, "VLAN cannot be empty");
1319 * Convert IPv4 item to Verbs specification.
1322 * Item specification.
1323 * @param default_mask[in]
1324 * Default bit-masks to use when item->mask is not provided.
1325 * @param data[in, out]
1329 * 0 on success, a negative errno value otherwise and rte_errno is set.
1332 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
1333 const void *default_mask,
1334 struct mlx5_flow_data *data)
1336 const struct rte_flow_item_ipv4 *spec = item->spec;
1337 const struct rte_flow_item_ipv4 *mask = item->mask;
1338 struct mlx5_flow_parse *parser = data->parser;
1339 unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4_ext);
1340 struct ibv_flow_spec_ipv4_ext ipv4 = {
1341 .type = parser->inner | IBV_FLOW_SPEC_IPV4_EXT,
1345 /* Don't update layer for the inner pattern. */
1347 parser->layer = HASH_RXQ_IPV4;
1350 mask = default_mask;
1351 ipv4.val = (struct ibv_flow_ipv4_ext_filter){
1352 .src_ip = spec->hdr.src_addr,
1353 .dst_ip = spec->hdr.dst_addr,
1354 .proto = spec->hdr.next_proto_id,
1355 .tos = spec->hdr.type_of_service,
1357 ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
1358 .src_ip = mask->hdr.src_addr,
1359 .dst_ip = mask->hdr.dst_addr,
1360 .proto = mask->hdr.next_proto_id,
1361 .tos = mask->hdr.type_of_service,
1363 /* Remove unwanted bits from values. */
1364 ipv4.val.src_ip &= ipv4.mask.src_ip;
1365 ipv4.val.dst_ip &= ipv4.mask.dst_ip;
1366 ipv4.val.proto &= ipv4.mask.proto;
1367 ipv4.val.tos &= ipv4.mask.tos;
1369 mlx5_flow_create_copy(parser, &ipv4, ipv4_size);
1374 * Convert IPv6 item to Verbs specification.
1377 * Item specification.
1378 * @param default_mask[in]
1379 * Default bit-masks to use when item->mask is not provided.
1380 * @param data[in, out]
1384 * 0 on success, a negative errno value otherwise and rte_errno is set.
1387 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
1388 const void *default_mask,
1389 struct mlx5_flow_data *data)
1391 const struct rte_flow_item_ipv6 *spec = item->spec;
1392 const struct rte_flow_item_ipv6 *mask = item->mask;
1393 struct mlx5_flow_parse *parser = data->parser;
1394 unsigned int ipv6_size = sizeof(struct ibv_flow_spec_ipv6);
1395 struct ibv_flow_spec_ipv6 ipv6 = {
1396 .type = parser->inner | IBV_FLOW_SPEC_IPV6,
1400 /* Don't update layer for the inner pattern. */
1402 parser->layer = HASH_RXQ_IPV6;
1405 uint32_t vtc_flow_val;
1406 uint32_t vtc_flow_mask;
1409 mask = default_mask;
1410 memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
1411 RTE_DIM(ipv6.val.src_ip));
1412 memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
1413 RTE_DIM(ipv6.val.dst_ip));
1414 memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
1415 RTE_DIM(ipv6.mask.src_ip));
1416 memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
1417 RTE_DIM(ipv6.mask.dst_ip));
1418 vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
1419 vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
1420 ipv6.val.flow_label =
1421 rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >>
1423 ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >>
1425 ipv6.val.next_hdr = spec->hdr.proto;
1426 ipv6.val.hop_limit = spec->hdr.hop_limits;
1427 ipv6.mask.flow_label =
1428 rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >>
1430 ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >>
1432 ipv6.mask.next_hdr = mask->hdr.proto;
1433 ipv6.mask.hop_limit = mask->hdr.hop_limits;
1434 /* Remove unwanted bits from values. */
1435 for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
1436 ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
1437 ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
1439 ipv6.val.flow_label &= ipv6.mask.flow_label;
1440 ipv6.val.traffic_class &= ipv6.mask.traffic_class;
1441 ipv6.val.next_hdr &= ipv6.mask.next_hdr;
1442 ipv6.val.hop_limit &= ipv6.mask.hop_limit;
1444 mlx5_flow_create_copy(parser, &ipv6, ipv6_size);
1449 * Convert UDP item to Verbs specification.
1452 * Item specification.
1453 * @param default_mask[in]
1454 * Default bit-masks to use when item->mask is not provided.
1455 * @param data[in, out]
1459 * 0 on success, a negative errno value otherwise and rte_errno is set.
1462 mlx5_flow_create_udp(const struct rte_flow_item *item,
1463 const void *default_mask,
1464 struct mlx5_flow_data *data)
1466 const struct rte_flow_item_udp *spec = item->spec;
1467 const struct rte_flow_item_udp *mask = item->mask;
1468 struct mlx5_flow_parse *parser = data->parser;
1469 unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1470 struct ibv_flow_spec_tcp_udp udp = {
1471 .type = parser->inner | IBV_FLOW_SPEC_UDP,
1475 /* Don't update layer for the inner pattern. */
1476 if (!parser->inner) {
1477 if (parser->layer == HASH_RXQ_IPV4)
1478 parser->layer = HASH_RXQ_UDPV4;
1480 parser->layer = HASH_RXQ_UDPV6;
1484 mask = default_mask;
1485 udp.val.dst_port = spec->hdr.dst_port;
1486 udp.val.src_port = spec->hdr.src_port;
1487 udp.mask.dst_port = mask->hdr.dst_port;
1488 udp.mask.src_port = mask->hdr.src_port;
1489 /* Remove unwanted bits from values. */
1490 udp.val.src_port &= udp.mask.src_port;
1491 udp.val.dst_port &= udp.mask.dst_port;
1493 mlx5_flow_create_copy(parser, &udp, udp_size);
1498 * Convert TCP item to Verbs specification.
1501 * Item specification.
1502 * @param default_mask[in]
1503 * Default bit-masks to use when item->mask is not provided.
1504 * @param data[in, out]
1508 * 0 on success, a negative errno value otherwise and rte_errno is set.
1511 mlx5_flow_create_tcp(const struct rte_flow_item *item,
1512 const void *default_mask,
1513 struct mlx5_flow_data *data)
1515 const struct rte_flow_item_tcp *spec = item->spec;
1516 const struct rte_flow_item_tcp *mask = item->mask;
1517 struct mlx5_flow_parse *parser = data->parser;
1518 unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1519 struct ibv_flow_spec_tcp_udp tcp = {
1520 .type = parser->inner | IBV_FLOW_SPEC_TCP,
1524 /* Don't update layer for the inner pattern. */
1525 if (!parser->inner) {
1526 if (parser->layer == HASH_RXQ_IPV4)
1527 parser->layer = HASH_RXQ_TCPV4;
1529 parser->layer = HASH_RXQ_TCPV6;
1533 mask = default_mask;
1534 tcp.val.dst_port = spec->hdr.dst_port;
1535 tcp.val.src_port = spec->hdr.src_port;
1536 tcp.mask.dst_port = mask->hdr.dst_port;
1537 tcp.mask.src_port = mask->hdr.src_port;
1538 /* Remove unwanted bits from values. */
1539 tcp.val.src_port &= tcp.mask.src_port;
1540 tcp.val.dst_port &= tcp.mask.dst_port;
1542 mlx5_flow_create_copy(parser, &tcp, tcp_size);
1547 * Convert VXLAN item to Verbs specification.
1550 * Item specification.
1551 * @param default_mask[in]
1552 * Default bit-masks to use when item->mask is not provided.
1553 * @param data[in, out]
1557 * 0 on success, a negative errno value otherwise and rte_errno is set.
1560 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
1561 const void *default_mask,
1562 struct mlx5_flow_data *data)
1564 const struct rte_flow_item_vxlan *spec = item->spec;
1565 const struct rte_flow_item_vxlan *mask = item->mask;
1566 struct mlx5_flow_parse *parser = data->parser;
1567 unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1568 struct ibv_flow_spec_tunnel vxlan = {
1569 .type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1578 parser->inner = IBV_FLOW_SPEC_INNER;
1581 mask = default_mask;
1582 memcpy(&id.vni[1], spec->vni, 3);
1583 vxlan.val.tunnel_id = id.vlan_id;
1584 memcpy(&id.vni[1], mask->vni, 3);
1585 vxlan.mask.tunnel_id = id.vlan_id;
1586 /* Remove unwanted bits from values. */
1587 vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
1590 * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
1591 * layer is defined in the Verbs specification it is interpreted as
1592 * wildcard and all packets will match this rule, if it follows a full
1593 * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
1594 * before will also match this rule.
1595 * To avoid such situation, VNI 0 is currently refused.
1597 if (!vxlan.val.tunnel_id)
1598 return rte_flow_error_set(data->error, EINVAL,
1599 RTE_FLOW_ERROR_TYPE_ITEM,
1601 "VxLAN vni cannot be 0");
1602 mlx5_flow_create_copy(parser, &vxlan, size);
1607 * Convert mark/flag action to Verbs specification.
1610 * Internal parser structure.
1615 * 0 on success, a negative errno value otherwise and rte_errno is set.
1618 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id)
1620 unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1621 struct ibv_flow_spec_action_tag tag = {
1622 .type = IBV_FLOW_SPEC_ACTION_TAG,
1624 .tag_id = mlx5_flow_mark_set(mark_id),
1627 assert(parser->mark);
1628 mlx5_flow_create_copy(parser, &tag, size);
1633 * Convert count action to Verbs specification.
1636 * Pointer to Ethernet device.
1638 * Pointer to MLX5 flow parser structure.
1641 * 0 on success, a negative errno value otherwise and rte_errno is set.
1644 mlx5_flow_create_count(struct rte_eth_dev *dev __rte_unused,
1645 struct mlx5_flow_parse *parser __rte_unused)
1647 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
1648 struct priv *priv = dev->data->dev_private;
1649 unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
1650 struct ibv_counter_set_init_attr init_attr = {0};
1651 struct ibv_flow_spec_counter_action counter = {
1652 .type = IBV_FLOW_SPEC_ACTION_COUNT,
1654 .counter_set_handle = 0,
1657 init_attr.counter_set_id = 0;
1658 parser->cs = mlx5_glue->create_counter_set(priv->ctx, &init_attr);
1663 counter.counter_set_handle = parser->cs->handle;
1664 mlx5_flow_create_copy(parser, &counter, size);
1670 * Complete flow rule creation with a drop queue.
1673 * Pointer to Ethernet device.
1675 * Internal parser structure.
1677 * Pointer to the rte_flow.
1679 * Perform verbose error reporting if not NULL.
1682 * 0 on success, a negative errno value otherwise and rte_errno is set.
1685 mlx5_flow_create_action_queue_drop(struct rte_eth_dev *dev,
1686 struct mlx5_flow_parse *parser,
1687 struct rte_flow *flow,
1688 struct rte_flow_error *error)
1690 struct priv *priv = dev->data->dev_private;
1691 struct ibv_flow_spec_action_drop *drop;
1692 unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
1697 drop = (void *)((uintptr_t)parser->queue[HASH_RXQ_ETH].ibv_attr +
1698 parser->queue[HASH_RXQ_ETH].offset);
1699 *drop = (struct ibv_flow_spec_action_drop){
1700 .type = IBV_FLOW_SPEC_ACTION_DROP,
1703 ++parser->queue[HASH_RXQ_ETH].ibv_attr->num_of_specs;
1704 parser->queue[HASH_RXQ_ETH].offset += size;
1705 flow->frxq[HASH_RXQ_ETH].ibv_attr =
1706 parser->queue[HASH_RXQ_ETH].ibv_attr;
1708 flow->cs = parser->cs;
1709 if (!priv->dev->data->dev_started)
1711 parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
1712 flow->frxq[HASH_RXQ_ETH].ibv_flow =
1713 mlx5_glue->create_flow(priv->flow_drop_queue->qp,
1714 flow->frxq[HASH_RXQ_ETH].ibv_attr);
1715 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
1716 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1717 NULL, "flow rule creation failure");
1723 if (flow->frxq[HASH_RXQ_ETH].ibv_flow) {
1724 claim_zero(mlx5_glue->destroy_flow
1725 (flow->frxq[HASH_RXQ_ETH].ibv_flow));
1726 flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
1728 if (flow->frxq[HASH_RXQ_ETH].ibv_attr) {
1729 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
1730 flow->frxq[HASH_RXQ_ETH].ibv_attr = NULL;
1733 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
1741 * Create hash Rx queues when RSS is enabled.
1744 * Pointer to Ethernet device.
1746 * Internal parser structure.
1748 * Pointer to the rte_flow.
1750 * Perform verbose error reporting if not NULL.
1753 * 0 on success, a negative errno value otherwise and rte_errno is set.
1756 mlx5_flow_create_action_queue_rss(struct rte_eth_dev *dev,
1757 struct mlx5_flow_parse *parser,
1758 struct rte_flow *flow,
1759 struct rte_flow_error *error)
1761 struct priv *priv = dev->data->dev_private;
1764 for (i = 0; i != hash_rxq_init_n; ++i) {
1765 uint64_t hash_fields;
1767 if (!parser->queue[i].ibv_attr)
1769 flow->frxq[i].ibv_attr = parser->queue[i].ibv_attr;
1770 parser->queue[i].ibv_attr = NULL;
1771 hash_fields = hash_rxq_init[i].hash_fields;
1772 if (!priv->dev->data->dev_started)
1774 flow->frxq[i].hrxq =
1776 parser->rss_conf.key,
1777 parser->rss_conf.key_len,
1779 parser->rss_conf.queue,
1780 parser->rss_conf.queue_num);
1781 if (flow->frxq[i].hrxq)
1783 flow->frxq[i].hrxq =
1785 parser->rss_conf.key,
1786 parser->rss_conf.key_len,
1788 parser->rss_conf.queue,
1789 parser->rss_conf.queue_num);
1790 if (!flow->frxq[i].hrxq) {
1791 return rte_flow_error_set(error, ENOMEM,
1792 RTE_FLOW_ERROR_TYPE_HANDLE,
1794 "cannot create hash rxq");
1801 * Complete flow rule creation.
1804 * Pointer to Ethernet device.
1806 * Internal parser structure.
1808 * Pointer to the rte_flow.
1810 * Perform verbose error reporting if not NULL.
1813 * 0 on success, a negative errno value otherwise and rte_errno is set.
1816 mlx5_flow_create_action_queue(struct rte_eth_dev *dev,
1817 struct mlx5_flow_parse *parser,
1818 struct rte_flow *flow,
1819 struct rte_flow_error *error)
1821 struct priv *priv = dev->data->dev_private;
1824 unsigned int flows_n = 0;
1828 assert(!parser->drop);
1829 ret = mlx5_flow_create_action_queue_rss(dev, parser, flow, error);
1833 flow->cs = parser->cs;
1834 if (!priv->dev->data->dev_started)
1836 for (i = 0; i != hash_rxq_init_n; ++i) {
1837 if (!flow->frxq[i].hrxq)
1839 flow->frxq[i].ibv_flow =
1840 mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
1841 flow->frxq[i].ibv_attr);
1842 if (!flow->frxq[i].ibv_flow) {
1843 rte_flow_error_set(error, ENOMEM,
1844 RTE_FLOW_ERROR_TYPE_HANDLE,
1845 NULL, "flow rule creation failure");
1849 DRV_LOG(DEBUG, "port %u %p type %d QP %p ibv_flow %p",
1852 (void *)flow->frxq[i].hrxq,
1853 (void *)flow->frxq[i].ibv_flow);
1856 rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_HANDLE,
1857 NULL, "internal error in flow creation");
1860 for (i = 0; i != parser->rss_conf.queue_num; ++i) {
1861 struct mlx5_rxq_data *q =
1862 (*priv->rxqs)[parser->rss_conf.queue[i]];
1864 q->mark |= parser->mark;
1868 ret = rte_errno; /* Save rte_errno before cleanup. */
1870 for (i = 0; i != hash_rxq_init_n; ++i) {
1871 if (flow->frxq[i].ibv_flow) {
1872 struct ibv_flow *ibv_flow = flow->frxq[i].ibv_flow;
1874 claim_zero(mlx5_glue->destroy_flow(ibv_flow));
1876 if (flow->frxq[i].hrxq)
1877 mlx5_hrxq_release(dev, flow->frxq[i].hrxq);
1878 if (flow->frxq[i].ibv_attr)
1879 rte_free(flow->frxq[i].ibv_attr);
1882 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
1886 rte_errno = ret; /* Restore rte_errno. */
1894 * Pointer to Ethernet device.
1896 * Pointer to a TAILQ flow list.
1898 * Flow rule attributes.
1899 * @param[in] pattern
1900 * Pattern specification (list terminated by the END pattern item).
1901 * @param[in] actions
1902 * Associated actions (list terminated by the END action).
1904 * Perform verbose error reporting if not NULL.
1907 * A flow on success, NULL otherwise and rte_errno is set.
1909 static struct rte_flow *
1910 mlx5_flow_list_create(struct rte_eth_dev *dev,
1911 struct mlx5_flows *list,
1912 const struct rte_flow_attr *attr,
1913 const struct rte_flow_item items[],
1914 const struct rte_flow_action actions[],
1915 struct rte_flow_error *error)
1917 struct mlx5_flow_parse parser = { .create = 1, };
1918 struct rte_flow *flow = NULL;
1922 ret = mlx5_flow_convert(dev, attr, items, actions, error, &parser);
1925 flow = rte_calloc(__func__, 1,
1927 parser.rss_conf.queue_num * sizeof(uint16_t),
1930 rte_flow_error_set(error, ENOMEM,
1931 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1933 "cannot allocate flow memory");
1936 /* Copy configuration. */
1937 flow->queues = (uint16_t (*)[])(flow + 1);
1938 flow->rss_conf = (struct rte_flow_action_rss){
1939 .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
1940 .types = parser.rss_conf.types,
1941 .key_len = parser.rss_conf.key_len,
1942 .queue_num = parser.rss_conf.queue_num,
1943 .key = memcpy(flow->rss_key, parser.rss_conf.key,
1944 sizeof(*parser.rss_conf.key) *
1945 parser.rss_conf.key_len),
1946 .queue = memcpy(flow->queues, parser.rss_conf.queue,
1947 sizeof(*parser.rss_conf.queue) *
1948 parser.rss_conf.queue_num),
1950 flow->mark = parser.mark;
1951 /* finalise the flow. */
1953 ret = mlx5_flow_create_action_queue_drop(dev, &parser, flow,
1956 ret = mlx5_flow_create_action_queue(dev, &parser, flow, error);
1959 TAILQ_INSERT_TAIL(list, flow, next);
1960 DRV_LOG(DEBUG, "port %u flow created %p", dev->data->port_id,
1964 DRV_LOG(ERR, "port %u flow creation error: %s", dev->data->port_id,
1966 for (i = 0; i != hash_rxq_init_n; ++i) {
1967 if (parser.queue[i].ibv_attr)
1968 rte_free(parser.queue[i].ibv_attr);
1975 * Validate a flow supported by the NIC.
1977 * @see rte_flow_validate()
1981 mlx5_flow_validate(struct rte_eth_dev *dev,
1982 const struct rte_flow_attr *attr,
1983 const struct rte_flow_item items[],
1984 const struct rte_flow_action actions[],
1985 struct rte_flow_error *error)
1987 struct mlx5_flow_parse parser = { .create = 0, };
1989 return mlx5_flow_convert(dev, attr, items, actions, error, &parser);
1995 * @see rte_flow_create()
1999 mlx5_flow_create(struct rte_eth_dev *dev,
2000 const struct rte_flow_attr *attr,
2001 const struct rte_flow_item items[],
2002 const struct rte_flow_action actions[],
2003 struct rte_flow_error *error)
2005 struct priv *priv = dev->data->dev_private;
2007 return mlx5_flow_list_create(dev, &priv->flows, attr, items, actions,
2012 * Destroy a flow in a list.
2015 * Pointer to Ethernet device.
2017 * Pointer to a TAILQ flow list.
2022 mlx5_flow_list_destroy(struct rte_eth_dev *dev, struct mlx5_flows *list,
2023 struct rte_flow *flow)
2025 struct priv *priv = dev->data->dev_private;
2028 if (flow->drop || !flow->mark)
2030 for (i = 0; i != flow->rss_conf.queue_num; ++i) {
2031 struct rte_flow *tmp;
2035 * To remove the mark from the queue, the queue must not be
2036 * present in any other marked flow (RSS or not).
2038 TAILQ_FOREACH(tmp, list, next) {
2040 uint16_t *tqs = NULL;
2045 for (j = 0; j != hash_rxq_init_n; ++j) {
2046 if (!tmp->frxq[j].hrxq)
2048 tqs = tmp->frxq[j].hrxq->ind_table->queues;
2049 tq_n = tmp->frxq[j].hrxq->ind_table->queues_n;
2053 for (j = 0; (j != tq_n) && !mark; j++)
2054 if (tqs[j] == (*flow->queues)[i])
2057 (*priv->rxqs)[(*flow->queues)[i]]->mark = mark;
2061 if (flow->frxq[HASH_RXQ_ETH].ibv_flow)
2062 claim_zero(mlx5_glue->destroy_flow
2063 (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2064 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
2066 for (i = 0; i != hash_rxq_init_n; ++i) {
2067 struct mlx5_flow *frxq = &flow->frxq[i];
2070 claim_zero(mlx5_glue->destroy_flow
2073 mlx5_hrxq_release(dev, frxq->hrxq);
2075 rte_free(frxq->ibv_attr);
2079 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
2082 TAILQ_REMOVE(list, flow, next);
2083 DRV_LOG(DEBUG, "port %u flow destroyed %p", dev->data->port_id,
2089 * Destroy all flows.
2092 * Pointer to Ethernet device.
2094 * Pointer to a TAILQ flow list.
2097 mlx5_flow_list_flush(struct rte_eth_dev *dev, struct mlx5_flows *list)
2099 while (!TAILQ_EMPTY(list)) {
2100 struct rte_flow *flow;
2102 flow = TAILQ_FIRST(list);
2103 mlx5_flow_list_destroy(dev, list, flow);
2108 * Create drop queue.
2111 * Pointer to Ethernet device.
2114 * 0 on success, a negative errno value otherwise and rte_errno is set.
2117 mlx5_flow_create_drop_queue(struct rte_eth_dev *dev)
2119 struct priv *priv = dev->data->dev_private;
2120 struct mlx5_hrxq_drop *fdq = NULL;
2124 fdq = rte_calloc(__func__, 1, sizeof(*fdq), 0);
2127 "port %u cannot allocate memory for drop queue",
2128 dev->data->port_id);
2132 fdq->cq = mlx5_glue->create_cq(priv->ctx, 1, NULL, NULL, 0);
2134 DRV_LOG(WARNING, "port %u cannot allocate CQ for drop queue",
2135 dev->data->port_id);
2139 fdq->wq = mlx5_glue->create_wq
2141 &(struct ibv_wq_init_attr){
2142 .wq_type = IBV_WQT_RQ,
2149 DRV_LOG(WARNING, "port %u cannot allocate WQ for drop queue",
2150 dev->data->port_id);
2154 fdq->ind_table = mlx5_glue->create_rwq_ind_table
2156 &(struct ibv_rwq_ind_table_init_attr){
2157 .log_ind_tbl_size = 0,
2158 .ind_tbl = &fdq->wq,
2161 if (!fdq->ind_table) {
2163 "port %u cannot allocate indirection table for drop"
2165 dev->data->port_id);
2169 fdq->qp = mlx5_glue->create_qp_ex
2171 &(struct ibv_qp_init_attr_ex){
2172 .qp_type = IBV_QPT_RAW_PACKET,
2174 IBV_QP_INIT_ATTR_PD |
2175 IBV_QP_INIT_ATTR_IND_TABLE |
2176 IBV_QP_INIT_ATTR_RX_HASH,
2177 .rx_hash_conf = (struct ibv_rx_hash_conf){
2179 IBV_RX_HASH_FUNC_TOEPLITZ,
2180 .rx_hash_key_len = rss_hash_default_key_len,
2181 .rx_hash_key = rss_hash_default_key,
2182 .rx_hash_fields_mask = 0,
2184 .rwq_ind_tbl = fdq->ind_table,
2188 DRV_LOG(WARNING, "port %u cannot allocate QP for drop queue",
2189 dev->data->port_id);
2193 priv->flow_drop_queue = fdq;
2197 claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2199 claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2201 claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2203 claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2206 priv->flow_drop_queue = NULL;
2211 * Delete drop queue.
2214 * Pointer to Ethernet device.
2217 mlx5_flow_delete_drop_queue(struct rte_eth_dev *dev)
2219 struct priv *priv = dev->data->dev_private;
2220 struct mlx5_hrxq_drop *fdq = priv->flow_drop_queue;
2225 claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2227 claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2229 claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2231 claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2233 priv->flow_drop_queue = NULL;
2240 * Pointer to Ethernet device.
2242 * Pointer to a TAILQ flow list.
2245 mlx5_flow_stop(struct rte_eth_dev *dev, struct mlx5_flows *list)
2247 struct priv *priv = dev->data->dev_private;
2248 struct rte_flow *flow;
2250 TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) {
2252 struct mlx5_ind_table_ibv *ind_tbl = NULL;
2255 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow)
2257 claim_zero(mlx5_glue->destroy_flow
2258 (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2259 flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
2260 DRV_LOG(DEBUG, "port %u flow %p removed",
2261 dev->data->port_id, (void *)flow);
2265 /* Verify the flow has not already been cleaned. */
2266 for (i = 0; i != hash_rxq_init_n; ++i) {
2267 if (!flow->frxq[i].ibv_flow)
2270 * Indirection table may be necessary to remove the
2271 * flags in the Rx queues.
2272 * This helps to speed-up the process by avoiding
2275 ind_tbl = flow->frxq[i].hrxq->ind_table;
2278 if (i == hash_rxq_init_n)
2282 for (i = 0; i != ind_tbl->queues_n; ++i)
2283 (*priv->rxqs)[ind_tbl->queues[i]]->mark = 0;
2285 for (i = 0; i != hash_rxq_init_n; ++i) {
2286 if (!flow->frxq[i].ibv_flow)
2288 claim_zero(mlx5_glue->destroy_flow
2289 (flow->frxq[i].ibv_flow));
2290 flow->frxq[i].ibv_flow = NULL;
2291 mlx5_hrxq_release(dev, flow->frxq[i].hrxq);
2292 flow->frxq[i].hrxq = NULL;
2294 DRV_LOG(DEBUG, "port %u flow %p removed", dev->data->port_id,
2303 * Pointer to Ethernet device.
2305 * Pointer to a TAILQ flow list.
2308 * 0 on success, a negative errno value otherwise and rte_errno is set.
2311 mlx5_flow_start(struct rte_eth_dev *dev, struct mlx5_flows *list)
2313 struct priv *priv = dev->data->dev_private;
2314 struct rte_flow *flow;
2316 TAILQ_FOREACH(flow, list, next) {
2320 flow->frxq[HASH_RXQ_ETH].ibv_flow =
2321 mlx5_glue->create_flow
2322 (priv->flow_drop_queue->qp,
2323 flow->frxq[HASH_RXQ_ETH].ibv_attr);
2324 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
2326 "port %u flow %p cannot be applied",
2327 dev->data->port_id, (void *)flow);
2331 DRV_LOG(DEBUG, "port %u flow %p applied",
2332 dev->data->port_id, (void *)flow);
2336 for (i = 0; i != hash_rxq_init_n; ++i) {
2337 if (!flow->frxq[i].ibv_attr)
2339 flow->frxq[i].hrxq =
2340 mlx5_hrxq_get(dev, flow->rss_conf.key,
2341 flow->rss_conf.key_len,
2342 hash_rxq_init[i].hash_fields,
2343 flow->rss_conf.queue,
2344 flow->rss_conf.queue_num);
2345 if (flow->frxq[i].hrxq)
2347 flow->frxq[i].hrxq =
2348 mlx5_hrxq_new(dev, flow->rss_conf.key,
2349 flow->rss_conf.key_len,
2350 hash_rxq_init[i].hash_fields,
2351 flow->rss_conf.queue,
2352 flow->rss_conf.queue_num);
2353 if (!flow->frxq[i].hrxq) {
2355 "port %u flow %p cannot be applied",
2356 dev->data->port_id, (void *)flow);
2361 flow->frxq[i].ibv_flow =
2362 mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
2363 flow->frxq[i].ibv_attr);
2364 if (!flow->frxq[i].ibv_flow) {
2366 "port %u flow %p cannot be applied",
2367 dev->data->port_id, (void *)flow);
2371 DRV_LOG(DEBUG, "port %u flow %p applied",
2372 dev->data->port_id, (void *)flow);
2376 for (i = 0; i != flow->rss_conf.queue_num; ++i)
2377 (*priv->rxqs)[flow->rss_conf.queue[i]]->mark = 1;
2383 * Verify the flow list is empty
2386 * Pointer to Ethernet device.
2388 * @return the number of flows not released.
2391 mlx5_flow_verify(struct rte_eth_dev *dev)
2393 struct priv *priv = dev->data->dev_private;
2394 struct rte_flow *flow;
2397 TAILQ_FOREACH(flow, &priv->flows, next) {
2398 DRV_LOG(DEBUG, "port %u flow %p still referenced",
2399 dev->data->port_id, (void *)flow);
2406 * Enable a control flow configured from the control plane.
2409 * Pointer to Ethernet device.
2411 * An Ethernet flow spec to apply.
2413 * An Ethernet flow mask to apply.
2415 * A VLAN flow spec to apply.
2417 * A VLAN flow mask to apply.
2420 * 0 on success, a negative errno value otherwise and rte_errno is set.
2423 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
2424 struct rte_flow_item_eth *eth_spec,
2425 struct rte_flow_item_eth *eth_mask,
2426 struct rte_flow_item_vlan *vlan_spec,
2427 struct rte_flow_item_vlan *vlan_mask)
2429 struct priv *priv = dev->data->dev_private;
2430 const struct rte_flow_attr attr = {
2432 .priority = MLX5_CTRL_FLOW_PRIORITY,
2434 struct rte_flow_item items[] = {
2436 .type = RTE_FLOW_ITEM_TYPE_ETH,
2442 .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
2443 RTE_FLOW_ITEM_TYPE_END,
2449 .type = RTE_FLOW_ITEM_TYPE_END,
2452 uint16_t queue[priv->reta_idx_n];
2453 struct rte_flow_action_rss action_rss = {
2454 .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
2455 .types = priv->rss_conf.rss_hf,
2456 .key_len = priv->rss_conf.rss_key_len,
2457 .queue_num = priv->reta_idx_n,
2458 .key = priv->rss_conf.rss_key,
2461 struct rte_flow_action actions[] = {
2463 .type = RTE_FLOW_ACTION_TYPE_RSS,
2464 .conf = &action_rss,
2467 .type = RTE_FLOW_ACTION_TYPE_END,
2470 struct rte_flow *flow;
2471 struct rte_flow_error error;
2474 if (!priv->reta_idx_n) {
2478 for (i = 0; i != priv->reta_idx_n; ++i)
2479 queue[i] = (*priv->reta_idx)[i];
2480 flow = mlx5_flow_list_create(dev, &priv->ctrl_flows, &attr, items,
2488 * Enable a flow control configured from the control plane.
2491 * Pointer to Ethernet device.
2493 * An Ethernet flow spec to apply.
2495 * An Ethernet flow mask to apply.
2498 * 0 on success, a negative errno value otherwise and rte_errno is set.
2501 mlx5_ctrl_flow(struct rte_eth_dev *dev,
2502 struct rte_flow_item_eth *eth_spec,
2503 struct rte_flow_item_eth *eth_mask)
2505 return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
2511 * @see rte_flow_destroy()
2515 mlx5_flow_destroy(struct rte_eth_dev *dev,
2516 struct rte_flow *flow,
2517 struct rte_flow_error *error __rte_unused)
2519 struct priv *priv = dev->data->dev_private;
2521 mlx5_flow_list_destroy(dev, &priv->flows, flow);
2526 * Destroy all flows.
2528 * @see rte_flow_flush()
2532 mlx5_flow_flush(struct rte_eth_dev *dev,
2533 struct rte_flow_error *error __rte_unused)
2535 struct priv *priv = dev->data->dev_private;
2537 mlx5_flow_list_flush(dev, &priv->flows);
2541 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
2543 * Query flow counter.
2547 * @param counter_value
2548 * returned data from the counter.
2551 * 0 on success, a negative errno value otherwise and rte_errno is set.
2554 mlx5_flow_query_count(struct ibv_counter_set *cs,
2555 struct mlx5_flow_counter_stats *counter_stats,
2556 struct rte_flow_query_count *query_count,
2557 struct rte_flow_error *error)
2559 uint64_t counters[2];
2560 struct ibv_query_counter_set_attr query_cs_attr = {
2562 .query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
2564 struct ibv_counter_set_data query_out = {
2566 .outlen = 2 * sizeof(uint64_t),
2568 int err = mlx5_glue->query_counter_set(&query_cs_attr, &query_out);
2571 return rte_flow_error_set(error, err,
2572 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2574 "cannot read counter");
2575 query_count->hits_set = 1;
2576 query_count->bytes_set = 1;
2577 query_count->hits = counters[0] - counter_stats->hits;
2578 query_count->bytes = counters[1] - counter_stats->bytes;
2579 if (query_count->reset) {
2580 counter_stats->hits = counters[0];
2581 counter_stats->bytes = counters[1];
2589 * @see rte_flow_query()
2593 mlx5_flow_query(struct rte_eth_dev *dev __rte_unused,
2594 struct rte_flow *flow,
2595 enum rte_flow_action_type action __rte_unused,
2597 struct rte_flow_error *error)
2602 ret = mlx5_flow_query_count(flow->cs,
2603 &flow->counter_stats,
2604 (struct rte_flow_query_count *)data,
2609 return rte_flow_error_set(error, EINVAL,
2610 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2612 "no counter found for flow");
2621 * @see rte_flow_isolate()
2625 mlx5_flow_isolate(struct rte_eth_dev *dev,
2627 struct rte_flow_error *error)
2629 struct priv *priv = dev->data->dev_private;
2631 if (dev->data->dev_started) {
2632 rte_flow_error_set(error, EBUSY,
2633 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2635 "port must be stopped first");
2638 priv->isolated = !!enable;
2640 priv->dev->dev_ops = &mlx5_dev_ops_isolate;
2642 priv->dev->dev_ops = &mlx5_dev_ops;
2647 * Convert a flow director filter to a generic flow.
2650 * Pointer to Ethernet device.
2651 * @param fdir_filter
2652 * Flow director filter to add.
2654 * Generic flow parameters structure.
2657 * 0 on success, a negative errno value otherwise and rte_errno is set.
2660 mlx5_fdir_filter_convert(struct rte_eth_dev *dev,
2661 const struct rte_eth_fdir_filter *fdir_filter,
2662 struct mlx5_fdir *attributes)
2664 struct priv *priv = dev->data->dev_private;
2665 const struct rte_eth_fdir_input *input = &fdir_filter->input;
2666 const struct rte_eth_fdir_masks *mask =
2667 &dev->data->dev_conf.fdir_conf.mask;
2669 /* Validate queue number. */
2670 if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
2671 DRV_LOG(ERR, "port %u invalid queue number %d",
2672 dev->data->port_id, fdir_filter->action.rx_queue);
2676 attributes->attr.ingress = 1;
2677 attributes->items[0] = (struct rte_flow_item) {
2678 .type = RTE_FLOW_ITEM_TYPE_ETH,
2679 .spec = &attributes->l2,
2680 .mask = &attributes->l2_mask,
2682 switch (fdir_filter->action.behavior) {
2683 case RTE_ETH_FDIR_ACCEPT:
2684 attributes->actions[0] = (struct rte_flow_action){
2685 .type = RTE_FLOW_ACTION_TYPE_QUEUE,
2686 .conf = &attributes->queue,
2689 case RTE_ETH_FDIR_REJECT:
2690 attributes->actions[0] = (struct rte_flow_action){
2691 .type = RTE_FLOW_ACTION_TYPE_DROP,
2695 DRV_LOG(ERR, "port %u invalid behavior %d",
2697 fdir_filter->action.behavior);
2698 rte_errno = ENOTSUP;
2701 attributes->queue.index = fdir_filter->action.rx_queue;
2703 switch (fdir_filter->input.flow_type) {
2704 case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
2705 case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
2706 case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
2707 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2708 .src_addr = input->flow.ip4_flow.src_ip,
2709 .dst_addr = input->flow.ip4_flow.dst_ip,
2710 .time_to_live = input->flow.ip4_flow.ttl,
2711 .type_of_service = input->flow.ip4_flow.tos,
2712 .next_proto_id = input->flow.ip4_flow.proto,
2714 attributes->l3_mask.ipv4.hdr = (struct ipv4_hdr){
2715 .src_addr = mask->ipv4_mask.src_ip,
2716 .dst_addr = mask->ipv4_mask.dst_ip,
2717 .time_to_live = mask->ipv4_mask.ttl,
2718 .type_of_service = mask->ipv4_mask.tos,
2719 .next_proto_id = mask->ipv4_mask.proto,
2721 attributes->items[1] = (struct rte_flow_item){
2722 .type = RTE_FLOW_ITEM_TYPE_IPV4,
2723 .spec = &attributes->l3,
2724 .mask = &attributes->l3_mask,
2727 case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
2728 case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
2729 case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
2730 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2731 .hop_limits = input->flow.ipv6_flow.hop_limits,
2732 .proto = input->flow.ipv6_flow.proto,
2735 memcpy(attributes->l3.ipv6.hdr.src_addr,
2736 input->flow.ipv6_flow.src_ip,
2737 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2738 memcpy(attributes->l3.ipv6.hdr.dst_addr,
2739 input->flow.ipv6_flow.dst_ip,
2740 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2741 memcpy(attributes->l3_mask.ipv6.hdr.src_addr,
2742 mask->ipv6_mask.src_ip,
2743 RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
2744 memcpy(attributes->l3_mask.ipv6.hdr.dst_addr,
2745 mask->ipv6_mask.dst_ip,
2746 RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
2747 attributes->items[1] = (struct rte_flow_item){
2748 .type = RTE_FLOW_ITEM_TYPE_IPV6,
2749 .spec = &attributes->l3,
2750 .mask = &attributes->l3_mask,
2754 DRV_LOG(ERR, "port %u invalid flow type%d",
2755 dev->data->port_id, fdir_filter->input.flow_type);
2756 rte_errno = ENOTSUP;
2760 switch (fdir_filter->input.flow_type) {
2761 case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
2762 attributes->l4.udp.hdr = (struct udp_hdr){
2763 .src_port = input->flow.udp4_flow.src_port,
2764 .dst_port = input->flow.udp4_flow.dst_port,
2766 attributes->l4_mask.udp.hdr = (struct udp_hdr){
2767 .src_port = mask->src_port_mask,
2768 .dst_port = mask->dst_port_mask,
2770 attributes->items[2] = (struct rte_flow_item){
2771 .type = RTE_FLOW_ITEM_TYPE_UDP,
2772 .spec = &attributes->l4,
2773 .mask = &attributes->l4_mask,
2776 case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
2777 attributes->l4.tcp.hdr = (struct tcp_hdr){
2778 .src_port = input->flow.tcp4_flow.src_port,
2779 .dst_port = input->flow.tcp4_flow.dst_port,
2781 attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
2782 .src_port = mask->src_port_mask,
2783 .dst_port = mask->dst_port_mask,
2785 attributes->items[2] = (struct rte_flow_item){
2786 .type = RTE_FLOW_ITEM_TYPE_TCP,
2787 .spec = &attributes->l4,
2788 .mask = &attributes->l4_mask,
2791 case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
2792 attributes->l4.udp.hdr = (struct udp_hdr){
2793 .src_port = input->flow.udp6_flow.src_port,
2794 .dst_port = input->flow.udp6_flow.dst_port,
2796 attributes->l4_mask.udp.hdr = (struct udp_hdr){
2797 .src_port = mask->src_port_mask,
2798 .dst_port = mask->dst_port_mask,
2800 attributes->items[2] = (struct rte_flow_item){
2801 .type = RTE_FLOW_ITEM_TYPE_UDP,
2802 .spec = &attributes->l4,
2803 .mask = &attributes->l4_mask,
2806 case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
2807 attributes->l4.tcp.hdr = (struct tcp_hdr){
2808 .src_port = input->flow.tcp6_flow.src_port,
2809 .dst_port = input->flow.tcp6_flow.dst_port,
2811 attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
2812 .src_port = mask->src_port_mask,
2813 .dst_port = mask->dst_port_mask,
2815 attributes->items[2] = (struct rte_flow_item){
2816 .type = RTE_FLOW_ITEM_TYPE_TCP,
2817 .spec = &attributes->l4,
2818 .mask = &attributes->l4_mask,
2821 case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
2822 case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
2825 DRV_LOG(ERR, "port %u invalid flow type%d",
2826 dev->data->port_id, fdir_filter->input.flow_type);
2827 rte_errno = ENOTSUP;
2834 * Add new flow director filter and store it in list.
2837 * Pointer to Ethernet device.
2838 * @param fdir_filter
2839 * Flow director filter to add.
2842 * 0 on success, a negative errno value otherwise and rte_errno is set.
2845 mlx5_fdir_filter_add(struct rte_eth_dev *dev,
2846 const struct rte_eth_fdir_filter *fdir_filter)
2848 struct priv *priv = dev->data->dev_private;
2849 struct mlx5_fdir attributes = {
2852 .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2853 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2857 struct mlx5_flow_parse parser = {
2858 .layer = HASH_RXQ_ETH,
2860 struct rte_flow_error error;
2861 struct rte_flow *flow;
2864 ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
2867 ret = mlx5_flow_convert(dev, &attributes.attr, attributes.items,
2868 attributes.actions, &error, &parser);
2871 flow = mlx5_flow_list_create(dev, &priv->flows, &attributes.attr,
2872 attributes.items, attributes.actions,
2875 DRV_LOG(DEBUG, "port %u FDIR created %p", dev->data->port_id,
2883 * Delete specific filter.
2886 * Pointer to Ethernet device.
2887 * @param fdir_filter
2888 * Filter to be deleted.
2891 * 0 on success, a negative errno value otherwise and rte_errno is set.
2894 mlx5_fdir_filter_delete(struct rte_eth_dev *dev,
2895 const struct rte_eth_fdir_filter *fdir_filter)
2897 struct priv *priv = dev->data->dev_private;
2898 struct mlx5_fdir attributes = {
2901 struct mlx5_flow_parse parser = {
2903 .layer = HASH_RXQ_ETH,
2905 struct rte_flow_error error;
2906 struct rte_flow *flow;
2910 ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
2913 ret = mlx5_flow_convert(dev, &attributes.attr, attributes.items,
2914 attributes.actions, &error, &parser);
2918 * Special case for drop action which is only set in the
2919 * specifications when the flow is created. In this situation the
2920 * drop specification is missing.
2923 struct ibv_flow_spec_action_drop *drop;
2925 drop = (void *)((uintptr_t)parser.queue[HASH_RXQ_ETH].ibv_attr +
2926 parser.queue[HASH_RXQ_ETH].offset);
2927 *drop = (struct ibv_flow_spec_action_drop){
2928 .type = IBV_FLOW_SPEC_ACTION_DROP,
2929 .size = sizeof(struct ibv_flow_spec_action_drop),
2931 parser.queue[HASH_RXQ_ETH].ibv_attr->num_of_specs++;
2933 TAILQ_FOREACH(flow, &priv->flows, next) {
2934 struct ibv_flow_attr *attr;
2935 struct ibv_spec_header *attr_h;
2937 struct ibv_flow_attr *flow_attr;
2938 struct ibv_spec_header *flow_h;
2940 unsigned int specs_n;
2942 attr = parser.queue[HASH_RXQ_ETH].ibv_attr;
2943 flow_attr = flow->frxq[HASH_RXQ_ETH].ibv_attr;
2944 /* Compare first the attributes. */
2945 if (memcmp(attr, flow_attr, sizeof(struct ibv_flow_attr)))
2947 if (attr->num_of_specs == 0)
2949 spec = (void *)((uintptr_t)attr +
2950 sizeof(struct ibv_flow_attr));
2951 flow_spec = (void *)((uintptr_t)flow_attr +
2952 sizeof(struct ibv_flow_attr));
2953 specs_n = RTE_MIN(attr->num_of_specs, flow_attr->num_of_specs);
2954 for (i = 0; i != specs_n; ++i) {
2957 if (memcmp(spec, flow_spec,
2958 RTE_MIN(attr_h->size, flow_h->size)))
2960 spec = (void *)((uintptr_t)spec + attr_h->size);
2961 flow_spec = (void *)((uintptr_t)flow_spec +
2964 /* At this point, the flow match. */
2967 /* The flow does not match. */
2970 ret = rte_errno; /* Save rte_errno before cleanup. */
2972 mlx5_flow_list_destroy(dev, &priv->flows, flow);
2974 for (i = 0; i != hash_rxq_init_n; ++i) {
2975 if (parser.queue[i].ibv_attr)
2976 rte_free(parser.queue[i].ibv_attr);
2978 rte_errno = ret; /* Restore rte_errno. */
2983 * Update queue for specific filter.
2986 * Pointer to Ethernet device.
2987 * @param fdir_filter
2988 * Filter to be updated.
2991 * 0 on success, a negative errno value otherwise and rte_errno is set.
2994 mlx5_fdir_filter_update(struct rte_eth_dev *dev,
2995 const struct rte_eth_fdir_filter *fdir_filter)
2999 ret = mlx5_fdir_filter_delete(dev, fdir_filter);
3002 return mlx5_fdir_filter_add(dev, fdir_filter);
3006 * Flush all filters.
3009 * Pointer to Ethernet device.
3012 mlx5_fdir_filter_flush(struct rte_eth_dev *dev)
3014 struct priv *priv = dev->data->dev_private;
3016 mlx5_flow_list_flush(dev, &priv->flows);
3020 * Get flow director information.
3023 * Pointer to Ethernet device.
3024 * @param[out] fdir_info
3025 * Resulting flow director information.
3028 mlx5_fdir_info_get(struct rte_eth_dev *dev, struct rte_eth_fdir_info *fdir_info)
3030 struct priv *priv = dev->data->dev_private;
3031 struct rte_eth_fdir_masks *mask =
3032 &priv->dev->data->dev_conf.fdir_conf.mask;
3034 fdir_info->mode = priv->dev->data->dev_conf.fdir_conf.mode;
3035 fdir_info->guarant_spc = 0;
3036 rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
3037 fdir_info->max_flexpayload = 0;
3038 fdir_info->flow_types_mask[0] = 0;
3039 fdir_info->flex_payload_unit = 0;
3040 fdir_info->max_flex_payload_segment_num = 0;
3041 fdir_info->flex_payload_limit = 0;
3042 memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
3046 * Deal with flow director operations.
3049 * Pointer to Ethernet device.
3051 * Operation to perform.
3053 * Pointer to operation-specific structure.
3056 * 0 on success, a negative errno value otherwise and rte_errno is set.
3059 mlx5_fdir_ctrl_func(struct rte_eth_dev *dev, enum rte_filter_op filter_op,
3062 struct priv *priv = dev->data->dev_private;
3063 enum rte_fdir_mode fdir_mode =
3064 priv->dev->data->dev_conf.fdir_conf.mode;
3066 if (filter_op == RTE_ETH_FILTER_NOP)
3068 if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
3069 fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
3070 DRV_LOG(ERR, "port %u flow director mode %d not supported",
3071 dev->data->port_id, fdir_mode);
3075 switch (filter_op) {
3076 case RTE_ETH_FILTER_ADD:
3077 return mlx5_fdir_filter_add(dev, arg);
3078 case RTE_ETH_FILTER_UPDATE:
3079 return mlx5_fdir_filter_update(dev, arg);
3080 case RTE_ETH_FILTER_DELETE:
3081 return mlx5_fdir_filter_delete(dev, arg);
3082 case RTE_ETH_FILTER_FLUSH:
3083 mlx5_fdir_filter_flush(dev);
3085 case RTE_ETH_FILTER_INFO:
3086 mlx5_fdir_info_get(dev, arg);
3089 DRV_LOG(DEBUG, "port %u unknown operation %u",
3090 dev->data->port_id, filter_op);
3098 * Manage filter operations.
3101 * Pointer to Ethernet device structure.
3102 * @param filter_type
3105 * Operation to perform.
3107 * Pointer to operation-specific structure.
3110 * 0 on success, a negative errno value otherwise and rte_errno is set.
3113 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
3114 enum rte_filter_type filter_type,
3115 enum rte_filter_op filter_op,
3118 switch (filter_type) {
3119 case RTE_ETH_FILTER_GENERIC:
3120 if (filter_op != RTE_ETH_FILTER_GET) {
3124 *(const void **)arg = &mlx5_flow_ops;
3126 case RTE_ETH_FILTER_FDIR:
3127 return mlx5_fdir_ctrl_func(dev, filter_op, arg);
3129 DRV_LOG(ERR, "port %u filter type (%d) not supported",
3130 dev->data->port_id, filter_type);
3131 rte_errno = ENOTSUP;