1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2016 6WIND S.A.
3 * Copyright 2016 Mellanox Technologies, Ltd
11 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
13 #pragma GCC diagnostic ignored "-Wpedantic"
15 #include <infiniband/verbs.h>
17 #pragma GCC diagnostic error "-Wpedantic"
20 #include <rte_common.h>
21 #include <rte_eth_ctrl.h>
22 #include <rte_ethdev_driver.h>
24 #include <rte_flow_driver.h>
25 #include <rte_malloc.h>
29 #include "mlx5_defs.h"
31 #include "mlx5_glue.h"
33 /* Define minimal priority for control plane flows. */
34 #define MLX5_CTRL_FLOW_PRIORITY 4
36 /* Internet Protocol versions. */
40 #ifndef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
41 struct ibv_flow_spec_counter_action {
46 /* Dev ops structure defined in mlx5.c */
47 extern const struct eth_dev_ops mlx5_dev_ops;
48 extern const struct eth_dev_ops mlx5_dev_ops_isolate;
50 /** Structure give to the conversion functions. */
51 struct mlx5_flow_data {
52 struct mlx5_flow_parse *parser; /** Parser context. */
53 struct rte_flow_error *error; /** Error context. */
57 mlx5_flow_create_eth(const struct rte_flow_item *item,
58 const void *default_mask,
59 struct mlx5_flow_data *data);
62 mlx5_flow_create_vlan(const struct rte_flow_item *item,
63 const void *default_mask,
64 struct mlx5_flow_data *data);
67 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
68 const void *default_mask,
69 struct mlx5_flow_data *data);
72 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
73 const void *default_mask,
74 struct mlx5_flow_data *data);
77 mlx5_flow_create_udp(const struct rte_flow_item *item,
78 const void *default_mask,
79 struct mlx5_flow_data *data);
82 mlx5_flow_create_tcp(const struct rte_flow_item *item,
83 const void *default_mask,
84 struct mlx5_flow_data *data);
87 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
88 const void *default_mask,
89 struct mlx5_flow_data *data);
91 struct mlx5_flow_parse;
94 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
98 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id);
101 mlx5_flow_create_count(struct rte_eth_dev *dev, struct mlx5_flow_parse *parser);
103 /* Hash RX queue types. */
114 /* Initialization data for hash RX queue. */
115 struct hash_rxq_init {
116 uint64_t hash_fields; /* Fields that participate in the hash. */
117 uint64_t dpdk_rss_hf; /* Matching DPDK RSS hash fields. */
118 unsigned int flow_priority; /* Flow priority to use. */
119 unsigned int ip_version; /* Internet protocol. */
122 /* Initialization data for hash RX queues. */
123 const struct hash_rxq_init hash_rxq_init[] = {
125 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
126 IBV_RX_HASH_DST_IPV4 |
127 IBV_RX_HASH_SRC_PORT_TCP |
128 IBV_RX_HASH_DST_PORT_TCP),
129 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,
131 .ip_version = MLX5_IPV4,
134 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
135 IBV_RX_HASH_DST_IPV4 |
136 IBV_RX_HASH_SRC_PORT_UDP |
137 IBV_RX_HASH_DST_PORT_UDP),
138 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP,
140 .ip_version = MLX5_IPV4,
143 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
144 IBV_RX_HASH_DST_IPV4),
145 .dpdk_rss_hf = (ETH_RSS_IPV4 |
148 .ip_version = MLX5_IPV4,
151 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
152 IBV_RX_HASH_DST_IPV6 |
153 IBV_RX_HASH_SRC_PORT_TCP |
154 IBV_RX_HASH_DST_PORT_TCP),
155 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP,
157 .ip_version = MLX5_IPV6,
160 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
161 IBV_RX_HASH_DST_IPV6 |
162 IBV_RX_HASH_SRC_PORT_UDP |
163 IBV_RX_HASH_DST_PORT_UDP),
164 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP,
166 .ip_version = MLX5_IPV6,
169 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
170 IBV_RX_HASH_DST_IPV6),
171 .dpdk_rss_hf = (ETH_RSS_IPV6 |
174 .ip_version = MLX5_IPV6,
183 /* Number of entries in hash_rxq_init[]. */
184 const unsigned int hash_rxq_init_n = RTE_DIM(hash_rxq_init);
186 /** Structure for holding counter stats. */
187 struct mlx5_flow_counter_stats {
188 uint64_t hits; /**< Number of packets matched by the rule. */
189 uint64_t bytes; /**< Number of bytes matched by the rule. */
192 /** Structure for Drop queue. */
193 struct mlx5_hrxq_drop {
194 struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */
195 struct ibv_qp *qp; /**< Verbs queue pair. */
196 struct ibv_wq *wq; /**< Verbs work queue. */
197 struct ibv_cq *cq; /**< Verbs completion queue. */
200 /* Flows structures. */
202 uint64_t hash_fields; /**< Fields that participate in the hash. */
203 struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
204 struct ibv_flow *ibv_flow; /**< Verbs flow. */
205 struct mlx5_hrxq *hrxq; /**< Hash Rx queues. */
208 /* Drop flows structures. */
209 struct mlx5_flow_drop {
210 struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
211 struct ibv_flow *ibv_flow; /**< Verbs flow. */
215 TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
216 uint32_t mark:1; /**< Set if the flow is marked. */
217 uint32_t drop:1; /**< Drop queue. */
218 struct rte_flow_action_rss rss_conf; /**< RSS configuration */
219 uint16_t (*queues)[]; /**< Queues indexes to use. */
220 uint8_t rss_key[40]; /**< copy of the RSS key. */
221 struct ibv_counter_set *cs; /**< Holds the counters for the rule. */
222 struct mlx5_flow_counter_stats counter_stats;/**<The counter stats. */
223 struct mlx5_flow frxq[RTE_DIM(hash_rxq_init)];
224 /**< Flow with Rx queue. */
227 /** Static initializer for items. */
229 (const enum rte_flow_item_type []){ \
230 __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
233 /** Structure to generate a simple graph of layers supported by the NIC. */
234 struct mlx5_flow_items {
235 /** List of possible actions for these items. */
236 const enum rte_flow_action_type *const actions;
237 /** Bit-masks corresponding to the possibilities for the item. */
240 * Default bit-masks to use when item->mask is not provided. When
241 * \default_mask is also NULL, the full supported bit-mask (\mask) is
244 const void *default_mask;
245 /** Bit-masks size in bytes. */
246 const unsigned int mask_sz;
248 * Conversion function from rte_flow to NIC specific flow.
251 * rte_flow item to convert.
252 * @param default_mask
253 * Default bit-masks to use when item->mask is not provided.
255 * Internal structure to store the conversion.
258 * 0 on success, a negative errno value otherwise and rte_errno is
261 int (*convert)(const struct rte_flow_item *item,
262 const void *default_mask,
263 struct mlx5_flow_data *data);
264 /** Size in bytes of the destination structure. */
265 const unsigned int dst_sz;
266 /** List of possible following items. */
267 const enum rte_flow_item_type *const items;
270 /** Valid action for this PMD. */
271 static const enum rte_flow_action_type valid_actions[] = {
272 RTE_FLOW_ACTION_TYPE_DROP,
273 RTE_FLOW_ACTION_TYPE_QUEUE,
274 RTE_FLOW_ACTION_TYPE_MARK,
275 RTE_FLOW_ACTION_TYPE_FLAG,
276 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
277 RTE_FLOW_ACTION_TYPE_COUNT,
279 RTE_FLOW_ACTION_TYPE_END,
282 /** Graph of supported items and associated actions. */
283 static const struct mlx5_flow_items mlx5_flow_items[] = {
284 [RTE_FLOW_ITEM_TYPE_END] = {
285 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
286 RTE_FLOW_ITEM_TYPE_VXLAN),
288 [RTE_FLOW_ITEM_TYPE_ETH] = {
289 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
290 RTE_FLOW_ITEM_TYPE_IPV4,
291 RTE_FLOW_ITEM_TYPE_IPV6),
292 .actions = valid_actions,
293 .mask = &(const struct rte_flow_item_eth){
294 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
295 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
298 .default_mask = &rte_flow_item_eth_mask,
299 .mask_sz = sizeof(struct rte_flow_item_eth),
300 .convert = mlx5_flow_create_eth,
301 .dst_sz = sizeof(struct ibv_flow_spec_eth),
303 [RTE_FLOW_ITEM_TYPE_VLAN] = {
304 .items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
305 RTE_FLOW_ITEM_TYPE_IPV6),
306 .actions = valid_actions,
307 .mask = &(const struct rte_flow_item_vlan){
310 .default_mask = &rte_flow_item_vlan_mask,
311 .mask_sz = sizeof(struct rte_flow_item_vlan),
312 .convert = mlx5_flow_create_vlan,
315 [RTE_FLOW_ITEM_TYPE_IPV4] = {
316 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
317 RTE_FLOW_ITEM_TYPE_TCP),
318 .actions = valid_actions,
319 .mask = &(const struct rte_flow_item_ipv4){
323 .type_of_service = -1,
327 .default_mask = &rte_flow_item_ipv4_mask,
328 .mask_sz = sizeof(struct rte_flow_item_ipv4),
329 .convert = mlx5_flow_create_ipv4,
330 .dst_sz = sizeof(struct ibv_flow_spec_ipv4_ext),
332 [RTE_FLOW_ITEM_TYPE_IPV6] = {
333 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
334 RTE_FLOW_ITEM_TYPE_TCP),
335 .actions = valid_actions,
336 .mask = &(const struct rte_flow_item_ipv6){
339 0xff, 0xff, 0xff, 0xff,
340 0xff, 0xff, 0xff, 0xff,
341 0xff, 0xff, 0xff, 0xff,
342 0xff, 0xff, 0xff, 0xff,
345 0xff, 0xff, 0xff, 0xff,
346 0xff, 0xff, 0xff, 0xff,
347 0xff, 0xff, 0xff, 0xff,
348 0xff, 0xff, 0xff, 0xff,
355 .default_mask = &rte_flow_item_ipv6_mask,
356 .mask_sz = sizeof(struct rte_flow_item_ipv6),
357 .convert = mlx5_flow_create_ipv6,
358 .dst_sz = sizeof(struct ibv_flow_spec_ipv6),
360 [RTE_FLOW_ITEM_TYPE_UDP] = {
361 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN),
362 .actions = valid_actions,
363 .mask = &(const struct rte_flow_item_udp){
369 .default_mask = &rte_flow_item_udp_mask,
370 .mask_sz = sizeof(struct rte_flow_item_udp),
371 .convert = mlx5_flow_create_udp,
372 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
374 [RTE_FLOW_ITEM_TYPE_TCP] = {
375 .actions = valid_actions,
376 .mask = &(const struct rte_flow_item_tcp){
382 .default_mask = &rte_flow_item_tcp_mask,
383 .mask_sz = sizeof(struct rte_flow_item_tcp),
384 .convert = mlx5_flow_create_tcp,
385 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
387 [RTE_FLOW_ITEM_TYPE_VXLAN] = {
388 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH),
389 .actions = valid_actions,
390 .mask = &(const struct rte_flow_item_vxlan){
391 .vni = "\xff\xff\xff",
393 .default_mask = &rte_flow_item_vxlan_mask,
394 .mask_sz = sizeof(struct rte_flow_item_vxlan),
395 .convert = mlx5_flow_create_vxlan,
396 .dst_sz = sizeof(struct ibv_flow_spec_tunnel),
400 /** Structure to pass to the conversion function. */
401 struct mlx5_flow_parse {
402 uint32_t inner; /**< Set once VXLAN is encountered. */
404 /**< Whether resources should remain after a validate. */
405 uint32_t drop:1; /**< Target is a drop queue. */
406 uint32_t mark:1; /**< Mark is present in the flow. */
407 uint32_t count:1; /**< Count is present in the flow. */
408 uint32_t mark_id; /**< Mark identifier. */
409 struct rte_flow_action_rss rss_conf; /**< RSS configuration */
410 uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
411 uint8_t rss_key[40]; /**< copy of the RSS key. */
412 enum hash_rxq_type layer; /**< Last pattern layer detected. */
413 struct ibv_counter_set *cs; /**< Holds the counter set for the rule */
415 struct ibv_flow_attr *ibv_attr;
416 /**< Pointer to Verbs attributes. */
418 /**< Current position or total size of the attribute. */
419 } queue[RTE_DIM(hash_rxq_init)];
422 static const struct rte_flow_ops mlx5_flow_ops = {
423 .validate = mlx5_flow_validate,
424 .create = mlx5_flow_create,
425 .destroy = mlx5_flow_destroy,
426 .flush = mlx5_flow_flush,
427 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
428 .query = mlx5_flow_query,
432 .isolate = mlx5_flow_isolate,
435 /* Convert FDIR request to Generic flow. */
437 struct rte_flow_attr attr;
438 struct rte_flow_action actions[2];
439 struct rte_flow_item items[4];
440 struct rte_flow_item_eth l2;
441 struct rte_flow_item_eth l2_mask;
443 struct rte_flow_item_ipv4 ipv4;
444 struct rte_flow_item_ipv6 ipv6;
447 struct rte_flow_item_ipv4 ipv4;
448 struct rte_flow_item_ipv6 ipv6;
451 struct rte_flow_item_udp udp;
452 struct rte_flow_item_tcp tcp;
455 struct rte_flow_item_udp udp;
456 struct rte_flow_item_tcp tcp;
458 struct rte_flow_action_queue queue;
461 /* Verbs specification header. */
462 struct ibv_spec_header {
463 enum ibv_flow_spec_type type;
468 * Check support for a given item.
471 * Item specification.
473 * Bit-masks covering supported fields to compare with spec, last and mask in
476 * Bit-Mask size in bytes.
479 * 0 on success, a negative errno value otherwise and rte_errno is set.
482 mlx5_flow_item_validate(const struct rte_flow_item *item,
483 const uint8_t *mask, unsigned int size)
485 if (!item->spec && (item->mask || item->last)) {
489 if (item->spec && !item->mask) {
491 const uint8_t *spec = item->spec;
493 for (i = 0; i < size; ++i)
494 if ((spec[i] | mask[i]) != mask[i]) {
499 if (item->last && !item->mask) {
501 const uint8_t *spec = item->last;
503 for (i = 0; i < size; ++i)
504 if ((spec[i] | mask[i]) != mask[i]) {
511 const uint8_t *spec = item->spec;
513 for (i = 0; i < size; ++i)
514 if ((spec[i] | mask[i]) != mask[i]) {
519 if (item->spec && item->last) {
522 const uint8_t *apply = mask;
528 for (i = 0; i < size; ++i) {
529 spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
530 last[i] = ((const uint8_t *)item->last)[i] & apply[i];
532 ret = memcmp(spec, last, size);
542 * Extract attribute to the parser.
545 * Flow rule attributes.
547 * Perform verbose error reporting if not NULL.
550 * 0 on success, a negative errno value otherwise and rte_errno is set.
553 mlx5_flow_convert_attributes(const struct rte_flow_attr *attr,
554 struct rte_flow_error *error)
557 rte_flow_error_set(error, ENOTSUP,
558 RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
560 "groups are not supported");
563 if (attr->priority && attr->priority != MLX5_CTRL_FLOW_PRIORITY) {
564 rte_flow_error_set(error, ENOTSUP,
565 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
567 "priorities are not supported");
571 rte_flow_error_set(error, ENOTSUP,
572 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
574 "egress is not supported");
577 if (!attr->ingress) {
578 rte_flow_error_set(error, ENOTSUP,
579 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
581 "only ingress is supported");
588 * Extract actions request to the parser.
591 * Pointer to Ethernet device.
593 * Associated actions (list terminated by the END action).
595 * Perform verbose error reporting if not NULL.
596 * @param[in, out] parser
597 * Internal parser structure.
600 * 0 on success, a negative errno value otherwise and rte_errno is set.
603 mlx5_flow_convert_actions(struct rte_eth_dev *dev,
604 const struct rte_flow_action actions[],
605 struct rte_flow_error *error,
606 struct mlx5_flow_parse *parser)
608 enum { FATE = 1, MARK = 2, COUNT = 4, };
609 uint32_t overlap = 0;
610 struct priv *priv = dev->data->dev_private;
612 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
613 if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
615 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
617 goto exit_action_overlap;
620 } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
621 const struct rte_flow_action_queue *queue =
622 (const struct rte_flow_action_queue *)
626 goto exit_action_overlap;
628 if (!queue || (queue->index > (priv->rxqs_n - 1)))
629 goto exit_action_not_supported;
630 parser->queues[0] = queue->index;
631 parser->rss_conf = (struct rte_flow_action_rss){
633 .queue = parser->queues,
635 } else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
636 const struct rte_flow_action_rss *rss =
637 (const struct rte_flow_action_rss *)
639 const uint8_t *rss_key;
640 uint32_t rss_key_len;
644 goto exit_action_overlap;
647 rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ) {
648 rte_flow_error_set(error, EINVAL,
649 RTE_FLOW_ERROR_TYPE_ACTION,
651 "the only supported RSS hash"
652 " function is Toeplitz");
656 rte_flow_error_set(error, EINVAL,
657 RTE_FLOW_ERROR_TYPE_ACTION,
659 "a nonzero RSS encapsulation"
660 " level is not supported");
663 if (rss->types & MLX5_RSS_HF_MASK) {
664 rte_flow_error_set(error, EINVAL,
665 RTE_FLOW_ERROR_TYPE_ACTION,
667 "unsupported RSS type"
672 rss_key_len = rss->key_len;
675 rss_key_len = rss_hash_default_key_len;
676 rss_key = rss_hash_default_key;
678 if (rss_key_len != RTE_DIM(parser->rss_key)) {
679 rte_flow_error_set(error, EINVAL,
680 RTE_FLOW_ERROR_TYPE_ACTION,
682 "RSS hash key must be"
683 " exactly 40 bytes long");
686 if (!rss->queue_num) {
687 rte_flow_error_set(error, EINVAL,
688 RTE_FLOW_ERROR_TYPE_ACTION,
693 if (rss->queue_num > RTE_DIM(parser->queues)) {
694 rte_flow_error_set(error, EINVAL,
695 RTE_FLOW_ERROR_TYPE_ACTION,
697 "too many queues for RSS"
701 for (n = 0; n < rss->queue_num; ++n) {
702 if (rss->queue[n] >= priv->rxqs_n) {
703 rte_flow_error_set(error, EINVAL,
704 RTE_FLOW_ERROR_TYPE_ACTION,
706 "queue id > number of"
711 parser->rss_conf = (struct rte_flow_action_rss){
712 .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
715 .key_len = rss_key_len,
716 .queue_num = rss->queue_num,
717 .key = memcpy(parser->rss_key, rss_key,
718 sizeof(*rss_key) * rss_key_len),
719 .queue = memcpy(parser->queues, rss->queue,
720 sizeof(*rss->queue) *
723 } else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
724 const struct rte_flow_action_mark *mark =
725 (const struct rte_flow_action_mark *)
729 goto exit_action_overlap;
732 rte_flow_error_set(error, EINVAL,
733 RTE_FLOW_ERROR_TYPE_ACTION,
735 "mark must be defined");
737 } else if (mark->id >= MLX5_FLOW_MARK_MAX) {
738 rte_flow_error_set(error, ENOTSUP,
739 RTE_FLOW_ERROR_TYPE_ACTION,
741 "mark must be between 0"
746 parser->mark_id = mark->id;
747 } else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
749 goto exit_action_overlap;
752 } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT &&
753 priv->config.flow_counter_en) {
755 goto exit_action_overlap;
759 goto exit_action_not_supported;
762 /* When fate is unknown, drop traffic. */
763 if (!(overlap & FATE))
765 if (parser->drop && parser->mark)
767 if (!parser->rss_conf.queue_num && !parser->drop) {
768 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
769 NULL, "no valid action");
773 exit_action_not_supported:
774 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
775 actions, "action not supported");
778 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
779 actions, "overlapping actions are not supported");
787 * Pattern specification (list terminated by the END pattern item).
789 * Perform verbose error reporting if not NULL.
790 * @param[in, out] parser
791 * Internal parser structure.
794 * 0 on success, a negative errno value otherwise and rte_errno is set.
797 mlx5_flow_convert_items_validate(const struct rte_flow_item items[],
798 struct rte_flow_error *error,
799 struct mlx5_flow_parse *parser)
801 const struct mlx5_flow_items *cur_item = mlx5_flow_items;
805 /* Initialise the offsets to start after verbs attribute. */
806 for (i = 0; i != hash_rxq_init_n; ++i)
807 parser->queue[i].offset = sizeof(struct ibv_flow_attr);
808 for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
809 const struct mlx5_flow_items *token = NULL;
812 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
816 cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
818 if (cur_item->items[i] == items->type) {
819 token = &mlx5_flow_items[items->type];
825 goto exit_item_not_supported;
828 ret = mlx5_flow_item_validate(items,
829 (const uint8_t *)cur_item->mask,
832 goto exit_item_not_supported;
833 if (items->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
835 rte_flow_error_set(error, ENOTSUP,
836 RTE_FLOW_ERROR_TYPE_ITEM,
838 "cannot recognize multiple"
839 " VXLAN encapsulations");
842 parser->inner = IBV_FLOW_SPEC_INNER;
845 parser->queue[HASH_RXQ_ETH].offset += cur_item->dst_sz;
847 for (n = 0; n != hash_rxq_init_n; ++n)
848 parser->queue[n].offset += cur_item->dst_sz;
852 parser->queue[HASH_RXQ_ETH].offset +=
853 sizeof(struct ibv_flow_spec_action_drop);
856 for (i = 0; i != hash_rxq_init_n; ++i)
857 parser->queue[i].offset +=
858 sizeof(struct ibv_flow_spec_action_tag);
861 unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
863 for (i = 0; i != hash_rxq_init_n; ++i)
864 parser->queue[i].offset += size;
867 exit_item_not_supported:
868 return rte_flow_error_set(error, -ret, RTE_FLOW_ERROR_TYPE_ITEM,
869 items, "item not supported");
873 * Allocate memory space to store verbs flow attributes.
876 * Amount of byte to allocate.
878 * Perform verbose error reporting if not NULL.
881 * A verbs flow attribute on success, NULL otherwise and rte_errno is set.
883 static struct ibv_flow_attr *
884 mlx5_flow_convert_allocate(unsigned int size, struct rte_flow_error *error)
886 struct ibv_flow_attr *ibv_attr;
888 ibv_attr = rte_calloc(__func__, 1, size, 0);
890 rte_flow_error_set(error, ENOMEM,
891 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
893 "cannot allocate verbs spec attributes");
900 * Make inner packet matching with an higher priority from the non Inner
903 * @param[in, out] parser
904 * Internal parser structure.
906 * User flow attribute.
909 mlx5_flow_update_priority(struct mlx5_flow_parse *parser,
910 const struct rte_flow_attr *attr)
915 parser->queue[HASH_RXQ_ETH].ibv_attr->priority =
917 hash_rxq_init[HASH_RXQ_ETH].flow_priority;
920 for (i = 0; i != hash_rxq_init_n; ++i) {
921 if (parser->queue[i].ibv_attr) {
922 parser->queue[i].ibv_attr->priority =
924 hash_rxq_init[i].flow_priority -
925 (parser->inner ? 1 : 0);
931 * Finalise verbs flow attributes.
933 * @param[in, out] parser
934 * Internal parser structure.
937 mlx5_flow_convert_finalise(struct mlx5_flow_parse *parser)
939 const unsigned int ipv4 =
940 hash_rxq_init[parser->layer].ip_version == MLX5_IPV4;
941 const enum hash_rxq_type hmin = ipv4 ? HASH_RXQ_TCPV4 : HASH_RXQ_TCPV6;
942 const enum hash_rxq_type hmax = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
943 const enum hash_rxq_type ohmin = ipv4 ? HASH_RXQ_TCPV6 : HASH_RXQ_TCPV4;
944 const enum hash_rxq_type ohmax = ipv4 ? HASH_RXQ_IPV6 : HASH_RXQ_IPV4;
945 const enum hash_rxq_type ip = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
948 /* Remove any other flow not matching the pattern. */
949 if (parser->rss_conf.queue_num == 1 && !parser->rss_conf.types) {
950 for (i = 0; i != hash_rxq_init_n; ++i) {
951 if (i == HASH_RXQ_ETH)
953 rte_free(parser->queue[i].ibv_attr);
954 parser->queue[i].ibv_attr = NULL;
958 if (parser->layer == HASH_RXQ_ETH) {
962 * This layer becomes useless as the pattern define under
965 rte_free(parser->queue[HASH_RXQ_ETH].ibv_attr);
966 parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
968 /* Remove opposite kind of layer e.g. IPv6 if the pattern is IPv4. */
969 for (i = ohmin; i != (ohmax + 1); ++i) {
970 if (!parser->queue[i].ibv_attr)
972 rte_free(parser->queue[i].ibv_attr);
973 parser->queue[i].ibv_attr = NULL;
975 /* Remove impossible flow according to the RSS configuration. */
976 if (hash_rxq_init[parser->layer].dpdk_rss_hf &
977 parser->rss_conf.types) {
978 /* Remove any other flow. */
979 for (i = hmin; i != (hmax + 1); ++i) {
980 if ((i == parser->layer) ||
981 (!parser->queue[i].ibv_attr))
983 rte_free(parser->queue[i].ibv_attr);
984 parser->queue[i].ibv_attr = NULL;
986 } else if (!parser->queue[ip].ibv_attr) {
987 /* no RSS possible with the current configuration. */
988 parser->rss_conf.queue_num = 1;
993 * Fill missing layers in verbs specifications, or compute the correct
994 * offset to allocate the memory space for the attributes and
997 for (i = 0; i != hash_rxq_init_n - 1; ++i) {
999 struct ibv_flow_spec_ipv4_ext ipv4;
1000 struct ibv_flow_spec_ipv6 ipv6;
1001 struct ibv_flow_spec_tcp_udp udp_tcp;
1006 if (i == parser->layer)
1008 if (parser->layer == HASH_RXQ_ETH) {
1009 if (hash_rxq_init[i].ip_version == MLX5_IPV4) {
1010 size = sizeof(struct ibv_flow_spec_ipv4_ext);
1011 specs.ipv4 = (struct ibv_flow_spec_ipv4_ext){
1012 .type = IBV_FLOW_SPEC_IPV4_EXT,
1016 size = sizeof(struct ibv_flow_spec_ipv6);
1017 specs.ipv6 = (struct ibv_flow_spec_ipv6){
1018 .type = IBV_FLOW_SPEC_IPV6,
1022 if (parser->queue[i].ibv_attr) {
1023 dst = (void *)((uintptr_t)
1024 parser->queue[i].ibv_attr +
1025 parser->queue[i].offset);
1026 memcpy(dst, &specs, size);
1027 ++parser->queue[i].ibv_attr->num_of_specs;
1029 parser->queue[i].offset += size;
1031 if ((i == HASH_RXQ_UDPV4) || (i == HASH_RXQ_TCPV4) ||
1032 (i == HASH_RXQ_UDPV6) || (i == HASH_RXQ_TCPV6)) {
1033 size = sizeof(struct ibv_flow_spec_tcp_udp);
1034 specs.udp_tcp = (struct ibv_flow_spec_tcp_udp) {
1035 .type = ((i == HASH_RXQ_UDPV4 ||
1036 i == HASH_RXQ_UDPV6) ?
1041 if (parser->queue[i].ibv_attr) {
1042 dst = (void *)((uintptr_t)
1043 parser->queue[i].ibv_attr +
1044 parser->queue[i].offset);
1045 memcpy(dst, &specs, size);
1046 ++parser->queue[i].ibv_attr->num_of_specs;
1048 parser->queue[i].offset += size;
1054 * Validate and convert a flow supported by the NIC.
1057 * Pointer to Ethernet device.
1059 * Flow rule attributes.
1060 * @param[in] pattern
1061 * Pattern specification (list terminated by the END pattern item).
1062 * @param[in] actions
1063 * Associated actions (list terminated by the END action).
1065 * Perform verbose error reporting if not NULL.
1066 * @param[in, out] parser
1067 * Internal parser structure.
1070 * 0 on success, a negative errno value otherwise and rte_errno is set.
1073 mlx5_flow_convert(struct rte_eth_dev *dev,
1074 const struct rte_flow_attr *attr,
1075 const struct rte_flow_item items[],
1076 const struct rte_flow_action actions[],
1077 struct rte_flow_error *error,
1078 struct mlx5_flow_parse *parser)
1080 const struct mlx5_flow_items *cur_item = mlx5_flow_items;
1084 /* First step. Validate the attributes, items and actions. */
1085 *parser = (struct mlx5_flow_parse){
1086 .create = parser->create,
1087 .layer = HASH_RXQ_ETH,
1088 .mark_id = MLX5_FLOW_MARK_DEFAULT,
1090 ret = mlx5_flow_convert_attributes(attr, error);
1093 ret = mlx5_flow_convert_actions(dev, actions, error, parser);
1096 ret = mlx5_flow_convert_items_validate(items, error, parser);
1099 mlx5_flow_convert_finalise(parser);
1102 * Allocate the memory space to store verbs specifications.
1105 unsigned int offset = parser->queue[HASH_RXQ_ETH].offset;
1107 parser->queue[HASH_RXQ_ETH].ibv_attr =
1108 mlx5_flow_convert_allocate(offset, error);
1109 if (!parser->queue[HASH_RXQ_ETH].ibv_attr)
1111 parser->queue[HASH_RXQ_ETH].offset =
1112 sizeof(struct ibv_flow_attr);
1114 for (i = 0; i != hash_rxq_init_n; ++i) {
1115 unsigned int offset;
1117 if (!(parser->rss_conf.types &
1118 hash_rxq_init[i].dpdk_rss_hf) &&
1119 (i != HASH_RXQ_ETH))
1121 offset = parser->queue[i].offset;
1122 parser->queue[i].ibv_attr =
1123 mlx5_flow_convert_allocate(offset, error);
1124 if (!parser->queue[i].ibv_attr)
1126 parser->queue[i].offset = sizeof(struct ibv_flow_attr);
1129 /* Third step. Conversion parse, fill the specifications. */
1131 for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
1132 struct mlx5_flow_data data = {
1137 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
1139 cur_item = &mlx5_flow_items[items->type];
1140 ret = cur_item->convert(items,
1141 (cur_item->default_mask ?
1142 cur_item->default_mask :
1149 mlx5_flow_create_flag_mark(parser, parser->mark_id);
1150 if (parser->count && parser->create) {
1151 mlx5_flow_create_count(dev, parser);
1153 goto exit_count_error;
1156 * Last step. Complete missing specification to reach the RSS
1160 mlx5_flow_convert_finalise(parser);
1161 mlx5_flow_update_priority(parser, attr);
1163 /* Only verification is expected, all resources should be released. */
1164 if (!parser->create) {
1165 for (i = 0; i != hash_rxq_init_n; ++i) {
1166 if (parser->queue[i].ibv_attr) {
1167 rte_free(parser->queue[i].ibv_attr);
1168 parser->queue[i].ibv_attr = NULL;
1174 for (i = 0; i != hash_rxq_init_n; ++i) {
1175 if (parser->queue[i].ibv_attr) {
1176 rte_free(parser->queue[i].ibv_attr);
1177 parser->queue[i].ibv_attr = NULL;
1180 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1181 NULL, "cannot allocate verbs spec attributes");
1184 rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1185 NULL, "cannot create counter");
1190 * Copy the specification created into the flow.
1193 * Internal parser structure.
1195 * Create specification.
1197 * Size in bytes of the specification to copy.
1200 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
1206 for (i = 0; i != hash_rxq_init_n; ++i) {
1207 if (!parser->queue[i].ibv_attr)
1209 /* Specification must be the same l3 type or none. */
1210 if (parser->layer == HASH_RXQ_ETH ||
1211 (hash_rxq_init[parser->layer].ip_version ==
1212 hash_rxq_init[i].ip_version) ||
1213 (hash_rxq_init[i].ip_version == 0)) {
1214 dst = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1215 parser->queue[i].offset);
1216 memcpy(dst, src, size);
1217 ++parser->queue[i].ibv_attr->num_of_specs;
1218 parser->queue[i].offset += size;
1224 * Convert Ethernet item to Verbs specification.
1227 * Item specification.
1228 * @param default_mask[in]
1229 * Default bit-masks to use when item->mask is not provided.
1230 * @param data[in, out]
1234 * 0 on success, a negative errno value otherwise and rte_errno is set.
1237 mlx5_flow_create_eth(const struct rte_flow_item *item,
1238 const void *default_mask,
1239 struct mlx5_flow_data *data)
1241 const struct rte_flow_item_eth *spec = item->spec;
1242 const struct rte_flow_item_eth *mask = item->mask;
1243 struct mlx5_flow_parse *parser = data->parser;
1244 const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1245 struct ibv_flow_spec_eth eth = {
1246 .type = parser->inner | IBV_FLOW_SPEC_ETH,
1250 /* Don't update layer for the inner pattern. */
1252 parser->layer = HASH_RXQ_ETH;
1257 mask = default_mask;
1258 memcpy(ð.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
1259 memcpy(ð.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
1260 eth.val.ether_type = spec->type;
1261 memcpy(ð.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
1262 memcpy(ð.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
1263 eth.mask.ether_type = mask->type;
1264 /* Remove unwanted bits from values. */
1265 for (i = 0; i < ETHER_ADDR_LEN; ++i) {
1266 eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
1267 eth.val.src_mac[i] &= eth.mask.src_mac[i];
1269 eth.val.ether_type &= eth.mask.ether_type;
1271 mlx5_flow_create_copy(parser, ð, eth_size);
1276 * Convert VLAN item to Verbs specification.
1279 * Item specification.
1280 * @param default_mask[in]
1281 * Default bit-masks to use when item->mask is not provided.
1282 * @param data[in, out]
1286 * 0 on success, a negative errno value otherwise and rte_errno is set.
1289 mlx5_flow_create_vlan(const struct rte_flow_item *item,
1290 const void *default_mask,
1291 struct mlx5_flow_data *data)
1293 const struct rte_flow_item_vlan *spec = item->spec;
1294 const struct rte_flow_item_vlan *mask = item->mask;
1295 struct mlx5_flow_parse *parser = data->parser;
1296 struct ibv_flow_spec_eth *eth;
1297 const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1302 mask = default_mask;
1304 for (i = 0; i != hash_rxq_init_n; ++i) {
1305 if (!parser->queue[i].ibv_attr)
1308 eth = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1309 parser->queue[i].offset - eth_size);
1310 eth->val.vlan_tag = spec->tci;
1311 eth->mask.vlan_tag = mask->tci;
1312 eth->val.vlan_tag &= eth->mask.vlan_tag;
1314 * From verbs perspective an empty VLAN is equivalent
1315 * to a packet without VLAN layer.
1317 if (!eth->mask.vlan_tag)
1323 return rte_flow_error_set(data->error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM,
1324 item, "VLAN cannot be empty");
1328 * Convert IPv4 item to Verbs specification.
1331 * Item specification.
1332 * @param default_mask[in]
1333 * Default bit-masks to use when item->mask is not provided.
1334 * @param data[in, out]
1338 * 0 on success, a negative errno value otherwise and rte_errno is set.
1341 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
1342 const void *default_mask,
1343 struct mlx5_flow_data *data)
1345 const struct rte_flow_item_ipv4 *spec = item->spec;
1346 const struct rte_flow_item_ipv4 *mask = item->mask;
1347 struct mlx5_flow_parse *parser = data->parser;
1348 unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4_ext);
1349 struct ibv_flow_spec_ipv4_ext ipv4 = {
1350 .type = parser->inner | IBV_FLOW_SPEC_IPV4_EXT,
1354 /* Don't update layer for the inner pattern. */
1356 parser->layer = HASH_RXQ_IPV4;
1359 mask = default_mask;
1360 ipv4.val = (struct ibv_flow_ipv4_ext_filter){
1361 .src_ip = spec->hdr.src_addr,
1362 .dst_ip = spec->hdr.dst_addr,
1363 .proto = spec->hdr.next_proto_id,
1364 .tos = spec->hdr.type_of_service,
1366 ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
1367 .src_ip = mask->hdr.src_addr,
1368 .dst_ip = mask->hdr.dst_addr,
1369 .proto = mask->hdr.next_proto_id,
1370 .tos = mask->hdr.type_of_service,
1372 /* Remove unwanted bits from values. */
1373 ipv4.val.src_ip &= ipv4.mask.src_ip;
1374 ipv4.val.dst_ip &= ipv4.mask.dst_ip;
1375 ipv4.val.proto &= ipv4.mask.proto;
1376 ipv4.val.tos &= ipv4.mask.tos;
1378 mlx5_flow_create_copy(parser, &ipv4, ipv4_size);
1383 * Convert IPv6 item to Verbs specification.
1386 * Item specification.
1387 * @param default_mask[in]
1388 * Default bit-masks to use when item->mask is not provided.
1389 * @param data[in, out]
1393 * 0 on success, a negative errno value otherwise and rte_errno is set.
1396 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
1397 const void *default_mask,
1398 struct mlx5_flow_data *data)
1400 const struct rte_flow_item_ipv6 *spec = item->spec;
1401 const struct rte_flow_item_ipv6 *mask = item->mask;
1402 struct mlx5_flow_parse *parser = data->parser;
1403 unsigned int ipv6_size = sizeof(struct ibv_flow_spec_ipv6);
1404 struct ibv_flow_spec_ipv6 ipv6 = {
1405 .type = parser->inner | IBV_FLOW_SPEC_IPV6,
1409 /* Don't update layer for the inner pattern. */
1411 parser->layer = HASH_RXQ_IPV6;
1414 uint32_t vtc_flow_val;
1415 uint32_t vtc_flow_mask;
1418 mask = default_mask;
1419 memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
1420 RTE_DIM(ipv6.val.src_ip));
1421 memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
1422 RTE_DIM(ipv6.val.dst_ip));
1423 memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
1424 RTE_DIM(ipv6.mask.src_ip));
1425 memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
1426 RTE_DIM(ipv6.mask.dst_ip));
1427 vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
1428 vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
1429 ipv6.val.flow_label =
1430 rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >>
1432 ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >>
1434 ipv6.val.next_hdr = spec->hdr.proto;
1435 ipv6.val.hop_limit = spec->hdr.hop_limits;
1436 ipv6.mask.flow_label =
1437 rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >>
1439 ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >>
1441 ipv6.mask.next_hdr = mask->hdr.proto;
1442 ipv6.mask.hop_limit = mask->hdr.hop_limits;
1443 /* Remove unwanted bits from values. */
1444 for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
1445 ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
1446 ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
1448 ipv6.val.flow_label &= ipv6.mask.flow_label;
1449 ipv6.val.traffic_class &= ipv6.mask.traffic_class;
1450 ipv6.val.next_hdr &= ipv6.mask.next_hdr;
1451 ipv6.val.hop_limit &= ipv6.mask.hop_limit;
1453 mlx5_flow_create_copy(parser, &ipv6, ipv6_size);
1458 * Convert UDP item to Verbs specification.
1461 * Item specification.
1462 * @param default_mask[in]
1463 * Default bit-masks to use when item->mask is not provided.
1464 * @param data[in, out]
1468 * 0 on success, a negative errno value otherwise and rte_errno is set.
1471 mlx5_flow_create_udp(const struct rte_flow_item *item,
1472 const void *default_mask,
1473 struct mlx5_flow_data *data)
1475 const struct rte_flow_item_udp *spec = item->spec;
1476 const struct rte_flow_item_udp *mask = item->mask;
1477 struct mlx5_flow_parse *parser = data->parser;
1478 unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1479 struct ibv_flow_spec_tcp_udp udp = {
1480 .type = parser->inner | IBV_FLOW_SPEC_UDP,
1484 /* Don't update layer for the inner pattern. */
1485 if (!parser->inner) {
1486 if (parser->layer == HASH_RXQ_IPV4)
1487 parser->layer = HASH_RXQ_UDPV4;
1489 parser->layer = HASH_RXQ_UDPV6;
1493 mask = default_mask;
1494 udp.val.dst_port = spec->hdr.dst_port;
1495 udp.val.src_port = spec->hdr.src_port;
1496 udp.mask.dst_port = mask->hdr.dst_port;
1497 udp.mask.src_port = mask->hdr.src_port;
1498 /* Remove unwanted bits from values. */
1499 udp.val.src_port &= udp.mask.src_port;
1500 udp.val.dst_port &= udp.mask.dst_port;
1502 mlx5_flow_create_copy(parser, &udp, udp_size);
1507 * Convert TCP item to Verbs specification.
1510 * Item specification.
1511 * @param default_mask[in]
1512 * Default bit-masks to use when item->mask is not provided.
1513 * @param data[in, out]
1517 * 0 on success, a negative errno value otherwise and rte_errno is set.
1520 mlx5_flow_create_tcp(const struct rte_flow_item *item,
1521 const void *default_mask,
1522 struct mlx5_flow_data *data)
1524 const struct rte_flow_item_tcp *spec = item->spec;
1525 const struct rte_flow_item_tcp *mask = item->mask;
1526 struct mlx5_flow_parse *parser = data->parser;
1527 unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1528 struct ibv_flow_spec_tcp_udp tcp = {
1529 .type = parser->inner | IBV_FLOW_SPEC_TCP,
1533 /* Don't update layer for the inner pattern. */
1534 if (!parser->inner) {
1535 if (parser->layer == HASH_RXQ_IPV4)
1536 parser->layer = HASH_RXQ_TCPV4;
1538 parser->layer = HASH_RXQ_TCPV6;
1542 mask = default_mask;
1543 tcp.val.dst_port = spec->hdr.dst_port;
1544 tcp.val.src_port = spec->hdr.src_port;
1545 tcp.mask.dst_port = mask->hdr.dst_port;
1546 tcp.mask.src_port = mask->hdr.src_port;
1547 /* Remove unwanted bits from values. */
1548 tcp.val.src_port &= tcp.mask.src_port;
1549 tcp.val.dst_port &= tcp.mask.dst_port;
1551 mlx5_flow_create_copy(parser, &tcp, tcp_size);
1556 * Convert VXLAN item to Verbs specification.
1559 * Item specification.
1560 * @param default_mask[in]
1561 * Default bit-masks to use when item->mask is not provided.
1562 * @param data[in, out]
1566 * 0 on success, a negative errno value otherwise and rte_errno is set.
1569 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
1570 const void *default_mask,
1571 struct mlx5_flow_data *data)
1573 const struct rte_flow_item_vxlan *spec = item->spec;
1574 const struct rte_flow_item_vxlan *mask = item->mask;
1575 struct mlx5_flow_parse *parser = data->parser;
1576 unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1577 struct ibv_flow_spec_tunnel vxlan = {
1578 .type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1587 parser->inner = IBV_FLOW_SPEC_INNER;
1590 mask = default_mask;
1591 memcpy(&id.vni[1], spec->vni, 3);
1592 vxlan.val.tunnel_id = id.vlan_id;
1593 memcpy(&id.vni[1], mask->vni, 3);
1594 vxlan.mask.tunnel_id = id.vlan_id;
1595 /* Remove unwanted bits from values. */
1596 vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
1599 * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
1600 * layer is defined in the Verbs specification it is interpreted as
1601 * wildcard and all packets will match this rule, if it follows a full
1602 * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
1603 * before will also match this rule.
1604 * To avoid such situation, VNI 0 is currently refused.
1606 if (!vxlan.val.tunnel_id)
1607 return rte_flow_error_set(data->error, EINVAL,
1608 RTE_FLOW_ERROR_TYPE_ITEM,
1610 "VxLAN vni cannot be 0");
1611 mlx5_flow_create_copy(parser, &vxlan, size);
1616 * Convert mark/flag action to Verbs specification.
1619 * Internal parser structure.
1624 * 0 on success, a negative errno value otherwise and rte_errno is set.
1627 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id)
1629 unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1630 struct ibv_flow_spec_action_tag tag = {
1631 .type = IBV_FLOW_SPEC_ACTION_TAG,
1633 .tag_id = mlx5_flow_mark_set(mark_id),
1636 assert(parser->mark);
1637 mlx5_flow_create_copy(parser, &tag, size);
1642 * Convert count action to Verbs specification.
1645 * Pointer to Ethernet device.
1647 * Pointer to MLX5 flow parser structure.
1650 * 0 on success, a negative errno value otherwise and rte_errno is set.
1653 mlx5_flow_create_count(struct rte_eth_dev *dev __rte_unused,
1654 struct mlx5_flow_parse *parser __rte_unused)
1656 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
1657 struct priv *priv = dev->data->dev_private;
1658 unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
1659 struct ibv_counter_set_init_attr init_attr = {0};
1660 struct ibv_flow_spec_counter_action counter = {
1661 .type = IBV_FLOW_SPEC_ACTION_COUNT,
1663 .counter_set_handle = 0,
1666 init_attr.counter_set_id = 0;
1667 parser->cs = mlx5_glue->create_counter_set(priv->ctx, &init_attr);
1672 counter.counter_set_handle = parser->cs->handle;
1673 mlx5_flow_create_copy(parser, &counter, size);
1679 * Complete flow rule creation with a drop queue.
1682 * Pointer to Ethernet device.
1684 * Internal parser structure.
1686 * Pointer to the rte_flow.
1688 * Perform verbose error reporting if not NULL.
1691 * 0 on success, a negative errno value otherwise and rte_errno is set.
1694 mlx5_flow_create_action_queue_drop(struct rte_eth_dev *dev,
1695 struct mlx5_flow_parse *parser,
1696 struct rte_flow *flow,
1697 struct rte_flow_error *error)
1699 struct priv *priv = dev->data->dev_private;
1700 struct ibv_flow_spec_action_drop *drop;
1701 unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
1706 drop = (void *)((uintptr_t)parser->queue[HASH_RXQ_ETH].ibv_attr +
1707 parser->queue[HASH_RXQ_ETH].offset);
1708 *drop = (struct ibv_flow_spec_action_drop){
1709 .type = IBV_FLOW_SPEC_ACTION_DROP,
1712 ++parser->queue[HASH_RXQ_ETH].ibv_attr->num_of_specs;
1713 parser->queue[HASH_RXQ_ETH].offset += size;
1714 flow->frxq[HASH_RXQ_ETH].ibv_attr =
1715 parser->queue[HASH_RXQ_ETH].ibv_attr;
1717 flow->cs = parser->cs;
1718 if (!priv->dev->data->dev_started)
1720 parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
1721 flow->frxq[HASH_RXQ_ETH].ibv_flow =
1722 mlx5_glue->create_flow(priv->flow_drop_queue->qp,
1723 flow->frxq[HASH_RXQ_ETH].ibv_attr);
1724 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
1725 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1726 NULL, "flow rule creation failure");
1732 if (flow->frxq[HASH_RXQ_ETH].ibv_flow) {
1733 claim_zero(mlx5_glue->destroy_flow
1734 (flow->frxq[HASH_RXQ_ETH].ibv_flow));
1735 flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
1737 if (flow->frxq[HASH_RXQ_ETH].ibv_attr) {
1738 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
1739 flow->frxq[HASH_RXQ_ETH].ibv_attr = NULL;
1742 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
1750 * Create hash Rx queues when RSS is enabled.
1753 * Pointer to Ethernet device.
1755 * Internal parser structure.
1757 * Pointer to the rte_flow.
1759 * Perform verbose error reporting if not NULL.
1762 * 0 on success, a negative errno value otherwise and rte_errno is set.
1765 mlx5_flow_create_action_queue_rss(struct rte_eth_dev *dev,
1766 struct mlx5_flow_parse *parser,
1767 struct rte_flow *flow,
1768 struct rte_flow_error *error)
1770 struct priv *priv = dev->data->dev_private;
1773 for (i = 0; i != hash_rxq_init_n; ++i) {
1774 uint64_t hash_fields;
1776 if (!parser->queue[i].ibv_attr)
1778 flow->frxq[i].ibv_attr = parser->queue[i].ibv_attr;
1779 parser->queue[i].ibv_attr = NULL;
1780 hash_fields = hash_rxq_init[i].hash_fields;
1781 if (!priv->dev->data->dev_started)
1783 flow->frxq[i].hrxq =
1785 parser->rss_conf.key,
1786 parser->rss_conf.key_len,
1788 parser->rss_conf.queue,
1789 parser->rss_conf.queue_num);
1790 if (flow->frxq[i].hrxq)
1792 flow->frxq[i].hrxq =
1794 parser->rss_conf.key,
1795 parser->rss_conf.key_len,
1797 parser->rss_conf.queue,
1798 parser->rss_conf.queue_num);
1799 if (!flow->frxq[i].hrxq) {
1800 return rte_flow_error_set(error, ENOMEM,
1801 RTE_FLOW_ERROR_TYPE_HANDLE,
1803 "cannot create hash rxq");
1810 * Complete flow rule creation.
1813 * Pointer to Ethernet device.
1815 * Internal parser structure.
1817 * Pointer to the rte_flow.
1819 * Perform verbose error reporting if not NULL.
1822 * 0 on success, a negative errno value otherwise and rte_errno is set.
1825 mlx5_flow_create_action_queue(struct rte_eth_dev *dev,
1826 struct mlx5_flow_parse *parser,
1827 struct rte_flow *flow,
1828 struct rte_flow_error *error)
1830 struct priv *priv = dev->data->dev_private;
1833 unsigned int flows_n = 0;
1837 assert(!parser->drop);
1838 ret = mlx5_flow_create_action_queue_rss(dev, parser, flow, error);
1842 flow->cs = parser->cs;
1843 if (!priv->dev->data->dev_started)
1845 for (i = 0; i != hash_rxq_init_n; ++i) {
1846 if (!flow->frxq[i].hrxq)
1848 flow->frxq[i].ibv_flow =
1849 mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
1850 flow->frxq[i].ibv_attr);
1851 if (!flow->frxq[i].ibv_flow) {
1852 rte_flow_error_set(error, ENOMEM,
1853 RTE_FLOW_ERROR_TYPE_HANDLE,
1854 NULL, "flow rule creation failure");
1858 DRV_LOG(DEBUG, "port %u %p type %d QP %p ibv_flow %p",
1861 (void *)flow->frxq[i].hrxq,
1862 (void *)flow->frxq[i].ibv_flow);
1865 rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_HANDLE,
1866 NULL, "internal error in flow creation");
1869 for (i = 0; i != parser->rss_conf.queue_num; ++i) {
1870 struct mlx5_rxq_data *q =
1871 (*priv->rxqs)[parser->rss_conf.queue[i]];
1873 q->mark |= parser->mark;
1877 ret = rte_errno; /* Save rte_errno before cleanup. */
1879 for (i = 0; i != hash_rxq_init_n; ++i) {
1880 if (flow->frxq[i].ibv_flow) {
1881 struct ibv_flow *ibv_flow = flow->frxq[i].ibv_flow;
1883 claim_zero(mlx5_glue->destroy_flow(ibv_flow));
1885 if (flow->frxq[i].hrxq)
1886 mlx5_hrxq_release(dev, flow->frxq[i].hrxq);
1887 if (flow->frxq[i].ibv_attr)
1888 rte_free(flow->frxq[i].ibv_attr);
1891 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
1895 rte_errno = ret; /* Restore rte_errno. */
1903 * Pointer to Ethernet device.
1905 * Pointer to a TAILQ flow list.
1907 * Flow rule attributes.
1908 * @param[in] pattern
1909 * Pattern specification (list terminated by the END pattern item).
1910 * @param[in] actions
1911 * Associated actions (list terminated by the END action).
1913 * Perform verbose error reporting if not NULL.
1916 * A flow on success, NULL otherwise and rte_errno is set.
1918 static struct rte_flow *
1919 mlx5_flow_list_create(struct rte_eth_dev *dev,
1920 struct mlx5_flows *list,
1921 const struct rte_flow_attr *attr,
1922 const struct rte_flow_item items[],
1923 const struct rte_flow_action actions[],
1924 struct rte_flow_error *error)
1926 struct mlx5_flow_parse parser = { .create = 1, };
1927 struct rte_flow *flow = NULL;
1931 ret = mlx5_flow_convert(dev, attr, items, actions, error, &parser);
1934 flow = rte_calloc(__func__, 1,
1936 parser.rss_conf.queue_num * sizeof(uint16_t),
1939 rte_flow_error_set(error, ENOMEM,
1940 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1942 "cannot allocate flow memory");
1945 /* Copy configuration. */
1946 flow->queues = (uint16_t (*)[])(flow + 1);
1947 flow->rss_conf = (struct rte_flow_action_rss){
1948 .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
1950 .types = parser.rss_conf.types,
1951 .key_len = parser.rss_conf.key_len,
1952 .queue_num = parser.rss_conf.queue_num,
1953 .key = memcpy(flow->rss_key, parser.rss_conf.key,
1954 sizeof(*parser.rss_conf.key) *
1955 parser.rss_conf.key_len),
1956 .queue = memcpy(flow->queues, parser.rss_conf.queue,
1957 sizeof(*parser.rss_conf.queue) *
1958 parser.rss_conf.queue_num),
1960 flow->mark = parser.mark;
1961 /* finalise the flow. */
1963 ret = mlx5_flow_create_action_queue_drop(dev, &parser, flow,
1966 ret = mlx5_flow_create_action_queue(dev, &parser, flow, error);
1969 TAILQ_INSERT_TAIL(list, flow, next);
1970 DRV_LOG(DEBUG, "port %u flow created %p", dev->data->port_id,
1974 DRV_LOG(ERR, "port %u flow creation error: %s", dev->data->port_id,
1976 for (i = 0; i != hash_rxq_init_n; ++i) {
1977 if (parser.queue[i].ibv_attr)
1978 rte_free(parser.queue[i].ibv_attr);
1985 * Validate a flow supported by the NIC.
1987 * @see rte_flow_validate()
1991 mlx5_flow_validate(struct rte_eth_dev *dev,
1992 const struct rte_flow_attr *attr,
1993 const struct rte_flow_item items[],
1994 const struct rte_flow_action actions[],
1995 struct rte_flow_error *error)
1997 struct mlx5_flow_parse parser = { .create = 0, };
1999 return mlx5_flow_convert(dev, attr, items, actions, error, &parser);
2005 * @see rte_flow_create()
2009 mlx5_flow_create(struct rte_eth_dev *dev,
2010 const struct rte_flow_attr *attr,
2011 const struct rte_flow_item items[],
2012 const struct rte_flow_action actions[],
2013 struct rte_flow_error *error)
2015 struct priv *priv = dev->data->dev_private;
2017 return mlx5_flow_list_create(dev, &priv->flows, attr, items, actions,
2022 * Destroy a flow in a list.
2025 * Pointer to Ethernet device.
2027 * Pointer to a TAILQ flow list.
2032 mlx5_flow_list_destroy(struct rte_eth_dev *dev, struct mlx5_flows *list,
2033 struct rte_flow *flow)
2035 struct priv *priv = dev->data->dev_private;
2038 if (flow->drop || !flow->mark)
2040 for (i = 0; i != flow->rss_conf.queue_num; ++i) {
2041 struct rte_flow *tmp;
2045 * To remove the mark from the queue, the queue must not be
2046 * present in any other marked flow (RSS or not).
2048 TAILQ_FOREACH(tmp, list, next) {
2050 uint16_t *tqs = NULL;
2055 for (j = 0; j != hash_rxq_init_n; ++j) {
2056 if (!tmp->frxq[j].hrxq)
2058 tqs = tmp->frxq[j].hrxq->ind_table->queues;
2059 tq_n = tmp->frxq[j].hrxq->ind_table->queues_n;
2063 for (j = 0; (j != tq_n) && !mark; j++)
2064 if (tqs[j] == (*flow->queues)[i])
2067 (*priv->rxqs)[(*flow->queues)[i]]->mark = mark;
2071 if (flow->frxq[HASH_RXQ_ETH].ibv_flow)
2072 claim_zero(mlx5_glue->destroy_flow
2073 (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2074 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
2076 for (i = 0; i != hash_rxq_init_n; ++i) {
2077 struct mlx5_flow *frxq = &flow->frxq[i];
2080 claim_zero(mlx5_glue->destroy_flow
2083 mlx5_hrxq_release(dev, frxq->hrxq);
2085 rte_free(frxq->ibv_attr);
2089 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
2092 TAILQ_REMOVE(list, flow, next);
2093 DRV_LOG(DEBUG, "port %u flow destroyed %p", dev->data->port_id,
2099 * Destroy all flows.
2102 * Pointer to Ethernet device.
2104 * Pointer to a TAILQ flow list.
2107 mlx5_flow_list_flush(struct rte_eth_dev *dev, struct mlx5_flows *list)
2109 while (!TAILQ_EMPTY(list)) {
2110 struct rte_flow *flow;
2112 flow = TAILQ_FIRST(list);
2113 mlx5_flow_list_destroy(dev, list, flow);
2118 * Create drop queue.
2121 * Pointer to Ethernet device.
2124 * 0 on success, a negative errno value otherwise and rte_errno is set.
2127 mlx5_flow_create_drop_queue(struct rte_eth_dev *dev)
2129 struct priv *priv = dev->data->dev_private;
2130 struct mlx5_hrxq_drop *fdq = NULL;
2134 fdq = rte_calloc(__func__, 1, sizeof(*fdq), 0);
2137 "port %u cannot allocate memory for drop queue",
2138 dev->data->port_id);
2142 fdq->cq = mlx5_glue->create_cq(priv->ctx, 1, NULL, NULL, 0);
2144 DRV_LOG(WARNING, "port %u cannot allocate CQ for drop queue",
2145 dev->data->port_id);
2149 fdq->wq = mlx5_glue->create_wq
2151 &(struct ibv_wq_init_attr){
2152 .wq_type = IBV_WQT_RQ,
2159 DRV_LOG(WARNING, "port %u cannot allocate WQ for drop queue",
2160 dev->data->port_id);
2164 fdq->ind_table = mlx5_glue->create_rwq_ind_table
2166 &(struct ibv_rwq_ind_table_init_attr){
2167 .log_ind_tbl_size = 0,
2168 .ind_tbl = &fdq->wq,
2171 if (!fdq->ind_table) {
2173 "port %u cannot allocate indirection table for drop"
2175 dev->data->port_id);
2179 fdq->qp = mlx5_glue->create_qp_ex
2181 &(struct ibv_qp_init_attr_ex){
2182 .qp_type = IBV_QPT_RAW_PACKET,
2184 IBV_QP_INIT_ATTR_PD |
2185 IBV_QP_INIT_ATTR_IND_TABLE |
2186 IBV_QP_INIT_ATTR_RX_HASH,
2187 .rx_hash_conf = (struct ibv_rx_hash_conf){
2189 IBV_RX_HASH_FUNC_TOEPLITZ,
2190 .rx_hash_key_len = rss_hash_default_key_len,
2191 .rx_hash_key = rss_hash_default_key,
2192 .rx_hash_fields_mask = 0,
2194 .rwq_ind_tbl = fdq->ind_table,
2198 DRV_LOG(WARNING, "port %u cannot allocate QP for drop queue",
2199 dev->data->port_id);
2203 priv->flow_drop_queue = fdq;
2207 claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2209 claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2211 claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2213 claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2216 priv->flow_drop_queue = NULL;
2221 * Delete drop queue.
2224 * Pointer to Ethernet device.
2227 mlx5_flow_delete_drop_queue(struct rte_eth_dev *dev)
2229 struct priv *priv = dev->data->dev_private;
2230 struct mlx5_hrxq_drop *fdq = priv->flow_drop_queue;
2235 claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2237 claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2239 claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2241 claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2243 priv->flow_drop_queue = NULL;
2250 * Pointer to Ethernet device.
2252 * Pointer to a TAILQ flow list.
2255 mlx5_flow_stop(struct rte_eth_dev *dev, struct mlx5_flows *list)
2257 struct priv *priv = dev->data->dev_private;
2258 struct rte_flow *flow;
2260 TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) {
2262 struct mlx5_ind_table_ibv *ind_tbl = NULL;
2265 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow)
2267 claim_zero(mlx5_glue->destroy_flow
2268 (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2269 flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
2270 DRV_LOG(DEBUG, "port %u flow %p removed",
2271 dev->data->port_id, (void *)flow);
2275 /* Verify the flow has not already been cleaned. */
2276 for (i = 0; i != hash_rxq_init_n; ++i) {
2277 if (!flow->frxq[i].ibv_flow)
2280 * Indirection table may be necessary to remove the
2281 * flags in the Rx queues.
2282 * This helps to speed-up the process by avoiding
2285 ind_tbl = flow->frxq[i].hrxq->ind_table;
2288 if (i == hash_rxq_init_n)
2292 for (i = 0; i != ind_tbl->queues_n; ++i)
2293 (*priv->rxqs)[ind_tbl->queues[i]]->mark = 0;
2295 for (i = 0; i != hash_rxq_init_n; ++i) {
2296 if (!flow->frxq[i].ibv_flow)
2298 claim_zero(mlx5_glue->destroy_flow
2299 (flow->frxq[i].ibv_flow));
2300 flow->frxq[i].ibv_flow = NULL;
2301 mlx5_hrxq_release(dev, flow->frxq[i].hrxq);
2302 flow->frxq[i].hrxq = NULL;
2304 DRV_LOG(DEBUG, "port %u flow %p removed", dev->data->port_id,
2313 * Pointer to Ethernet device.
2315 * Pointer to a TAILQ flow list.
2318 * 0 on success, a negative errno value otherwise and rte_errno is set.
2321 mlx5_flow_start(struct rte_eth_dev *dev, struct mlx5_flows *list)
2323 struct priv *priv = dev->data->dev_private;
2324 struct rte_flow *flow;
2326 TAILQ_FOREACH(flow, list, next) {
2330 flow->frxq[HASH_RXQ_ETH].ibv_flow =
2331 mlx5_glue->create_flow
2332 (priv->flow_drop_queue->qp,
2333 flow->frxq[HASH_RXQ_ETH].ibv_attr);
2334 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
2336 "port %u flow %p cannot be applied",
2337 dev->data->port_id, (void *)flow);
2341 DRV_LOG(DEBUG, "port %u flow %p applied",
2342 dev->data->port_id, (void *)flow);
2346 for (i = 0; i != hash_rxq_init_n; ++i) {
2347 if (!flow->frxq[i].ibv_attr)
2349 flow->frxq[i].hrxq =
2350 mlx5_hrxq_get(dev, flow->rss_conf.key,
2351 flow->rss_conf.key_len,
2352 hash_rxq_init[i].hash_fields,
2353 flow->rss_conf.queue,
2354 flow->rss_conf.queue_num);
2355 if (flow->frxq[i].hrxq)
2357 flow->frxq[i].hrxq =
2358 mlx5_hrxq_new(dev, flow->rss_conf.key,
2359 flow->rss_conf.key_len,
2360 hash_rxq_init[i].hash_fields,
2361 flow->rss_conf.queue,
2362 flow->rss_conf.queue_num);
2363 if (!flow->frxq[i].hrxq) {
2365 "port %u flow %p cannot be applied",
2366 dev->data->port_id, (void *)flow);
2371 flow->frxq[i].ibv_flow =
2372 mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
2373 flow->frxq[i].ibv_attr);
2374 if (!flow->frxq[i].ibv_flow) {
2376 "port %u flow %p cannot be applied",
2377 dev->data->port_id, (void *)flow);
2381 DRV_LOG(DEBUG, "port %u flow %p applied",
2382 dev->data->port_id, (void *)flow);
2386 for (i = 0; i != flow->rss_conf.queue_num; ++i)
2387 (*priv->rxqs)[flow->rss_conf.queue[i]]->mark = 1;
2393 * Verify the flow list is empty
2396 * Pointer to Ethernet device.
2398 * @return the number of flows not released.
2401 mlx5_flow_verify(struct rte_eth_dev *dev)
2403 struct priv *priv = dev->data->dev_private;
2404 struct rte_flow *flow;
2407 TAILQ_FOREACH(flow, &priv->flows, next) {
2408 DRV_LOG(DEBUG, "port %u flow %p still referenced",
2409 dev->data->port_id, (void *)flow);
2416 * Enable a control flow configured from the control plane.
2419 * Pointer to Ethernet device.
2421 * An Ethernet flow spec to apply.
2423 * An Ethernet flow mask to apply.
2425 * A VLAN flow spec to apply.
2427 * A VLAN flow mask to apply.
2430 * 0 on success, a negative errno value otherwise and rte_errno is set.
2433 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
2434 struct rte_flow_item_eth *eth_spec,
2435 struct rte_flow_item_eth *eth_mask,
2436 struct rte_flow_item_vlan *vlan_spec,
2437 struct rte_flow_item_vlan *vlan_mask)
2439 struct priv *priv = dev->data->dev_private;
2440 const struct rte_flow_attr attr = {
2442 .priority = MLX5_CTRL_FLOW_PRIORITY,
2444 struct rte_flow_item items[] = {
2446 .type = RTE_FLOW_ITEM_TYPE_ETH,
2452 .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
2453 RTE_FLOW_ITEM_TYPE_END,
2459 .type = RTE_FLOW_ITEM_TYPE_END,
2462 uint16_t queue[priv->reta_idx_n];
2463 struct rte_flow_action_rss action_rss = {
2464 .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
2466 .types = priv->rss_conf.rss_hf,
2467 .key_len = priv->rss_conf.rss_key_len,
2468 .queue_num = priv->reta_idx_n,
2469 .key = priv->rss_conf.rss_key,
2472 struct rte_flow_action actions[] = {
2474 .type = RTE_FLOW_ACTION_TYPE_RSS,
2475 .conf = &action_rss,
2478 .type = RTE_FLOW_ACTION_TYPE_END,
2481 struct rte_flow *flow;
2482 struct rte_flow_error error;
2485 if (!priv->reta_idx_n) {
2489 for (i = 0; i != priv->reta_idx_n; ++i)
2490 queue[i] = (*priv->reta_idx)[i];
2491 flow = mlx5_flow_list_create(dev, &priv->ctrl_flows, &attr, items,
2499 * Enable a flow control configured from the control plane.
2502 * Pointer to Ethernet device.
2504 * An Ethernet flow spec to apply.
2506 * An Ethernet flow mask to apply.
2509 * 0 on success, a negative errno value otherwise and rte_errno is set.
2512 mlx5_ctrl_flow(struct rte_eth_dev *dev,
2513 struct rte_flow_item_eth *eth_spec,
2514 struct rte_flow_item_eth *eth_mask)
2516 return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
2522 * @see rte_flow_destroy()
2526 mlx5_flow_destroy(struct rte_eth_dev *dev,
2527 struct rte_flow *flow,
2528 struct rte_flow_error *error __rte_unused)
2530 struct priv *priv = dev->data->dev_private;
2532 mlx5_flow_list_destroy(dev, &priv->flows, flow);
2537 * Destroy all flows.
2539 * @see rte_flow_flush()
2543 mlx5_flow_flush(struct rte_eth_dev *dev,
2544 struct rte_flow_error *error __rte_unused)
2546 struct priv *priv = dev->data->dev_private;
2548 mlx5_flow_list_flush(dev, &priv->flows);
2552 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
2554 * Query flow counter.
2558 * @param counter_value
2559 * returned data from the counter.
2562 * 0 on success, a negative errno value otherwise and rte_errno is set.
2565 mlx5_flow_query_count(struct ibv_counter_set *cs,
2566 struct mlx5_flow_counter_stats *counter_stats,
2567 struct rte_flow_query_count *query_count,
2568 struct rte_flow_error *error)
2570 uint64_t counters[2];
2571 struct ibv_query_counter_set_attr query_cs_attr = {
2573 .query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
2575 struct ibv_counter_set_data query_out = {
2577 .outlen = 2 * sizeof(uint64_t),
2579 int err = mlx5_glue->query_counter_set(&query_cs_attr, &query_out);
2582 return rte_flow_error_set(error, err,
2583 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2585 "cannot read counter");
2586 query_count->hits_set = 1;
2587 query_count->bytes_set = 1;
2588 query_count->hits = counters[0] - counter_stats->hits;
2589 query_count->bytes = counters[1] - counter_stats->bytes;
2590 if (query_count->reset) {
2591 counter_stats->hits = counters[0];
2592 counter_stats->bytes = counters[1];
2600 * @see rte_flow_query()
2604 mlx5_flow_query(struct rte_eth_dev *dev __rte_unused,
2605 struct rte_flow *flow,
2606 enum rte_flow_action_type action __rte_unused,
2608 struct rte_flow_error *error)
2613 ret = mlx5_flow_query_count(flow->cs,
2614 &flow->counter_stats,
2615 (struct rte_flow_query_count *)data,
2620 return rte_flow_error_set(error, EINVAL,
2621 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2623 "no counter found for flow");
2632 * @see rte_flow_isolate()
2636 mlx5_flow_isolate(struct rte_eth_dev *dev,
2638 struct rte_flow_error *error)
2640 struct priv *priv = dev->data->dev_private;
2642 if (dev->data->dev_started) {
2643 rte_flow_error_set(error, EBUSY,
2644 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2646 "port must be stopped first");
2649 priv->isolated = !!enable;
2651 priv->dev->dev_ops = &mlx5_dev_ops_isolate;
2653 priv->dev->dev_ops = &mlx5_dev_ops;
2658 * Convert a flow director filter to a generic flow.
2661 * Pointer to Ethernet device.
2662 * @param fdir_filter
2663 * Flow director filter to add.
2665 * Generic flow parameters structure.
2668 * 0 on success, a negative errno value otherwise and rte_errno is set.
2671 mlx5_fdir_filter_convert(struct rte_eth_dev *dev,
2672 const struct rte_eth_fdir_filter *fdir_filter,
2673 struct mlx5_fdir *attributes)
2675 struct priv *priv = dev->data->dev_private;
2676 const struct rte_eth_fdir_input *input = &fdir_filter->input;
2677 const struct rte_eth_fdir_masks *mask =
2678 &dev->data->dev_conf.fdir_conf.mask;
2680 /* Validate queue number. */
2681 if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
2682 DRV_LOG(ERR, "port %u invalid queue number %d",
2683 dev->data->port_id, fdir_filter->action.rx_queue);
2687 attributes->attr.ingress = 1;
2688 attributes->items[0] = (struct rte_flow_item) {
2689 .type = RTE_FLOW_ITEM_TYPE_ETH,
2690 .spec = &attributes->l2,
2691 .mask = &attributes->l2_mask,
2693 switch (fdir_filter->action.behavior) {
2694 case RTE_ETH_FDIR_ACCEPT:
2695 attributes->actions[0] = (struct rte_flow_action){
2696 .type = RTE_FLOW_ACTION_TYPE_QUEUE,
2697 .conf = &attributes->queue,
2700 case RTE_ETH_FDIR_REJECT:
2701 attributes->actions[0] = (struct rte_flow_action){
2702 .type = RTE_FLOW_ACTION_TYPE_DROP,
2706 DRV_LOG(ERR, "port %u invalid behavior %d",
2708 fdir_filter->action.behavior);
2709 rte_errno = ENOTSUP;
2712 attributes->queue.index = fdir_filter->action.rx_queue;
2714 switch (fdir_filter->input.flow_type) {
2715 case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
2716 case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
2717 case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
2718 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2719 .src_addr = input->flow.ip4_flow.src_ip,
2720 .dst_addr = input->flow.ip4_flow.dst_ip,
2721 .time_to_live = input->flow.ip4_flow.ttl,
2722 .type_of_service = input->flow.ip4_flow.tos,
2723 .next_proto_id = input->flow.ip4_flow.proto,
2725 attributes->l3_mask.ipv4.hdr = (struct ipv4_hdr){
2726 .src_addr = mask->ipv4_mask.src_ip,
2727 .dst_addr = mask->ipv4_mask.dst_ip,
2728 .time_to_live = mask->ipv4_mask.ttl,
2729 .type_of_service = mask->ipv4_mask.tos,
2730 .next_proto_id = mask->ipv4_mask.proto,
2732 attributes->items[1] = (struct rte_flow_item){
2733 .type = RTE_FLOW_ITEM_TYPE_IPV4,
2734 .spec = &attributes->l3,
2735 .mask = &attributes->l3_mask,
2738 case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
2739 case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
2740 case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
2741 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2742 .hop_limits = input->flow.ipv6_flow.hop_limits,
2743 .proto = input->flow.ipv6_flow.proto,
2746 memcpy(attributes->l3.ipv6.hdr.src_addr,
2747 input->flow.ipv6_flow.src_ip,
2748 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2749 memcpy(attributes->l3.ipv6.hdr.dst_addr,
2750 input->flow.ipv6_flow.dst_ip,
2751 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2752 memcpy(attributes->l3_mask.ipv6.hdr.src_addr,
2753 mask->ipv6_mask.src_ip,
2754 RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
2755 memcpy(attributes->l3_mask.ipv6.hdr.dst_addr,
2756 mask->ipv6_mask.dst_ip,
2757 RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
2758 attributes->items[1] = (struct rte_flow_item){
2759 .type = RTE_FLOW_ITEM_TYPE_IPV6,
2760 .spec = &attributes->l3,
2761 .mask = &attributes->l3_mask,
2765 DRV_LOG(ERR, "port %u invalid flow type%d",
2766 dev->data->port_id, fdir_filter->input.flow_type);
2767 rte_errno = ENOTSUP;
2771 switch (fdir_filter->input.flow_type) {
2772 case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
2773 attributes->l4.udp.hdr = (struct udp_hdr){
2774 .src_port = input->flow.udp4_flow.src_port,
2775 .dst_port = input->flow.udp4_flow.dst_port,
2777 attributes->l4_mask.udp.hdr = (struct udp_hdr){
2778 .src_port = mask->src_port_mask,
2779 .dst_port = mask->dst_port_mask,
2781 attributes->items[2] = (struct rte_flow_item){
2782 .type = RTE_FLOW_ITEM_TYPE_UDP,
2783 .spec = &attributes->l4,
2784 .mask = &attributes->l4_mask,
2787 case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
2788 attributes->l4.tcp.hdr = (struct tcp_hdr){
2789 .src_port = input->flow.tcp4_flow.src_port,
2790 .dst_port = input->flow.tcp4_flow.dst_port,
2792 attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
2793 .src_port = mask->src_port_mask,
2794 .dst_port = mask->dst_port_mask,
2796 attributes->items[2] = (struct rte_flow_item){
2797 .type = RTE_FLOW_ITEM_TYPE_TCP,
2798 .spec = &attributes->l4,
2799 .mask = &attributes->l4_mask,
2802 case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
2803 attributes->l4.udp.hdr = (struct udp_hdr){
2804 .src_port = input->flow.udp6_flow.src_port,
2805 .dst_port = input->flow.udp6_flow.dst_port,
2807 attributes->l4_mask.udp.hdr = (struct udp_hdr){
2808 .src_port = mask->src_port_mask,
2809 .dst_port = mask->dst_port_mask,
2811 attributes->items[2] = (struct rte_flow_item){
2812 .type = RTE_FLOW_ITEM_TYPE_UDP,
2813 .spec = &attributes->l4,
2814 .mask = &attributes->l4_mask,
2817 case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
2818 attributes->l4.tcp.hdr = (struct tcp_hdr){
2819 .src_port = input->flow.tcp6_flow.src_port,
2820 .dst_port = input->flow.tcp6_flow.dst_port,
2822 attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
2823 .src_port = mask->src_port_mask,
2824 .dst_port = mask->dst_port_mask,
2826 attributes->items[2] = (struct rte_flow_item){
2827 .type = RTE_FLOW_ITEM_TYPE_TCP,
2828 .spec = &attributes->l4,
2829 .mask = &attributes->l4_mask,
2832 case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
2833 case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
2836 DRV_LOG(ERR, "port %u invalid flow type%d",
2837 dev->data->port_id, fdir_filter->input.flow_type);
2838 rte_errno = ENOTSUP;
2845 * Add new flow director filter and store it in list.
2848 * Pointer to Ethernet device.
2849 * @param fdir_filter
2850 * Flow director filter to add.
2853 * 0 on success, a negative errno value otherwise and rte_errno is set.
2856 mlx5_fdir_filter_add(struct rte_eth_dev *dev,
2857 const struct rte_eth_fdir_filter *fdir_filter)
2859 struct priv *priv = dev->data->dev_private;
2860 struct mlx5_fdir attributes = {
2863 .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2864 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2868 struct mlx5_flow_parse parser = {
2869 .layer = HASH_RXQ_ETH,
2871 struct rte_flow_error error;
2872 struct rte_flow *flow;
2875 ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
2878 ret = mlx5_flow_convert(dev, &attributes.attr, attributes.items,
2879 attributes.actions, &error, &parser);
2882 flow = mlx5_flow_list_create(dev, &priv->flows, &attributes.attr,
2883 attributes.items, attributes.actions,
2886 DRV_LOG(DEBUG, "port %u FDIR created %p", dev->data->port_id,
2894 * Delete specific filter.
2897 * Pointer to Ethernet device.
2898 * @param fdir_filter
2899 * Filter to be deleted.
2902 * 0 on success, a negative errno value otherwise and rte_errno is set.
2905 mlx5_fdir_filter_delete(struct rte_eth_dev *dev,
2906 const struct rte_eth_fdir_filter *fdir_filter)
2908 struct priv *priv = dev->data->dev_private;
2909 struct mlx5_fdir attributes = {
2912 struct mlx5_flow_parse parser = {
2914 .layer = HASH_RXQ_ETH,
2916 struct rte_flow_error error;
2917 struct rte_flow *flow;
2921 ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
2924 ret = mlx5_flow_convert(dev, &attributes.attr, attributes.items,
2925 attributes.actions, &error, &parser);
2929 * Special case for drop action which is only set in the
2930 * specifications when the flow is created. In this situation the
2931 * drop specification is missing.
2934 struct ibv_flow_spec_action_drop *drop;
2936 drop = (void *)((uintptr_t)parser.queue[HASH_RXQ_ETH].ibv_attr +
2937 parser.queue[HASH_RXQ_ETH].offset);
2938 *drop = (struct ibv_flow_spec_action_drop){
2939 .type = IBV_FLOW_SPEC_ACTION_DROP,
2940 .size = sizeof(struct ibv_flow_spec_action_drop),
2942 parser.queue[HASH_RXQ_ETH].ibv_attr->num_of_specs++;
2944 TAILQ_FOREACH(flow, &priv->flows, next) {
2945 struct ibv_flow_attr *attr;
2946 struct ibv_spec_header *attr_h;
2948 struct ibv_flow_attr *flow_attr;
2949 struct ibv_spec_header *flow_h;
2951 unsigned int specs_n;
2953 attr = parser.queue[HASH_RXQ_ETH].ibv_attr;
2954 flow_attr = flow->frxq[HASH_RXQ_ETH].ibv_attr;
2955 /* Compare first the attributes. */
2956 if (memcmp(attr, flow_attr, sizeof(struct ibv_flow_attr)))
2958 if (attr->num_of_specs == 0)
2960 spec = (void *)((uintptr_t)attr +
2961 sizeof(struct ibv_flow_attr));
2962 flow_spec = (void *)((uintptr_t)flow_attr +
2963 sizeof(struct ibv_flow_attr));
2964 specs_n = RTE_MIN(attr->num_of_specs, flow_attr->num_of_specs);
2965 for (i = 0; i != specs_n; ++i) {
2968 if (memcmp(spec, flow_spec,
2969 RTE_MIN(attr_h->size, flow_h->size)))
2971 spec = (void *)((uintptr_t)spec + attr_h->size);
2972 flow_spec = (void *)((uintptr_t)flow_spec +
2975 /* At this point, the flow match. */
2978 /* The flow does not match. */
2981 ret = rte_errno; /* Save rte_errno before cleanup. */
2983 mlx5_flow_list_destroy(dev, &priv->flows, flow);
2985 for (i = 0; i != hash_rxq_init_n; ++i) {
2986 if (parser.queue[i].ibv_attr)
2987 rte_free(parser.queue[i].ibv_attr);
2989 rte_errno = ret; /* Restore rte_errno. */
2994 * Update queue for specific filter.
2997 * Pointer to Ethernet device.
2998 * @param fdir_filter
2999 * Filter to be updated.
3002 * 0 on success, a negative errno value otherwise and rte_errno is set.
3005 mlx5_fdir_filter_update(struct rte_eth_dev *dev,
3006 const struct rte_eth_fdir_filter *fdir_filter)
3010 ret = mlx5_fdir_filter_delete(dev, fdir_filter);
3013 return mlx5_fdir_filter_add(dev, fdir_filter);
3017 * Flush all filters.
3020 * Pointer to Ethernet device.
3023 mlx5_fdir_filter_flush(struct rte_eth_dev *dev)
3025 struct priv *priv = dev->data->dev_private;
3027 mlx5_flow_list_flush(dev, &priv->flows);
3031 * Get flow director information.
3034 * Pointer to Ethernet device.
3035 * @param[out] fdir_info
3036 * Resulting flow director information.
3039 mlx5_fdir_info_get(struct rte_eth_dev *dev, struct rte_eth_fdir_info *fdir_info)
3041 struct priv *priv = dev->data->dev_private;
3042 struct rte_eth_fdir_masks *mask =
3043 &priv->dev->data->dev_conf.fdir_conf.mask;
3045 fdir_info->mode = priv->dev->data->dev_conf.fdir_conf.mode;
3046 fdir_info->guarant_spc = 0;
3047 rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
3048 fdir_info->max_flexpayload = 0;
3049 fdir_info->flow_types_mask[0] = 0;
3050 fdir_info->flex_payload_unit = 0;
3051 fdir_info->max_flex_payload_segment_num = 0;
3052 fdir_info->flex_payload_limit = 0;
3053 memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
3057 * Deal with flow director operations.
3060 * Pointer to Ethernet device.
3062 * Operation to perform.
3064 * Pointer to operation-specific structure.
3067 * 0 on success, a negative errno value otherwise and rte_errno is set.
3070 mlx5_fdir_ctrl_func(struct rte_eth_dev *dev, enum rte_filter_op filter_op,
3073 struct priv *priv = dev->data->dev_private;
3074 enum rte_fdir_mode fdir_mode =
3075 priv->dev->data->dev_conf.fdir_conf.mode;
3077 if (filter_op == RTE_ETH_FILTER_NOP)
3079 if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
3080 fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
3081 DRV_LOG(ERR, "port %u flow director mode %d not supported",
3082 dev->data->port_id, fdir_mode);
3086 switch (filter_op) {
3087 case RTE_ETH_FILTER_ADD:
3088 return mlx5_fdir_filter_add(dev, arg);
3089 case RTE_ETH_FILTER_UPDATE:
3090 return mlx5_fdir_filter_update(dev, arg);
3091 case RTE_ETH_FILTER_DELETE:
3092 return mlx5_fdir_filter_delete(dev, arg);
3093 case RTE_ETH_FILTER_FLUSH:
3094 mlx5_fdir_filter_flush(dev);
3096 case RTE_ETH_FILTER_INFO:
3097 mlx5_fdir_info_get(dev, arg);
3100 DRV_LOG(DEBUG, "port %u unknown operation %u",
3101 dev->data->port_id, filter_op);
3109 * Manage filter operations.
3112 * Pointer to Ethernet device structure.
3113 * @param filter_type
3116 * Operation to perform.
3118 * Pointer to operation-specific structure.
3121 * 0 on success, a negative errno value otherwise and rte_errno is set.
3124 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
3125 enum rte_filter_type filter_type,
3126 enum rte_filter_op filter_op,
3129 switch (filter_type) {
3130 case RTE_ETH_FILTER_GENERIC:
3131 if (filter_op != RTE_ETH_FILTER_GET) {
3135 *(const void **)arg = &mlx5_flow_ops;
3137 case RTE_ETH_FILTER_FDIR:
3138 return mlx5_fdir_ctrl_func(dev, filter_op, arg);
3140 DRV_LOG(ERR, "port %u filter type (%d) not supported",
3141 dev->data->port_id, filter_type);
3142 rte_errno = ENOTSUP;