1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2016 6WIND S.A.
3 * Copyright 2016 Mellanox.
10 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
12 #pragma GCC diagnostic ignored "-Wpedantic"
14 #include <infiniband/verbs.h>
16 #pragma GCC diagnostic error "-Wpedantic"
19 #include <rte_ethdev_driver.h>
21 #include <rte_flow_driver.h>
22 #include <rte_malloc.h>
26 #include "mlx5_defs.h"
28 #include "mlx5_glue.h"
30 /* Define minimal priority for control plane flows. */
31 #define MLX5_CTRL_FLOW_PRIORITY 4
33 /* Internet Protocol versions. */
37 #ifndef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
38 struct ibv_flow_spec_counter_action {
43 /* Dev ops structure defined in mlx5.c */
44 extern const struct eth_dev_ops mlx5_dev_ops;
45 extern const struct eth_dev_ops mlx5_dev_ops_isolate;
47 /** Structure give to the conversion functions. */
48 struct mlx5_flow_data {
49 struct mlx5_flow_parse *parser; /** Parser context. */
50 struct rte_flow_error *error; /** Error context. */
54 mlx5_flow_create_eth(const struct rte_flow_item *item,
55 const void *default_mask,
56 struct mlx5_flow_data *data);
59 mlx5_flow_create_vlan(const struct rte_flow_item *item,
60 const void *default_mask,
61 struct mlx5_flow_data *data);
64 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
65 const void *default_mask,
66 struct mlx5_flow_data *data);
69 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
70 const void *default_mask,
71 struct mlx5_flow_data *data);
74 mlx5_flow_create_udp(const struct rte_flow_item *item,
75 const void *default_mask,
76 struct mlx5_flow_data *data);
79 mlx5_flow_create_tcp(const struct rte_flow_item *item,
80 const void *default_mask,
81 struct mlx5_flow_data *data);
84 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
85 const void *default_mask,
86 struct mlx5_flow_data *data);
88 struct mlx5_flow_parse;
91 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
95 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id);
98 mlx5_flow_create_count(struct rte_eth_dev *dev, struct mlx5_flow_parse *parser);
100 /* Hash RX queue types. */
111 /* Initialization data for hash RX queue. */
112 struct hash_rxq_init {
113 uint64_t hash_fields; /* Fields that participate in the hash. */
114 uint64_t dpdk_rss_hf; /* Matching DPDK RSS hash fields. */
115 unsigned int flow_priority; /* Flow priority to use. */
116 unsigned int ip_version; /* Internet protocol. */
119 /* Initialization data for hash RX queues. */
120 const struct hash_rxq_init hash_rxq_init[] = {
122 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
123 IBV_RX_HASH_DST_IPV4 |
124 IBV_RX_HASH_SRC_PORT_TCP |
125 IBV_RX_HASH_DST_PORT_TCP),
126 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,
128 .ip_version = MLX5_IPV4,
131 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
132 IBV_RX_HASH_DST_IPV4 |
133 IBV_RX_HASH_SRC_PORT_UDP |
134 IBV_RX_HASH_DST_PORT_UDP),
135 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP,
137 .ip_version = MLX5_IPV4,
140 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
141 IBV_RX_HASH_DST_IPV4),
142 .dpdk_rss_hf = (ETH_RSS_IPV4 |
145 .ip_version = MLX5_IPV4,
148 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
149 IBV_RX_HASH_DST_IPV6 |
150 IBV_RX_HASH_SRC_PORT_TCP |
151 IBV_RX_HASH_DST_PORT_TCP),
152 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP,
154 .ip_version = MLX5_IPV6,
157 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
158 IBV_RX_HASH_DST_IPV6 |
159 IBV_RX_HASH_SRC_PORT_UDP |
160 IBV_RX_HASH_DST_PORT_UDP),
161 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP,
163 .ip_version = MLX5_IPV6,
166 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
167 IBV_RX_HASH_DST_IPV6),
168 .dpdk_rss_hf = (ETH_RSS_IPV6 |
171 .ip_version = MLX5_IPV6,
180 /* Number of entries in hash_rxq_init[]. */
181 const unsigned int hash_rxq_init_n = RTE_DIM(hash_rxq_init);
183 /** Structure for holding counter stats. */
184 struct mlx5_flow_counter_stats {
185 uint64_t hits; /**< Number of packets matched by the rule. */
186 uint64_t bytes; /**< Number of bytes matched by the rule. */
189 /** Structure for Drop queue. */
190 struct mlx5_hrxq_drop {
191 struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */
192 struct ibv_qp *qp; /**< Verbs queue pair. */
193 struct ibv_wq *wq; /**< Verbs work queue. */
194 struct ibv_cq *cq; /**< Verbs completion queue. */
197 /* Flows structures. */
199 uint64_t hash_fields; /**< Fields that participate in the hash. */
200 struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
201 struct ibv_flow *ibv_flow; /**< Verbs flow. */
202 struct mlx5_hrxq *hrxq; /**< Hash Rx queues. */
205 /* Drop flows structures. */
206 struct mlx5_flow_drop {
207 struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
208 struct ibv_flow *ibv_flow; /**< Verbs flow. */
212 TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
213 uint32_t mark:1; /**< Set if the flow is marked. */
214 uint32_t drop:1; /**< Drop queue. */
215 uint16_t queues_n; /**< Number of entries in queue[]. */
216 uint16_t (*queues)[]; /**< Queues indexes to use. */
217 struct rte_eth_rss_conf rss_conf; /**< RSS configuration */
218 uint8_t rss_key[40]; /**< copy of the RSS key. */
219 struct ibv_counter_set *cs; /**< Holds the counters for the rule. */
220 struct mlx5_flow_counter_stats counter_stats;/**<The counter stats. */
221 struct mlx5_flow frxq[RTE_DIM(hash_rxq_init)];
222 /**< Flow with Rx queue. */
225 /** Static initializer for items. */
227 (const enum rte_flow_item_type []){ \
228 __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
231 /** Structure to generate a simple graph of layers supported by the NIC. */
232 struct mlx5_flow_items {
233 /** List of possible actions for these items. */
234 const enum rte_flow_action_type *const actions;
235 /** Bit-masks corresponding to the possibilities for the item. */
238 * Default bit-masks to use when item->mask is not provided. When
239 * \default_mask is also NULL, the full supported bit-mask (\mask) is
242 const void *default_mask;
243 /** Bit-masks size in bytes. */
244 const unsigned int mask_sz;
246 * Conversion function from rte_flow to NIC specific flow.
249 * rte_flow item to convert.
250 * @param default_mask
251 * Default bit-masks to use when item->mask is not provided.
253 * Internal structure to store the conversion.
256 * 0 on success, a negative errno value otherwise and rte_errno is
259 int (*convert)(const struct rte_flow_item *item,
260 const void *default_mask,
261 struct mlx5_flow_data *data);
262 /** Size in bytes of the destination structure. */
263 const unsigned int dst_sz;
264 /** List of possible following items. */
265 const enum rte_flow_item_type *const items;
268 /** Valid action for this PMD. */
269 static const enum rte_flow_action_type valid_actions[] = {
270 RTE_FLOW_ACTION_TYPE_DROP,
271 RTE_FLOW_ACTION_TYPE_QUEUE,
272 RTE_FLOW_ACTION_TYPE_MARK,
273 RTE_FLOW_ACTION_TYPE_FLAG,
274 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
275 RTE_FLOW_ACTION_TYPE_COUNT,
277 RTE_FLOW_ACTION_TYPE_END,
280 /** Graph of supported items and associated actions. */
281 static const struct mlx5_flow_items mlx5_flow_items[] = {
282 [RTE_FLOW_ITEM_TYPE_END] = {
283 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
284 RTE_FLOW_ITEM_TYPE_VXLAN),
286 [RTE_FLOW_ITEM_TYPE_ETH] = {
287 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
288 RTE_FLOW_ITEM_TYPE_IPV4,
289 RTE_FLOW_ITEM_TYPE_IPV6),
290 .actions = valid_actions,
291 .mask = &(const struct rte_flow_item_eth){
292 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
293 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
296 .default_mask = &rte_flow_item_eth_mask,
297 .mask_sz = sizeof(struct rte_flow_item_eth),
298 .convert = mlx5_flow_create_eth,
299 .dst_sz = sizeof(struct ibv_flow_spec_eth),
301 [RTE_FLOW_ITEM_TYPE_VLAN] = {
302 .items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
303 RTE_FLOW_ITEM_TYPE_IPV6),
304 .actions = valid_actions,
305 .mask = &(const struct rte_flow_item_vlan){
308 .default_mask = &rte_flow_item_vlan_mask,
309 .mask_sz = sizeof(struct rte_flow_item_vlan),
310 .convert = mlx5_flow_create_vlan,
313 [RTE_FLOW_ITEM_TYPE_IPV4] = {
314 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
315 RTE_FLOW_ITEM_TYPE_TCP),
316 .actions = valid_actions,
317 .mask = &(const struct rte_flow_item_ipv4){
321 .type_of_service = -1,
325 .default_mask = &rte_flow_item_ipv4_mask,
326 .mask_sz = sizeof(struct rte_flow_item_ipv4),
327 .convert = mlx5_flow_create_ipv4,
328 .dst_sz = sizeof(struct ibv_flow_spec_ipv4_ext),
330 [RTE_FLOW_ITEM_TYPE_IPV6] = {
331 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
332 RTE_FLOW_ITEM_TYPE_TCP),
333 .actions = valid_actions,
334 .mask = &(const struct rte_flow_item_ipv6){
337 0xff, 0xff, 0xff, 0xff,
338 0xff, 0xff, 0xff, 0xff,
339 0xff, 0xff, 0xff, 0xff,
340 0xff, 0xff, 0xff, 0xff,
343 0xff, 0xff, 0xff, 0xff,
344 0xff, 0xff, 0xff, 0xff,
345 0xff, 0xff, 0xff, 0xff,
346 0xff, 0xff, 0xff, 0xff,
353 .default_mask = &rte_flow_item_ipv6_mask,
354 .mask_sz = sizeof(struct rte_flow_item_ipv6),
355 .convert = mlx5_flow_create_ipv6,
356 .dst_sz = sizeof(struct ibv_flow_spec_ipv6),
358 [RTE_FLOW_ITEM_TYPE_UDP] = {
359 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN),
360 .actions = valid_actions,
361 .mask = &(const struct rte_flow_item_udp){
367 .default_mask = &rte_flow_item_udp_mask,
368 .mask_sz = sizeof(struct rte_flow_item_udp),
369 .convert = mlx5_flow_create_udp,
370 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
372 [RTE_FLOW_ITEM_TYPE_TCP] = {
373 .actions = valid_actions,
374 .mask = &(const struct rte_flow_item_tcp){
380 .default_mask = &rte_flow_item_tcp_mask,
381 .mask_sz = sizeof(struct rte_flow_item_tcp),
382 .convert = mlx5_flow_create_tcp,
383 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
385 [RTE_FLOW_ITEM_TYPE_VXLAN] = {
386 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH),
387 .actions = valid_actions,
388 .mask = &(const struct rte_flow_item_vxlan){
389 .vni = "\xff\xff\xff",
391 .default_mask = &rte_flow_item_vxlan_mask,
392 .mask_sz = sizeof(struct rte_flow_item_vxlan),
393 .convert = mlx5_flow_create_vxlan,
394 .dst_sz = sizeof(struct ibv_flow_spec_tunnel),
398 /** Structure to pass to the conversion function. */
399 struct mlx5_flow_parse {
400 uint32_t inner; /**< Set once VXLAN is encountered. */
402 /**< Whether resources should remain after a validate. */
403 uint32_t drop:1; /**< Target is a drop queue. */
404 uint32_t mark:1; /**< Mark is present in the flow. */
405 uint32_t count:1; /**< Count is present in the flow. */
406 uint32_t mark_id; /**< Mark identifier. */
407 uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
408 uint16_t queues_n; /**< Number of entries in queue[]. */
409 struct rte_eth_rss_conf rss_conf; /**< RSS configuration */
410 uint8_t rss_key[40]; /**< copy of the RSS key. */
411 enum hash_rxq_type layer; /**< Last pattern layer detected. */
412 struct ibv_counter_set *cs; /**< Holds the counter set for the rule */
414 struct ibv_flow_attr *ibv_attr;
415 /**< Pointer to Verbs attributes. */
417 /**< Current position or total size of the attribute. */
418 } queue[RTE_DIM(hash_rxq_init)];
421 static const struct rte_flow_ops mlx5_flow_ops = {
422 .validate = mlx5_flow_validate,
423 .create = mlx5_flow_create,
424 .destroy = mlx5_flow_destroy,
425 .flush = mlx5_flow_flush,
426 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
427 .query = mlx5_flow_query,
431 .isolate = mlx5_flow_isolate,
434 /* Convert FDIR request to Generic flow. */
436 struct rte_flow_attr attr;
437 struct rte_flow_action actions[2];
438 struct rte_flow_item items[4];
439 struct rte_flow_item_eth l2;
440 struct rte_flow_item_eth l2_mask;
442 struct rte_flow_item_ipv4 ipv4;
443 struct rte_flow_item_ipv6 ipv6;
446 struct rte_flow_item_udp udp;
447 struct rte_flow_item_tcp tcp;
449 struct rte_flow_action_queue queue;
452 /* Verbs specification header. */
453 struct ibv_spec_header {
454 enum ibv_flow_spec_type type;
459 * Check support for a given item.
462 * Item specification.
464 * Bit-masks covering supported fields to compare with spec, last and mask in
467 * Bit-Mask size in bytes.
470 * 0 on success, a negative errno value otherwise and rte_errno is set.
473 mlx5_flow_item_validate(const struct rte_flow_item *item,
474 const uint8_t *mask, unsigned int size)
476 if (!item->spec && (item->mask || item->last)) {
480 if (item->spec && !item->mask) {
482 const uint8_t *spec = item->spec;
484 for (i = 0; i < size; ++i)
485 if ((spec[i] | mask[i]) != mask[i]) {
490 if (item->last && !item->mask) {
492 const uint8_t *spec = item->last;
494 for (i = 0; i < size; ++i)
495 if ((spec[i] | mask[i]) != mask[i]) {
502 const uint8_t *spec = item->spec;
504 for (i = 0; i < size; ++i)
505 if ((spec[i] | mask[i]) != mask[i]) {
510 if (item->spec && item->last) {
513 const uint8_t *apply = mask;
519 for (i = 0; i < size; ++i) {
520 spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
521 last[i] = ((const uint8_t *)item->last)[i] & apply[i];
523 ret = memcmp(spec, last, size);
533 * Copy the RSS configuration from the user ones, of the rss_conf is null,
534 * uses the driver one.
537 * Internal parser structure.
539 * User RSS configuration to save.
542 * 0 on success, a negative errno value otherwise and rte_errno is set.
545 mlx5_flow_convert_rss_conf(struct mlx5_flow_parse *parser,
546 const struct rte_eth_rss_conf *rss_conf)
549 * This function is also called at the beginning of
550 * mlx5_flow_convert_actions() to initialize the parser with the
551 * device default RSS configuration.
554 if (rss_conf->rss_hf & MLX5_RSS_HF_MASK) {
558 if (rss_conf->rss_key_len != 40) {
562 if (rss_conf->rss_key_len && rss_conf->rss_key) {
563 parser->rss_conf.rss_key_len = rss_conf->rss_key_len;
564 memcpy(parser->rss_key, rss_conf->rss_key,
565 rss_conf->rss_key_len);
566 parser->rss_conf.rss_key = parser->rss_key;
568 parser->rss_conf.rss_hf = rss_conf->rss_hf;
574 * Extract attribute to the parser.
577 * Flow rule attributes.
579 * Perform verbose error reporting if not NULL.
582 * 0 on success, a negative errno value otherwise and rte_errno is set.
585 mlx5_flow_convert_attributes(const struct rte_flow_attr *attr,
586 struct rte_flow_error *error)
589 rte_flow_error_set(error, ENOTSUP,
590 RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
592 "groups are not supported");
595 if (attr->priority && attr->priority != MLX5_CTRL_FLOW_PRIORITY) {
596 rte_flow_error_set(error, ENOTSUP,
597 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
599 "priorities are not supported");
603 rte_flow_error_set(error, ENOTSUP,
604 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
606 "egress is not supported");
609 if (!attr->ingress) {
610 rte_flow_error_set(error, ENOTSUP,
611 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
613 "only ingress is supported");
620 * Extract actions request to the parser.
623 * Pointer to Ethernet device.
625 * Associated actions (list terminated by the END action).
627 * Perform verbose error reporting if not NULL.
628 * @param[in, out] parser
629 * Internal parser structure.
632 * 0 on success, a negative errno value otherwise and rte_errno is set.
635 mlx5_flow_convert_actions(struct rte_eth_dev *dev,
636 const struct rte_flow_action actions[],
637 struct rte_flow_error *error,
638 struct mlx5_flow_parse *parser)
640 struct priv *priv = dev->data->dev_private;
644 * Add default RSS configuration necessary for Verbs to create QP even
645 * if no RSS is necessary.
647 ret = mlx5_flow_convert_rss_conf(parser,
648 (const struct rte_eth_rss_conf *)
652 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
653 if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
655 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
657 } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
658 const struct rte_flow_action_queue *queue =
659 (const struct rte_flow_action_queue *)
664 if (!queue || (queue->index > (priv->rxqs_n - 1)))
665 goto exit_action_not_supported;
666 for (n = 0; n < parser->queues_n; ++n) {
667 if (parser->queues[n] == queue->index) {
672 if (parser->queues_n > 1 && !found) {
673 rte_flow_error_set(error, ENOTSUP,
674 RTE_FLOW_ERROR_TYPE_ACTION,
676 "queue action not in RSS queues");
680 parser->queues_n = 1;
681 parser->queues[0] = queue->index;
683 } else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
684 const struct rte_flow_action_rss *rss =
685 (const struct rte_flow_action_rss *)
689 if (!rss || !rss->num) {
690 rte_flow_error_set(error, EINVAL,
691 RTE_FLOW_ERROR_TYPE_ACTION,
696 if (parser->queues_n == 1) {
699 assert(parser->queues_n);
700 for (n = 0; n < rss->num; ++n) {
701 if (parser->queues[0] ==
708 rte_flow_error_set(error, ENOTSUP,
709 RTE_FLOW_ERROR_TYPE_ACTION,
711 "queue action not in RSS"
716 for (n = 0; n < rss->num; ++n) {
717 if (rss->queue[n] >= priv->rxqs_n) {
718 rte_flow_error_set(error, EINVAL,
719 RTE_FLOW_ERROR_TYPE_ACTION,
721 "queue id > number of"
726 for (n = 0; n < rss->num; ++n)
727 parser->queues[n] = rss->queue[n];
728 parser->queues_n = rss->num;
729 if (mlx5_flow_convert_rss_conf(parser, rss->rss_conf)) {
730 rte_flow_error_set(error, EINVAL,
731 RTE_FLOW_ERROR_TYPE_ACTION,
733 "wrong RSS configuration");
736 } else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
737 const struct rte_flow_action_mark *mark =
738 (const struct rte_flow_action_mark *)
742 rte_flow_error_set(error, EINVAL,
743 RTE_FLOW_ERROR_TYPE_ACTION,
745 "mark must be defined");
747 } else if (mark->id >= MLX5_FLOW_MARK_MAX) {
748 rte_flow_error_set(error, ENOTSUP,
749 RTE_FLOW_ERROR_TYPE_ACTION,
751 "mark must be between 0"
756 parser->mark_id = mark->id;
757 } else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
759 } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT &&
760 priv->config.flow_counter_en) {
763 goto exit_action_not_supported;
766 if (parser->drop && parser->mark)
768 if (!parser->queues_n && !parser->drop) {
769 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
770 NULL, "no valid action");
774 exit_action_not_supported:
775 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
776 actions, "action not supported");
784 * Pattern specification (list terminated by the END pattern item).
786 * Perform verbose error reporting if not NULL.
787 * @param[in, out] parser
788 * Internal parser structure.
791 * 0 on success, a negative errno value otherwise and rte_errno is set.
794 mlx5_flow_convert_items_validate(const struct rte_flow_item items[],
795 struct rte_flow_error *error,
796 struct mlx5_flow_parse *parser)
798 const struct mlx5_flow_items *cur_item = mlx5_flow_items;
802 /* Initialise the offsets to start after verbs attribute. */
803 for (i = 0; i != hash_rxq_init_n; ++i)
804 parser->queue[i].offset = sizeof(struct ibv_flow_attr);
805 for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
806 const struct mlx5_flow_items *token = NULL;
809 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
813 cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
815 if (cur_item->items[i] == items->type) {
816 token = &mlx5_flow_items[items->type];
821 goto exit_item_not_supported;
823 ret = mlx5_flow_item_validate(items,
824 (const uint8_t *)cur_item->mask,
827 goto exit_item_not_supported;
828 if (items->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
830 rte_flow_error_set(error, ENOTSUP,
831 RTE_FLOW_ERROR_TYPE_ITEM,
833 "cannot recognize multiple"
834 " VXLAN encapsulations");
837 parser->inner = IBV_FLOW_SPEC_INNER;
840 parser->queue[HASH_RXQ_ETH].offset += cur_item->dst_sz;
842 for (n = 0; n != hash_rxq_init_n; ++n)
843 parser->queue[n].offset += cur_item->dst_sz;
847 parser->queue[HASH_RXQ_ETH].offset +=
848 sizeof(struct ibv_flow_spec_action_drop);
851 for (i = 0; i != hash_rxq_init_n; ++i)
852 parser->queue[i].offset +=
853 sizeof(struct ibv_flow_spec_action_tag);
856 unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
858 for (i = 0; i != hash_rxq_init_n; ++i)
859 parser->queue[i].offset += size;
862 exit_item_not_supported:
863 return rte_flow_error_set(error, -ret, RTE_FLOW_ERROR_TYPE_ITEM,
864 items, "item not supported");
868 * Allocate memory space to store verbs flow attributes.
871 * Amount of byte to allocate.
873 * Perform verbose error reporting if not NULL.
876 * A verbs flow attribute on success, NULL otherwise and rte_errno is set.
878 static struct ibv_flow_attr *
879 mlx5_flow_convert_allocate(unsigned int size, struct rte_flow_error *error)
881 struct ibv_flow_attr *ibv_attr;
883 ibv_attr = rte_calloc(__func__, 1, size, 0);
885 rte_flow_error_set(error, ENOMEM,
886 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
888 "cannot allocate verbs spec attributes");
895 * Make inner packet matching with an higher priority from the non Inner
898 * @param[in, out] parser
899 * Internal parser structure.
901 * User flow attribute.
904 mlx5_flow_update_priority(struct mlx5_flow_parse *parser,
905 const struct rte_flow_attr *attr)
910 parser->queue[HASH_RXQ_ETH].ibv_attr->priority =
912 hash_rxq_init[HASH_RXQ_ETH].flow_priority;
915 for (i = 0; i != hash_rxq_init_n; ++i) {
916 if (parser->queue[i].ibv_attr) {
917 parser->queue[i].ibv_attr->priority =
919 hash_rxq_init[i].flow_priority -
920 (parser->inner ? 1 : 0);
926 * Finalise verbs flow attributes.
928 * @param[in, out] parser
929 * Internal parser structure.
932 mlx5_flow_convert_finalise(struct mlx5_flow_parse *parser)
934 const unsigned int ipv4 =
935 hash_rxq_init[parser->layer].ip_version == MLX5_IPV4;
936 const enum hash_rxq_type hmin = ipv4 ? HASH_RXQ_TCPV4 : HASH_RXQ_TCPV6;
937 const enum hash_rxq_type hmax = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
938 const enum hash_rxq_type ohmin = ipv4 ? HASH_RXQ_TCPV6 : HASH_RXQ_TCPV4;
939 const enum hash_rxq_type ohmax = ipv4 ? HASH_RXQ_IPV6 : HASH_RXQ_IPV4;
940 const enum hash_rxq_type ip = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
943 /* Remove any other flow not matching the pattern. */
944 if (parser->queues_n == 1) {
945 for (i = 0; i != hash_rxq_init_n; ++i) {
946 if (i == HASH_RXQ_ETH)
948 rte_free(parser->queue[i].ibv_attr);
949 parser->queue[i].ibv_attr = NULL;
953 if (parser->layer == HASH_RXQ_ETH) {
957 * This layer becomes useless as the pattern define under
960 rte_free(parser->queue[HASH_RXQ_ETH].ibv_attr);
961 parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
963 /* Remove opposite kind of layer e.g. IPv6 if the pattern is IPv4. */
964 for (i = ohmin; i != (ohmax + 1); ++i) {
965 if (!parser->queue[i].ibv_attr)
967 rte_free(parser->queue[i].ibv_attr);
968 parser->queue[i].ibv_attr = NULL;
970 /* Remove impossible flow according to the RSS configuration. */
971 if (hash_rxq_init[parser->layer].dpdk_rss_hf &
972 parser->rss_conf.rss_hf) {
973 /* Remove any other flow. */
974 for (i = hmin; i != (hmax + 1); ++i) {
975 if ((i == parser->layer) ||
976 (!parser->queue[i].ibv_attr))
978 rte_free(parser->queue[i].ibv_attr);
979 parser->queue[i].ibv_attr = NULL;
981 } else if (!parser->queue[ip].ibv_attr) {
982 /* no RSS possible with the current configuration. */
983 parser->queues_n = 1;
988 * Fill missing layers in verbs specifications, or compute the correct
989 * offset to allocate the memory space for the attributes and
992 for (i = 0; i != hash_rxq_init_n - 1; ++i) {
994 struct ibv_flow_spec_ipv4_ext ipv4;
995 struct ibv_flow_spec_ipv6 ipv6;
996 struct ibv_flow_spec_tcp_udp udp_tcp;
1001 if (i == parser->layer)
1003 if (parser->layer == HASH_RXQ_ETH) {
1004 if (hash_rxq_init[i].ip_version == MLX5_IPV4) {
1005 size = sizeof(struct ibv_flow_spec_ipv4_ext);
1006 specs.ipv4 = (struct ibv_flow_spec_ipv4_ext){
1007 .type = IBV_FLOW_SPEC_IPV4_EXT,
1011 size = sizeof(struct ibv_flow_spec_ipv6);
1012 specs.ipv6 = (struct ibv_flow_spec_ipv6){
1013 .type = IBV_FLOW_SPEC_IPV6,
1017 if (parser->queue[i].ibv_attr) {
1018 dst = (void *)((uintptr_t)
1019 parser->queue[i].ibv_attr +
1020 parser->queue[i].offset);
1021 memcpy(dst, &specs, size);
1022 ++parser->queue[i].ibv_attr->num_of_specs;
1024 parser->queue[i].offset += size;
1026 if ((i == HASH_RXQ_UDPV4) || (i == HASH_RXQ_TCPV4) ||
1027 (i == HASH_RXQ_UDPV6) || (i == HASH_RXQ_TCPV6)) {
1028 size = sizeof(struct ibv_flow_spec_tcp_udp);
1029 specs.udp_tcp = (struct ibv_flow_spec_tcp_udp) {
1030 .type = ((i == HASH_RXQ_UDPV4 ||
1031 i == HASH_RXQ_UDPV6) ?
1036 if (parser->queue[i].ibv_attr) {
1037 dst = (void *)((uintptr_t)
1038 parser->queue[i].ibv_attr +
1039 parser->queue[i].offset);
1040 memcpy(dst, &specs, size);
1041 ++parser->queue[i].ibv_attr->num_of_specs;
1043 parser->queue[i].offset += size;
1049 * Validate and convert a flow supported by the NIC.
1052 * Pointer to Ethernet device.
1054 * Flow rule attributes.
1055 * @param[in] pattern
1056 * Pattern specification (list terminated by the END pattern item).
1057 * @param[in] actions
1058 * Associated actions (list terminated by the END action).
1060 * Perform verbose error reporting if not NULL.
1061 * @param[in, out] parser
1062 * Internal parser structure.
1065 * 0 on success, a negative errno value otherwise and rte_errno is set.
1068 mlx5_flow_convert(struct rte_eth_dev *dev,
1069 const struct rte_flow_attr *attr,
1070 const struct rte_flow_item items[],
1071 const struct rte_flow_action actions[],
1072 struct rte_flow_error *error,
1073 struct mlx5_flow_parse *parser)
1075 const struct mlx5_flow_items *cur_item = mlx5_flow_items;
1079 /* First step. Validate the attributes, items and actions. */
1080 *parser = (struct mlx5_flow_parse){
1081 .create = parser->create,
1082 .layer = HASH_RXQ_ETH,
1083 .mark_id = MLX5_FLOW_MARK_DEFAULT,
1085 ret = mlx5_flow_convert_attributes(attr, error);
1088 ret = mlx5_flow_convert_actions(dev, actions, error, parser);
1091 ret = mlx5_flow_convert_items_validate(items, error, parser);
1094 mlx5_flow_convert_finalise(parser);
1097 * Allocate the memory space to store verbs specifications.
1100 unsigned int offset = parser->queue[HASH_RXQ_ETH].offset;
1102 parser->queue[HASH_RXQ_ETH].ibv_attr =
1103 mlx5_flow_convert_allocate(offset, error);
1104 if (!parser->queue[HASH_RXQ_ETH].ibv_attr)
1106 parser->queue[HASH_RXQ_ETH].offset =
1107 sizeof(struct ibv_flow_attr);
1109 for (i = 0; i != hash_rxq_init_n; ++i) {
1110 unsigned int offset;
1112 if (!(parser->rss_conf.rss_hf &
1113 hash_rxq_init[i].dpdk_rss_hf) &&
1114 (i != HASH_RXQ_ETH))
1116 offset = parser->queue[i].offset;
1117 parser->queue[i].ibv_attr =
1118 mlx5_flow_convert_allocate(offset, error);
1119 if (!parser->queue[i].ibv_attr)
1121 parser->queue[i].offset = sizeof(struct ibv_flow_attr);
1124 /* Third step. Conversion parse, fill the specifications. */
1126 for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
1127 struct mlx5_flow_data data = {
1132 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
1134 cur_item = &mlx5_flow_items[items->type];
1135 ret = cur_item->convert(items,
1136 (cur_item->default_mask ?
1137 cur_item->default_mask :
1144 mlx5_flow_create_flag_mark(parser, parser->mark_id);
1145 if (parser->count && parser->create) {
1146 mlx5_flow_create_count(dev, parser);
1148 goto exit_count_error;
1151 * Last step. Complete missing specification to reach the RSS
1155 mlx5_flow_convert_finalise(parser);
1156 mlx5_flow_update_priority(parser, attr);
1158 /* Only verification is expected, all resources should be released. */
1159 if (!parser->create) {
1160 for (i = 0; i != hash_rxq_init_n; ++i) {
1161 if (parser->queue[i].ibv_attr) {
1162 rte_free(parser->queue[i].ibv_attr);
1163 parser->queue[i].ibv_attr = NULL;
1169 for (i = 0; i != hash_rxq_init_n; ++i) {
1170 if (parser->queue[i].ibv_attr) {
1171 rte_free(parser->queue[i].ibv_attr);
1172 parser->queue[i].ibv_attr = NULL;
1175 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1176 NULL, "cannot allocate verbs spec attributes");
1179 rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1180 NULL, "cannot create counter");
1185 * Copy the specification created into the flow.
1188 * Internal parser structure.
1190 * Create specification.
1192 * Size in bytes of the specification to copy.
1195 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
1201 for (i = 0; i != hash_rxq_init_n; ++i) {
1202 if (!parser->queue[i].ibv_attr)
1204 /* Specification must be the same l3 type or none. */
1205 if (parser->layer == HASH_RXQ_ETH ||
1206 (hash_rxq_init[parser->layer].ip_version ==
1207 hash_rxq_init[i].ip_version) ||
1208 (hash_rxq_init[i].ip_version == 0)) {
1209 dst = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1210 parser->queue[i].offset);
1211 memcpy(dst, src, size);
1212 ++parser->queue[i].ibv_attr->num_of_specs;
1213 parser->queue[i].offset += size;
1219 * Convert Ethernet item to Verbs specification.
1222 * Item specification.
1223 * @param default_mask[in]
1224 * Default bit-masks to use when item->mask is not provided.
1225 * @param data[in, out]
1229 * 0 on success, a negative errno value otherwise and rte_errno is set.
1232 mlx5_flow_create_eth(const struct rte_flow_item *item,
1233 const void *default_mask,
1234 struct mlx5_flow_data *data)
1236 const struct rte_flow_item_eth *spec = item->spec;
1237 const struct rte_flow_item_eth *mask = item->mask;
1238 struct mlx5_flow_parse *parser = data->parser;
1239 const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1240 struct ibv_flow_spec_eth eth = {
1241 .type = parser->inner | IBV_FLOW_SPEC_ETH,
1245 /* Don't update layer for the inner pattern. */
1247 parser->layer = HASH_RXQ_ETH;
1252 mask = default_mask;
1253 memcpy(ð.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
1254 memcpy(ð.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
1255 eth.val.ether_type = spec->type;
1256 memcpy(ð.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
1257 memcpy(ð.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
1258 eth.mask.ether_type = mask->type;
1259 /* Remove unwanted bits from values. */
1260 for (i = 0; i < ETHER_ADDR_LEN; ++i) {
1261 eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
1262 eth.val.src_mac[i] &= eth.mask.src_mac[i];
1264 eth.val.ether_type &= eth.mask.ether_type;
1266 mlx5_flow_create_copy(parser, ð, eth_size);
1271 * Convert VLAN item to Verbs specification.
1274 * Item specification.
1275 * @param default_mask[in]
1276 * Default bit-masks to use when item->mask is not provided.
1277 * @param data[in, out]
1281 * 0 on success, a negative errno value otherwise and rte_errno is set.
1284 mlx5_flow_create_vlan(const struct rte_flow_item *item,
1285 const void *default_mask,
1286 struct mlx5_flow_data *data)
1288 const struct rte_flow_item_vlan *spec = item->spec;
1289 const struct rte_flow_item_vlan *mask = item->mask;
1290 struct mlx5_flow_parse *parser = data->parser;
1291 struct ibv_flow_spec_eth *eth;
1292 const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1297 mask = default_mask;
1299 for (i = 0; i != hash_rxq_init_n; ++i) {
1300 if (!parser->queue[i].ibv_attr)
1303 eth = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1304 parser->queue[i].offset - eth_size);
1305 eth->val.vlan_tag = spec->tci;
1306 eth->mask.vlan_tag = mask->tci;
1307 eth->val.vlan_tag &= eth->mask.vlan_tag;
1314 * Convert IPv4 item to Verbs specification.
1317 * Item specification.
1318 * @param default_mask[in]
1319 * Default bit-masks to use when item->mask is not provided.
1320 * @param data[in, out]
1324 * 0 on success, a negative errno value otherwise and rte_errno is set.
1327 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
1328 const void *default_mask,
1329 struct mlx5_flow_data *data)
1331 const struct rte_flow_item_ipv4 *spec = item->spec;
1332 const struct rte_flow_item_ipv4 *mask = item->mask;
1333 struct mlx5_flow_parse *parser = data->parser;
1334 unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4_ext);
1335 struct ibv_flow_spec_ipv4_ext ipv4 = {
1336 .type = parser->inner | IBV_FLOW_SPEC_IPV4_EXT,
1340 /* Don't update layer for the inner pattern. */
1342 parser->layer = HASH_RXQ_IPV4;
1345 mask = default_mask;
1346 ipv4.val = (struct ibv_flow_ipv4_ext_filter){
1347 .src_ip = spec->hdr.src_addr,
1348 .dst_ip = spec->hdr.dst_addr,
1349 .proto = spec->hdr.next_proto_id,
1350 .tos = spec->hdr.type_of_service,
1352 ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
1353 .src_ip = mask->hdr.src_addr,
1354 .dst_ip = mask->hdr.dst_addr,
1355 .proto = mask->hdr.next_proto_id,
1356 .tos = mask->hdr.type_of_service,
1358 /* Remove unwanted bits from values. */
1359 ipv4.val.src_ip &= ipv4.mask.src_ip;
1360 ipv4.val.dst_ip &= ipv4.mask.dst_ip;
1361 ipv4.val.proto &= ipv4.mask.proto;
1362 ipv4.val.tos &= ipv4.mask.tos;
1364 mlx5_flow_create_copy(parser, &ipv4, ipv4_size);
1369 * Convert IPv6 item to Verbs specification.
1372 * Item specification.
1373 * @param default_mask[in]
1374 * Default bit-masks to use when item->mask is not provided.
1375 * @param data[in, out]
1379 * 0 on success, a negative errno value otherwise and rte_errno is set.
1382 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
1383 const void *default_mask,
1384 struct mlx5_flow_data *data)
1386 const struct rte_flow_item_ipv6 *spec = item->spec;
1387 const struct rte_flow_item_ipv6 *mask = item->mask;
1388 struct mlx5_flow_parse *parser = data->parser;
1389 unsigned int ipv6_size = sizeof(struct ibv_flow_spec_ipv6);
1390 struct ibv_flow_spec_ipv6 ipv6 = {
1391 .type = parser->inner | IBV_FLOW_SPEC_IPV6,
1395 /* Don't update layer for the inner pattern. */
1397 parser->layer = HASH_RXQ_IPV6;
1400 uint32_t vtc_flow_val;
1401 uint32_t vtc_flow_mask;
1404 mask = default_mask;
1405 memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
1406 RTE_DIM(ipv6.val.src_ip));
1407 memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
1408 RTE_DIM(ipv6.val.dst_ip));
1409 memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
1410 RTE_DIM(ipv6.mask.src_ip));
1411 memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
1412 RTE_DIM(ipv6.mask.dst_ip));
1413 vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
1414 vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
1415 ipv6.val.flow_label =
1416 rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >>
1418 ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >>
1420 ipv6.val.next_hdr = spec->hdr.proto;
1421 ipv6.val.hop_limit = spec->hdr.hop_limits;
1422 ipv6.mask.flow_label =
1423 rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >>
1425 ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >>
1427 ipv6.mask.next_hdr = mask->hdr.proto;
1428 ipv6.mask.hop_limit = mask->hdr.hop_limits;
1429 /* Remove unwanted bits from values. */
1430 for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
1431 ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
1432 ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
1434 ipv6.val.flow_label &= ipv6.mask.flow_label;
1435 ipv6.val.traffic_class &= ipv6.mask.traffic_class;
1436 ipv6.val.next_hdr &= ipv6.mask.next_hdr;
1437 ipv6.val.hop_limit &= ipv6.mask.hop_limit;
1439 mlx5_flow_create_copy(parser, &ipv6, ipv6_size);
1444 * Convert UDP item to Verbs specification.
1447 * Item specification.
1448 * @param default_mask[in]
1449 * Default bit-masks to use when item->mask is not provided.
1450 * @param data[in, out]
1454 * 0 on success, a negative errno value otherwise and rte_errno is set.
1457 mlx5_flow_create_udp(const struct rte_flow_item *item,
1458 const void *default_mask,
1459 struct mlx5_flow_data *data)
1461 const struct rte_flow_item_udp *spec = item->spec;
1462 const struct rte_flow_item_udp *mask = item->mask;
1463 struct mlx5_flow_parse *parser = data->parser;
1464 unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1465 struct ibv_flow_spec_tcp_udp udp = {
1466 .type = parser->inner | IBV_FLOW_SPEC_UDP,
1470 /* Don't update layer for the inner pattern. */
1471 if (!parser->inner) {
1472 if (parser->layer == HASH_RXQ_IPV4)
1473 parser->layer = HASH_RXQ_UDPV4;
1475 parser->layer = HASH_RXQ_UDPV6;
1479 mask = default_mask;
1480 udp.val.dst_port = spec->hdr.dst_port;
1481 udp.val.src_port = spec->hdr.src_port;
1482 udp.mask.dst_port = mask->hdr.dst_port;
1483 udp.mask.src_port = mask->hdr.src_port;
1484 /* Remove unwanted bits from values. */
1485 udp.val.src_port &= udp.mask.src_port;
1486 udp.val.dst_port &= udp.mask.dst_port;
1488 mlx5_flow_create_copy(parser, &udp, udp_size);
1493 * Convert TCP item to Verbs specification.
1496 * Item specification.
1497 * @param default_mask[in]
1498 * Default bit-masks to use when item->mask is not provided.
1499 * @param data[in, out]
1503 * 0 on success, a negative errno value otherwise and rte_errno is set.
1506 mlx5_flow_create_tcp(const struct rte_flow_item *item,
1507 const void *default_mask,
1508 struct mlx5_flow_data *data)
1510 const struct rte_flow_item_tcp *spec = item->spec;
1511 const struct rte_flow_item_tcp *mask = item->mask;
1512 struct mlx5_flow_parse *parser = data->parser;
1513 unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1514 struct ibv_flow_spec_tcp_udp tcp = {
1515 .type = parser->inner | IBV_FLOW_SPEC_TCP,
1519 /* Don't update layer for the inner pattern. */
1520 if (!parser->inner) {
1521 if (parser->layer == HASH_RXQ_IPV4)
1522 parser->layer = HASH_RXQ_TCPV4;
1524 parser->layer = HASH_RXQ_TCPV6;
1528 mask = default_mask;
1529 tcp.val.dst_port = spec->hdr.dst_port;
1530 tcp.val.src_port = spec->hdr.src_port;
1531 tcp.mask.dst_port = mask->hdr.dst_port;
1532 tcp.mask.src_port = mask->hdr.src_port;
1533 /* Remove unwanted bits from values. */
1534 tcp.val.src_port &= tcp.mask.src_port;
1535 tcp.val.dst_port &= tcp.mask.dst_port;
1537 mlx5_flow_create_copy(parser, &tcp, tcp_size);
1542 * Convert VXLAN item to Verbs specification.
1545 * Item specification.
1546 * @param default_mask[in]
1547 * Default bit-masks to use when item->mask is not provided.
1548 * @param data[in, out]
1552 * 0 on success, a negative errno value otherwise and rte_errno is set.
1555 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
1556 const void *default_mask,
1557 struct mlx5_flow_data *data)
1559 const struct rte_flow_item_vxlan *spec = item->spec;
1560 const struct rte_flow_item_vxlan *mask = item->mask;
1561 struct mlx5_flow_parse *parser = data->parser;
1562 unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1563 struct ibv_flow_spec_tunnel vxlan = {
1564 .type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1573 parser->inner = IBV_FLOW_SPEC_INNER;
1576 mask = default_mask;
1577 memcpy(&id.vni[1], spec->vni, 3);
1578 vxlan.val.tunnel_id = id.vlan_id;
1579 memcpy(&id.vni[1], mask->vni, 3);
1580 vxlan.mask.tunnel_id = id.vlan_id;
1581 /* Remove unwanted bits from values. */
1582 vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
1585 * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
1586 * layer is defined in the Verbs specification it is interpreted as
1587 * wildcard and all packets will match this rule, if it follows a full
1588 * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
1589 * before will also match this rule.
1590 * To avoid such situation, VNI 0 is currently refused.
1592 if (!vxlan.val.tunnel_id)
1593 return rte_flow_error_set(data->error, EINVAL,
1594 RTE_FLOW_ERROR_TYPE_ITEM,
1596 "VxLAN vni cannot be 0");
1597 mlx5_flow_create_copy(parser, &vxlan, size);
1602 * Convert mark/flag action to Verbs specification.
1605 * Internal parser structure.
1610 * 0 on success, a negative errno value otherwise and rte_errno is set.
1613 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id)
1615 unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1616 struct ibv_flow_spec_action_tag tag = {
1617 .type = IBV_FLOW_SPEC_ACTION_TAG,
1619 .tag_id = mlx5_flow_mark_set(mark_id),
1622 assert(parser->mark);
1623 mlx5_flow_create_copy(parser, &tag, size);
1628 * Convert count action to Verbs specification.
1631 * Pointer to Ethernet device.
1633 * Pointer to MLX5 flow parser structure.
1636 * 0 on success, a negative errno value otherwise and rte_errno is set.
1639 mlx5_flow_create_count(struct rte_eth_dev *dev __rte_unused,
1640 struct mlx5_flow_parse *parser __rte_unused)
1642 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
1643 struct priv *priv = dev->data->dev_private;
1644 unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
1645 struct ibv_counter_set_init_attr init_attr = {0};
1646 struct ibv_flow_spec_counter_action counter = {
1647 .type = IBV_FLOW_SPEC_ACTION_COUNT,
1649 .counter_set_handle = 0,
1652 init_attr.counter_set_id = 0;
1653 parser->cs = mlx5_glue->create_counter_set(priv->ctx, &init_attr);
1658 counter.counter_set_handle = parser->cs->handle;
1659 mlx5_flow_create_copy(parser, &counter, size);
1665 * Complete flow rule creation with a drop queue.
1668 * Pointer to Ethernet device.
1670 * Internal parser structure.
1672 * Pointer to the rte_flow.
1674 * Perform verbose error reporting if not NULL.
1677 * 0 on success, a negative errno value otherwise and rte_errno is set.
1680 mlx5_flow_create_action_queue_drop(struct rte_eth_dev *dev,
1681 struct mlx5_flow_parse *parser,
1682 struct rte_flow *flow,
1683 struct rte_flow_error *error)
1685 struct priv *priv = dev->data->dev_private;
1686 struct ibv_flow_spec_action_drop *drop;
1687 unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
1692 drop = (void *)((uintptr_t)parser->queue[HASH_RXQ_ETH].ibv_attr +
1693 parser->queue[HASH_RXQ_ETH].offset);
1694 *drop = (struct ibv_flow_spec_action_drop){
1695 .type = IBV_FLOW_SPEC_ACTION_DROP,
1698 ++parser->queue[HASH_RXQ_ETH].ibv_attr->num_of_specs;
1699 parser->queue[HASH_RXQ_ETH].offset += size;
1700 flow->frxq[HASH_RXQ_ETH].ibv_attr =
1701 parser->queue[HASH_RXQ_ETH].ibv_attr;
1703 flow->cs = parser->cs;
1704 if (!priv->dev->data->dev_started)
1706 parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
1707 flow->frxq[HASH_RXQ_ETH].ibv_flow =
1708 mlx5_glue->create_flow(priv->flow_drop_queue->qp,
1709 flow->frxq[HASH_RXQ_ETH].ibv_attr);
1710 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
1711 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1712 NULL, "flow rule creation failure");
1718 if (flow->frxq[HASH_RXQ_ETH].ibv_flow) {
1719 claim_zero(mlx5_glue->destroy_flow
1720 (flow->frxq[HASH_RXQ_ETH].ibv_flow));
1721 flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
1723 if (flow->frxq[HASH_RXQ_ETH].ibv_attr) {
1724 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
1725 flow->frxq[HASH_RXQ_ETH].ibv_attr = NULL;
1728 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
1736 * Create hash Rx queues when RSS is enabled.
1739 * Pointer to Ethernet device.
1741 * Internal parser structure.
1743 * Pointer to the rte_flow.
1745 * Perform verbose error reporting if not NULL.
1748 * 0 on success, a negative errno value otherwise and rte_errno is set.
1751 mlx5_flow_create_action_queue_rss(struct rte_eth_dev *dev,
1752 struct mlx5_flow_parse *parser,
1753 struct rte_flow *flow,
1754 struct rte_flow_error *error)
1756 struct priv *priv = dev->data->dev_private;
1759 for (i = 0; i != hash_rxq_init_n; ++i) {
1760 uint64_t hash_fields;
1762 if (!parser->queue[i].ibv_attr)
1764 flow->frxq[i].ibv_attr = parser->queue[i].ibv_attr;
1765 parser->queue[i].ibv_attr = NULL;
1766 hash_fields = hash_rxq_init[i].hash_fields;
1767 if (!priv->dev->data->dev_started)
1769 flow->frxq[i].hrxq =
1771 parser->rss_conf.rss_key,
1772 parser->rss_conf.rss_key_len,
1776 if (flow->frxq[i].hrxq)
1778 flow->frxq[i].hrxq =
1780 parser->rss_conf.rss_key,
1781 parser->rss_conf.rss_key_len,
1785 if (!flow->frxq[i].hrxq) {
1786 return rte_flow_error_set(error, ENOMEM,
1787 RTE_FLOW_ERROR_TYPE_HANDLE,
1789 "cannot create hash rxq");
1796 * Complete flow rule creation.
1799 * Pointer to Ethernet device.
1801 * Internal parser structure.
1803 * Pointer to the rte_flow.
1805 * Perform verbose error reporting if not NULL.
1808 * 0 on success, a negative errno value otherwise and rte_errno is set.
1811 mlx5_flow_create_action_queue(struct rte_eth_dev *dev,
1812 struct mlx5_flow_parse *parser,
1813 struct rte_flow *flow,
1814 struct rte_flow_error *error)
1816 struct priv *priv = dev->data->dev_private;
1819 unsigned int flows_n = 0;
1823 assert(!parser->drop);
1824 ret = mlx5_flow_create_action_queue_rss(dev, parser, flow, error);
1828 flow->cs = parser->cs;
1829 if (!priv->dev->data->dev_started)
1831 for (i = 0; i != hash_rxq_init_n; ++i) {
1832 if (!flow->frxq[i].hrxq)
1834 flow->frxq[i].ibv_flow =
1835 mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
1836 flow->frxq[i].ibv_attr);
1837 if (!flow->frxq[i].ibv_flow) {
1838 rte_flow_error_set(error, ENOMEM,
1839 RTE_FLOW_ERROR_TYPE_HANDLE,
1840 NULL, "flow rule creation failure");
1844 DRV_LOG(DEBUG, "port %u %p type %d QP %p ibv_flow %p",
1847 (void *)flow->frxq[i].hrxq,
1848 (void *)flow->frxq[i].ibv_flow);
1851 rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_HANDLE,
1852 NULL, "internal error in flow creation");
1855 for (i = 0; i != parser->queues_n; ++i) {
1856 struct mlx5_rxq_data *q =
1857 (*priv->rxqs)[parser->queues[i]];
1859 q->mark |= parser->mark;
1863 ret = rte_errno; /* Save rte_errno before cleanup. */
1865 for (i = 0; i != hash_rxq_init_n; ++i) {
1866 if (flow->frxq[i].ibv_flow) {
1867 struct ibv_flow *ibv_flow = flow->frxq[i].ibv_flow;
1869 claim_zero(mlx5_glue->destroy_flow(ibv_flow));
1871 if (flow->frxq[i].hrxq)
1872 mlx5_hrxq_release(dev, flow->frxq[i].hrxq);
1873 if (flow->frxq[i].ibv_attr)
1874 rte_free(flow->frxq[i].ibv_attr);
1877 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
1881 rte_errno = ret; /* Restore rte_errno. */
1889 * Pointer to Ethernet device.
1891 * Pointer to a TAILQ flow list.
1893 * Flow rule attributes.
1894 * @param[in] pattern
1895 * Pattern specification (list terminated by the END pattern item).
1896 * @param[in] actions
1897 * Associated actions (list terminated by the END action).
1899 * Perform verbose error reporting if not NULL.
1902 * A flow on success, NULL otherwise and rte_errno is set.
1904 static struct rte_flow *
1905 mlx5_flow_list_create(struct rte_eth_dev *dev,
1906 struct mlx5_flows *list,
1907 const struct rte_flow_attr *attr,
1908 const struct rte_flow_item items[],
1909 const struct rte_flow_action actions[],
1910 struct rte_flow_error *error)
1912 struct mlx5_flow_parse parser = { .create = 1, };
1913 struct rte_flow *flow = NULL;
1917 ret = mlx5_flow_convert(dev, attr, items, actions, error, &parser);
1920 flow = rte_calloc(__func__, 1,
1921 sizeof(*flow) + parser.queues_n * sizeof(uint16_t),
1924 rte_flow_error_set(error, ENOMEM,
1925 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1927 "cannot allocate flow memory");
1930 /* Copy queues configuration. */
1931 flow->queues = (uint16_t (*)[])(flow + 1);
1932 memcpy(flow->queues, parser.queues, parser.queues_n * sizeof(uint16_t));
1933 flow->queues_n = parser.queues_n;
1934 flow->mark = parser.mark;
1935 /* Copy RSS configuration. */
1936 flow->rss_conf = parser.rss_conf;
1937 flow->rss_conf.rss_key = flow->rss_key;
1938 memcpy(flow->rss_key, parser.rss_key, parser.rss_conf.rss_key_len);
1939 /* finalise the flow. */
1941 ret = mlx5_flow_create_action_queue_drop(dev, &parser, flow,
1944 ret = mlx5_flow_create_action_queue(dev, &parser, flow, error);
1947 TAILQ_INSERT_TAIL(list, flow, next);
1948 DRV_LOG(DEBUG, "port %u flow created %p", dev->data->port_id,
1952 DRV_LOG(ERR, "port %u flow creation error: %s", dev->data->port_id,
1954 for (i = 0; i != hash_rxq_init_n; ++i) {
1955 if (parser.queue[i].ibv_attr)
1956 rte_free(parser.queue[i].ibv_attr);
1963 * Validate a flow supported by the NIC.
1965 * @see rte_flow_validate()
1969 mlx5_flow_validate(struct rte_eth_dev *dev,
1970 const struct rte_flow_attr *attr,
1971 const struct rte_flow_item items[],
1972 const struct rte_flow_action actions[],
1973 struct rte_flow_error *error)
1975 struct mlx5_flow_parse parser = { .create = 0, };
1977 return mlx5_flow_convert(dev, attr, items, actions, error, &parser);
1983 * @see rte_flow_create()
1987 mlx5_flow_create(struct rte_eth_dev *dev,
1988 const struct rte_flow_attr *attr,
1989 const struct rte_flow_item items[],
1990 const struct rte_flow_action actions[],
1991 struct rte_flow_error *error)
1993 struct priv *priv = dev->data->dev_private;
1995 return mlx5_flow_list_create(dev, &priv->flows, attr, items, actions,
2000 * Destroy a flow in a list.
2003 * Pointer to Ethernet device.
2005 * Pointer to a TAILQ flow list.
2010 mlx5_flow_list_destroy(struct rte_eth_dev *dev, struct mlx5_flows *list,
2011 struct rte_flow *flow)
2013 struct priv *priv = dev->data->dev_private;
2016 if (flow->drop || !flow->mark)
2018 for (i = 0; i != flow->queues_n; ++i) {
2019 struct rte_flow *tmp;
2023 * To remove the mark from the queue, the queue must not be
2024 * present in any other marked flow (RSS or not).
2026 TAILQ_FOREACH(tmp, list, next) {
2028 uint16_t *tqs = NULL;
2033 for (j = 0; j != hash_rxq_init_n; ++j) {
2034 if (!tmp->frxq[j].hrxq)
2036 tqs = tmp->frxq[j].hrxq->ind_table->queues;
2037 tq_n = tmp->frxq[j].hrxq->ind_table->queues_n;
2041 for (j = 0; (j != tq_n) && !mark; j++)
2042 if (tqs[j] == (*flow->queues)[i])
2045 (*priv->rxqs)[(*flow->queues)[i]]->mark = mark;
2049 if (flow->frxq[HASH_RXQ_ETH].ibv_flow)
2050 claim_zero(mlx5_glue->destroy_flow
2051 (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2052 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
2054 for (i = 0; i != hash_rxq_init_n; ++i) {
2055 struct mlx5_flow *frxq = &flow->frxq[i];
2058 claim_zero(mlx5_glue->destroy_flow
2061 mlx5_hrxq_release(dev, frxq->hrxq);
2063 rte_free(frxq->ibv_attr);
2067 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
2070 TAILQ_REMOVE(list, flow, next);
2071 DRV_LOG(DEBUG, "port %u flow destroyed %p", dev->data->port_id,
2077 * Destroy all flows.
2080 * Pointer to Ethernet device.
2082 * Pointer to a TAILQ flow list.
2085 mlx5_flow_list_flush(struct rte_eth_dev *dev, struct mlx5_flows *list)
2087 while (!TAILQ_EMPTY(list)) {
2088 struct rte_flow *flow;
2090 flow = TAILQ_FIRST(list);
2091 mlx5_flow_list_destroy(dev, list, flow);
2096 * Create drop queue.
2099 * Pointer to Ethernet device.
2102 * 0 on success, a negative errno value otherwise and rte_errno is set.
2105 mlx5_flow_create_drop_queue(struct rte_eth_dev *dev)
2107 struct priv *priv = dev->data->dev_private;
2108 struct mlx5_hrxq_drop *fdq = NULL;
2112 fdq = rte_calloc(__func__, 1, sizeof(*fdq), 0);
2115 "port %u cannot allocate memory for drop queue",
2116 dev->data->port_id);
2120 fdq->cq = mlx5_glue->create_cq(priv->ctx, 1, NULL, NULL, 0);
2122 DRV_LOG(WARNING, "port %u cannot allocate CQ for drop queue",
2123 dev->data->port_id);
2127 fdq->wq = mlx5_glue->create_wq
2129 &(struct ibv_wq_init_attr){
2130 .wq_type = IBV_WQT_RQ,
2137 DRV_LOG(WARNING, "port %u cannot allocate WQ for drop queue",
2138 dev->data->port_id);
2142 fdq->ind_table = mlx5_glue->create_rwq_ind_table
2144 &(struct ibv_rwq_ind_table_init_attr){
2145 .log_ind_tbl_size = 0,
2146 .ind_tbl = &fdq->wq,
2149 if (!fdq->ind_table) {
2151 "port %u cannot allocate indirection table for drop"
2153 dev->data->port_id);
2157 fdq->qp = mlx5_glue->create_qp_ex
2159 &(struct ibv_qp_init_attr_ex){
2160 .qp_type = IBV_QPT_RAW_PACKET,
2162 IBV_QP_INIT_ATTR_PD |
2163 IBV_QP_INIT_ATTR_IND_TABLE |
2164 IBV_QP_INIT_ATTR_RX_HASH,
2165 .rx_hash_conf = (struct ibv_rx_hash_conf){
2167 IBV_RX_HASH_FUNC_TOEPLITZ,
2168 .rx_hash_key_len = rss_hash_default_key_len,
2169 .rx_hash_key = rss_hash_default_key,
2170 .rx_hash_fields_mask = 0,
2172 .rwq_ind_tbl = fdq->ind_table,
2176 DRV_LOG(WARNING, "port %u cannot allocate QP for drop queue",
2177 dev->data->port_id);
2181 priv->flow_drop_queue = fdq;
2185 claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2187 claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2189 claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2191 claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2194 priv->flow_drop_queue = NULL;
2199 * Delete drop queue.
2202 * Pointer to Ethernet device.
2205 mlx5_flow_delete_drop_queue(struct rte_eth_dev *dev)
2207 struct priv *priv = dev->data->dev_private;
2208 struct mlx5_hrxq_drop *fdq = priv->flow_drop_queue;
2213 claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2215 claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2217 claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2219 claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2221 priv->flow_drop_queue = NULL;
2228 * Pointer to Ethernet device.
2230 * Pointer to a TAILQ flow list.
2233 mlx5_flow_stop(struct rte_eth_dev *dev, struct mlx5_flows *list)
2235 struct priv *priv = dev->data->dev_private;
2236 struct rte_flow *flow;
2238 TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) {
2240 struct mlx5_ind_table_ibv *ind_tbl = NULL;
2243 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow)
2245 claim_zero(mlx5_glue->destroy_flow
2246 (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2247 flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
2248 DRV_LOG(DEBUG, "port %u flow %p removed",
2249 dev->data->port_id, (void *)flow);
2253 /* Verify the flow has not already been cleaned. */
2254 for (i = 0; i != hash_rxq_init_n; ++i) {
2255 if (!flow->frxq[i].ibv_flow)
2258 * Indirection table may be necessary to remove the
2259 * flags in the Rx queues.
2260 * This helps to speed-up the process by avoiding
2263 ind_tbl = flow->frxq[i].hrxq->ind_table;
2266 if (i == hash_rxq_init_n)
2270 for (i = 0; i != ind_tbl->queues_n; ++i)
2271 (*priv->rxqs)[ind_tbl->queues[i]]->mark = 0;
2273 for (i = 0; i != hash_rxq_init_n; ++i) {
2274 if (!flow->frxq[i].ibv_flow)
2276 claim_zero(mlx5_glue->destroy_flow
2277 (flow->frxq[i].ibv_flow));
2278 flow->frxq[i].ibv_flow = NULL;
2279 mlx5_hrxq_release(dev, flow->frxq[i].hrxq);
2280 flow->frxq[i].hrxq = NULL;
2282 DRV_LOG(DEBUG, "port %u flow %p removed", dev->data->port_id,
2291 * Pointer to Ethernet device.
2293 * Pointer to a TAILQ flow list.
2296 * 0 on success, a negative errno value otherwise and rte_errno is set.
2299 mlx5_flow_start(struct rte_eth_dev *dev, struct mlx5_flows *list)
2301 struct priv *priv = dev->data->dev_private;
2302 struct rte_flow *flow;
2304 TAILQ_FOREACH(flow, list, next) {
2308 flow->frxq[HASH_RXQ_ETH].ibv_flow =
2309 mlx5_glue->create_flow
2310 (priv->flow_drop_queue->qp,
2311 flow->frxq[HASH_RXQ_ETH].ibv_attr);
2312 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
2314 "port %u flow %p cannot be applied",
2315 dev->data->port_id, (void *)flow);
2319 DRV_LOG(DEBUG, "port %u flow %p applied",
2320 dev->data->port_id, (void *)flow);
2324 for (i = 0; i != hash_rxq_init_n; ++i) {
2325 if (!flow->frxq[i].ibv_attr)
2327 flow->frxq[i].hrxq =
2328 mlx5_hrxq_get(dev, flow->rss_conf.rss_key,
2329 flow->rss_conf.rss_key_len,
2330 hash_rxq_init[i].hash_fields,
2333 if (flow->frxq[i].hrxq)
2335 flow->frxq[i].hrxq =
2336 mlx5_hrxq_new(dev, flow->rss_conf.rss_key,
2337 flow->rss_conf.rss_key_len,
2338 hash_rxq_init[i].hash_fields,
2341 if (!flow->frxq[i].hrxq) {
2343 "port %u flow %p cannot be applied",
2344 dev->data->port_id, (void *)flow);
2349 flow->frxq[i].ibv_flow =
2350 mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
2351 flow->frxq[i].ibv_attr);
2352 if (!flow->frxq[i].ibv_flow) {
2354 "port %u flow %p cannot be applied",
2355 dev->data->port_id, (void *)flow);
2359 DRV_LOG(DEBUG, "port %u flow %p applied",
2360 dev->data->port_id, (void *)flow);
2364 for (i = 0; i != flow->queues_n; ++i)
2365 (*priv->rxqs)[(*flow->queues)[i]]->mark = 1;
2371 * Verify the flow list is empty
2374 * Pointer to Ethernet device.
2376 * @return the number of flows not released.
2379 mlx5_flow_verify(struct rte_eth_dev *dev)
2381 struct priv *priv = dev->data->dev_private;
2382 struct rte_flow *flow;
2385 TAILQ_FOREACH(flow, &priv->flows, next) {
2386 DRV_LOG(DEBUG, "port %u flow %p still referenced",
2387 dev->data->port_id, (void *)flow);
2394 * Enable a control flow configured from the control plane.
2397 * Pointer to Ethernet device.
2399 * An Ethernet flow spec to apply.
2401 * An Ethernet flow mask to apply.
2403 * A VLAN flow spec to apply.
2405 * A VLAN flow mask to apply.
2408 * 0 on success, a negative errno value otherwise and rte_errno is set.
2411 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
2412 struct rte_flow_item_eth *eth_spec,
2413 struct rte_flow_item_eth *eth_mask,
2414 struct rte_flow_item_vlan *vlan_spec,
2415 struct rte_flow_item_vlan *vlan_mask)
2417 struct priv *priv = dev->data->dev_private;
2418 const struct rte_flow_attr attr = {
2420 .priority = MLX5_CTRL_FLOW_PRIORITY,
2422 struct rte_flow_item items[] = {
2424 .type = RTE_FLOW_ITEM_TYPE_ETH,
2430 .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
2431 RTE_FLOW_ITEM_TYPE_END,
2437 .type = RTE_FLOW_ITEM_TYPE_END,
2440 struct rte_flow_action actions[] = {
2442 .type = RTE_FLOW_ACTION_TYPE_RSS,
2445 .type = RTE_FLOW_ACTION_TYPE_END,
2448 struct rte_flow *flow;
2449 struct rte_flow_error error;
2452 struct rte_flow_action_rss rss;
2454 const struct rte_eth_rss_conf *rss_conf;
2456 uint16_t queue[RTE_MAX_QUEUES_PER_PORT];
2460 if (!priv->reta_idx_n) {
2464 for (i = 0; i != priv->reta_idx_n; ++i)
2465 action_rss.local.queue[i] = (*priv->reta_idx)[i];
2466 action_rss.local.rss_conf = &priv->rss_conf;
2467 action_rss.local.num = priv->reta_idx_n;
2468 actions[0].conf = (const void *)&action_rss.rss;
2469 flow = mlx5_flow_list_create(dev, &priv->ctrl_flows, &attr, items,
2477 * Enable a flow control configured from the control plane.
2480 * Pointer to Ethernet device.
2482 * An Ethernet flow spec to apply.
2484 * An Ethernet flow mask to apply.
2487 * 0 on success, a negative errno value otherwise and rte_errno is set.
2490 mlx5_ctrl_flow(struct rte_eth_dev *dev,
2491 struct rte_flow_item_eth *eth_spec,
2492 struct rte_flow_item_eth *eth_mask)
2494 return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
2500 * @see rte_flow_destroy()
2504 mlx5_flow_destroy(struct rte_eth_dev *dev,
2505 struct rte_flow *flow,
2506 struct rte_flow_error *error __rte_unused)
2508 struct priv *priv = dev->data->dev_private;
2510 mlx5_flow_list_destroy(dev, &priv->flows, flow);
2515 * Destroy all flows.
2517 * @see rte_flow_flush()
2521 mlx5_flow_flush(struct rte_eth_dev *dev,
2522 struct rte_flow_error *error __rte_unused)
2524 struct priv *priv = dev->data->dev_private;
2526 mlx5_flow_list_flush(dev, &priv->flows);
2530 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
2532 * Query flow counter.
2536 * @param counter_value
2537 * returned data from the counter.
2540 * 0 on success, a negative errno value otherwise and rte_errno is set.
2543 mlx5_flow_query_count(struct ibv_counter_set *cs,
2544 struct mlx5_flow_counter_stats *counter_stats,
2545 struct rte_flow_query_count *query_count,
2546 struct rte_flow_error *error)
2548 uint64_t counters[2];
2549 struct ibv_query_counter_set_attr query_cs_attr = {
2551 .query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
2553 struct ibv_counter_set_data query_out = {
2555 .outlen = 2 * sizeof(uint64_t),
2557 int err = mlx5_glue->query_counter_set(&query_cs_attr, &query_out);
2560 return rte_flow_error_set(error, err,
2561 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2563 "cannot read counter");
2564 query_count->hits_set = 1;
2565 query_count->bytes_set = 1;
2566 query_count->hits = counters[0] - counter_stats->hits;
2567 query_count->bytes = counters[1] - counter_stats->bytes;
2568 if (query_count->reset) {
2569 counter_stats->hits = counters[0];
2570 counter_stats->bytes = counters[1];
2578 * @see rte_flow_query()
2582 mlx5_flow_query(struct rte_eth_dev *dev __rte_unused,
2583 struct rte_flow *flow,
2584 enum rte_flow_action_type action __rte_unused,
2586 struct rte_flow_error *error)
2591 ret = mlx5_flow_query_count(flow->cs,
2592 &flow->counter_stats,
2593 (struct rte_flow_query_count *)data,
2598 return rte_flow_error_set(error, EINVAL,
2599 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2601 "no counter found for flow");
2610 * @see rte_flow_isolate()
2614 mlx5_flow_isolate(struct rte_eth_dev *dev,
2616 struct rte_flow_error *error)
2618 struct priv *priv = dev->data->dev_private;
2620 if (dev->data->dev_started) {
2621 rte_flow_error_set(error, EBUSY,
2622 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2624 "port must be stopped first");
2627 priv->isolated = !!enable;
2629 priv->dev->dev_ops = &mlx5_dev_ops_isolate;
2631 priv->dev->dev_ops = &mlx5_dev_ops;
2636 * Convert a flow director filter to a generic flow.
2639 * Pointer to Ethernet device.
2640 * @param fdir_filter
2641 * Flow director filter to add.
2643 * Generic flow parameters structure.
2646 * 0 on success, a negative errno value otherwise and rte_errno is set.
2649 mlx5_fdir_filter_convert(struct rte_eth_dev *dev,
2650 const struct rte_eth_fdir_filter *fdir_filter,
2651 struct mlx5_fdir *attributes)
2653 struct priv *priv = dev->data->dev_private;
2654 const struct rte_eth_fdir_input *input = &fdir_filter->input;
2656 /* Validate queue number. */
2657 if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
2658 DRV_LOG(ERR, "port %u invalid queue number %d",
2659 dev->data->port_id, fdir_filter->action.rx_queue);
2663 attributes->attr.ingress = 1;
2664 attributes->items[0] = (struct rte_flow_item) {
2665 .type = RTE_FLOW_ITEM_TYPE_ETH,
2666 .spec = &attributes->l2,
2667 .mask = &attributes->l2_mask,
2669 switch (fdir_filter->action.behavior) {
2670 case RTE_ETH_FDIR_ACCEPT:
2671 attributes->actions[0] = (struct rte_flow_action){
2672 .type = RTE_FLOW_ACTION_TYPE_QUEUE,
2673 .conf = &attributes->queue,
2676 case RTE_ETH_FDIR_REJECT:
2677 attributes->actions[0] = (struct rte_flow_action){
2678 .type = RTE_FLOW_ACTION_TYPE_DROP,
2682 DRV_LOG(ERR, "port %u invalid behavior %d",
2684 fdir_filter->action.behavior);
2685 rte_errno = ENOTSUP;
2688 attributes->queue.index = fdir_filter->action.rx_queue;
2689 switch (fdir_filter->input.flow_type) {
2690 case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
2691 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2692 .src_addr = input->flow.udp4_flow.ip.src_ip,
2693 .dst_addr = input->flow.udp4_flow.ip.dst_ip,
2694 .time_to_live = input->flow.udp4_flow.ip.ttl,
2695 .type_of_service = input->flow.udp4_flow.ip.tos,
2696 .next_proto_id = input->flow.udp4_flow.ip.proto,
2698 attributes->l4.udp.hdr = (struct udp_hdr){
2699 .src_port = input->flow.udp4_flow.src_port,
2700 .dst_port = input->flow.udp4_flow.dst_port,
2702 attributes->items[1] = (struct rte_flow_item){
2703 .type = RTE_FLOW_ITEM_TYPE_IPV4,
2704 .spec = &attributes->l3,
2705 .mask = &attributes->l3,
2707 attributes->items[2] = (struct rte_flow_item){
2708 .type = RTE_FLOW_ITEM_TYPE_UDP,
2709 .spec = &attributes->l4,
2710 .mask = &attributes->l4,
2713 case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
2714 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2715 .src_addr = input->flow.tcp4_flow.ip.src_ip,
2716 .dst_addr = input->flow.tcp4_flow.ip.dst_ip,
2717 .time_to_live = input->flow.tcp4_flow.ip.ttl,
2718 .type_of_service = input->flow.tcp4_flow.ip.tos,
2719 .next_proto_id = input->flow.tcp4_flow.ip.proto,
2721 attributes->l4.tcp.hdr = (struct tcp_hdr){
2722 .src_port = input->flow.tcp4_flow.src_port,
2723 .dst_port = input->flow.tcp4_flow.dst_port,
2725 attributes->items[1] = (struct rte_flow_item){
2726 .type = RTE_FLOW_ITEM_TYPE_IPV4,
2727 .spec = &attributes->l3,
2728 .mask = &attributes->l3,
2730 attributes->items[2] = (struct rte_flow_item){
2731 .type = RTE_FLOW_ITEM_TYPE_TCP,
2732 .spec = &attributes->l4,
2733 .mask = &attributes->l4,
2736 case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
2737 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2738 .src_addr = input->flow.ip4_flow.src_ip,
2739 .dst_addr = input->flow.ip4_flow.dst_ip,
2740 .time_to_live = input->flow.ip4_flow.ttl,
2741 .type_of_service = input->flow.ip4_flow.tos,
2742 .next_proto_id = input->flow.ip4_flow.proto,
2744 attributes->items[1] = (struct rte_flow_item){
2745 .type = RTE_FLOW_ITEM_TYPE_IPV4,
2746 .spec = &attributes->l3,
2747 .mask = &attributes->l3,
2750 case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
2751 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2752 .hop_limits = input->flow.udp6_flow.ip.hop_limits,
2753 .proto = input->flow.udp6_flow.ip.proto,
2755 memcpy(attributes->l3.ipv6.hdr.src_addr,
2756 input->flow.udp6_flow.ip.src_ip,
2757 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2758 memcpy(attributes->l3.ipv6.hdr.dst_addr,
2759 input->flow.udp6_flow.ip.dst_ip,
2760 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2761 attributes->l4.udp.hdr = (struct udp_hdr){
2762 .src_port = input->flow.udp6_flow.src_port,
2763 .dst_port = input->flow.udp6_flow.dst_port,
2765 attributes->items[1] = (struct rte_flow_item){
2766 .type = RTE_FLOW_ITEM_TYPE_IPV6,
2767 .spec = &attributes->l3,
2768 .mask = &attributes->l3,
2770 attributes->items[2] = (struct rte_flow_item){
2771 .type = RTE_FLOW_ITEM_TYPE_UDP,
2772 .spec = &attributes->l4,
2773 .mask = &attributes->l4,
2776 case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
2777 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2778 .hop_limits = input->flow.tcp6_flow.ip.hop_limits,
2779 .proto = input->flow.tcp6_flow.ip.proto,
2781 memcpy(attributes->l3.ipv6.hdr.src_addr,
2782 input->flow.tcp6_flow.ip.src_ip,
2783 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2784 memcpy(attributes->l3.ipv6.hdr.dst_addr,
2785 input->flow.tcp6_flow.ip.dst_ip,
2786 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2787 attributes->l4.tcp.hdr = (struct tcp_hdr){
2788 .src_port = input->flow.tcp6_flow.src_port,
2789 .dst_port = input->flow.tcp6_flow.dst_port,
2791 attributes->items[1] = (struct rte_flow_item){
2792 .type = RTE_FLOW_ITEM_TYPE_IPV6,
2793 .spec = &attributes->l3,
2794 .mask = &attributes->l3,
2796 attributes->items[2] = (struct rte_flow_item){
2797 .type = RTE_FLOW_ITEM_TYPE_TCP,
2798 .spec = &attributes->l4,
2799 .mask = &attributes->l4,
2802 case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
2803 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2804 .hop_limits = input->flow.ipv6_flow.hop_limits,
2805 .proto = input->flow.ipv6_flow.proto,
2807 memcpy(attributes->l3.ipv6.hdr.src_addr,
2808 input->flow.ipv6_flow.src_ip,
2809 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2810 memcpy(attributes->l3.ipv6.hdr.dst_addr,
2811 input->flow.ipv6_flow.dst_ip,
2812 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2813 attributes->items[1] = (struct rte_flow_item){
2814 .type = RTE_FLOW_ITEM_TYPE_IPV6,
2815 .spec = &attributes->l3,
2816 .mask = &attributes->l3,
2820 DRV_LOG(ERR, "port %u invalid flow type%d",
2821 dev->data->port_id, fdir_filter->input.flow_type);
2822 rte_errno = ENOTSUP;
2829 * Add new flow director filter and store it in list.
2832 * Pointer to Ethernet device.
2833 * @param fdir_filter
2834 * Flow director filter to add.
2837 * 0 on success, a negative errno value otherwise and rte_errno is set.
2840 mlx5_fdir_filter_add(struct rte_eth_dev *dev,
2841 const struct rte_eth_fdir_filter *fdir_filter)
2843 struct priv *priv = dev->data->dev_private;
2844 struct mlx5_fdir attributes = {
2847 .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2848 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2852 struct mlx5_flow_parse parser = {
2853 .layer = HASH_RXQ_ETH,
2855 struct rte_flow_error error;
2856 struct rte_flow *flow;
2859 ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
2862 ret = mlx5_flow_convert(dev, &attributes.attr, attributes.items,
2863 attributes.actions, &error, &parser);
2866 flow = mlx5_flow_list_create(dev, &priv->flows, &attributes.attr,
2867 attributes.items, attributes.actions,
2870 DRV_LOG(DEBUG, "port %u FDIR created %p", dev->data->port_id,
2878 * Delete specific filter.
2881 * Pointer to Ethernet device.
2882 * @param fdir_filter
2883 * Filter to be deleted.
2886 * 0 on success, a negative errno value otherwise and rte_errno is set.
2889 mlx5_fdir_filter_delete(struct rte_eth_dev *dev,
2890 const struct rte_eth_fdir_filter *fdir_filter)
2892 struct priv *priv = dev->data->dev_private;
2893 struct mlx5_fdir attributes = {
2896 struct mlx5_flow_parse parser = {
2898 .layer = HASH_RXQ_ETH,
2900 struct rte_flow_error error;
2901 struct rte_flow *flow;
2905 ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
2908 ret = mlx5_flow_convert(dev, &attributes.attr, attributes.items,
2909 attributes.actions, &error, &parser);
2913 * Special case for drop action which is only set in the
2914 * specifications when the flow is created. In this situation the
2915 * drop specification is missing.
2918 struct ibv_flow_spec_action_drop *drop;
2920 drop = (void *)((uintptr_t)parser.queue[HASH_RXQ_ETH].ibv_attr +
2921 parser.queue[HASH_RXQ_ETH].offset);
2922 *drop = (struct ibv_flow_spec_action_drop){
2923 .type = IBV_FLOW_SPEC_ACTION_DROP,
2924 .size = sizeof(struct ibv_flow_spec_action_drop),
2926 parser.queue[HASH_RXQ_ETH].ibv_attr->num_of_specs++;
2928 TAILQ_FOREACH(flow, &priv->flows, next) {
2929 struct ibv_flow_attr *attr;
2930 struct ibv_spec_header *attr_h;
2932 struct ibv_flow_attr *flow_attr;
2933 struct ibv_spec_header *flow_h;
2935 unsigned int specs_n;
2937 attr = parser.queue[HASH_RXQ_ETH].ibv_attr;
2938 flow_attr = flow->frxq[HASH_RXQ_ETH].ibv_attr;
2939 /* Compare first the attributes. */
2940 if (memcmp(attr, flow_attr, sizeof(struct ibv_flow_attr)))
2942 if (attr->num_of_specs == 0)
2944 spec = (void *)((uintptr_t)attr +
2945 sizeof(struct ibv_flow_attr));
2946 flow_spec = (void *)((uintptr_t)flow_attr +
2947 sizeof(struct ibv_flow_attr));
2948 specs_n = RTE_MIN(attr->num_of_specs, flow_attr->num_of_specs);
2949 for (i = 0; i != specs_n; ++i) {
2952 if (memcmp(spec, flow_spec,
2953 RTE_MIN(attr_h->size, flow_h->size)))
2955 spec = (void *)((uintptr_t)spec + attr_h->size);
2956 flow_spec = (void *)((uintptr_t)flow_spec +
2959 /* At this point, the flow match. */
2962 /* The flow does not match. */
2965 ret = rte_errno; /* Save rte_errno before cleanup. */
2967 mlx5_flow_list_destroy(dev, &priv->flows, flow);
2969 for (i = 0; i != hash_rxq_init_n; ++i) {
2970 if (parser.queue[i].ibv_attr)
2971 rte_free(parser.queue[i].ibv_attr);
2973 rte_errno = ret; /* Restore rte_errno. */
2978 * Update queue for specific filter.
2981 * Pointer to Ethernet device.
2982 * @param fdir_filter
2983 * Filter to be updated.
2986 * 0 on success, a negative errno value otherwise and rte_errno is set.
2989 mlx5_fdir_filter_update(struct rte_eth_dev *dev,
2990 const struct rte_eth_fdir_filter *fdir_filter)
2994 ret = mlx5_fdir_filter_delete(dev, fdir_filter);
2997 return mlx5_fdir_filter_add(dev, fdir_filter);
3001 * Flush all filters.
3004 * Pointer to Ethernet device.
3007 mlx5_fdir_filter_flush(struct rte_eth_dev *dev)
3009 struct priv *priv = dev->data->dev_private;
3011 mlx5_flow_list_flush(dev, &priv->flows);
3015 * Get flow director information.
3018 * Pointer to Ethernet device.
3019 * @param[out] fdir_info
3020 * Resulting flow director information.
3023 mlx5_fdir_info_get(struct rte_eth_dev *dev, struct rte_eth_fdir_info *fdir_info)
3025 struct priv *priv = dev->data->dev_private;
3026 struct rte_eth_fdir_masks *mask =
3027 &priv->dev->data->dev_conf.fdir_conf.mask;
3029 fdir_info->mode = priv->dev->data->dev_conf.fdir_conf.mode;
3030 fdir_info->guarant_spc = 0;
3031 rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
3032 fdir_info->max_flexpayload = 0;
3033 fdir_info->flow_types_mask[0] = 0;
3034 fdir_info->flex_payload_unit = 0;
3035 fdir_info->max_flex_payload_segment_num = 0;
3036 fdir_info->flex_payload_limit = 0;
3037 memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
3041 * Deal with flow director operations.
3044 * Pointer to Ethernet device.
3046 * Operation to perform.
3048 * Pointer to operation-specific structure.
3051 * 0 on success, a negative errno value otherwise and rte_errno is set.
3054 mlx5_fdir_ctrl_func(struct rte_eth_dev *dev, enum rte_filter_op filter_op,
3057 struct priv *priv = dev->data->dev_private;
3058 enum rte_fdir_mode fdir_mode =
3059 priv->dev->data->dev_conf.fdir_conf.mode;
3061 if (filter_op == RTE_ETH_FILTER_NOP)
3063 if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
3064 fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
3065 DRV_LOG(ERR, "port %u flow director mode %d not supported",
3066 dev->data->port_id, fdir_mode);
3070 switch (filter_op) {
3071 case RTE_ETH_FILTER_ADD:
3072 return mlx5_fdir_filter_add(dev, arg);
3073 case RTE_ETH_FILTER_UPDATE:
3074 return mlx5_fdir_filter_update(dev, arg);
3075 case RTE_ETH_FILTER_DELETE:
3076 return mlx5_fdir_filter_delete(dev, arg);
3077 case RTE_ETH_FILTER_FLUSH:
3078 mlx5_fdir_filter_flush(dev);
3080 case RTE_ETH_FILTER_INFO:
3081 mlx5_fdir_info_get(dev, arg);
3084 DRV_LOG(DEBUG, "port %u unknown operation %u",
3085 dev->data->port_id, filter_op);
3093 * Manage filter operations.
3096 * Pointer to Ethernet device structure.
3097 * @param filter_type
3100 * Operation to perform.
3102 * Pointer to operation-specific structure.
3105 * 0 on success, a negative errno value otherwise and rte_errno is set.
3108 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
3109 enum rte_filter_type filter_type,
3110 enum rte_filter_op filter_op,
3113 switch (filter_type) {
3114 case RTE_ETH_FILTER_GENERIC:
3115 if (filter_op != RTE_ETH_FILTER_GET) {
3119 *(const void **)arg = &mlx5_flow_ops;
3121 case RTE_ETH_FILTER_FDIR:
3122 return mlx5_fdir_ctrl_func(dev, filter_op, arg);
3124 DRV_LOG(ERR, "port %u filter type (%d) not supported",
3125 dev->data->port_id, filter_type);
3126 rte_errno = ENOTSUP;