1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2016 6WIND S.A.
3 * Copyright 2016 Mellanox.
10 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
12 #pragma GCC diagnostic ignored "-Wpedantic"
14 #include <infiniband/verbs.h>
16 #pragma GCC diagnostic error "-Wpedantic"
19 #include <rte_ethdev_driver.h>
21 #include <rte_flow_driver.h>
22 #include <rte_malloc.h>
26 #include "mlx5_defs.h"
28 #include "mlx5_glue.h"
30 /* Define minimal priority for control plane flows. */
31 #define MLX5_CTRL_FLOW_PRIORITY 4
33 /* Internet Protocol versions. */
37 #ifndef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
38 struct ibv_flow_spec_counter_action {
43 /* Dev ops structure defined in mlx5.c */
44 extern const struct eth_dev_ops mlx5_dev_ops;
45 extern const struct eth_dev_ops mlx5_dev_ops_isolate;
47 /** Structure give to the conversion functions. */
48 struct mlx5_flow_data {
49 struct mlx5_flow_parse *parser; /** Parser context. */
50 struct rte_flow_error *error; /** Error context. */
54 mlx5_flow_create_eth(const struct rte_flow_item *item,
55 const void *default_mask,
56 struct mlx5_flow_data *data);
59 mlx5_flow_create_vlan(const struct rte_flow_item *item,
60 const void *default_mask,
61 struct mlx5_flow_data *data);
64 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
65 const void *default_mask,
66 struct mlx5_flow_data *data);
69 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
70 const void *default_mask,
71 struct mlx5_flow_data *data);
74 mlx5_flow_create_udp(const struct rte_flow_item *item,
75 const void *default_mask,
76 struct mlx5_flow_data *data);
79 mlx5_flow_create_tcp(const struct rte_flow_item *item,
80 const void *default_mask,
81 struct mlx5_flow_data *data);
84 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
85 const void *default_mask,
86 struct mlx5_flow_data *data);
88 struct mlx5_flow_parse;
91 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
95 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id);
98 mlx5_flow_create_count(struct rte_eth_dev *dev, struct mlx5_flow_parse *parser);
100 /* Hash RX queue types. */
111 /* Initialization data for hash RX queue. */
112 struct hash_rxq_init {
113 uint64_t hash_fields; /* Fields that participate in the hash. */
114 uint64_t dpdk_rss_hf; /* Matching DPDK RSS hash fields. */
115 unsigned int flow_priority; /* Flow priority to use. */
116 unsigned int ip_version; /* Internet protocol. */
119 /* Initialization data for hash RX queues. */
120 const struct hash_rxq_init hash_rxq_init[] = {
122 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
123 IBV_RX_HASH_DST_IPV4 |
124 IBV_RX_HASH_SRC_PORT_TCP |
125 IBV_RX_HASH_DST_PORT_TCP),
126 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,
128 .ip_version = MLX5_IPV4,
131 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
132 IBV_RX_HASH_DST_IPV4 |
133 IBV_RX_HASH_SRC_PORT_UDP |
134 IBV_RX_HASH_DST_PORT_UDP),
135 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP,
137 .ip_version = MLX5_IPV4,
140 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
141 IBV_RX_HASH_DST_IPV4),
142 .dpdk_rss_hf = (ETH_RSS_IPV4 |
145 .ip_version = MLX5_IPV4,
148 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
149 IBV_RX_HASH_DST_IPV6 |
150 IBV_RX_HASH_SRC_PORT_TCP |
151 IBV_RX_HASH_DST_PORT_TCP),
152 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP,
154 .ip_version = MLX5_IPV6,
157 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
158 IBV_RX_HASH_DST_IPV6 |
159 IBV_RX_HASH_SRC_PORT_UDP |
160 IBV_RX_HASH_DST_PORT_UDP),
161 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP,
163 .ip_version = MLX5_IPV6,
166 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
167 IBV_RX_HASH_DST_IPV6),
168 .dpdk_rss_hf = (ETH_RSS_IPV6 |
171 .ip_version = MLX5_IPV6,
180 /* Number of entries in hash_rxq_init[]. */
181 const unsigned int hash_rxq_init_n = RTE_DIM(hash_rxq_init);
183 /** Structure for holding counter stats. */
184 struct mlx5_flow_counter_stats {
185 uint64_t hits; /**< Number of packets matched by the rule. */
186 uint64_t bytes; /**< Number of bytes matched by the rule. */
189 /** Structure for Drop queue. */
190 struct mlx5_hrxq_drop {
191 struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */
192 struct ibv_qp *qp; /**< Verbs queue pair. */
193 struct ibv_wq *wq; /**< Verbs work queue. */
194 struct ibv_cq *cq; /**< Verbs completion queue. */
197 /* Flows structures. */
199 uint64_t hash_fields; /**< Fields that participate in the hash. */
200 struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
201 struct ibv_flow *ibv_flow; /**< Verbs flow. */
202 struct mlx5_hrxq *hrxq; /**< Hash Rx queues. */
205 /* Drop flows structures. */
206 struct mlx5_flow_drop {
207 struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
208 struct ibv_flow *ibv_flow; /**< Verbs flow. */
212 TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
213 uint32_t mark:1; /**< Set if the flow is marked. */
214 uint32_t drop:1; /**< Drop queue. */
215 uint16_t queues_n; /**< Number of entries in queue[]. */
216 uint16_t (*queues)[]; /**< Queues indexes to use. */
217 struct rte_eth_rss_conf rss_conf; /**< RSS configuration */
218 uint8_t rss_key[40]; /**< copy of the RSS key. */
219 struct ibv_counter_set *cs; /**< Holds the counters for the rule. */
220 struct mlx5_flow_counter_stats counter_stats;/**<The counter stats. */
221 struct mlx5_flow frxq[RTE_DIM(hash_rxq_init)];
222 /**< Flow with Rx queue. */
225 /** Static initializer for items. */
227 (const enum rte_flow_item_type []){ \
228 __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
231 /** Structure to generate a simple graph of layers supported by the NIC. */
232 struct mlx5_flow_items {
233 /** List of possible actions for these items. */
234 const enum rte_flow_action_type *const actions;
235 /** Bit-masks corresponding to the possibilities for the item. */
238 * Default bit-masks to use when item->mask is not provided. When
239 * \default_mask is also NULL, the full supported bit-mask (\mask) is
242 const void *default_mask;
243 /** Bit-masks size in bytes. */
244 const unsigned int mask_sz;
246 * Conversion function from rte_flow to NIC specific flow.
249 * rte_flow item to convert.
250 * @param default_mask
251 * Default bit-masks to use when item->mask is not provided.
253 * Internal structure to store the conversion.
256 * 0 on success, a negative errno value otherwise and rte_errno is
259 int (*convert)(const struct rte_flow_item *item,
260 const void *default_mask,
261 struct mlx5_flow_data *data);
262 /** Size in bytes of the destination structure. */
263 const unsigned int dst_sz;
264 /** List of possible following items. */
265 const enum rte_flow_item_type *const items;
268 /** Valid action for this PMD. */
269 static const enum rte_flow_action_type valid_actions[] = {
270 RTE_FLOW_ACTION_TYPE_DROP,
271 RTE_FLOW_ACTION_TYPE_QUEUE,
272 RTE_FLOW_ACTION_TYPE_MARK,
273 RTE_FLOW_ACTION_TYPE_FLAG,
274 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
275 RTE_FLOW_ACTION_TYPE_COUNT,
277 RTE_FLOW_ACTION_TYPE_END,
280 /** Graph of supported items and associated actions. */
281 static const struct mlx5_flow_items mlx5_flow_items[] = {
282 [RTE_FLOW_ITEM_TYPE_END] = {
283 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
284 RTE_FLOW_ITEM_TYPE_VXLAN),
286 [RTE_FLOW_ITEM_TYPE_ETH] = {
287 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
288 RTE_FLOW_ITEM_TYPE_IPV4,
289 RTE_FLOW_ITEM_TYPE_IPV6),
290 .actions = valid_actions,
291 .mask = &(const struct rte_flow_item_eth){
292 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
293 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
296 .default_mask = &rte_flow_item_eth_mask,
297 .mask_sz = sizeof(struct rte_flow_item_eth),
298 .convert = mlx5_flow_create_eth,
299 .dst_sz = sizeof(struct ibv_flow_spec_eth),
301 [RTE_FLOW_ITEM_TYPE_VLAN] = {
302 .items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
303 RTE_FLOW_ITEM_TYPE_IPV6),
304 .actions = valid_actions,
305 .mask = &(const struct rte_flow_item_vlan){
308 .default_mask = &rte_flow_item_vlan_mask,
309 .mask_sz = sizeof(struct rte_flow_item_vlan),
310 .convert = mlx5_flow_create_vlan,
313 [RTE_FLOW_ITEM_TYPE_IPV4] = {
314 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
315 RTE_FLOW_ITEM_TYPE_TCP),
316 .actions = valid_actions,
317 .mask = &(const struct rte_flow_item_ipv4){
321 .type_of_service = -1,
325 .default_mask = &rte_flow_item_ipv4_mask,
326 .mask_sz = sizeof(struct rte_flow_item_ipv4),
327 .convert = mlx5_flow_create_ipv4,
328 .dst_sz = sizeof(struct ibv_flow_spec_ipv4_ext),
330 [RTE_FLOW_ITEM_TYPE_IPV6] = {
331 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
332 RTE_FLOW_ITEM_TYPE_TCP),
333 .actions = valid_actions,
334 .mask = &(const struct rte_flow_item_ipv6){
337 0xff, 0xff, 0xff, 0xff,
338 0xff, 0xff, 0xff, 0xff,
339 0xff, 0xff, 0xff, 0xff,
340 0xff, 0xff, 0xff, 0xff,
343 0xff, 0xff, 0xff, 0xff,
344 0xff, 0xff, 0xff, 0xff,
345 0xff, 0xff, 0xff, 0xff,
346 0xff, 0xff, 0xff, 0xff,
353 .default_mask = &rte_flow_item_ipv6_mask,
354 .mask_sz = sizeof(struct rte_flow_item_ipv6),
355 .convert = mlx5_flow_create_ipv6,
356 .dst_sz = sizeof(struct ibv_flow_spec_ipv6),
358 [RTE_FLOW_ITEM_TYPE_UDP] = {
359 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN),
360 .actions = valid_actions,
361 .mask = &(const struct rte_flow_item_udp){
367 .default_mask = &rte_flow_item_udp_mask,
368 .mask_sz = sizeof(struct rte_flow_item_udp),
369 .convert = mlx5_flow_create_udp,
370 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
372 [RTE_FLOW_ITEM_TYPE_TCP] = {
373 .actions = valid_actions,
374 .mask = &(const struct rte_flow_item_tcp){
380 .default_mask = &rte_flow_item_tcp_mask,
381 .mask_sz = sizeof(struct rte_flow_item_tcp),
382 .convert = mlx5_flow_create_tcp,
383 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
385 [RTE_FLOW_ITEM_TYPE_VXLAN] = {
386 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH),
387 .actions = valid_actions,
388 .mask = &(const struct rte_flow_item_vxlan){
389 .vni = "\xff\xff\xff",
391 .default_mask = &rte_flow_item_vxlan_mask,
392 .mask_sz = sizeof(struct rte_flow_item_vxlan),
393 .convert = mlx5_flow_create_vxlan,
394 .dst_sz = sizeof(struct ibv_flow_spec_tunnel),
398 /** Structure to pass to the conversion function. */
399 struct mlx5_flow_parse {
400 uint32_t inner; /**< Set once VXLAN is encountered. */
402 /**< Whether resources should remain after a validate. */
403 uint32_t drop:1; /**< Target is a drop queue. */
404 uint32_t mark:1; /**< Mark is present in the flow. */
405 uint32_t count:1; /**< Count is present in the flow. */
406 uint32_t mark_id; /**< Mark identifier. */
407 uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
408 uint16_t queues_n; /**< Number of entries in queue[]. */
409 struct rte_eth_rss_conf rss_conf; /**< RSS configuration */
410 uint8_t rss_key[40]; /**< copy of the RSS key. */
411 enum hash_rxq_type layer; /**< Last pattern layer detected. */
412 struct ibv_counter_set *cs; /**< Holds the counter set for the rule */
414 struct ibv_flow_attr *ibv_attr;
415 /**< Pointer to Verbs attributes. */
417 /**< Current position or total size of the attribute. */
418 } queue[RTE_DIM(hash_rxq_init)];
421 static const struct rte_flow_ops mlx5_flow_ops = {
422 .validate = mlx5_flow_validate,
423 .create = mlx5_flow_create,
424 .destroy = mlx5_flow_destroy,
425 .flush = mlx5_flow_flush,
426 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
427 .query = mlx5_flow_query,
431 .isolate = mlx5_flow_isolate,
434 /* Convert FDIR request to Generic flow. */
436 struct rte_flow_attr attr;
437 struct rte_flow_action actions[2];
438 struct rte_flow_item items[4];
439 struct rte_flow_item_eth l2;
440 struct rte_flow_item_eth l2_mask;
442 struct rte_flow_item_ipv4 ipv4;
443 struct rte_flow_item_ipv6 ipv6;
446 struct rte_flow_item_udp udp;
447 struct rte_flow_item_tcp tcp;
449 struct rte_flow_action_queue queue;
452 /* Verbs specification header. */
453 struct ibv_spec_header {
454 enum ibv_flow_spec_type type;
459 * Check support for a given item.
462 * Item specification.
464 * Bit-masks covering supported fields to compare with spec, last and mask in
467 * Bit-Mask size in bytes.
470 * 0 on success, a negative errno value otherwise and rte_errno is set.
473 mlx5_flow_item_validate(const struct rte_flow_item *item,
474 const uint8_t *mask, unsigned int size)
476 if (!item->spec && (item->mask || item->last)) {
480 if (item->spec && !item->mask) {
482 const uint8_t *spec = item->spec;
484 for (i = 0; i < size; ++i)
485 if ((spec[i] | mask[i]) != mask[i]) {
490 if (item->last && !item->mask) {
492 const uint8_t *spec = item->last;
494 for (i = 0; i < size; ++i)
495 if ((spec[i] | mask[i]) != mask[i]) {
502 const uint8_t *spec = item->spec;
504 for (i = 0; i < size; ++i)
505 if ((spec[i] | mask[i]) != mask[i]) {
510 if (item->spec && item->last) {
513 const uint8_t *apply = mask;
519 for (i = 0; i < size; ++i) {
520 spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
521 last[i] = ((const uint8_t *)item->last)[i] & apply[i];
523 ret = memcmp(spec, last, size);
533 * Copy the RSS configuration from the user ones, of the rss_conf is null,
534 * uses the driver one.
537 * Internal parser structure.
539 * User RSS configuration to save.
542 * 0 on success, a negative errno value otherwise and rte_errno is set.
545 mlx5_flow_convert_rss_conf(struct mlx5_flow_parse *parser,
546 const struct rte_eth_rss_conf *rss_conf)
549 * This function is also called at the beginning of
550 * mlx5_flow_convert_actions() to initialize the parser with the
551 * device default RSS configuration.
554 if (rss_conf->rss_hf & MLX5_RSS_HF_MASK) {
558 if (rss_conf->rss_key_len != 40) {
562 if (rss_conf->rss_key_len && rss_conf->rss_key) {
563 parser->rss_conf.rss_key_len = rss_conf->rss_key_len;
564 memcpy(parser->rss_key, rss_conf->rss_key,
565 rss_conf->rss_key_len);
566 parser->rss_conf.rss_key = parser->rss_key;
568 parser->rss_conf.rss_hf = rss_conf->rss_hf;
574 * Extract attribute to the parser.
577 * Flow rule attributes.
579 * Perform verbose error reporting if not NULL.
582 * 0 on success, a negative errno value otherwise and rte_errno is set.
585 mlx5_flow_convert_attributes(const struct rte_flow_attr *attr,
586 struct rte_flow_error *error)
589 rte_flow_error_set(error, ENOTSUP,
590 RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
592 "groups are not supported");
595 if (attr->priority && attr->priority != MLX5_CTRL_FLOW_PRIORITY) {
596 rte_flow_error_set(error, ENOTSUP,
597 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
599 "priorities are not supported");
603 rte_flow_error_set(error, ENOTSUP,
604 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
606 "egress is not supported");
609 if (!attr->ingress) {
610 rte_flow_error_set(error, ENOTSUP,
611 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
613 "only ingress is supported");
620 * Extract actions request to the parser.
623 * Pointer to Ethernet device.
625 * Associated actions (list terminated by the END action).
627 * Perform verbose error reporting if not NULL.
628 * @param[in, out] parser
629 * Internal parser structure.
632 * 0 on success, a negative errno value otherwise and rte_errno is set.
635 mlx5_flow_convert_actions(struct rte_eth_dev *dev,
636 const struct rte_flow_action actions[],
637 struct rte_flow_error *error,
638 struct mlx5_flow_parse *parser)
640 struct priv *priv = dev->data->dev_private;
644 * Add default RSS configuration necessary for Verbs to create QP even
645 * if no RSS is necessary.
647 ret = mlx5_flow_convert_rss_conf(parser,
648 (const struct rte_eth_rss_conf *)
652 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
653 if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
655 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
657 } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
658 const struct rte_flow_action_queue *queue =
659 (const struct rte_flow_action_queue *)
664 if (!queue || (queue->index > (priv->rxqs_n - 1)))
665 goto exit_action_not_supported;
666 for (n = 0; n < parser->queues_n; ++n) {
667 if (parser->queues[n] == queue->index) {
672 if (parser->queues_n > 1 && !found) {
673 rte_flow_error_set(error, ENOTSUP,
674 RTE_FLOW_ERROR_TYPE_ACTION,
676 "queue action not in RSS queues");
680 parser->queues_n = 1;
681 parser->queues[0] = queue->index;
683 } else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
684 const struct rte_flow_action_rss *rss =
685 (const struct rte_flow_action_rss *)
689 if (!rss || !rss->num) {
690 rte_flow_error_set(error, EINVAL,
691 RTE_FLOW_ERROR_TYPE_ACTION,
696 if (parser->queues_n == 1) {
699 assert(parser->queues_n);
700 for (n = 0; n < rss->num; ++n) {
701 if (parser->queues[0] ==
708 rte_flow_error_set(error, ENOTSUP,
709 RTE_FLOW_ERROR_TYPE_ACTION,
711 "queue action not in RSS"
716 for (n = 0; n < rss->num; ++n) {
717 if (rss->queue[n] >= priv->rxqs_n) {
718 rte_flow_error_set(error, EINVAL,
719 RTE_FLOW_ERROR_TYPE_ACTION,
721 "queue id > number of"
726 for (n = 0; n < rss->num; ++n)
727 parser->queues[n] = rss->queue[n];
728 parser->queues_n = rss->num;
729 if (mlx5_flow_convert_rss_conf(parser, rss->rss_conf)) {
730 rte_flow_error_set(error, EINVAL,
731 RTE_FLOW_ERROR_TYPE_ACTION,
733 "wrong RSS configuration");
736 } else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
737 const struct rte_flow_action_mark *mark =
738 (const struct rte_flow_action_mark *)
742 rte_flow_error_set(error, EINVAL,
743 RTE_FLOW_ERROR_TYPE_ACTION,
745 "mark must be defined");
747 } else if (mark->id >= MLX5_FLOW_MARK_MAX) {
748 rte_flow_error_set(error, ENOTSUP,
749 RTE_FLOW_ERROR_TYPE_ACTION,
751 "mark must be between 0"
756 parser->mark_id = mark->id;
757 } else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
759 } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT &&
760 priv->config.flow_counter_en) {
763 goto exit_action_not_supported;
766 if (parser->drop && parser->mark)
768 if (!parser->queues_n && !parser->drop) {
769 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
770 NULL, "no valid action");
774 exit_action_not_supported:
775 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
776 actions, "action not supported");
784 * Pattern specification (list terminated by the END pattern item).
786 * Perform verbose error reporting if not NULL.
787 * @param[in, out] parser
788 * Internal parser structure.
791 * 0 on success, a negative errno value otherwise and rte_errno is set.
794 mlx5_flow_convert_items_validate(const struct rte_flow_item items[],
795 struct rte_flow_error *error,
796 struct mlx5_flow_parse *parser)
798 const struct mlx5_flow_items *cur_item = mlx5_flow_items;
802 /* Initialise the offsets to start after verbs attribute. */
803 for (i = 0; i != hash_rxq_init_n; ++i)
804 parser->queue[i].offset = sizeof(struct ibv_flow_attr);
805 for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
806 const struct mlx5_flow_items *token = NULL;
809 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
813 cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
815 if (cur_item->items[i] == items->type) {
816 token = &mlx5_flow_items[items->type];
821 goto exit_item_not_supported;
823 ret = mlx5_flow_item_validate(items,
824 (const uint8_t *)cur_item->mask,
827 goto exit_item_not_supported;
828 if (items->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
830 rte_flow_error_set(error, ENOTSUP,
831 RTE_FLOW_ERROR_TYPE_ITEM,
833 "cannot recognize multiple"
834 " VXLAN encapsulations");
837 parser->inner = IBV_FLOW_SPEC_INNER;
840 parser->queue[HASH_RXQ_ETH].offset += cur_item->dst_sz;
842 for (n = 0; n != hash_rxq_init_n; ++n)
843 parser->queue[n].offset += cur_item->dst_sz;
847 parser->queue[HASH_RXQ_ETH].offset +=
848 sizeof(struct ibv_flow_spec_action_drop);
851 for (i = 0; i != hash_rxq_init_n; ++i)
852 parser->queue[i].offset +=
853 sizeof(struct ibv_flow_spec_action_tag);
856 unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
858 for (i = 0; i != hash_rxq_init_n; ++i)
859 parser->queue[i].offset += size;
862 exit_item_not_supported:
863 return rte_flow_error_set(error, -ret, RTE_FLOW_ERROR_TYPE_ITEM,
864 items, "item not supported");
868 * Allocate memory space to store verbs flow attributes.
871 * Amount of byte to allocate.
873 * Perform verbose error reporting if not NULL.
876 * A verbs flow attribute on success, NULL otherwise and rte_errno is set.
878 static struct ibv_flow_attr *
879 mlx5_flow_convert_allocate(unsigned int size, struct rte_flow_error *error)
881 struct ibv_flow_attr *ibv_attr;
883 ibv_attr = rte_calloc(__func__, 1, size, 0);
885 rte_flow_error_set(error, ENOMEM,
886 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
888 "cannot allocate verbs spec attributes");
895 * Make inner packet matching with an higher priority from the non Inner
898 * @param[in, out] parser
899 * Internal parser structure.
901 * User flow attribute.
904 mlx5_flow_update_priority(struct mlx5_flow_parse *parser,
905 const struct rte_flow_attr *attr)
910 parser->queue[HASH_RXQ_ETH].ibv_attr->priority =
912 hash_rxq_init[HASH_RXQ_ETH].flow_priority;
915 for (i = 0; i != hash_rxq_init_n; ++i) {
916 if (parser->queue[i].ibv_attr) {
917 parser->queue[i].ibv_attr->priority =
919 hash_rxq_init[i].flow_priority -
920 (parser->inner ? 1 : 0);
926 * Finalise verbs flow attributes.
928 * @param[in, out] parser
929 * Internal parser structure.
932 mlx5_flow_convert_finalise(struct mlx5_flow_parse *parser)
934 const unsigned int ipv4 =
935 hash_rxq_init[parser->layer].ip_version == MLX5_IPV4;
936 const enum hash_rxq_type hmin = ipv4 ? HASH_RXQ_TCPV4 : HASH_RXQ_TCPV6;
937 const enum hash_rxq_type hmax = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
938 const enum hash_rxq_type ohmin = ipv4 ? HASH_RXQ_TCPV6 : HASH_RXQ_TCPV4;
939 const enum hash_rxq_type ohmax = ipv4 ? HASH_RXQ_IPV6 : HASH_RXQ_IPV4;
940 const enum hash_rxq_type ip = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
943 /* Remove any other flow not matching the pattern. */
944 if (parser->queues_n == 1) {
945 for (i = 0; i != hash_rxq_init_n; ++i) {
946 if (i == HASH_RXQ_ETH)
948 rte_free(parser->queue[i].ibv_attr);
949 parser->queue[i].ibv_attr = NULL;
953 if (parser->layer == HASH_RXQ_ETH) {
957 * This layer becomes useless as the pattern define under
960 rte_free(parser->queue[HASH_RXQ_ETH].ibv_attr);
961 parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
963 /* Remove opposite kind of layer e.g. IPv6 if the pattern is IPv4. */
964 for (i = ohmin; i != (ohmax + 1); ++i) {
965 if (!parser->queue[i].ibv_attr)
967 rte_free(parser->queue[i].ibv_attr);
968 parser->queue[i].ibv_attr = NULL;
970 /* Remove impossible flow according to the RSS configuration. */
971 if (hash_rxq_init[parser->layer].dpdk_rss_hf &
972 parser->rss_conf.rss_hf) {
973 /* Remove any other flow. */
974 for (i = hmin; i != (hmax + 1); ++i) {
975 if ((i == parser->layer) ||
976 (!parser->queue[i].ibv_attr))
978 rte_free(parser->queue[i].ibv_attr);
979 parser->queue[i].ibv_attr = NULL;
981 } else if (!parser->queue[ip].ibv_attr) {
982 /* no RSS possible with the current configuration. */
983 parser->queues_n = 1;
988 * Fill missing layers in verbs specifications, or compute the correct
989 * offset to allocate the memory space for the attributes and
992 for (i = 0; i != hash_rxq_init_n - 1; ++i) {
994 struct ibv_flow_spec_ipv4_ext ipv4;
995 struct ibv_flow_spec_ipv6 ipv6;
996 struct ibv_flow_spec_tcp_udp udp_tcp;
1001 if (i == parser->layer)
1003 if (parser->layer == HASH_RXQ_ETH) {
1004 if (hash_rxq_init[i].ip_version == MLX5_IPV4) {
1005 size = sizeof(struct ibv_flow_spec_ipv4_ext);
1006 specs.ipv4 = (struct ibv_flow_spec_ipv4_ext){
1007 .type = IBV_FLOW_SPEC_IPV4_EXT,
1011 size = sizeof(struct ibv_flow_spec_ipv6);
1012 specs.ipv6 = (struct ibv_flow_spec_ipv6){
1013 .type = IBV_FLOW_SPEC_IPV6,
1017 if (parser->queue[i].ibv_attr) {
1018 dst = (void *)((uintptr_t)
1019 parser->queue[i].ibv_attr +
1020 parser->queue[i].offset);
1021 memcpy(dst, &specs, size);
1022 ++parser->queue[i].ibv_attr->num_of_specs;
1024 parser->queue[i].offset += size;
1026 if ((i == HASH_RXQ_UDPV4) || (i == HASH_RXQ_TCPV4) ||
1027 (i == HASH_RXQ_UDPV6) || (i == HASH_RXQ_TCPV6)) {
1028 size = sizeof(struct ibv_flow_spec_tcp_udp);
1029 specs.udp_tcp = (struct ibv_flow_spec_tcp_udp) {
1030 .type = ((i == HASH_RXQ_UDPV4 ||
1031 i == HASH_RXQ_UDPV6) ?
1036 if (parser->queue[i].ibv_attr) {
1037 dst = (void *)((uintptr_t)
1038 parser->queue[i].ibv_attr +
1039 parser->queue[i].offset);
1040 memcpy(dst, &specs, size);
1041 ++parser->queue[i].ibv_attr->num_of_specs;
1043 parser->queue[i].offset += size;
1049 * Validate and convert a flow supported by the NIC.
1052 * Pointer to Ethernet device.
1054 * Flow rule attributes.
1055 * @param[in] pattern
1056 * Pattern specification (list terminated by the END pattern item).
1057 * @param[in] actions
1058 * Associated actions (list terminated by the END action).
1060 * Perform verbose error reporting if not NULL.
1061 * @param[in, out] parser
1062 * Internal parser structure.
1065 * 0 on success, a negative errno value otherwise and rte_errno is set.
1068 mlx5_flow_convert(struct rte_eth_dev *dev,
1069 const struct rte_flow_attr *attr,
1070 const struct rte_flow_item items[],
1071 const struct rte_flow_action actions[],
1072 struct rte_flow_error *error,
1073 struct mlx5_flow_parse *parser)
1075 const struct mlx5_flow_items *cur_item = mlx5_flow_items;
1079 /* First step. Validate the attributes, items and actions. */
1080 *parser = (struct mlx5_flow_parse){
1081 .create = parser->create,
1082 .layer = HASH_RXQ_ETH,
1083 .mark_id = MLX5_FLOW_MARK_DEFAULT,
1085 ret = mlx5_flow_convert_attributes(attr, error);
1088 ret = mlx5_flow_convert_actions(dev, actions, error, parser);
1091 ret = mlx5_flow_convert_items_validate(items, error, parser);
1094 mlx5_flow_convert_finalise(parser);
1097 * Allocate the memory space to store verbs specifications.
1100 unsigned int offset = parser->queue[HASH_RXQ_ETH].offset;
1102 parser->queue[HASH_RXQ_ETH].ibv_attr =
1103 mlx5_flow_convert_allocate(offset, error);
1104 if (!parser->queue[HASH_RXQ_ETH].ibv_attr)
1106 parser->queue[HASH_RXQ_ETH].offset =
1107 sizeof(struct ibv_flow_attr);
1109 for (i = 0; i != hash_rxq_init_n; ++i) {
1110 unsigned int offset;
1112 if (!(parser->rss_conf.rss_hf &
1113 hash_rxq_init[i].dpdk_rss_hf) &&
1114 (i != HASH_RXQ_ETH))
1116 offset = parser->queue[i].offset;
1117 parser->queue[i].ibv_attr =
1118 mlx5_flow_convert_allocate(offset, error);
1119 if (!parser->queue[i].ibv_attr)
1121 parser->queue[i].offset = sizeof(struct ibv_flow_attr);
1124 /* Third step. Conversion parse, fill the specifications. */
1126 for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
1127 struct mlx5_flow_data data = {
1132 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
1134 cur_item = &mlx5_flow_items[items->type];
1135 ret = cur_item->convert(items,
1136 (cur_item->default_mask ?
1137 cur_item->default_mask :
1144 mlx5_flow_create_flag_mark(parser, parser->mark_id);
1145 if (parser->count && parser->create) {
1146 mlx5_flow_create_count(dev, parser);
1148 goto exit_count_error;
1151 * Last step. Complete missing specification to reach the RSS
1155 mlx5_flow_convert_finalise(parser);
1156 mlx5_flow_update_priority(parser, attr);
1158 /* Only verification is expected, all resources should be released. */
1159 if (!parser->create) {
1160 for (i = 0; i != hash_rxq_init_n; ++i) {
1161 if (parser->queue[i].ibv_attr) {
1162 rte_free(parser->queue[i].ibv_attr);
1163 parser->queue[i].ibv_attr = NULL;
1169 for (i = 0; i != hash_rxq_init_n; ++i) {
1170 if (parser->queue[i].ibv_attr) {
1171 rte_free(parser->queue[i].ibv_attr);
1172 parser->queue[i].ibv_attr = NULL;
1175 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1176 NULL, "cannot allocate verbs spec attributes");
1179 rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1180 NULL, "cannot create counter");
1185 * Copy the specification created into the flow.
1188 * Internal parser structure.
1190 * Create specification.
1192 * Size in bytes of the specification to copy.
1195 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
1201 for (i = 0; i != hash_rxq_init_n; ++i) {
1202 if (!parser->queue[i].ibv_attr)
1204 /* Specification must be the same l3 type or none. */
1205 if (parser->layer == HASH_RXQ_ETH ||
1206 (hash_rxq_init[parser->layer].ip_version ==
1207 hash_rxq_init[i].ip_version) ||
1208 (hash_rxq_init[i].ip_version == 0)) {
1209 dst = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1210 parser->queue[i].offset);
1211 memcpy(dst, src, size);
1212 ++parser->queue[i].ibv_attr->num_of_specs;
1213 parser->queue[i].offset += size;
1219 * Convert Ethernet item to Verbs specification.
1222 * Item specification.
1223 * @param default_mask[in]
1224 * Default bit-masks to use when item->mask is not provided.
1225 * @param data[in, out]
1229 * 0 on success, a negative errno value otherwise and rte_errno is set.
1232 mlx5_flow_create_eth(const struct rte_flow_item *item,
1233 const void *default_mask,
1234 struct mlx5_flow_data *data)
1236 const struct rte_flow_item_eth *spec = item->spec;
1237 const struct rte_flow_item_eth *mask = item->mask;
1238 struct mlx5_flow_parse *parser = data->parser;
1239 const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1240 struct ibv_flow_spec_eth eth = {
1241 .type = parser->inner | IBV_FLOW_SPEC_ETH,
1245 /* Don't update layer for the inner pattern. */
1247 parser->layer = HASH_RXQ_ETH;
1252 mask = default_mask;
1253 memcpy(ð.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
1254 memcpy(ð.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
1255 eth.val.ether_type = spec->type;
1256 memcpy(ð.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
1257 memcpy(ð.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
1258 eth.mask.ether_type = mask->type;
1259 /* Remove unwanted bits from values. */
1260 for (i = 0; i < ETHER_ADDR_LEN; ++i) {
1261 eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
1262 eth.val.src_mac[i] &= eth.mask.src_mac[i];
1264 eth.val.ether_type &= eth.mask.ether_type;
1266 mlx5_flow_create_copy(parser, ð, eth_size);
1271 * Convert VLAN item to Verbs specification.
1274 * Item specification.
1275 * @param default_mask[in]
1276 * Default bit-masks to use when item->mask is not provided.
1277 * @param data[in, out]
1281 * 0 on success, a negative errno value otherwise and rte_errno is set.
1284 mlx5_flow_create_vlan(const struct rte_flow_item *item,
1285 const void *default_mask,
1286 struct mlx5_flow_data *data)
1288 const struct rte_flow_item_vlan *spec = item->spec;
1289 const struct rte_flow_item_vlan *mask = item->mask;
1290 struct mlx5_flow_parse *parser = data->parser;
1291 struct ibv_flow_spec_eth *eth;
1292 const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1297 mask = default_mask;
1299 for (i = 0; i != hash_rxq_init_n; ++i) {
1300 if (!parser->queue[i].ibv_attr)
1303 eth = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1304 parser->queue[i].offset - eth_size);
1305 eth->val.vlan_tag = spec->tci;
1306 eth->mask.vlan_tag = mask->tci;
1307 eth->val.vlan_tag &= eth->mask.vlan_tag;
1309 * From verbs perspective an empty VLAN is equivalent
1310 * to a packet without VLAN layer.
1312 if (!eth->mask.vlan_tag)
1318 return rte_flow_error_set(data->error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM,
1319 item, "VLAN cannot be empty");
1323 * Convert IPv4 item to Verbs specification.
1326 * Item specification.
1327 * @param default_mask[in]
1328 * Default bit-masks to use when item->mask is not provided.
1329 * @param data[in, out]
1333 * 0 on success, a negative errno value otherwise and rte_errno is set.
1336 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
1337 const void *default_mask,
1338 struct mlx5_flow_data *data)
1340 const struct rte_flow_item_ipv4 *spec = item->spec;
1341 const struct rte_flow_item_ipv4 *mask = item->mask;
1342 struct mlx5_flow_parse *parser = data->parser;
1343 unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4_ext);
1344 struct ibv_flow_spec_ipv4_ext ipv4 = {
1345 .type = parser->inner | IBV_FLOW_SPEC_IPV4_EXT,
1349 /* Don't update layer for the inner pattern. */
1351 parser->layer = HASH_RXQ_IPV4;
1354 mask = default_mask;
1355 ipv4.val = (struct ibv_flow_ipv4_ext_filter){
1356 .src_ip = spec->hdr.src_addr,
1357 .dst_ip = spec->hdr.dst_addr,
1358 .proto = spec->hdr.next_proto_id,
1359 .tos = spec->hdr.type_of_service,
1361 ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
1362 .src_ip = mask->hdr.src_addr,
1363 .dst_ip = mask->hdr.dst_addr,
1364 .proto = mask->hdr.next_proto_id,
1365 .tos = mask->hdr.type_of_service,
1367 /* Remove unwanted bits from values. */
1368 ipv4.val.src_ip &= ipv4.mask.src_ip;
1369 ipv4.val.dst_ip &= ipv4.mask.dst_ip;
1370 ipv4.val.proto &= ipv4.mask.proto;
1371 ipv4.val.tos &= ipv4.mask.tos;
1373 mlx5_flow_create_copy(parser, &ipv4, ipv4_size);
1378 * Convert IPv6 item to Verbs specification.
1381 * Item specification.
1382 * @param default_mask[in]
1383 * Default bit-masks to use when item->mask is not provided.
1384 * @param data[in, out]
1388 * 0 on success, a negative errno value otherwise and rte_errno is set.
1391 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
1392 const void *default_mask,
1393 struct mlx5_flow_data *data)
1395 const struct rte_flow_item_ipv6 *spec = item->spec;
1396 const struct rte_flow_item_ipv6 *mask = item->mask;
1397 struct mlx5_flow_parse *parser = data->parser;
1398 unsigned int ipv6_size = sizeof(struct ibv_flow_spec_ipv6);
1399 struct ibv_flow_spec_ipv6 ipv6 = {
1400 .type = parser->inner | IBV_FLOW_SPEC_IPV6,
1404 /* Don't update layer for the inner pattern. */
1406 parser->layer = HASH_RXQ_IPV6;
1409 uint32_t vtc_flow_val;
1410 uint32_t vtc_flow_mask;
1413 mask = default_mask;
1414 memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
1415 RTE_DIM(ipv6.val.src_ip));
1416 memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
1417 RTE_DIM(ipv6.val.dst_ip));
1418 memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
1419 RTE_DIM(ipv6.mask.src_ip));
1420 memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
1421 RTE_DIM(ipv6.mask.dst_ip));
1422 vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
1423 vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
1424 ipv6.val.flow_label =
1425 rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >>
1427 ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >>
1429 ipv6.val.next_hdr = spec->hdr.proto;
1430 ipv6.val.hop_limit = spec->hdr.hop_limits;
1431 ipv6.mask.flow_label =
1432 rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >>
1434 ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >>
1436 ipv6.mask.next_hdr = mask->hdr.proto;
1437 ipv6.mask.hop_limit = mask->hdr.hop_limits;
1438 /* Remove unwanted bits from values. */
1439 for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
1440 ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
1441 ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
1443 ipv6.val.flow_label &= ipv6.mask.flow_label;
1444 ipv6.val.traffic_class &= ipv6.mask.traffic_class;
1445 ipv6.val.next_hdr &= ipv6.mask.next_hdr;
1446 ipv6.val.hop_limit &= ipv6.mask.hop_limit;
1448 mlx5_flow_create_copy(parser, &ipv6, ipv6_size);
1453 * Convert UDP item to Verbs specification.
1456 * Item specification.
1457 * @param default_mask[in]
1458 * Default bit-masks to use when item->mask is not provided.
1459 * @param data[in, out]
1463 * 0 on success, a negative errno value otherwise and rte_errno is set.
1466 mlx5_flow_create_udp(const struct rte_flow_item *item,
1467 const void *default_mask,
1468 struct mlx5_flow_data *data)
1470 const struct rte_flow_item_udp *spec = item->spec;
1471 const struct rte_flow_item_udp *mask = item->mask;
1472 struct mlx5_flow_parse *parser = data->parser;
1473 unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1474 struct ibv_flow_spec_tcp_udp udp = {
1475 .type = parser->inner | IBV_FLOW_SPEC_UDP,
1479 /* Don't update layer for the inner pattern. */
1480 if (!parser->inner) {
1481 if (parser->layer == HASH_RXQ_IPV4)
1482 parser->layer = HASH_RXQ_UDPV4;
1484 parser->layer = HASH_RXQ_UDPV6;
1488 mask = default_mask;
1489 udp.val.dst_port = spec->hdr.dst_port;
1490 udp.val.src_port = spec->hdr.src_port;
1491 udp.mask.dst_port = mask->hdr.dst_port;
1492 udp.mask.src_port = mask->hdr.src_port;
1493 /* Remove unwanted bits from values. */
1494 udp.val.src_port &= udp.mask.src_port;
1495 udp.val.dst_port &= udp.mask.dst_port;
1497 mlx5_flow_create_copy(parser, &udp, udp_size);
1502 * Convert TCP item to Verbs specification.
1505 * Item specification.
1506 * @param default_mask[in]
1507 * Default bit-masks to use when item->mask is not provided.
1508 * @param data[in, out]
1512 * 0 on success, a negative errno value otherwise and rte_errno is set.
1515 mlx5_flow_create_tcp(const struct rte_flow_item *item,
1516 const void *default_mask,
1517 struct mlx5_flow_data *data)
1519 const struct rte_flow_item_tcp *spec = item->spec;
1520 const struct rte_flow_item_tcp *mask = item->mask;
1521 struct mlx5_flow_parse *parser = data->parser;
1522 unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1523 struct ibv_flow_spec_tcp_udp tcp = {
1524 .type = parser->inner | IBV_FLOW_SPEC_TCP,
1528 /* Don't update layer for the inner pattern. */
1529 if (!parser->inner) {
1530 if (parser->layer == HASH_RXQ_IPV4)
1531 parser->layer = HASH_RXQ_TCPV4;
1533 parser->layer = HASH_RXQ_TCPV6;
1537 mask = default_mask;
1538 tcp.val.dst_port = spec->hdr.dst_port;
1539 tcp.val.src_port = spec->hdr.src_port;
1540 tcp.mask.dst_port = mask->hdr.dst_port;
1541 tcp.mask.src_port = mask->hdr.src_port;
1542 /* Remove unwanted bits from values. */
1543 tcp.val.src_port &= tcp.mask.src_port;
1544 tcp.val.dst_port &= tcp.mask.dst_port;
1546 mlx5_flow_create_copy(parser, &tcp, tcp_size);
1551 * Convert VXLAN item to Verbs specification.
1554 * Item specification.
1555 * @param default_mask[in]
1556 * Default bit-masks to use when item->mask is not provided.
1557 * @param data[in, out]
1561 * 0 on success, a negative errno value otherwise and rte_errno is set.
1564 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
1565 const void *default_mask,
1566 struct mlx5_flow_data *data)
1568 const struct rte_flow_item_vxlan *spec = item->spec;
1569 const struct rte_flow_item_vxlan *mask = item->mask;
1570 struct mlx5_flow_parse *parser = data->parser;
1571 unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1572 struct ibv_flow_spec_tunnel vxlan = {
1573 .type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1582 parser->inner = IBV_FLOW_SPEC_INNER;
1585 mask = default_mask;
1586 memcpy(&id.vni[1], spec->vni, 3);
1587 vxlan.val.tunnel_id = id.vlan_id;
1588 memcpy(&id.vni[1], mask->vni, 3);
1589 vxlan.mask.tunnel_id = id.vlan_id;
1590 /* Remove unwanted bits from values. */
1591 vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
1594 * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
1595 * layer is defined in the Verbs specification it is interpreted as
1596 * wildcard and all packets will match this rule, if it follows a full
1597 * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
1598 * before will also match this rule.
1599 * To avoid such situation, VNI 0 is currently refused.
1601 if (!vxlan.val.tunnel_id)
1602 return rte_flow_error_set(data->error, EINVAL,
1603 RTE_FLOW_ERROR_TYPE_ITEM,
1605 "VxLAN vni cannot be 0");
1606 mlx5_flow_create_copy(parser, &vxlan, size);
1611 * Convert mark/flag action to Verbs specification.
1614 * Internal parser structure.
1619 * 0 on success, a negative errno value otherwise and rte_errno is set.
1622 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id)
1624 unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1625 struct ibv_flow_spec_action_tag tag = {
1626 .type = IBV_FLOW_SPEC_ACTION_TAG,
1628 .tag_id = mlx5_flow_mark_set(mark_id),
1631 assert(parser->mark);
1632 mlx5_flow_create_copy(parser, &tag, size);
1637 * Convert count action to Verbs specification.
1640 * Pointer to Ethernet device.
1642 * Pointer to MLX5 flow parser structure.
1645 * 0 on success, a negative errno value otherwise and rte_errno is set.
1648 mlx5_flow_create_count(struct rte_eth_dev *dev __rte_unused,
1649 struct mlx5_flow_parse *parser __rte_unused)
1651 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
1652 struct priv *priv = dev->data->dev_private;
1653 unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
1654 struct ibv_counter_set_init_attr init_attr = {0};
1655 struct ibv_flow_spec_counter_action counter = {
1656 .type = IBV_FLOW_SPEC_ACTION_COUNT,
1658 .counter_set_handle = 0,
1661 init_attr.counter_set_id = 0;
1662 parser->cs = mlx5_glue->create_counter_set(priv->ctx, &init_attr);
1667 counter.counter_set_handle = parser->cs->handle;
1668 mlx5_flow_create_copy(parser, &counter, size);
1674 * Complete flow rule creation with a drop queue.
1677 * Pointer to Ethernet device.
1679 * Internal parser structure.
1681 * Pointer to the rte_flow.
1683 * Perform verbose error reporting if not NULL.
1686 * 0 on success, a negative errno value otherwise and rte_errno is set.
1689 mlx5_flow_create_action_queue_drop(struct rte_eth_dev *dev,
1690 struct mlx5_flow_parse *parser,
1691 struct rte_flow *flow,
1692 struct rte_flow_error *error)
1694 struct priv *priv = dev->data->dev_private;
1695 struct ibv_flow_spec_action_drop *drop;
1696 unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
1701 drop = (void *)((uintptr_t)parser->queue[HASH_RXQ_ETH].ibv_attr +
1702 parser->queue[HASH_RXQ_ETH].offset);
1703 *drop = (struct ibv_flow_spec_action_drop){
1704 .type = IBV_FLOW_SPEC_ACTION_DROP,
1707 ++parser->queue[HASH_RXQ_ETH].ibv_attr->num_of_specs;
1708 parser->queue[HASH_RXQ_ETH].offset += size;
1709 flow->frxq[HASH_RXQ_ETH].ibv_attr =
1710 parser->queue[HASH_RXQ_ETH].ibv_attr;
1712 flow->cs = parser->cs;
1713 if (!priv->dev->data->dev_started)
1715 parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
1716 flow->frxq[HASH_RXQ_ETH].ibv_flow =
1717 mlx5_glue->create_flow(priv->flow_drop_queue->qp,
1718 flow->frxq[HASH_RXQ_ETH].ibv_attr);
1719 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
1720 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1721 NULL, "flow rule creation failure");
1727 if (flow->frxq[HASH_RXQ_ETH].ibv_flow) {
1728 claim_zero(mlx5_glue->destroy_flow
1729 (flow->frxq[HASH_RXQ_ETH].ibv_flow));
1730 flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
1732 if (flow->frxq[HASH_RXQ_ETH].ibv_attr) {
1733 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
1734 flow->frxq[HASH_RXQ_ETH].ibv_attr = NULL;
1737 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
1745 * Create hash Rx queues when RSS is enabled.
1748 * Pointer to Ethernet device.
1750 * Internal parser structure.
1752 * Pointer to the rte_flow.
1754 * Perform verbose error reporting if not NULL.
1757 * 0 on success, a negative errno value otherwise and rte_errno is set.
1760 mlx5_flow_create_action_queue_rss(struct rte_eth_dev *dev,
1761 struct mlx5_flow_parse *parser,
1762 struct rte_flow *flow,
1763 struct rte_flow_error *error)
1765 struct priv *priv = dev->data->dev_private;
1768 for (i = 0; i != hash_rxq_init_n; ++i) {
1769 uint64_t hash_fields;
1771 if (!parser->queue[i].ibv_attr)
1773 flow->frxq[i].ibv_attr = parser->queue[i].ibv_attr;
1774 parser->queue[i].ibv_attr = NULL;
1775 hash_fields = hash_rxq_init[i].hash_fields;
1776 if (!priv->dev->data->dev_started)
1778 flow->frxq[i].hrxq =
1780 parser->rss_conf.rss_key,
1781 parser->rss_conf.rss_key_len,
1785 if (flow->frxq[i].hrxq)
1787 flow->frxq[i].hrxq =
1789 parser->rss_conf.rss_key,
1790 parser->rss_conf.rss_key_len,
1794 if (!flow->frxq[i].hrxq) {
1795 return rte_flow_error_set(error, ENOMEM,
1796 RTE_FLOW_ERROR_TYPE_HANDLE,
1798 "cannot create hash rxq");
1805 * Complete flow rule creation.
1808 * Pointer to Ethernet device.
1810 * Internal parser structure.
1812 * Pointer to the rte_flow.
1814 * Perform verbose error reporting if not NULL.
1817 * 0 on success, a negative errno value otherwise and rte_errno is set.
1820 mlx5_flow_create_action_queue(struct rte_eth_dev *dev,
1821 struct mlx5_flow_parse *parser,
1822 struct rte_flow *flow,
1823 struct rte_flow_error *error)
1825 struct priv *priv = dev->data->dev_private;
1828 unsigned int flows_n = 0;
1832 assert(!parser->drop);
1833 ret = mlx5_flow_create_action_queue_rss(dev, parser, flow, error);
1837 flow->cs = parser->cs;
1838 if (!priv->dev->data->dev_started)
1840 for (i = 0; i != hash_rxq_init_n; ++i) {
1841 if (!flow->frxq[i].hrxq)
1843 flow->frxq[i].ibv_flow =
1844 mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
1845 flow->frxq[i].ibv_attr);
1846 if (!flow->frxq[i].ibv_flow) {
1847 rte_flow_error_set(error, ENOMEM,
1848 RTE_FLOW_ERROR_TYPE_HANDLE,
1849 NULL, "flow rule creation failure");
1853 DRV_LOG(DEBUG, "port %u %p type %d QP %p ibv_flow %p",
1856 (void *)flow->frxq[i].hrxq,
1857 (void *)flow->frxq[i].ibv_flow);
1860 rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_HANDLE,
1861 NULL, "internal error in flow creation");
1864 for (i = 0; i != parser->queues_n; ++i) {
1865 struct mlx5_rxq_data *q =
1866 (*priv->rxqs)[parser->queues[i]];
1868 q->mark |= parser->mark;
1872 ret = rte_errno; /* Save rte_errno before cleanup. */
1874 for (i = 0; i != hash_rxq_init_n; ++i) {
1875 if (flow->frxq[i].ibv_flow) {
1876 struct ibv_flow *ibv_flow = flow->frxq[i].ibv_flow;
1878 claim_zero(mlx5_glue->destroy_flow(ibv_flow));
1880 if (flow->frxq[i].hrxq)
1881 mlx5_hrxq_release(dev, flow->frxq[i].hrxq);
1882 if (flow->frxq[i].ibv_attr)
1883 rte_free(flow->frxq[i].ibv_attr);
1886 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
1890 rte_errno = ret; /* Restore rte_errno. */
1898 * Pointer to Ethernet device.
1900 * Pointer to a TAILQ flow list.
1902 * Flow rule attributes.
1903 * @param[in] pattern
1904 * Pattern specification (list terminated by the END pattern item).
1905 * @param[in] actions
1906 * Associated actions (list terminated by the END action).
1908 * Perform verbose error reporting if not NULL.
1911 * A flow on success, NULL otherwise and rte_errno is set.
1913 static struct rte_flow *
1914 mlx5_flow_list_create(struct rte_eth_dev *dev,
1915 struct mlx5_flows *list,
1916 const struct rte_flow_attr *attr,
1917 const struct rte_flow_item items[],
1918 const struct rte_flow_action actions[],
1919 struct rte_flow_error *error)
1921 struct mlx5_flow_parse parser = { .create = 1, };
1922 struct rte_flow *flow = NULL;
1926 ret = mlx5_flow_convert(dev, attr, items, actions, error, &parser);
1929 flow = rte_calloc(__func__, 1,
1930 sizeof(*flow) + parser.queues_n * sizeof(uint16_t),
1933 rte_flow_error_set(error, ENOMEM,
1934 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1936 "cannot allocate flow memory");
1939 /* Copy queues configuration. */
1940 flow->queues = (uint16_t (*)[])(flow + 1);
1941 memcpy(flow->queues, parser.queues, parser.queues_n * sizeof(uint16_t));
1942 flow->queues_n = parser.queues_n;
1943 flow->mark = parser.mark;
1944 /* Copy RSS configuration. */
1945 flow->rss_conf = parser.rss_conf;
1946 flow->rss_conf.rss_key = flow->rss_key;
1947 memcpy(flow->rss_key, parser.rss_key, parser.rss_conf.rss_key_len);
1948 /* finalise the flow. */
1950 ret = mlx5_flow_create_action_queue_drop(dev, &parser, flow,
1953 ret = mlx5_flow_create_action_queue(dev, &parser, flow, error);
1956 TAILQ_INSERT_TAIL(list, flow, next);
1957 DRV_LOG(DEBUG, "port %u flow created %p", dev->data->port_id,
1961 DRV_LOG(ERR, "port %u flow creation error: %s", dev->data->port_id,
1963 for (i = 0; i != hash_rxq_init_n; ++i) {
1964 if (parser.queue[i].ibv_attr)
1965 rte_free(parser.queue[i].ibv_attr);
1972 * Validate a flow supported by the NIC.
1974 * @see rte_flow_validate()
1978 mlx5_flow_validate(struct rte_eth_dev *dev,
1979 const struct rte_flow_attr *attr,
1980 const struct rte_flow_item items[],
1981 const struct rte_flow_action actions[],
1982 struct rte_flow_error *error)
1984 struct mlx5_flow_parse parser = { .create = 0, };
1986 return mlx5_flow_convert(dev, attr, items, actions, error, &parser);
1992 * @see rte_flow_create()
1996 mlx5_flow_create(struct rte_eth_dev *dev,
1997 const struct rte_flow_attr *attr,
1998 const struct rte_flow_item items[],
1999 const struct rte_flow_action actions[],
2000 struct rte_flow_error *error)
2002 struct priv *priv = dev->data->dev_private;
2004 return mlx5_flow_list_create(dev, &priv->flows, attr, items, actions,
2009 * Destroy a flow in a list.
2012 * Pointer to Ethernet device.
2014 * Pointer to a TAILQ flow list.
2019 mlx5_flow_list_destroy(struct rte_eth_dev *dev, struct mlx5_flows *list,
2020 struct rte_flow *flow)
2022 struct priv *priv = dev->data->dev_private;
2025 if (flow->drop || !flow->mark)
2027 for (i = 0; i != flow->queues_n; ++i) {
2028 struct rte_flow *tmp;
2032 * To remove the mark from the queue, the queue must not be
2033 * present in any other marked flow (RSS or not).
2035 TAILQ_FOREACH(tmp, list, next) {
2037 uint16_t *tqs = NULL;
2042 for (j = 0; j != hash_rxq_init_n; ++j) {
2043 if (!tmp->frxq[j].hrxq)
2045 tqs = tmp->frxq[j].hrxq->ind_table->queues;
2046 tq_n = tmp->frxq[j].hrxq->ind_table->queues_n;
2050 for (j = 0; (j != tq_n) && !mark; j++)
2051 if (tqs[j] == (*flow->queues)[i])
2054 (*priv->rxqs)[(*flow->queues)[i]]->mark = mark;
2058 if (flow->frxq[HASH_RXQ_ETH].ibv_flow)
2059 claim_zero(mlx5_glue->destroy_flow
2060 (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2061 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
2063 for (i = 0; i != hash_rxq_init_n; ++i) {
2064 struct mlx5_flow *frxq = &flow->frxq[i];
2067 claim_zero(mlx5_glue->destroy_flow
2070 mlx5_hrxq_release(dev, frxq->hrxq);
2072 rte_free(frxq->ibv_attr);
2076 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
2079 TAILQ_REMOVE(list, flow, next);
2080 DRV_LOG(DEBUG, "port %u flow destroyed %p", dev->data->port_id,
2086 * Destroy all flows.
2089 * Pointer to Ethernet device.
2091 * Pointer to a TAILQ flow list.
2094 mlx5_flow_list_flush(struct rte_eth_dev *dev, struct mlx5_flows *list)
2096 while (!TAILQ_EMPTY(list)) {
2097 struct rte_flow *flow;
2099 flow = TAILQ_FIRST(list);
2100 mlx5_flow_list_destroy(dev, list, flow);
2105 * Create drop queue.
2108 * Pointer to Ethernet device.
2111 * 0 on success, a negative errno value otherwise and rte_errno is set.
2114 mlx5_flow_create_drop_queue(struct rte_eth_dev *dev)
2116 struct priv *priv = dev->data->dev_private;
2117 struct mlx5_hrxq_drop *fdq = NULL;
2121 fdq = rte_calloc(__func__, 1, sizeof(*fdq), 0);
2124 "port %u cannot allocate memory for drop queue",
2125 dev->data->port_id);
2129 fdq->cq = mlx5_glue->create_cq(priv->ctx, 1, NULL, NULL, 0);
2131 DRV_LOG(WARNING, "port %u cannot allocate CQ for drop queue",
2132 dev->data->port_id);
2136 fdq->wq = mlx5_glue->create_wq
2138 &(struct ibv_wq_init_attr){
2139 .wq_type = IBV_WQT_RQ,
2146 DRV_LOG(WARNING, "port %u cannot allocate WQ for drop queue",
2147 dev->data->port_id);
2151 fdq->ind_table = mlx5_glue->create_rwq_ind_table
2153 &(struct ibv_rwq_ind_table_init_attr){
2154 .log_ind_tbl_size = 0,
2155 .ind_tbl = &fdq->wq,
2158 if (!fdq->ind_table) {
2160 "port %u cannot allocate indirection table for drop"
2162 dev->data->port_id);
2166 fdq->qp = mlx5_glue->create_qp_ex
2168 &(struct ibv_qp_init_attr_ex){
2169 .qp_type = IBV_QPT_RAW_PACKET,
2171 IBV_QP_INIT_ATTR_PD |
2172 IBV_QP_INIT_ATTR_IND_TABLE |
2173 IBV_QP_INIT_ATTR_RX_HASH,
2174 .rx_hash_conf = (struct ibv_rx_hash_conf){
2176 IBV_RX_HASH_FUNC_TOEPLITZ,
2177 .rx_hash_key_len = rss_hash_default_key_len,
2178 .rx_hash_key = rss_hash_default_key,
2179 .rx_hash_fields_mask = 0,
2181 .rwq_ind_tbl = fdq->ind_table,
2185 DRV_LOG(WARNING, "port %u cannot allocate QP for drop queue",
2186 dev->data->port_id);
2190 priv->flow_drop_queue = fdq;
2194 claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2196 claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2198 claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2200 claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2203 priv->flow_drop_queue = NULL;
2208 * Delete drop queue.
2211 * Pointer to Ethernet device.
2214 mlx5_flow_delete_drop_queue(struct rte_eth_dev *dev)
2216 struct priv *priv = dev->data->dev_private;
2217 struct mlx5_hrxq_drop *fdq = priv->flow_drop_queue;
2222 claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2224 claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2226 claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2228 claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2230 priv->flow_drop_queue = NULL;
2237 * Pointer to Ethernet device.
2239 * Pointer to a TAILQ flow list.
2242 mlx5_flow_stop(struct rte_eth_dev *dev, struct mlx5_flows *list)
2244 struct priv *priv = dev->data->dev_private;
2245 struct rte_flow *flow;
2247 TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) {
2249 struct mlx5_ind_table_ibv *ind_tbl = NULL;
2252 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow)
2254 claim_zero(mlx5_glue->destroy_flow
2255 (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2256 flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
2257 DRV_LOG(DEBUG, "port %u flow %p removed",
2258 dev->data->port_id, (void *)flow);
2262 /* Verify the flow has not already been cleaned. */
2263 for (i = 0; i != hash_rxq_init_n; ++i) {
2264 if (!flow->frxq[i].ibv_flow)
2267 * Indirection table may be necessary to remove the
2268 * flags in the Rx queues.
2269 * This helps to speed-up the process by avoiding
2272 ind_tbl = flow->frxq[i].hrxq->ind_table;
2275 if (i == hash_rxq_init_n)
2279 for (i = 0; i != ind_tbl->queues_n; ++i)
2280 (*priv->rxqs)[ind_tbl->queues[i]]->mark = 0;
2282 for (i = 0; i != hash_rxq_init_n; ++i) {
2283 if (!flow->frxq[i].ibv_flow)
2285 claim_zero(mlx5_glue->destroy_flow
2286 (flow->frxq[i].ibv_flow));
2287 flow->frxq[i].ibv_flow = NULL;
2288 mlx5_hrxq_release(dev, flow->frxq[i].hrxq);
2289 flow->frxq[i].hrxq = NULL;
2291 DRV_LOG(DEBUG, "port %u flow %p removed", dev->data->port_id,
2300 * Pointer to Ethernet device.
2302 * Pointer to a TAILQ flow list.
2305 * 0 on success, a negative errno value otherwise and rte_errno is set.
2308 mlx5_flow_start(struct rte_eth_dev *dev, struct mlx5_flows *list)
2310 struct priv *priv = dev->data->dev_private;
2311 struct rte_flow *flow;
2313 TAILQ_FOREACH(flow, list, next) {
2317 flow->frxq[HASH_RXQ_ETH].ibv_flow =
2318 mlx5_glue->create_flow
2319 (priv->flow_drop_queue->qp,
2320 flow->frxq[HASH_RXQ_ETH].ibv_attr);
2321 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
2323 "port %u flow %p cannot be applied",
2324 dev->data->port_id, (void *)flow);
2328 DRV_LOG(DEBUG, "port %u flow %p applied",
2329 dev->data->port_id, (void *)flow);
2333 for (i = 0; i != hash_rxq_init_n; ++i) {
2334 if (!flow->frxq[i].ibv_attr)
2336 flow->frxq[i].hrxq =
2337 mlx5_hrxq_get(dev, flow->rss_conf.rss_key,
2338 flow->rss_conf.rss_key_len,
2339 hash_rxq_init[i].hash_fields,
2342 if (flow->frxq[i].hrxq)
2344 flow->frxq[i].hrxq =
2345 mlx5_hrxq_new(dev, flow->rss_conf.rss_key,
2346 flow->rss_conf.rss_key_len,
2347 hash_rxq_init[i].hash_fields,
2350 if (!flow->frxq[i].hrxq) {
2352 "port %u flow %p cannot be applied",
2353 dev->data->port_id, (void *)flow);
2358 flow->frxq[i].ibv_flow =
2359 mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
2360 flow->frxq[i].ibv_attr);
2361 if (!flow->frxq[i].ibv_flow) {
2363 "port %u flow %p cannot be applied",
2364 dev->data->port_id, (void *)flow);
2368 DRV_LOG(DEBUG, "port %u flow %p applied",
2369 dev->data->port_id, (void *)flow);
2373 for (i = 0; i != flow->queues_n; ++i)
2374 (*priv->rxqs)[(*flow->queues)[i]]->mark = 1;
2380 * Verify the flow list is empty
2383 * Pointer to Ethernet device.
2385 * @return the number of flows not released.
2388 mlx5_flow_verify(struct rte_eth_dev *dev)
2390 struct priv *priv = dev->data->dev_private;
2391 struct rte_flow *flow;
2394 TAILQ_FOREACH(flow, &priv->flows, next) {
2395 DRV_LOG(DEBUG, "port %u flow %p still referenced",
2396 dev->data->port_id, (void *)flow);
2403 * Enable a control flow configured from the control plane.
2406 * Pointer to Ethernet device.
2408 * An Ethernet flow spec to apply.
2410 * An Ethernet flow mask to apply.
2412 * A VLAN flow spec to apply.
2414 * A VLAN flow mask to apply.
2417 * 0 on success, a negative errno value otherwise and rte_errno is set.
2420 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
2421 struct rte_flow_item_eth *eth_spec,
2422 struct rte_flow_item_eth *eth_mask,
2423 struct rte_flow_item_vlan *vlan_spec,
2424 struct rte_flow_item_vlan *vlan_mask)
2426 struct priv *priv = dev->data->dev_private;
2427 const struct rte_flow_attr attr = {
2429 .priority = MLX5_CTRL_FLOW_PRIORITY,
2431 struct rte_flow_item items[] = {
2433 .type = RTE_FLOW_ITEM_TYPE_ETH,
2439 .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
2440 RTE_FLOW_ITEM_TYPE_END,
2446 .type = RTE_FLOW_ITEM_TYPE_END,
2449 struct rte_flow_action actions[] = {
2451 .type = RTE_FLOW_ACTION_TYPE_RSS,
2454 .type = RTE_FLOW_ACTION_TYPE_END,
2457 struct rte_flow *flow;
2458 struct rte_flow_error error;
2461 struct rte_flow_action_rss rss;
2463 const struct rte_eth_rss_conf *rss_conf;
2465 uint16_t queue[RTE_MAX_QUEUES_PER_PORT];
2469 if (!priv->reta_idx_n) {
2473 for (i = 0; i != priv->reta_idx_n; ++i)
2474 action_rss.local.queue[i] = (*priv->reta_idx)[i];
2475 action_rss.local.rss_conf = &priv->rss_conf;
2476 action_rss.local.num = priv->reta_idx_n;
2477 actions[0].conf = (const void *)&action_rss.rss;
2478 flow = mlx5_flow_list_create(dev, &priv->ctrl_flows, &attr, items,
2486 * Enable a flow control configured from the control plane.
2489 * Pointer to Ethernet device.
2491 * An Ethernet flow spec to apply.
2493 * An Ethernet flow mask to apply.
2496 * 0 on success, a negative errno value otherwise and rte_errno is set.
2499 mlx5_ctrl_flow(struct rte_eth_dev *dev,
2500 struct rte_flow_item_eth *eth_spec,
2501 struct rte_flow_item_eth *eth_mask)
2503 return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
2509 * @see rte_flow_destroy()
2513 mlx5_flow_destroy(struct rte_eth_dev *dev,
2514 struct rte_flow *flow,
2515 struct rte_flow_error *error __rte_unused)
2517 struct priv *priv = dev->data->dev_private;
2519 mlx5_flow_list_destroy(dev, &priv->flows, flow);
2524 * Destroy all flows.
2526 * @see rte_flow_flush()
2530 mlx5_flow_flush(struct rte_eth_dev *dev,
2531 struct rte_flow_error *error __rte_unused)
2533 struct priv *priv = dev->data->dev_private;
2535 mlx5_flow_list_flush(dev, &priv->flows);
2539 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
2541 * Query flow counter.
2545 * @param counter_value
2546 * returned data from the counter.
2549 * 0 on success, a negative errno value otherwise and rte_errno is set.
2552 mlx5_flow_query_count(struct ibv_counter_set *cs,
2553 struct mlx5_flow_counter_stats *counter_stats,
2554 struct rte_flow_query_count *query_count,
2555 struct rte_flow_error *error)
2557 uint64_t counters[2];
2558 struct ibv_query_counter_set_attr query_cs_attr = {
2560 .query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
2562 struct ibv_counter_set_data query_out = {
2564 .outlen = 2 * sizeof(uint64_t),
2566 int err = mlx5_glue->query_counter_set(&query_cs_attr, &query_out);
2569 return rte_flow_error_set(error, err,
2570 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2572 "cannot read counter");
2573 query_count->hits_set = 1;
2574 query_count->bytes_set = 1;
2575 query_count->hits = counters[0] - counter_stats->hits;
2576 query_count->bytes = counters[1] - counter_stats->bytes;
2577 if (query_count->reset) {
2578 counter_stats->hits = counters[0];
2579 counter_stats->bytes = counters[1];
2587 * @see rte_flow_query()
2591 mlx5_flow_query(struct rte_eth_dev *dev __rte_unused,
2592 struct rte_flow *flow,
2593 enum rte_flow_action_type action __rte_unused,
2595 struct rte_flow_error *error)
2600 ret = mlx5_flow_query_count(flow->cs,
2601 &flow->counter_stats,
2602 (struct rte_flow_query_count *)data,
2607 return rte_flow_error_set(error, EINVAL,
2608 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2610 "no counter found for flow");
2619 * @see rte_flow_isolate()
2623 mlx5_flow_isolate(struct rte_eth_dev *dev,
2625 struct rte_flow_error *error)
2627 struct priv *priv = dev->data->dev_private;
2629 if (dev->data->dev_started) {
2630 rte_flow_error_set(error, EBUSY,
2631 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2633 "port must be stopped first");
2636 priv->isolated = !!enable;
2638 priv->dev->dev_ops = &mlx5_dev_ops_isolate;
2640 priv->dev->dev_ops = &mlx5_dev_ops;
2645 * Convert a flow director filter to a generic flow.
2648 * Pointer to Ethernet device.
2649 * @param fdir_filter
2650 * Flow director filter to add.
2652 * Generic flow parameters structure.
2655 * 0 on success, a negative errno value otherwise and rte_errno is set.
2658 mlx5_fdir_filter_convert(struct rte_eth_dev *dev,
2659 const struct rte_eth_fdir_filter *fdir_filter,
2660 struct mlx5_fdir *attributes)
2662 struct priv *priv = dev->data->dev_private;
2663 const struct rte_eth_fdir_input *input = &fdir_filter->input;
2665 /* Validate queue number. */
2666 if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
2667 DRV_LOG(ERR, "port %u invalid queue number %d",
2668 dev->data->port_id, fdir_filter->action.rx_queue);
2672 attributes->attr.ingress = 1;
2673 attributes->items[0] = (struct rte_flow_item) {
2674 .type = RTE_FLOW_ITEM_TYPE_ETH,
2675 .spec = &attributes->l2,
2676 .mask = &attributes->l2_mask,
2678 switch (fdir_filter->action.behavior) {
2679 case RTE_ETH_FDIR_ACCEPT:
2680 attributes->actions[0] = (struct rte_flow_action){
2681 .type = RTE_FLOW_ACTION_TYPE_QUEUE,
2682 .conf = &attributes->queue,
2685 case RTE_ETH_FDIR_REJECT:
2686 attributes->actions[0] = (struct rte_flow_action){
2687 .type = RTE_FLOW_ACTION_TYPE_DROP,
2691 DRV_LOG(ERR, "port %u invalid behavior %d",
2693 fdir_filter->action.behavior);
2694 rte_errno = ENOTSUP;
2697 attributes->queue.index = fdir_filter->action.rx_queue;
2698 switch (fdir_filter->input.flow_type) {
2699 case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
2700 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2701 .src_addr = input->flow.udp4_flow.ip.src_ip,
2702 .dst_addr = input->flow.udp4_flow.ip.dst_ip,
2703 .time_to_live = input->flow.udp4_flow.ip.ttl,
2704 .type_of_service = input->flow.udp4_flow.ip.tos,
2705 .next_proto_id = input->flow.udp4_flow.ip.proto,
2707 attributes->l4.udp.hdr = (struct udp_hdr){
2708 .src_port = input->flow.udp4_flow.src_port,
2709 .dst_port = input->flow.udp4_flow.dst_port,
2711 attributes->items[1] = (struct rte_flow_item){
2712 .type = RTE_FLOW_ITEM_TYPE_IPV4,
2713 .spec = &attributes->l3,
2714 .mask = &attributes->l3,
2716 attributes->items[2] = (struct rte_flow_item){
2717 .type = RTE_FLOW_ITEM_TYPE_UDP,
2718 .spec = &attributes->l4,
2719 .mask = &attributes->l4,
2722 case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
2723 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2724 .src_addr = input->flow.tcp4_flow.ip.src_ip,
2725 .dst_addr = input->flow.tcp4_flow.ip.dst_ip,
2726 .time_to_live = input->flow.tcp4_flow.ip.ttl,
2727 .type_of_service = input->flow.tcp4_flow.ip.tos,
2728 .next_proto_id = input->flow.tcp4_flow.ip.proto,
2730 attributes->l4.tcp.hdr = (struct tcp_hdr){
2731 .src_port = input->flow.tcp4_flow.src_port,
2732 .dst_port = input->flow.tcp4_flow.dst_port,
2734 attributes->items[1] = (struct rte_flow_item){
2735 .type = RTE_FLOW_ITEM_TYPE_IPV4,
2736 .spec = &attributes->l3,
2737 .mask = &attributes->l3,
2739 attributes->items[2] = (struct rte_flow_item){
2740 .type = RTE_FLOW_ITEM_TYPE_TCP,
2741 .spec = &attributes->l4,
2742 .mask = &attributes->l4,
2745 case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
2746 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2747 .src_addr = input->flow.ip4_flow.src_ip,
2748 .dst_addr = input->flow.ip4_flow.dst_ip,
2749 .time_to_live = input->flow.ip4_flow.ttl,
2750 .type_of_service = input->flow.ip4_flow.tos,
2751 .next_proto_id = input->flow.ip4_flow.proto,
2753 attributes->items[1] = (struct rte_flow_item){
2754 .type = RTE_FLOW_ITEM_TYPE_IPV4,
2755 .spec = &attributes->l3,
2756 .mask = &attributes->l3,
2759 case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
2760 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2761 .hop_limits = input->flow.udp6_flow.ip.hop_limits,
2762 .proto = input->flow.udp6_flow.ip.proto,
2764 memcpy(attributes->l3.ipv6.hdr.src_addr,
2765 input->flow.udp6_flow.ip.src_ip,
2766 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2767 memcpy(attributes->l3.ipv6.hdr.dst_addr,
2768 input->flow.udp6_flow.ip.dst_ip,
2769 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2770 attributes->l4.udp.hdr = (struct udp_hdr){
2771 .src_port = input->flow.udp6_flow.src_port,
2772 .dst_port = input->flow.udp6_flow.dst_port,
2774 attributes->items[1] = (struct rte_flow_item){
2775 .type = RTE_FLOW_ITEM_TYPE_IPV6,
2776 .spec = &attributes->l3,
2777 .mask = &attributes->l3,
2779 attributes->items[2] = (struct rte_flow_item){
2780 .type = RTE_FLOW_ITEM_TYPE_UDP,
2781 .spec = &attributes->l4,
2782 .mask = &attributes->l4,
2785 case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
2786 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2787 .hop_limits = input->flow.tcp6_flow.ip.hop_limits,
2788 .proto = input->flow.tcp6_flow.ip.proto,
2790 memcpy(attributes->l3.ipv6.hdr.src_addr,
2791 input->flow.tcp6_flow.ip.src_ip,
2792 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2793 memcpy(attributes->l3.ipv6.hdr.dst_addr,
2794 input->flow.tcp6_flow.ip.dst_ip,
2795 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2796 attributes->l4.tcp.hdr = (struct tcp_hdr){
2797 .src_port = input->flow.tcp6_flow.src_port,
2798 .dst_port = input->flow.tcp6_flow.dst_port,
2800 attributes->items[1] = (struct rte_flow_item){
2801 .type = RTE_FLOW_ITEM_TYPE_IPV6,
2802 .spec = &attributes->l3,
2803 .mask = &attributes->l3,
2805 attributes->items[2] = (struct rte_flow_item){
2806 .type = RTE_FLOW_ITEM_TYPE_TCP,
2807 .spec = &attributes->l4,
2808 .mask = &attributes->l4,
2811 case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
2812 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2813 .hop_limits = input->flow.ipv6_flow.hop_limits,
2814 .proto = input->flow.ipv6_flow.proto,
2816 memcpy(attributes->l3.ipv6.hdr.src_addr,
2817 input->flow.ipv6_flow.src_ip,
2818 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2819 memcpy(attributes->l3.ipv6.hdr.dst_addr,
2820 input->flow.ipv6_flow.dst_ip,
2821 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2822 attributes->items[1] = (struct rte_flow_item){
2823 .type = RTE_FLOW_ITEM_TYPE_IPV6,
2824 .spec = &attributes->l3,
2825 .mask = &attributes->l3,
2829 DRV_LOG(ERR, "port %u invalid flow type%d",
2830 dev->data->port_id, fdir_filter->input.flow_type);
2831 rte_errno = ENOTSUP;
2838 * Add new flow director filter and store it in list.
2841 * Pointer to Ethernet device.
2842 * @param fdir_filter
2843 * Flow director filter to add.
2846 * 0 on success, a negative errno value otherwise and rte_errno is set.
2849 mlx5_fdir_filter_add(struct rte_eth_dev *dev,
2850 const struct rte_eth_fdir_filter *fdir_filter)
2852 struct priv *priv = dev->data->dev_private;
2853 struct mlx5_fdir attributes = {
2856 .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2857 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2861 struct mlx5_flow_parse parser = {
2862 .layer = HASH_RXQ_ETH,
2864 struct rte_flow_error error;
2865 struct rte_flow *flow;
2868 ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
2871 ret = mlx5_flow_convert(dev, &attributes.attr, attributes.items,
2872 attributes.actions, &error, &parser);
2875 flow = mlx5_flow_list_create(dev, &priv->flows, &attributes.attr,
2876 attributes.items, attributes.actions,
2879 DRV_LOG(DEBUG, "port %u FDIR created %p", dev->data->port_id,
2887 * Delete specific filter.
2890 * Pointer to Ethernet device.
2891 * @param fdir_filter
2892 * Filter to be deleted.
2895 * 0 on success, a negative errno value otherwise and rte_errno is set.
2898 mlx5_fdir_filter_delete(struct rte_eth_dev *dev,
2899 const struct rte_eth_fdir_filter *fdir_filter)
2901 struct priv *priv = dev->data->dev_private;
2902 struct mlx5_fdir attributes = {
2905 struct mlx5_flow_parse parser = {
2907 .layer = HASH_RXQ_ETH,
2909 struct rte_flow_error error;
2910 struct rte_flow *flow;
2914 ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
2917 ret = mlx5_flow_convert(dev, &attributes.attr, attributes.items,
2918 attributes.actions, &error, &parser);
2922 * Special case for drop action which is only set in the
2923 * specifications when the flow is created. In this situation the
2924 * drop specification is missing.
2927 struct ibv_flow_spec_action_drop *drop;
2929 drop = (void *)((uintptr_t)parser.queue[HASH_RXQ_ETH].ibv_attr +
2930 parser.queue[HASH_RXQ_ETH].offset);
2931 *drop = (struct ibv_flow_spec_action_drop){
2932 .type = IBV_FLOW_SPEC_ACTION_DROP,
2933 .size = sizeof(struct ibv_flow_spec_action_drop),
2935 parser.queue[HASH_RXQ_ETH].ibv_attr->num_of_specs++;
2937 TAILQ_FOREACH(flow, &priv->flows, next) {
2938 struct ibv_flow_attr *attr;
2939 struct ibv_spec_header *attr_h;
2941 struct ibv_flow_attr *flow_attr;
2942 struct ibv_spec_header *flow_h;
2944 unsigned int specs_n;
2946 attr = parser.queue[HASH_RXQ_ETH].ibv_attr;
2947 flow_attr = flow->frxq[HASH_RXQ_ETH].ibv_attr;
2948 /* Compare first the attributes. */
2949 if (memcmp(attr, flow_attr, sizeof(struct ibv_flow_attr)))
2951 if (attr->num_of_specs == 0)
2953 spec = (void *)((uintptr_t)attr +
2954 sizeof(struct ibv_flow_attr));
2955 flow_spec = (void *)((uintptr_t)flow_attr +
2956 sizeof(struct ibv_flow_attr));
2957 specs_n = RTE_MIN(attr->num_of_specs, flow_attr->num_of_specs);
2958 for (i = 0; i != specs_n; ++i) {
2961 if (memcmp(spec, flow_spec,
2962 RTE_MIN(attr_h->size, flow_h->size)))
2964 spec = (void *)((uintptr_t)spec + attr_h->size);
2965 flow_spec = (void *)((uintptr_t)flow_spec +
2968 /* At this point, the flow match. */
2971 /* The flow does not match. */
2974 ret = rte_errno; /* Save rte_errno before cleanup. */
2976 mlx5_flow_list_destroy(dev, &priv->flows, flow);
2978 for (i = 0; i != hash_rxq_init_n; ++i) {
2979 if (parser.queue[i].ibv_attr)
2980 rte_free(parser.queue[i].ibv_attr);
2982 rte_errno = ret; /* Restore rte_errno. */
2987 * Update queue for specific filter.
2990 * Pointer to Ethernet device.
2991 * @param fdir_filter
2992 * Filter to be updated.
2995 * 0 on success, a negative errno value otherwise and rte_errno is set.
2998 mlx5_fdir_filter_update(struct rte_eth_dev *dev,
2999 const struct rte_eth_fdir_filter *fdir_filter)
3003 ret = mlx5_fdir_filter_delete(dev, fdir_filter);
3006 return mlx5_fdir_filter_add(dev, fdir_filter);
3010 * Flush all filters.
3013 * Pointer to Ethernet device.
3016 mlx5_fdir_filter_flush(struct rte_eth_dev *dev)
3018 struct priv *priv = dev->data->dev_private;
3020 mlx5_flow_list_flush(dev, &priv->flows);
3024 * Get flow director information.
3027 * Pointer to Ethernet device.
3028 * @param[out] fdir_info
3029 * Resulting flow director information.
3032 mlx5_fdir_info_get(struct rte_eth_dev *dev, struct rte_eth_fdir_info *fdir_info)
3034 struct priv *priv = dev->data->dev_private;
3035 struct rte_eth_fdir_masks *mask =
3036 &priv->dev->data->dev_conf.fdir_conf.mask;
3038 fdir_info->mode = priv->dev->data->dev_conf.fdir_conf.mode;
3039 fdir_info->guarant_spc = 0;
3040 rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
3041 fdir_info->max_flexpayload = 0;
3042 fdir_info->flow_types_mask[0] = 0;
3043 fdir_info->flex_payload_unit = 0;
3044 fdir_info->max_flex_payload_segment_num = 0;
3045 fdir_info->flex_payload_limit = 0;
3046 memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
3050 * Deal with flow director operations.
3053 * Pointer to Ethernet device.
3055 * Operation to perform.
3057 * Pointer to operation-specific structure.
3060 * 0 on success, a negative errno value otherwise and rte_errno is set.
3063 mlx5_fdir_ctrl_func(struct rte_eth_dev *dev, enum rte_filter_op filter_op,
3066 struct priv *priv = dev->data->dev_private;
3067 enum rte_fdir_mode fdir_mode =
3068 priv->dev->data->dev_conf.fdir_conf.mode;
3070 if (filter_op == RTE_ETH_FILTER_NOP)
3072 if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
3073 fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
3074 DRV_LOG(ERR, "port %u flow director mode %d not supported",
3075 dev->data->port_id, fdir_mode);
3079 switch (filter_op) {
3080 case RTE_ETH_FILTER_ADD:
3081 return mlx5_fdir_filter_add(dev, arg);
3082 case RTE_ETH_FILTER_UPDATE:
3083 return mlx5_fdir_filter_update(dev, arg);
3084 case RTE_ETH_FILTER_DELETE:
3085 return mlx5_fdir_filter_delete(dev, arg);
3086 case RTE_ETH_FILTER_FLUSH:
3087 mlx5_fdir_filter_flush(dev);
3089 case RTE_ETH_FILTER_INFO:
3090 mlx5_fdir_info_get(dev, arg);
3093 DRV_LOG(DEBUG, "port %u unknown operation %u",
3094 dev->data->port_id, filter_op);
3102 * Manage filter operations.
3105 * Pointer to Ethernet device structure.
3106 * @param filter_type
3109 * Operation to perform.
3111 * Pointer to operation-specific structure.
3114 * 0 on success, a negative errno value otherwise and rte_errno is set.
3117 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
3118 enum rte_filter_type filter_type,
3119 enum rte_filter_op filter_op,
3122 switch (filter_type) {
3123 case RTE_ETH_FILTER_GENERIC:
3124 if (filter_op != RTE_ETH_FILTER_GET) {
3128 *(const void **)arg = &mlx5_flow_ops;
3130 case RTE_ETH_FILTER_FDIR:
3131 return mlx5_fdir_ctrl_func(dev, filter_op, arg);
3133 DRV_LOG(ERR, "port %u filter type (%d) not supported",
3134 dev->data->port_id, filter_type);
3135 rte_errno = ENOTSUP;