1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2016 6WIND S.A.
3 * Copyright 2016 Mellanox Technologies, Ltd
10 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
12 #pragma GCC diagnostic ignored "-Wpedantic"
14 #include <infiniband/verbs.h>
16 #pragma GCC diagnostic error "-Wpedantic"
19 #include <rte_common.h>
20 #include <rte_ethdev_driver.h>
22 #include <rte_flow_driver.h>
23 #include <rte_malloc.h>
27 #include "mlx5_defs.h"
29 #include "mlx5_glue.h"
31 /* Define minimal priority for control plane flows. */
32 #define MLX5_CTRL_FLOW_PRIORITY 4
34 /* Internet Protocol versions. */
38 #ifndef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
39 struct ibv_flow_spec_counter_action {
44 /* Dev ops structure defined in mlx5.c */
45 extern const struct eth_dev_ops mlx5_dev_ops;
46 extern const struct eth_dev_ops mlx5_dev_ops_isolate;
48 /** Structure give to the conversion functions. */
49 struct mlx5_flow_data {
50 struct mlx5_flow_parse *parser; /** Parser context. */
51 struct rte_flow_error *error; /** Error context. */
55 mlx5_flow_create_eth(const struct rte_flow_item *item,
56 const void *default_mask,
57 struct mlx5_flow_data *data);
60 mlx5_flow_create_vlan(const struct rte_flow_item *item,
61 const void *default_mask,
62 struct mlx5_flow_data *data);
65 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
66 const void *default_mask,
67 struct mlx5_flow_data *data);
70 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
71 const void *default_mask,
72 struct mlx5_flow_data *data);
75 mlx5_flow_create_udp(const struct rte_flow_item *item,
76 const void *default_mask,
77 struct mlx5_flow_data *data);
80 mlx5_flow_create_tcp(const struct rte_flow_item *item,
81 const void *default_mask,
82 struct mlx5_flow_data *data);
85 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
86 const void *default_mask,
87 struct mlx5_flow_data *data);
89 struct mlx5_flow_parse;
92 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
96 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id);
99 mlx5_flow_create_count(struct rte_eth_dev *dev, struct mlx5_flow_parse *parser);
101 /* Hash RX queue types. */
112 /* Initialization data for hash RX queue. */
113 struct hash_rxq_init {
114 uint64_t hash_fields; /* Fields that participate in the hash. */
115 uint64_t dpdk_rss_hf; /* Matching DPDK RSS hash fields. */
116 unsigned int flow_priority; /* Flow priority to use. */
117 unsigned int ip_version; /* Internet protocol. */
120 /* Initialization data for hash RX queues. */
121 const struct hash_rxq_init hash_rxq_init[] = {
123 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
124 IBV_RX_HASH_DST_IPV4 |
125 IBV_RX_HASH_SRC_PORT_TCP |
126 IBV_RX_HASH_DST_PORT_TCP),
127 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,
129 .ip_version = MLX5_IPV4,
132 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
133 IBV_RX_HASH_DST_IPV4 |
134 IBV_RX_HASH_SRC_PORT_UDP |
135 IBV_RX_HASH_DST_PORT_UDP),
136 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP,
138 .ip_version = MLX5_IPV4,
141 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
142 IBV_RX_HASH_DST_IPV4),
143 .dpdk_rss_hf = (ETH_RSS_IPV4 |
146 .ip_version = MLX5_IPV4,
149 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
150 IBV_RX_HASH_DST_IPV6 |
151 IBV_RX_HASH_SRC_PORT_TCP |
152 IBV_RX_HASH_DST_PORT_TCP),
153 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP,
155 .ip_version = MLX5_IPV6,
158 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
159 IBV_RX_HASH_DST_IPV6 |
160 IBV_RX_HASH_SRC_PORT_UDP |
161 IBV_RX_HASH_DST_PORT_UDP),
162 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP,
164 .ip_version = MLX5_IPV6,
167 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
168 IBV_RX_HASH_DST_IPV6),
169 .dpdk_rss_hf = (ETH_RSS_IPV6 |
172 .ip_version = MLX5_IPV6,
181 /* Number of entries in hash_rxq_init[]. */
182 const unsigned int hash_rxq_init_n = RTE_DIM(hash_rxq_init);
184 /** Structure for holding counter stats. */
185 struct mlx5_flow_counter_stats {
186 uint64_t hits; /**< Number of packets matched by the rule. */
187 uint64_t bytes; /**< Number of bytes matched by the rule. */
190 /** Structure for Drop queue. */
191 struct mlx5_hrxq_drop {
192 struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */
193 struct ibv_qp *qp; /**< Verbs queue pair. */
194 struct ibv_wq *wq; /**< Verbs work queue. */
195 struct ibv_cq *cq; /**< Verbs completion queue. */
198 /* Flows structures. */
200 uint64_t hash_fields; /**< Fields that participate in the hash. */
201 struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
202 struct ibv_flow *ibv_flow; /**< Verbs flow. */
203 struct mlx5_hrxq *hrxq; /**< Hash Rx queues. */
206 /* Drop flows structures. */
207 struct mlx5_flow_drop {
208 struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
209 struct ibv_flow *ibv_flow; /**< Verbs flow. */
213 TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
214 uint32_t mark:1; /**< Set if the flow is marked. */
215 uint32_t drop:1; /**< Drop queue. */
216 uint16_t queues_n; /**< Number of entries in queue[]. */
217 uint16_t (*queues)[]; /**< Queues indexes to use. */
218 struct rte_eth_rss_conf rss_conf; /**< RSS configuration */
219 uint8_t rss_key[40]; /**< copy of the RSS key. */
220 struct ibv_counter_set *cs; /**< Holds the counters for the rule. */
221 struct mlx5_flow_counter_stats counter_stats;/**<The counter stats. */
222 struct mlx5_flow frxq[RTE_DIM(hash_rxq_init)];
223 /**< Flow with Rx queue. */
226 /** Static initializer for items. */
228 (const enum rte_flow_item_type []){ \
229 __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
232 /** Structure to generate a simple graph of layers supported by the NIC. */
233 struct mlx5_flow_items {
234 /** List of possible actions for these items. */
235 const enum rte_flow_action_type *const actions;
236 /** Bit-masks corresponding to the possibilities for the item. */
239 * Default bit-masks to use when item->mask is not provided. When
240 * \default_mask is also NULL, the full supported bit-mask (\mask) is
243 const void *default_mask;
244 /** Bit-masks size in bytes. */
245 const unsigned int mask_sz;
247 * Conversion function from rte_flow to NIC specific flow.
250 * rte_flow item to convert.
251 * @param default_mask
252 * Default bit-masks to use when item->mask is not provided.
254 * Internal structure to store the conversion.
257 * 0 on success, a negative errno value otherwise and rte_errno is
260 int (*convert)(const struct rte_flow_item *item,
261 const void *default_mask,
262 struct mlx5_flow_data *data);
263 /** Size in bytes of the destination structure. */
264 const unsigned int dst_sz;
265 /** List of possible following items. */
266 const enum rte_flow_item_type *const items;
269 /** Valid action for this PMD. */
270 static const enum rte_flow_action_type valid_actions[] = {
271 RTE_FLOW_ACTION_TYPE_DROP,
272 RTE_FLOW_ACTION_TYPE_QUEUE,
273 RTE_FLOW_ACTION_TYPE_MARK,
274 RTE_FLOW_ACTION_TYPE_FLAG,
275 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
276 RTE_FLOW_ACTION_TYPE_COUNT,
278 RTE_FLOW_ACTION_TYPE_END,
281 /** Graph of supported items and associated actions. */
282 static const struct mlx5_flow_items mlx5_flow_items[] = {
283 [RTE_FLOW_ITEM_TYPE_END] = {
284 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
285 RTE_FLOW_ITEM_TYPE_VXLAN),
287 [RTE_FLOW_ITEM_TYPE_ETH] = {
288 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
289 RTE_FLOW_ITEM_TYPE_IPV4,
290 RTE_FLOW_ITEM_TYPE_IPV6),
291 .actions = valid_actions,
292 .mask = &(const struct rte_flow_item_eth){
293 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
294 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
297 .default_mask = &rte_flow_item_eth_mask,
298 .mask_sz = sizeof(struct rte_flow_item_eth),
299 .convert = mlx5_flow_create_eth,
300 .dst_sz = sizeof(struct ibv_flow_spec_eth),
302 [RTE_FLOW_ITEM_TYPE_VLAN] = {
303 .items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
304 RTE_FLOW_ITEM_TYPE_IPV6),
305 .actions = valid_actions,
306 .mask = &(const struct rte_flow_item_vlan){
309 .default_mask = &rte_flow_item_vlan_mask,
310 .mask_sz = sizeof(struct rte_flow_item_vlan),
311 .convert = mlx5_flow_create_vlan,
314 [RTE_FLOW_ITEM_TYPE_IPV4] = {
315 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
316 RTE_FLOW_ITEM_TYPE_TCP),
317 .actions = valid_actions,
318 .mask = &(const struct rte_flow_item_ipv4){
322 .type_of_service = -1,
326 .default_mask = &rte_flow_item_ipv4_mask,
327 .mask_sz = sizeof(struct rte_flow_item_ipv4),
328 .convert = mlx5_flow_create_ipv4,
329 .dst_sz = sizeof(struct ibv_flow_spec_ipv4_ext),
331 [RTE_FLOW_ITEM_TYPE_IPV6] = {
332 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
333 RTE_FLOW_ITEM_TYPE_TCP),
334 .actions = valid_actions,
335 .mask = &(const struct rte_flow_item_ipv6){
338 0xff, 0xff, 0xff, 0xff,
339 0xff, 0xff, 0xff, 0xff,
340 0xff, 0xff, 0xff, 0xff,
341 0xff, 0xff, 0xff, 0xff,
344 0xff, 0xff, 0xff, 0xff,
345 0xff, 0xff, 0xff, 0xff,
346 0xff, 0xff, 0xff, 0xff,
347 0xff, 0xff, 0xff, 0xff,
354 .default_mask = &rte_flow_item_ipv6_mask,
355 .mask_sz = sizeof(struct rte_flow_item_ipv6),
356 .convert = mlx5_flow_create_ipv6,
357 .dst_sz = sizeof(struct ibv_flow_spec_ipv6),
359 [RTE_FLOW_ITEM_TYPE_UDP] = {
360 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN),
361 .actions = valid_actions,
362 .mask = &(const struct rte_flow_item_udp){
368 .default_mask = &rte_flow_item_udp_mask,
369 .mask_sz = sizeof(struct rte_flow_item_udp),
370 .convert = mlx5_flow_create_udp,
371 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
373 [RTE_FLOW_ITEM_TYPE_TCP] = {
374 .actions = valid_actions,
375 .mask = &(const struct rte_flow_item_tcp){
381 .default_mask = &rte_flow_item_tcp_mask,
382 .mask_sz = sizeof(struct rte_flow_item_tcp),
383 .convert = mlx5_flow_create_tcp,
384 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
386 [RTE_FLOW_ITEM_TYPE_VXLAN] = {
387 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH),
388 .actions = valid_actions,
389 .mask = &(const struct rte_flow_item_vxlan){
390 .vni = "\xff\xff\xff",
392 .default_mask = &rte_flow_item_vxlan_mask,
393 .mask_sz = sizeof(struct rte_flow_item_vxlan),
394 .convert = mlx5_flow_create_vxlan,
395 .dst_sz = sizeof(struct ibv_flow_spec_tunnel),
399 /** Structure to pass to the conversion function. */
400 struct mlx5_flow_parse {
401 uint32_t inner; /**< Set once VXLAN is encountered. */
403 /**< Whether resources should remain after a validate. */
404 uint32_t drop:1; /**< Target is a drop queue. */
405 uint32_t mark:1; /**< Mark is present in the flow. */
406 uint32_t count:1; /**< Count is present in the flow. */
407 uint32_t mark_id; /**< Mark identifier. */
408 uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
409 uint16_t queues_n; /**< Number of entries in queue[]. */
410 struct rte_eth_rss_conf rss_conf; /**< RSS configuration */
411 uint8_t rss_key[40]; /**< copy of the RSS key. */
412 enum hash_rxq_type layer; /**< Last pattern layer detected. */
413 struct ibv_counter_set *cs; /**< Holds the counter set for the rule */
415 struct ibv_flow_attr *ibv_attr;
416 /**< Pointer to Verbs attributes. */
418 /**< Current position or total size of the attribute. */
419 } queue[RTE_DIM(hash_rxq_init)];
422 static const struct rte_flow_ops mlx5_flow_ops = {
423 .validate = mlx5_flow_validate,
424 .create = mlx5_flow_create,
425 .destroy = mlx5_flow_destroy,
426 .flush = mlx5_flow_flush,
427 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
428 .query = mlx5_flow_query,
432 .isolate = mlx5_flow_isolate,
435 /* Convert FDIR request to Generic flow. */
437 struct rte_flow_attr attr;
438 struct rte_flow_action actions[2];
439 struct rte_flow_item items[4];
440 struct rte_flow_item_eth l2;
441 struct rte_flow_item_eth l2_mask;
443 struct rte_flow_item_ipv4 ipv4;
444 struct rte_flow_item_ipv6 ipv6;
447 struct rte_flow_item_ipv4 ipv4;
448 struct rte_flow_item_ipv6 ipv6;
451 struct rte_flow_item_udp udp;
452 struct rte_flow_item_tcp tcp;
455 struct rte_flow_item_udp udp;
456 struct rte_flow_item_tcp tcp;
458 struct rte_flow_action_queue queue;
461 /* Verbs specification header. */
462 struct ibv_spec_header {
463 enum ibv_flow_spec_type type;
468 * Check support for a given item.
471 * Item specification.
473 * Bit-masks covering supported fields to compare with spec, last and mask in
476 * Bit-Mask size in bytes.
479 * 0 on success, a negative errno value otherwise and rte_errno is set.
482 mlx5_flow_item_validate(const struct rte_flow_item *item,
483 const uint8_t *mask, unsigned int size)
485 if (!item->spec && (item->mask || item->last)) {
489 if (item->spec && !item->mask) {
491 const uint8_t *spec = item->spec;
493 for (i = 0; i < size; ++i)
494 if ((spec[i] | mask[i]) != mask[i]) {
499 if (item->last && !item->mask) {
501 const uint8_t *spec = item->last;
503 for (i = 0; i < size; ++i)
504 if ((spec[i] | mask[i]) != mask[i]) {
511 const uint8_t *spec = item->spec;
513 for (i = 0; i < size; ++i)
514 if ((spec[i] | mask[i]) != mask[i]) {
519 if (item->spec && item->last) {
522 const uint8_t *apply = mask;
528 for (i = 0; i < size; ++i) {
529 spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
530 last[i] = ((const uint8_t *)item->last)[i] & apply[i];
532 ret = memcmp(spec, last, size);
542 * Copy the RSS configuration from the user ones, of the rss_conf is null,
543 * uses the driver one.
546 * Internal parser structure.
548 * User RSS configuration to save.
551 * 0 on success, a negative errno value otherwise and rte_errno is set.
554 mlx5_flow_convert_rss_conf(struct mlx5_flow_parse *parser,
555 const struct rte_eth_rss_conf *rss_conf)
558 * This function is also called at the beginning of
559 * mlx5_flow_convert_actions() to initialize the parser with the
560 * device default RSS configuration.
563 if (rss_conf->rss_hf & MLX5_RSS_HF_MASK) {
567 if (rss_conf->rss_key_len != 40) {
571 if (rss_conf->rss_key_len && rss_conf->rss_key) {
572 parser->rss_conf.rss_key_len = rss_conf->rss_key_len;
573 memcpy(parser->rss_key, rss_conf->rss_key,
574 rss_conf->rss_key_len);
575 parser->rss_conf.rss_key = parser->rss_key;
577 parser->rss_conf.rss_hf = rss_conf->rss_hf;
583 * Extract attribute to the parser.
586 * Flow rule attributes.
588 * Perform verbose error reporting if not NULL.
591 * 0 on success, a negative errno value otherwise and rte_errno is set.
594 mlx5_flow_convert_attributes(const struct rte_flow_attr *attr,
595 struct rte_flow_error *error)
598 rte_flow_error_set(error, ENOTSUP,
599 RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
601 "groups are not supported");
604 if (attr->priority && attr->priority != MLX5_CTRL_FLOW_PRIORITY) {
605 rte_flow_error_set(error, ENOTSUP,
606 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
608 "priorities are not supported");
612 rte_flow_error_set(error, ENOTSUP,
613 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
615 "egress is not supported");
618 if (!attr->ingress) {
619 rte_flow_error_set(error, ENOTSUP,
620 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
622 "only ingress is supported");
629 * Extract actions request to the parser.
632 * Pointer to Ethernet device.
634 * Associated actions (list terminated by the END action).
636 * Perform verbose error reporting if not NULL.
637 * @param[in, out] parser
638 * Internal parser structure.
641 * 0 on success, a negative errno value otherwise and rte_errno is set.
644 mlx5_flow_convert_actions(struct rte_eth_dev *dev,
645 const struct rte_flow_action actions[],
646 struct rte_flow_error *error,
647 struct mlx5_flow_parse *parser)
649 struct priv *priv = dev->data->dev_private;
653 * Add default RSS configuration necessary for Verbs to create QP even
654 * if no RSS is necessary.
656 ret = mlx5_flow_convert_rss_conf(parser,
657 (const struct rte_eth_rss_conf *)
661 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
662 if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
664 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
666 } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
667 const struct rte_flow_action_queue *queue =
668 (const struct rte_flow_action_queue *)
673 if (!queue || (queue->index > (priv->rxqs_n - 1)))
674 goto exit_action_not_supported;
675 for (n = 0; n < parser->queues_n; ++n) {
676 if (parser->queues[n] == queue->index) {
681 if (parser->queues_n > 1 && !found) {
682 rte_flow_error_set(error, ENOTSUP,
683 RTE_FLOW_ERROR_TYPE_ACTION,
685 "queue action not in RSS queues");
689 parser->queues_n = 1;
690 parser->queues[0] = queue->index;
692 } else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
693 const struct rte_flow_action_rss *rss =
694 (const struct rte_flow_action_rss *)
698 if (!rss || !rss->num) {
699 rte_flow_error_set(error, EINVAL,
700 RTE_FLOW_ERROR_TYPE_ACTION,
705 if (parser->queues_n == 1) {
708 assert(parser->queues_n);
709 for (n = 0; n < rss->num; ++n) {
710 if (parser->queues[0] ==
717 rte_flow_error_set(error, ENOTSUP,
718 RTE_FLOW_ERROR_TYPE_ACTION,
720 "queue action not in RSS"
725 if (rss->num > RTE_DIM(parser->queues)) {
726 rte_flow_error_set(error, EINVAL,
727 RTE_FLOW_ERROR_TYPE_ACTION,
729 "too many queues for RSS"
733 for (n = 0; n < rss->num; ++n) {
734 if (rss->queue[n] >= priv->rxqs_n) {
735 rte_flow_error_set(error, EINVAL,
736 RTE_FLOW_ERROR_TYPE_ACTION,
738 "queue id > number of"
743 for (n = 0; n < rss->num; ++n)
744 parser->queues[n] = rss->queue[n];
745 parser->queues_n = rss->num;
746 if (mlx5_flow_convert_rss_conf(parser, rss->rss_conf)) {
747 rte_flow_error_set(error, EINVAL,
748 RTE_FLOW_ERROR_TYPE_ACTION,
750 "wrong RSS configuration");
753 } else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
754 const struct rte_flow_action_mark *mark =
755 (const struct rte_flow_action_mark *)
759 rte_flow_error_set(error, EINVAL,
760 RTE_FLOW_ERROR_TYPE_ACTION,
762 "mark must be defined");
764 } else if (mark->id >= MLX5_FLOW_MARK_MAX) {
765 rte_flow_error_set(error, ENOTSUP,
766 RTE_FLOW_ERROR_TYPE_ACTION,
768 "mark must be between 0"
773 parser->mark_id = mark->id;
774 } else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
776 } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT &&
777 priv->config.flow_counter_en) {
780 goto exit_action_not_supported;
783 if (parser->drop && parser->mark)
785 if (!parser->queues_n && !parser->drop) {
786 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
787 NULL, "no valid action");
791 exit_action_not_supported:
792 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
793 actions, "action not supported");
801 * Pattern specification (list terminated by the END pattern item).
803 * Perform verbose error reporting if not NULL.
804 * @param[in, out] parser
805 * Internal parser structure.
808 * 0 on success, a negative errno value otherwise and rte_errno is set.
811 mlx5_flow_convert_items_validate(const struct rte_flow_item items[],
812 struct rte_flow_error *error,
813 struct mlx5_flow_parse *parser)
815 const struct mlx5_flow_items *cur_item = mlx5_flow_items;
819 /* Initialise the offsets to start after verbs attribute. */
820 for (i = 0; i != hash_rxq_init_n; ++i)
821 parser->queue[i].offset = sizeof(struct ibv_flow_attr);
822 for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
823 const struct mlx5_flow_items *token = NULL;
826 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
830 cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
832 if (cur_item->items[i] == items->type) {
833 token = &mlx5_flow_items[items->type];
839 goto exit_item_not_supported;
842 ret = mlx5_flow_item_validate(items,
843 (const uint8_t *)cur_item->mask,
846 goto exit_item_not_supported;
847 if (items->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
849 rte_flow_error_set(error, ENOTSUP,
850 RTE_FLOW_ERROR_TYPE_ITEM,
852 "cannot recognize multiple"
853 " VXLAN encapsulations");
856 parser->inner = IBV_FLOW_SPEC_INNER;
859 parser->queue[HASH_RXQ_ETH].offset += cur_item->dst_sz;
861 for (n = 0; n != hash_rxq_init_n; ++n)
862 parser->queue[n].offset += cur_item->dst_sz;
866 parser->queue[HASH_RXQ_ETH].offset +=
867 sizeof(struct ibv_flow_spec_action_drop);
870 for (i = 0; i != hash_rxq_init_n; ++i)
871 parser->queue[i].offset +=
872 sizeof(struct ibv_flow_spec_action_tag);
875 unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
877 for (i = 0; i != hash_rxq_init_n; ++i)
878 parser->queue[i].offset += size;
881 exit_item_not_supported:
882 return rte_flow_error_set(error, -ret, RTE_FLOW_ERROR_TYPE_ITEM,
883 items, "item not supported");
887 * Allocate memory space to store verbs flow attributes.
890 * Amount of byte to allocate.
892 * Perform verbose error reporting if not NULL.
895 * A verbs flow attribute on success, NULL otherwise and rte_errno is set.
897 static struct ibv_flow_attr *
898 mlx5_flow_convert_allocate(unsigned int size, struct rte_flow_error *error)
900 struct ibv_flow_attr *ibv_attr;
902 ibv_attr = rte_calloc(__func__, 1, size, 0);
904 rte_flow_error_set(error, ENOMEM,
905 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
907 "cannot allocate verbs spec attributes");
914 * Make inner packet matching with an higher priority from the non Inner
917 * @param[in, out] parser
918 * Internal parser structure.
920 * User flow attribute.
923 mlx5_flow_update_priority(struct mlx5_flow_parse *parser,
924 const struct rte_flow_attr *attr)
929 parser->queue[HASH_RXQ_ETH].ibv_attr->priority =
931 hash_rxq_init[HASH_RXQ_ETH].flow_priority;
934 for (i = 0; i != hash_rxq_init_n; ++i) {
935 if (parser->queue[i].ibv_attr) {
936 parser->queue[i].ibv_attr->priority =
938 hash_rxq_init[i].flow_priority -
939 (parser->inner ? 1 : 0);
945 * Finalise verbs flow attributes.
947 * @param[in, out] parser
948 * Internal parser structure.
951 mlx5_flow_convert_finalise(struct mlx5_flow_parse *parser)
953 const unsigned int ipv4 =
954 hash_rxq_init[parser->layer].ip_version == MLX5_IPV4;
955 const enum hash_rxq_type hmin = ipv4 ? HASH_RXQ_TCPV4 : HASH_RXQ_TCPV6;
956 const enum hash_rxq_type hmax = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
957 const enum hash_rxq_type ohmin = ipv4 ? HASH_RXQ_TCPV6 : HASH_RXQ_TCPV4;
958 const enum hash_rxq_type ohmax = ipv4 ? HASH_RXQ_IPV6 : HASH_RXQ_IPV4;
959 const enum hash_rxq_type ip = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
962 /* Remove any other flow not matching the pattern. */
963 if (parser->queues_n == 1 && !parser->rss_conf.rss_hf) {
964 for (i = 0; i != hash_rxq_init_n; ++i) {
965 if (i == HASH_RXQ_ETH)
967 rte_free(parser->queue[i].ibv_attr);
968 parser->queue[i].ibv_attr = NULL;
972 if (parser->layer == HASH_RXQ_ETH) {
976 * This layer becomes useless as the pattern define under
979 rte_free(parser->queue[HASH_RXQ_ETH].ibv_attr);
980 parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
982 /* Remove opposite kind of layer e.g. IPv6 if the pattern is IPv4. */
983 for (i = ohmin; i != (ohmax + 1); ++i) {
984 if (!parser->queue[i].ibv_attr)
986 rte_free(parser->queue[i].ibv_attr);
987 parser->queue[i].ibv_attr = NULL;
989 /* Remove impossible flow according to the RSS configuration. */
990 if (hash_rxq_init[parser->layer].dpdk_rss_hf &
991 parser->rss_conf.rss_hf) {
992 /* Remove any other flow. */
993 for (i = hmin; i != (hmax + 1); ++i) {
994 if ((i == parser->layer) ||
995 (!parser->queue[i].ibv_attr))
997 rte_free(parser->queue[i].ibv_attr);
998 parser->queue[i].ibv_attr = NULL;
1000 } else if (!parser->queue[ip].ibv_attr) {
1001 /* no RSS possible with the current configuration. */
1002 parser->queues_n = 1;
1007 * Fill missing layers in verbs specifications, or compute the correct
1008 * offset to allocate the memory space for the attributes and
1011 for (i = 0; i != hash_rxq_init_n - 1; ++i) {
1013 struct ibv_flow_spec_ipv4_ext ipv4;
1014 struct ibv_flow_spec_ipv6 ipv6;
1015 struct ibv_flow_spec_tcp_udp udp_tcp;
1020 if (i == parser->layer)
1022 if (parser->layer == HASH_RXQ_ETH) {
1023 if (hash_rxq_init[i].ip_version == MLX5_IPV4) {
1024 size = sizeof(struct ibv_flow_spec_ipv4_ext);
1025 specs.ipv4 = (struct ibv_flow_spec_ipv4_ext){
1026 .type = IBV_FLOW_SPEC_IPV4_EXT,
1030 size = sizeof(struct ibv_flow_spec_ipv6);
1031 specs.ipv6 = (struct ibv_flow_spec_ipv6){
1032 .type = IBV_FLOW_SPEC_IPV6,
1036 if (parser->queue[i].ibv_attr) {
1037 dst = (void *)((uintptr_t)
1038 parser->queue[i].ibv_attr +
1039 parser->queue[i].offset);
1040 memcpy(dst, &specs, size);
1041 ++parser->queue[i].ibv_attr->num_of_specs;
1043 parser->queue[i].offset += size;
1045 if ((i == HASH_RXQ_UDPV4) || (i == HASH_RXQ_TCPV4) ||
1046 (i == HASH_RXQ_UDPV6) || (i == HASH_RXQ_TCPV6)) {
1047 size = sizeof(struct ibv_flow_spec_tcp_udp);
1048 specs.udp_tcp = (struct ibv_flow_spec_tcp_udp) {
1049 .type = ((i == HASH_RXQ_UDPV4 ||
1050 i == HASH_RXQ_UDPV6) ?
1055 if (parser->queue[i].ibv_attr) {
1056 dst = (void *)((uintptr_t)
1057 parser->queue[i].ibv_attr +
1058 parser->queue[i].offset);
1059 memcpy(dst, &specs, size);
1060 ++parser->queue[i].ibv_attr->num_of_specs;
1062 parser->queue[i].offset += size;
1068 * Validate and convert a flow supported by the NIC.
1071 * Pointer to Ethernet device.
1073 * Flow rule attributes.
1074 * @param[in] pattern
1075 * Pattern specification (list terminated by the END pattern item).
1076 * @param[in] actions
1077 * Associated actions (list terminated by the END action).
1079 * Perform verbose error reporting if not NULL.
1080 * @param[in, out] parser
1081 * Internal parser structure.
1084 * 0 on success, a negative errno value otherwise and rte_errno is set.
1087 mlx5_flow_convert(struct rte_eth_dev *dev,
1088 const struct rte_flow_attr *attr,
1089 const struct rte_flow_item items[],
1090 const struct rte_flow_action actions[],
1091 struct rte_flow_error *error,
1092 struct mlx5_flow_parse *parser)
1094 const struct mlx5_flow_items *cur_item = mlx5_flow_items;
1098 /* First step. Validate the attributes, items and actions. */
1099 *parser = (struct mlx5_flow_parse){
1100 .create = parser->create,
1101 .layer = HASH_RXQ_ETH,
1102 .mark_id = MLX5_FLOW_MARK_DEFAULT,
1104 ret = mlx5_flow_convert_attributes(attr, error);
1107 ret = mlx5_flow_convert_actions(dev, actions, error, parser);
1110 ret = mlx5_flow_convert_items_validate(items, error, parser);
1113 mlx5_flow_convert_finalise(parser);
1116 * Allocate the memory space to store verbs specifications.
1119 unsigned int offset = parser->queue[HASH_RXQ_ETH].offset;
1121 parser->queue[HASH_RXQ_ETH].ibv_attr =
1122 mlx5_flow_convert_allocate(offset, error);
1123 if (!parser->queue[HASH_RXQ_ETH].ibv_attr)
1125 parser->queue[HASH_RXQ_ETH].offset =
1126 sizeof(struct ibv_flow_attr);
1128 for (i = 0; i != hash_rxq_init_n; ++i) {
1129 unsigned int offset;
1131 if (!(parser->rss_conf.rss_hf &
1132 hash_rxq_init[i].dpdk_rss_hf) &&
1133 (i != HASH_RXQ_ETH))
1135 offset = parser->queue[i].offset;
1136 parser->queue[i].ibv_attr =
1137 mlx5_flow_convert_allocate(offset, error);
1138 if (!parser->queue[i].ibv_attr)
1140 parser->queue[i].offset = sizeof(struct ibv_flow_attr);
1143 /* Third step. Conversion parse, fill the specifications. */
1145 for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
1146 struct mlx5_flow_data data = {
1151 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
1153 cur_item = &mlx5_flow_items[items->type];
1154 ret = cur_item->convert(items,
1155 (cur_item->default_mask ?
1156 cur_item->default_mask :
1163 mlx5_flow_create_flag_mark(parser, parser->mark_id);
1164 if (parser->count && parser->create) {
1165 mlx5_flow_create_count(dev, parser);
1167 goto exit_count_error;
1170 * Last step. Complete missing specification to reach the RSS
1174 mlx5_flow_convert_finalise(parser);
1175 mlx5_flow_update_priority(parser, attr);
1177 /* Only verification is expected, all resources should be released. */
1178 if (!parser->create) {
1179 for (i = 0; i != hash_rxq_init_n; ++i) {
1180 if (parser->queue[i].ibv_attr) {
1181 rte_free(parser->queue[i].ibv_attr);
1182 parser->queue[i].ibv_attr = NULL;
1188 for (i = 0; i != hash_rxq_init_n; ++i) {
1189 if (parser->queue[i].ibv_attr) {
1190 rte_free(parser->queue[i].ibv_attr);
1191 parser->queue[i].ibv_attr = NULL;
1194 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1195 NULL, "cannot allocate verbs spec attributes");
1198 rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1199 NULL, "cannot create counter");
1204 * Copy the specification created into the flow.
1207 * Internal parser structure.
1209 * Create specification.
1211 * Size in bytes of the specification to copy.
1214 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
1220 for (i = 0; i != hash_rxq_init_n; ++i) {
1221 if (!parser->queue[i].ibv_attr)
1223 /* Specification must be the same l3 type or none. */
1224 if (parser->layer == HASH_RXQ_ETH ||
1225 (hash_rxq_init[parser->layer].ip_version ==
1226 hash_rxq_init[i].ip_version) ||
1227 (hash_rxq_init[i].ip_version == 0)) {
1228 dst = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1229 parser->queue[i].offset);
1230 memcpy(dst, src, size);
1231 ++parser->queue[i].ibv_attr->num_of_specs;
1232 parser->queue[i].offset += size;
1238 * Convert Ethernet item to Verbs specification.
1241 * Item specification.
1242 * @param default_mask[in]
1243 * Default bit-masks to use when item->mask is not provided.
1244 * @param data[in, out]
1248 * 0 on success, a negative errno value otherwise and rte_errno is set.
1251 mlx5_flow_create_eth(const struct rte_flow_item *item,
1252 const void *default_mask,
1253 struct mlx5_flow_data *data)
1255 const struct rte_flow_item_eth *spec = item->spec;
1256 const struct rte_flow_item_eth *mask = item->mask;
1257 struct mlx5_flow_parse *parser = data->parser;
1258 const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1259 struct ibv_flow_spec_eth eth = {
1260 .type = parser->inner | IBV_FLOW_SPEC_ETH,
1264 /* Don't update layer for the inner pattern. */
1266 parser->layer = HASH_RXQ_ETH;
1271 mask = default_mask;
1272 memcpy(ð.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
1273 memcpy(ð.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
1274 eth.val.ether_type = spec->type;
1275 memcpy(ð.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
1276 memcpy(ð.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
1277 eth.mask.ether_type = mask->type;
1278 /* Remove unwanted bits from values. */
1279 for (i = 0; i < ETHER_ADDR_LEN; ++i) {
1280 eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
1281 eth.val.src_mac[i] &= eth.mask.src_mac[i];
1283 eth.val.ether_type &= eth.mask.ether_type;
1285 mlx5_flow_create_copy(parser, ð, eth_size);
1290 * Convert VLAN item to Verbs specification.
1293 * Item specification.
1294 * @param default_mask[in]
1295 * Default bit-masks to use when item->mask is not provided.
1296 * @param data[in, out]
1300 * 0 on success, a negative errno value otherwise and rte_errno is set.
1303 mlx5_flow_create_vlan(const struct rte_flow_item *item,
1304 const void *default_mask,
1305 struct mlx5_flow_data *data)
1307 const struct rte_flow_item_vlan *spec = item->spec;
1308 const struct rte_flow_item_vlan *mask = item->mask;
1309 struct mlx5_flow_parse *parser = data->parser;
1310 struct ibv_flow_spec_eth *eth;
1311 const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1316 mask = default_mask;
1318 for (i = 0; i != hash_rxq_init_n; ++i) {
1319 if (!parser->queue[i].ibv_attr)
1322 eth = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1323 parser->queue[i].offset - eth_size);
1324 eth->val.vlan_tag = spec->tci;
1325 eth->mask.vlan_tag = mask->tci;
1326 eth->val.vlan_tag &= eth->mask.vlan_tag;
1328 * From verbs perspective an empty VLAN is equivalent
1329 * to a packet without VLAN layer.
1331 if (!eth->mask.vlan_tag)
1337 return rte_flow_error_set(data->error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM,
1338 item, "VLAN cannot be empty");
1342 * Convert IPv4 item to Verbs specification.
1345 * Item specification.
1346 * @param default_mask[in]
1347 * Default bit-masks to use when item->mask is not provided.
1348 * @param data[in, out]
1352 * 0 on success, a negative errno value otherwise and rte_errno is set.
1355 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
1356 const void *default_mask,
1357 struct mlx5_flow_data *data)
1359 const struct rte_flow_item_ipv4 *spec = item->spec;
1360 const struct rte_flow_item_ipv4 *mask = item->mask;
1361 struct mlx5_flow_parse *parser = data->parser;
1362 unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4_ext);
1363 struct ibv_flow_spec_ipv4_ext ipv4 = {
1364 .type = parser->inner | IBV_FLOW_SPEC_IPV4_EXT,
1368 /* Don't update layer for the inner pattern. */
1370 parser->layer = HASH_RXQ_IPV4;
1373 mask = default_mask;
1374 ipv4.val = (struct ibv_flow_ipv4_ext_filter){
1375 .src_ip = spec->hdr.src_addr,
1376 .dst_ip = spec->hdr.dst_addr,
1377 .proto = spec->hdr.next_proto_id,
1378 .tos = spec->hdr.type_of_service,
1380 ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
1381 .src_ip = mask->hdr.src_addr,
1382 .dst_ip = mask->hdr.dst_addr,
1383 .proto = mask->hdr.next_proto_id,
1384 .tos = mask->hdr.type_of_service,
1386 /* Remove unwanted bits from values. */
1387 ipv4.val.src_ip &= ipv4.mask.src_ip;
1388 ipv4.val.dst_ip &= ipv4.mask.dst_ip;
1389 ipv4.val.proto &= ipv4.mask.proto;
1390 ipv4.val.tos &= ipv4.mask.tos;
1392 mlx5_flow_create_copy(parser, &ipv4, ipv4_size);
1397 * Convert IPv6 item to Verbs specification.
1400 * Item specification.
1401 * @param default_mask[in]
1402 * Default bit-masks to use when item->mask is not provided.
1403 * @param data[in, out]
1407 * 0 on success, a negative errno value otherwise and rte_errno is set.
1410 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
1411 const void *default_mask,
1412 struct mlx5_flow_data *data)
1414 const struct rte_flow_item_ipv6 *spec = item->spec;
1415 const struct rte_flow_item_ipv6 *mask = item->mask;
1416 struct mlx5_flow_parse *parser = data->parser;
1417 unsigned int ipv6_size = sizeof(struct ibv_flow_spec_ipv6);
1418 struct ibv_flow_spec_ipv6 ipv6 = {
1419 .type = parser->inner | IBV_FLOW_SPEC_IPV6,
1423 /* Don't update layer for the inner pattern. */
1425 parser->layer = HASH_RXQ_IPV6;
1428 uint32_t vtc_flow_val;
1429 uint32_t vtc_flow_mask;
1432 mask = default_mask;
1433 memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
1434 RTE_DIM(ipv6.val.src_ip));
1435 memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
1436 RTE_DIM(ipv6.val.dst_ip));
1437 memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
1438 RTE_DIM(ipv6.mask.src_ip));
1439 memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
1440 RTE_DIM(ipv6.mask.dst_ip));
1441 vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
1442 vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
1443 ipv6.val.flow_label =
1444 rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >>
1446 ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >>
1448 ipv6.val.next_hdr = spec->hdr.proto;
1449 ipv6.val.hop_limit = spec->hdr.hop_limits;
1450 ipv6.mask.flow_label =
1451 rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >>
1453 ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >>
1455 ipv6.mask.next_hdr = mask->hdr.proto;
1456 ipv6.mask.hop_limit = mask->hdr.hop_limits;
1457 /* Remove unwanted bits from values. */
1458 for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
1459 ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
1460 ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
1462 ipv6.val.flow_label &= ipv6.mask.flow_label;
1463 ipv6.val.traffic_class &= ipv6.mask.traffic_class;
1464 ipv6.val.next_hdr &= ipv6.mask.next_hdr;
1465 ipv6.val.hop_limit &= ipv6.mask.hop_limit;
1467 mlx5_flow_create_copy(parser, &ipv6, ipv6_size);
1472 * Convert UDP item to Verbs specification.
1475 * Item specification.
1476 * @param default_mask[in]
1477 * Default bit-masks to use when item->mask is not provided.
1478 * @param data[in, out]
1482 * 0 on success, a negative errno value otherwise and rte_errno is set.
1485 mlx5_flow_create_udp(const struct rte_flow_item *item,
1486 const void *default_mask,
1487 struct mlx5_flow_data *data)
1489 const struct rte_flow_item_udp *spec = item->spec;
1490 const struct rte_flow_item_udp *mask = item->mask;
1491 struct mlx5_flow_parse *parser = data->parser;
1492 unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1493 struct ibv_flow_spec_tcp_udp udp = {
1494 .type = parser->inner | IBV_FLOW_SPEC_UDP,
1498 /* Don't update layer for the inner pattern. */
1499 if (!parser->inner) {
1500 if (parser->layer == HASH_RXQ_IPV4)
1501 parser->layer = HASH_RXQ_UDPV4;
1503 parser->layer = HASH_RXQ_UDPV6;
1507 mask = default_mask;
1508 udp.val.dst_port = spec->hdr.dst_port;
1509 udp.val.src_port = spec->hdr.src_port;
1510 udp.mask.dst_port = mask->hdr.dst_port;
1511 udp.mask.src_port = mask->hdr.src_port;
1512 /* Remove unwanted bits from values. */
1513 udp.val.src_port &= udp.mask.src_port;
1514 udp.val.dst_port &= udp.mask.dst_port;
1516 mlx5_flow_create_copy(parser, &udp, udp_size);
1521 * Convert TCP item to Verbs specification.
1524 * Item specification.
1525 * @param default_mask[in]
1526 * Default bit-masks to use when item->mask is not provided.
1527 * @param data[in, out]
1531 * 0 on success, a negative errno value otherwise and rte_errno is set.
1534 mlx5_flow_create_tcp(const struct rte_flow_item *item,
1535 const void *default_mask,
1536 struct mlx5_flow_data *data)
1538 const struct rte_flow_item_tcp *spec = item->spec;
1539 const struct rte_flow_item_tcp *mask = item->mask;
1540 struct mlx5_flow_parse *parser = data->parser;
1541 unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1542 struct ibv_flow_spec_tcp_udp tcp = {
1543 .type = parser->inner | IBV_FLOW_SPEC_TCP,
1547 /* Don't update layer for the inner pattern. */
1548 if (!parser->inner) {
1549 if (parser->layer == HASH_RXQ_IPV4)
1550 parser->layer = HASH_RXQ_TCPV4;
1552 parser->layer = HASH_RXQ_TCPV6;
1556 mask = default_mask;
1557 tcp.val.dst_port = spec->hdr.dst_port;
1558 tcp.val.src_port = spec->hdr.src_port;
1559 tcp.mask.dst_port = mask->hdr.dst_port;
1560 tcp.mask.src_port = mask->hdr.src_port;
1561 /* Remove unwanted bits from values. */
1562 tcp.val.src_port &= tcp.mask.src_port;
1563 tcp.val.dst_port &= tcp.mask.dst_port;
1565 mlx5_flow_create_copy(parser, &tcp, tcp_size);
1570 * Convert VXLAN item to Verbs specification.
1573 * Item specification.
1574 * @param default_mask[in]
1575 * Default bit-masks to use when item->mask is not provided.
1576 * @param data[in, out]
1580 * 0 on success, a negative errno value otherwise and rte_errno is set.
1583 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
1584 const void *default_mask,
1585 struct mlx5_flow_data *data)
1587 const struct rte_flow_item_vxlan *spec = item->spec;
1588 const struct rte_flow_item_vxlan *mask = item->mask;
1589 struct mlx5_flow_parse *parser = data->parser;
1590 unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1591 struct ibv_flow_spec_tunnel vxlan = {
1592 .type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1601 parser->inner = IBV_FLOW_SPEC_INNER;
1604 mask = default_mask;
1605 memcpy(&id.vni[1], spec->vni, 3);
1606 vxlan.val.tunnel_id = id.vlan_id;
1607 memcpy(&id.vni[1], mask->vni, 3);
1608 vxlan.mask.tunnel_id = id.vlan_id;
1609 /* Remove unwanted bits from values. */
1610 vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
1613 * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
1614 * layer is defined in the Verbs specification it is interpreted as
1615 * wildcard and all packets will match this rule, if it follows a full
1616 * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
1617 * before will also match this rule.
1618 * To avoid such situation, VNI 0 is currently refused.
1620 if (!vxlan.val.tunnel_id)
1621 return rte_flow_error_set(data->error, EINVAL,
1622 RTE_FLOW_ERROR_TYPE_ITEM,
1624 "VxLAN vni cannot be 0");
1625 mlx5_flow_create_copy(parser, &vxlan, size);
1630 * Convert mark/flag action to Verbs specification.
1633 * Internal parser structure.
1638 * 0 on success, a negative errno value otherwise and rte_errno is set.
1641 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id)
1643 unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1644 struct ibv_flow_spec_action_tag tag = {
1645 .type = IBV_FLOW_SPEC_ACTION_TAG,
1647 .tag_id = mlx5_flow_mark_set(mark_id),
1650 assert(parser->mark);
1651 mlx5_flow_create_copy(parser, &tag, size);
1656 * Convert count action to Verbs specification.
1659 * Pointer to Ethernet device.
1661 * Pointer to MLX5 flow parser structure.
1664 * 0 on success, a negative errno value otherwise and rte_errno is set.
1667 mlx5_flow_create_count(struct rte_eth_dev *dev __rte_unused,
1668 struct mlx5_flow_parse *parser __rte_unused)
1670 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
1671 struct priv *priv = dev->data->dev_private;
1672 unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
1673 struct ibv_counter_set_init_attr init_attr = {0};
1674 struct ibv_flow_spec_counter_action counter = {
1675 .type = IBV_FLOW_SPEC_ACTION_COUNT,
1677 .counter_set_handle = 0,
1680 init_attr.counter_set_id = 0;
1681 parser->cs = mlx5_glue->create_counter_set(priv->ctx, &init_attr);
1686 counter.counter_set_handle = parser->cs->handle;
1687 mlx5_flow_create_copy(parser, &counter, size);
1693 * Complete flow rule creation with a drop queue.
1696 * Pointer to Ethernet device.
1698 * Internal parser structure.
1700 * Pointer to the rte_flow.
1702 * Perform verbose error reporting if not NULL.
1705 * 0 on success, a negative errno value otherwise and rte_errno is set.
1708 mlx5_flow_create_action_queue_drop(struct rte_eth_dev *dev,
1709 struct mlx5_flow_parse *parser,
1710 struct rte_flow *flow,
1711 struct rte_flow_error *error)
1713 struct priv *priv = dev->data->dev_private;
1714 struct ibv_flow_spec_action_drop *drop;
1715 unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
1720 drop = (void *)((uintptr_t)parser->queue[HASH_RXQ_ETH].ibv_attr +
1721 parser->queue[HASH_RXQ_ETH].offset);
1722 *drop = (struct ibv_flow_spec_action_drop){
1723 .type = IBV_FLOW_SPEC_ACTION_DROP,
1726 ++parser->queue[HASH_RXQ_ETH].ibv_attr->num_of_specs;
1727 parser->queue[HASH_RXQ_ETH].offset += size;
1728 flow->frxq[HASH_RXQ_ETH].ibv_attr =
1729 parser->queue[HASH_RXQ_ETH].ibv_attr;
1731 flow->cs = parser->cs;
1732 if (!priv->dev->data->dev_started)
1734 parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
1735 flow->frxq[HASH_RXQ_ETH].ibv_flow =
1736 mlx5_glue->create_flow(priv->flow_drop_queue->qp,
1737 flow->frxq[HASH_RXQ_ETH].ibv_attr);
1738 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
1739 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1740 NULL, "flow rule creation failure");
1746 if (flow->frxq[HASH_RXQ_ETH].ibv_flow) {
1747 claim_zero(mlx5_glue->destroy_flow
1748 (flow->frxq[HASH_RXQ_ETH].ibv_flow));
1749 flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
1751 if (flow->frxq[HASH_RXQ_ETH].ibv_attr) {
1752 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
1753 flow->frxq[HASH_RXQ_ETH].ibv_attr = NULL;
1756 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
1764 * Create hash Rx queues when RSS is enabled.
1767 * Pointer to Ethernet device.
1769 * Internal parser structure.
1771 * Pointer to the rte_flow.
1773 * Perform verbose error reporting if not NULL.
1776 * 0 on success, a negative errno value otherwise and rte_errno is set.
1779 mlx5_flow_create_action_queue_rss(struct rte_eth_dev *dev,
1780 struct mlx5_flow_parse *parser,
1781 struct rte_flow *flow,
1782 struct rte_flow_error *error)
1784 struct priv *priv = dev->data->dev_private;
1787 for (i = 0; i != hash_rxq_init_n; ++i) {
1788 uint64_t hash_fields;
1790 if (!parser->queue[i].ibv_attr)
1792 flow->frxq[i].ibv_attr = parser->queue[i].ibv_attr;
1793 parser->queue[i].ibv_attr = NULL;
1794 hash_fields = hash_rxq_init[i].hash_fields;
1795 if (!priv->dev->data->dev_started)
1797 flow->frxq[i].hrxq =
1799 parser->rss_conf.rss_key,
1800 parser->rss_conf.rss_key_len,
1804 if (flow->frxq[i].hrxq)
1806 flow->frxq[i].hrxq =
1808 parser->rss_conf.rss_key,
1809 parser->rss_conf.rss_key_len,
1813 if (!flow->frxq[i].hrxq) {
1814 return rte_flow_error_set(error, ENOMEM,
1815 RTE_FLOW_ERROR_TYPE_HANDLE,
1817 "cannot create hash rxq");
1824 * Complete flow rule creation.
1827 * Pointer to Ethernet device.
1829 * Internal parser structure.
1831 * Pointer to the rte_flow.
1833 * Perform verbose error reporting if not NULL.
1836 * 0 on success, a negative errno value otherwise and rte_errno is set.
1839 mlx5_flow_create_action_queue(struct rte_eth_dev *dev,
1840 struct mlx5_flow_parse *parser,
1841 struct rte_flow *flow,
1842 struct rte_flow_error *error)
1844 struct priv *priv = dev->data->dev_private;
1847 unsigned int flows_n = 0;
1851 assert(!parser->drop);
1852 ret = mlx5_flow_create_action_queue_rss(dev, parser, flow, error);
1856 flow->cs = parser->cs;
1857 if (!priv->dev->data->dev_started)
1859 for (i = 0; i != hash_rxq_init_n; ++i) {
1860 if (!flow->frxq[i].hrxq)
1862 flow->frxq[i].ibv_flow =
1863 mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
1864 flow->frxq[i].ibv_attr);
1865 if (!flow->frxq[i].ibv_flow) {
1866 rte_flow_error_set(error, ENOMEM,
1867 RTE_FLOW_ERROR_TYPE_HANDLE,
1868 NULL, "flow rule creation failure");
1872 DRV_LOG(DEBUG, "port %u %p type %d QP %p ibv_flow %p",
1875 (void *)flow->frxq[i].hrxq,
1876 (void *)flow->frxq[i].ibv_flow);
1879 rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_HANDLE,
1880 NULL, "internal error in flow creation");
1883 for (i = 0; i != parser->queues_n; ++i) {
1884 struct mlx5_rxq_data *q =
1885 (*priv->rxqs)[parser->queues[i]];
1887 q->mark |= parser->mark;
1891 ret = rte_errno; /* Save rte_errno before cleanup. */
1893 for (i = 0; i != hash_rxq_init_n; ++i) {
1894 if (flow->frxq[i].ibv_flow) {
1895 struct ibv_flow *ibv_flow = flow->frxq[i].ibv_flow;
1897 claim_zero(mlx5_glue->destroy_flow(ibv_flow));
1899 if (flow->frxq[i].hrxq)
1900 mlx5_hrxq_release(dev, flow->frxq[i].hrxq);
1901 if (flow->frxq[i].ibv_attr)
1902 rte_free(flow->frxq[i].ibv_attr);
1905 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
1909 rte_errno = ret; /* Restore rte_errno. */
1917 * Pointer to Ethernet device.
1919 * Pointer to a TAILQ flow list.
1921 * Flow rule attributes.
1922 * @param[in] pattern
1923 * Pattern specification (list terminated by the END pattern item).
1924 * @param[in] actions
1925 * Associated actions (list terminated by the END action).
1927 * Perform verbose error reporting if not NULL.
1930 * A flow on success, NULL otherwise and rte_errno is set.
1932 static struct rte_flow *
1933 mlx5_flow_list_create(struct rte_eth_dev *dev,
1934 struct mlx5_flows *list,
1935 const struct rte_flow_attr *attr,
1936 const struct rte_flow_item items[],
1937 const struct rte_flow_action actions[],
1938 struct rte_flow_error *error)
1940 struct mlx5_flow_parse parser = { .create = 1, };
1941 struct rte_flow *flow = NULL;
1945 ret = mlx5_flow_convert(dev, attr, items, actions, error, &parser);
1948 flow = rte_calloc(__func__, 1,
1949 sizeof(*flow) + parser.queues_n * sizeof(uint16_t),
1952 rte_flow_error_set(error, ENOMEM,
1953 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1955 "cannot allocate flow memory");
1958 /* Copy queues configuration. */
1959 flow->queues = (uint16_t (*)[])(flow + 1);
1960 memcpy(flow->queues, parser.queues, parser.queues_n * sizeof(uint16_t));
1961 flow->queues_n = parser.queues_n;
1962 flow->mark = parser.mark;
1963 /* Copy RSS configuration. */
1964 flow->rss_conf = parser.rss_conf;
1965 flow->rss_conf.rss_key = flow->rss_key;
1966 memcpy(flow->rss_key, parser.rss_key, parser.rss_conf.rss_key_len);
1967 /* finalise the flow. */
1969 ret = mlx5_flow_create_action_queue_drop(dev, &parser, flow,
1972 ret = mlx5_flow_create_action_queue(dev, &parser, flow, error);
1975 TAILQ_INSERT_TAIL(list, flow, next);
1976 DRV_LOG(DEBUG, "port %u flow created %p", dev->data->port_id,
1980 DRV_LOG(ERR, "port %u flow creation error: %s", dev->data->port_id,
1982 for (i = 0; i != hash_rxq_init_n; ++i) {
1983 if (parser.queue[i].ibv_attr)
1984 rte_free(parser.queue[i].ibv_attr);
1991 * Validate a flow supported by the NIC.
1993 * @see rte_flow_validate()
1997 mlx5_flow_validate(struct rte_eth_dev *dev,
1998 const struct rte_flow_attr *attr,
1999 const struct rte_flow_item items[],
2000 const struct rte_flow_action actions[],
2001 struct rte_flow_error *error)
2003 struct mlx5_flow_parse parser = { .create = 0, };
2005 return mlx5_flow_convert(dev, attr, items, actions, error, &parser);
2011 * @see rte_flow_create()
2015 mlx5_flow_create(struct rte_eth_dev *dev,
2016 const struct rte_flow_attr *attr,
2017 const struct rte_flow_item items[],
2018 const struct rte_flow_action actions[],
2019 struct rte_flow_error *error)
2021 struct priv *priv = dev->data->dev_private;
2023 return mlx5_flow_list_create(dev, &priv->flows, attr, items, actions,
2028 * Destroy a flow in a list.
2031 * Pointer to Ethernet device.
2033 * Pointer to a TAILQ flow list.
2038 mlx5_flow_list_destroy(struct rte_eth_dev *dev, struct mlx5_flows *list,
2039 struct rte_flow *flow)
2041 struct priv *priv = dev->data->dev_private;
2044 if (flow->drop || !flow->mark)
2046 for (i = 0; i != flow->queues_n; ++i) {
2047 struct rte_flow *tmp;
2051 * To remove the mark from the queue, the queue must not be
2052 * present in any other marked flow (RSS or not).
2054 TAILQ_FOREACH(tmp, list, next) {
2056 uint16_t *tqs = NULL;
2061 for (j = 0; j != hash_rxq_init_n; ++j) {
2062 if (!tmp->frxq[j].hrxq)
2064 tqs = tmp->frxq[j].hrxq->ind_table->queues;
2065 tq_n = tmp->frxq[j].hrxq->ind_table->queues_n;
2069 for (j = 0; (j != tq_n) && !mark; j++)
2070 if (tqs[j] == (*flow->queues)[i])
2073 (*priv->rxqs)[(*flow->queues)[i]]->mark = mark;
2077 if (flow->frxq[HASH_RXQ_ETH].ibv_flow)
2078 claim_zero(mlx5_glue->destroy_flow
2079 (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2080 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
2082 for (i = 0; i != hash_rxq_init_n; ++i) {
2083 struct mlx5_flow *frxq = &flow->frxq[i];
2086 claim_zero(mlx5_glue->destroy_flow
2089 mlx5_hrxq_release(dev, frxq->hrxq);
2091 rte_free(frxq->ibv_attr);
2095 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
2098 TAILQ_REMOVE(list, flow, next);
2099 DRV_LOG(DEBUG, "port %u flow destroyed %p", dev->data->port_id,
2105 * Destroy all flows.
2108 * Pointer to Ethernet device.
2110 * Pointer to a TAILQ flow list.
2113 mlx5_flow_list_flush(struct rte_eth_dev *dev, struct mlx5_flows *list)
2115 while (!TAILQ_EMPTY(list)) {
2116 struct rte_flow *flow;
2118 flow = TAILQ_FIRST(list);
2119 mlx5_flow_list_destroy(dev, list, flow);
2124 * Create drop queue.
2127 * Pointer to Ethernet device.
2130 * 0 on success, a negative errno value otherwise and rte_errno is set.
2133 mlx5_flow_create_drop_queue(struct rte_eth_dev *dev)
2135 struct priv *priv = dev->data->dev_private;
2136 struct mlx5_hrxq_drop *fdq = NULL;
2140 fdq = rte_calloc(__func__, 1, sizeof(*fdq), 0);
2143 "port %u cannot allocate memory for drop queue",
2144 dev->data->port_id);
2148 fdq->cq = mlx5_glue->create_cq(priv->ctx, 1, NULL, NULL, 0);
2150 DRV_LOG(WARNING, "port %u cannot allocate CQ for drop queue",
2151 dev->data->port_id);
2155 fdq->wq = mlx5_glue->create_wq
2157 &(struct ibv_wq_init_attr){
2158 .wq_type = IBV_WQT_RQ,
2165 DRV_LOG(WARNING, "port %u cannot allocate WQ for drop queue",
2166 dev->data->port_id);
2170 fdq->ind_table = mlx5_glue->create_rwq_ind_table
2172 &(struct ibv_rwq_ind_table_init_attr){
2173 .log_ind_tbl_size = 0,
2174 .ind_tbl = &fdq->wq,
2177 if (!fdq->ind_table) {
2179 "port %u cannot allocate indirection table for drop"
2181 dev->data->port_id);
2185 fdq->qp = mlx5_glue->create_qp_ex
2187 &(struct ibv_qp_init_attr_ex){
2188 .qp_type = IBV_QPT_RAW_PACKET,
2190 IBV_QP_INIT_ATTR_PD |
2191 IBV_QP_INIT_ATTR_IND_TABLE |
2192 IBV_QP_INIT_ATTR_RX_HASH,
2193 .rx_hash_conf = (struct ibv_rx_hash_conf){
2195 IBV_RX_HASH_FUNC_TOEPLITZ,
2196 .rx_hash_key_len = rss_hash_default_key_len,
2197 .rx_hash_key = rss_hash_default_key,
2198 .rx_hash_fields_mask = 0,
2200 .rwq_ind_tbl = fdq->ind_table,
2204 DRV_LOG(WARNING, "port %u cannot allocate QP for drop queue",
2205 dev->data->port_id);
2209 priv->flow_drop_queue = fdq;
2213 claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2215 claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2217 claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2219 claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2222 priv->flow_drop_queue = NULL;
2227 * Delete drop queue.
2230 * Pointer to Ethernet device.
2233 mlx5_flow_delete_drop_queue(struct rte_eth_dev *dev)
2235 struct priv *priv = dev->data->dev_private;
2236 struct mlx5_hrxq_drop *fdq = priv->flow_drop_queue;
2241 claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2243 claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2245 claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2247 claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2249 priv->flow_drop_queue = NULL;
2256 * Pointer to Ethernet device.
2258 * Pointer to a TAILQ flow list.
2261 mlx5_flow_stop(struct rte_eth_dev *dev, struct mlx5_flows *list)
2263 struct priv *priv = dev->data->dev_private;
2264 struct rte_flow *flow;
2266 TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) {
2268 struct mlx5_ind_table_ibv *ind_tbl = NULL;
2271 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow)
2273 claim_zero(mlx5_glue->destroy_flow
2274 (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2275 flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
2276 DRV_LOG(DEBUG, "port %u flow %p removed",
2277 dev->data->port_id, (void *)flow);
2281 /* Verify the flow has not already been cleaned. */
2282 for (i = 0; i != hash_rxq_init_n; ++i) {
2283 if (!flow->frxq[i].ibv_flow)
2286 * Indirection table may be necessary to remove the
2287 * flags in the Rx queues.
2288 * This helps to speed-up the process by avoiding
2291 ind_tbl = flow->frxq[i].hrxq->ind_table;
2294 if (i == hash_rxq_init_n)
2298 for (i = 0; i != ind_tbl->queues_n; ++i)
2299 (*priv->rxqs)[ind_tbl->queues[i]]->mark = 0;
2301 for (i = 0; i != hash_rxq_init_n; ++i) {
2302 if (!flow->frxq[i].ibv_flow)
2304 claim_zero(mlx5_glue->destroy_flow
2305 (flow->frxq[i].ibv_flow));
2306 flow->frxq[i].ibv_flow = NULL;
2307 mlx5_hrxq_release(dev, flow->frxq[i].hrxq);
2308 flow->frxq[i].hrxq = NULL;
2310 DRV_LOG(DEBUG, "port %u flow %p removed", dev->data->port_id,
2319 * Pointer to Ethernet device.
2321 * Pointer to a TAILQ flow list.
2324 * 0 on success, a negative errno value otherwise and rte_errno is set.
2327 mlx5_flow_start(struct rte_eth_dev *dev, struct mlx5_flows *list)
2329 struct priv *priv = dev->data->dev_private;
2330 struct rte_flow *flow;
2332 TAILQ_FOREACH(flow, list, next) {
2336 flow->frxq[HASH_RXQ_ETH].ibv_flow =
2337 mlx5_glue->create_flow
2338 (priv->flow_drop_queue->qp,
2339 flow->frxq[HASH_RXQ_ETH].ibv_attr);
2340 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
2342 "port %u flow %p cannot be applied",
2343 dev->data->port_id, (void *)flow);
2347 DRV_LOG(DEBUG, "port %u flow %p applied",
2348 dev->data->port_id, (void *)flow);
2352 for (i = 0; i != hash_rxq_init_n; ++i) {
2353 if (!flow->frxq[i].ibv_attr)
2355 flow->frxq[i].hrxq =
2356 mlx5_hrxq_get(dev, flow->rss_conf.rss_key,
2357 flow->rss_conf.rss_key_len,
2358 hash_rxq_init[i].hash_fields,
2361 if (flow->frxq[i].hrxq)
2363 flow->frxq[i].hrxq =
2364 mlx5_hrxq_new(dev, flow->rss_conf.rss_key,
2365 flow->rss_conf.rss_key_len,
2366 hash_rxq_init[i].hash_fields,
2369 if (!flow->frxq[i].hrxq) {
2371 "port %u flow %p cannot be applied",
2372 dev->data->port_id, (void *)flow);
2377 flow->frxq[i].ibv_flow =
2378 mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
2379 flow->frxq[i].ibv_attr);
2380 if (!flow->frxq[i].ibv_flow) {
2382 "port %u flow %p cannot be applied",
2383 dev->data->port_id, (void *)flow);
2387 DRV_LOG(DEBUG, "port %u flow %p applied",
2388 dev->data->port_id, (void *)flow);
2392 for (i = 0; i != flow->queues_n; ++i)
2393 (*priv->rxqs)[(*flow->queues)[i]]->mark = 1;
2399 * Verify the flow list is empty
2402 * Pointer to Ethernet device.
2404 * @return the number of flows not released.
2407 mlx5_flow_verify(struct rte_eth_dev *dev)
2409 struct priv *priv = dev->data->dev_private;
2410 struct rte_flow *flow;
2413 TAILQ_FOREACH(flow, &priv->flows, next) {
2414 DRV_LOG(DEBUG, "port %u flow %p still referenced",
2415 dev->data->port_id, (void *)flow);
2422 * Enable a control flow configured from the control plane.
2425 * Pointer to Ethernet device.
2427 * An Ethernet flow spec to apply.
2429 * An Ethernet flow mask to apply.
2431 * A VLAN flow spec to apply.
2433 * A VLAN flow mask to apply.
2436 * 0 on success, a negative errno value otherwise and rte_errno is set.
2439 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
2440 struct rte_flow_item_eth *eth_spec,
2441 struct rte_flow_item_eth *eth_mask,
2442 struct rte_flow_item_vlan *vlan_spec,
2443 struct rte_flow_item_vlan *vlan_mask)
2445 struct priv *priv = dev->data->dev_private;
2446 const struct rte_flow_attr attr = {
2448 .priority = MLX5_CTRL_FLOW_PRIORITY,
2450 struct rte_flow_item items[] = {
2452 .type = RTE_FLOW_ITEM_TYPE_ETH,
2458 .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
2459 RTE_FLOW_ITEM_TYPE_END,
2465 .type = RTE_FLOW_ITEM_TYPE_END,
2468 struct rte_flow_action actions[] = {
2470 .type = RTE_FLOW_ACTION_TYPE_RSS,
2473 .type = RTE_FLOW_ACTION_TYPE_END,
2476 struct rte_flow *flow;
2477 struct rte_flow_error error;
2480 struct rte_flow_action_rss rss;
2482 const struct rte_eth_rss_conf *rss_conf;
2484 uint16_t queue[RTE_MAX_QUEUES_PER_PORT];
2488 if (!priv->reta_idx_n) {
2492 for (i = 0; i != priv->reta_idx_n; ++i)
2493 action_rss.local.queue[i] = (*priv->reta_idx)[i];
2494 action_rss.local.rss_conf = &priv->rss_conf;
2495 action_rss.local.num = priv->reta_idx_n;
2496 actions[0].conf = (const void *)&action_rss.rss;
2497 flow = mlx5_flow_list_create(dev, &priv->ctrl_flows, &attr, items,
2505 * Enable a flow control configured from the control plane.
2508 * Pointer to Ethernet device.
2510 * An Ethernet flow spec to apply.
2512 * An Ethernet flow mask to apply.
2515 * 0 on success, a negative errno value otherwise and rte_errno is set.
2518 mlx5_ctrl_flow(struct rte_eth_dev *dev,
2519 struct rte_flow_item_eth *eth_spec,
2520 struct rte_flow_item_eth *eth_mask)
2522 return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
2528 * @see rte_flow_destroy()
2532 mlx5_flow_destroy(struct rte_eth_dev *dev,
2533 struct rte_flow *flow,
2534 struct rte_flow_error *error __rte_unused)
2536 struct priv *priv = dev->data->dev_private;
2538 mlx5_flow_list_destroy(dev, &priv->flows, flow);
2543 * Destroy all flows.
2545 * @see rte_flow_flush()
2549 mlx5_flow_flush(struct rte_eth_dev *dev,
2550 struct rte_flow_error *error __rte_unused)
2552 struct priv *priv = dev->data->dev_private;
2554 mlx5_flow_list_flush(dev, &priv->flows);
2558 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
2560 * Query flow counter.
2564 * @param counter_value
2565 * returned data from the counter.
2568 * 0 on success, a negative errno value otherwise and rte_errno is set.
2571 mlx5_flow_query_count(struct ibv_counter_set *cs,
2572 struct mlx5_flow_counter_stats *counter_stats,
2573 struct rte_flow_query_count *query_count,
2574 struct rte_flow_error *error)
2576 uint64_t counters[2];
2577 struct ibv_query_counter_set_attr query_cs_attr = {
2579 .query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
2581 struct ibv_counter_set_data query_out = {
2583 .outlen = 2 * sizeof(uint64_t),
2585 int err = mlx5_glue->query_counter_set(&query_cs_attr, &query_out);
2588 return rte_flow_error_set(error, err,
2589 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2591 "cannot read counter");
2592 query_count->hits_set = 1;
2593 query_count->bytes_set = 1;
2594 query_count->hits = counters[0] - counter_stats->hits;
2595 query_count->bytes = counters[1] - counter_stats->bytes;
2596 if (query_count->reset) {
2597 counter_stats->hits = counters[0];
2598 counter_stats->bytes = counters[1];
2606 * @see rte_flow_query()
2610 mlx5_flow_query(struct rte_eth_dev *dev __rte_unused,
2611 struct rte_flow *flow,
2612 enum rte_flow_action_type action __rte_unused,
2614 struct rte_flow_error *error)
2619 ret = mlx5_flow_query_count(flow->cs,
2620 &flow->counter_stats,
2621 (struct rte_flow_query_count *)data,
2626 return rte_flow_error_set(error, EINVAL,
2627 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2629 "no counter found for flow");
2638 * @see rte_flow_isolate()
2642 mlx5_flow_isolate(struct rte_eth_dev *dev,
2644 struct rte_flow_error *error)
2646 struct priv *priv = dev->data->dev_private;
2648 if (dev->data->dev_started) {
2649 rte_flow_error_set(error, EBUSY,
2650 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2652 "port must be stopped first");
2655 priv->isolated = !!enable;
2657 priv->dev->dev_ops = &mlx5_dev_ops_isolate;
2659 priv->dev->dev_ops = &mlx5_dev_ops;
2664 * Convert a flow director filter to a generic flow.
2667 * Pointer to Ethernet device.
2668 * @param fdir_filter
2669 * Flow director filter to add.
2671 * Generic flow parameters structure.
2674 * 0 on success, a negative errno value otherwise and rte_errno is set.
2677 mlx5_fdir_filter_convert(struct rte_eth_dev *dev,
2678 const struct rte_eth_fdir_filter *fdir_filter,
2679 struct mlx5_fdir *attributes)
2681 struct priv *priv = dev->data->dev_private;
2682 const struct rte_eth_fdir_input *input = &fdir_filter->input;
2683 const struct rte_eth_fdir_masks *mask =
2684 &dev->data->dev_conf.fdir_conf.mask;
2686 /* Validate queue number. */
2687 if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
2688 DRV_LOG(ERR, "port %u invalid queue number %d",
2689 dev->data->port_id, fdir_filter->action.rx_queue);
2693 attributes->attr.ingress = 1;
2694 attributes->items[0] = (struct rte_flow_item) {
2695 .type = RTE_FLOW_ITEM_TYPE_ETH,
2696 .spec = &attributes->l2,
2697 .mask = &attributes->l2_mask,
2699 switch (fdir_filter->action.behavior) {
2700 case RTE_ETH_FDIR_ACCEPT:
2701 attributes->actions[0] = (struct rte_flow_action){
2702 .type = RTE_FLOW_ACTION_TYPE_QUEUE,
2703 .conf = &attributes->queue,
2706 case RTE_ETH_FDIR_REJECT:
2707 attributes->actions[0] = (struct rte_flow_action){
2708 .type = RTE_FLOW_ACTION_TYPE_DROP,
2712 DRV_LOG(ERR, "port %u invalid behavior %d",
2714 fdir_filter->action.behavior);
2715 rte_errno = ENOTSUP;
2718 attributes->queue.index = fdir_filter->action.rx_queue;
2720 switch (fdir_filter->input.flow_type) {
2721 case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
2722 case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
2723 case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
2724 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2725 .src_addr = input->flow.ip4_flow.src_ip,
2726 .dst_addr = input->flow.ip4_flow.dst_ip,
2727 .time_to_live = input->flow.ip4_flow.ttl,
2728 .type_of_service = input->flow.ip4_flow.tos,
2729 .next_proto_id = input->flow.ip4_flow.proto,
2731 attributes->l3_mask.ipv4.hdr = (struct ipv4_hdr){
2732 .src_addr = mask->ipv4_mask.src_ip,
2733 .dst_addr = mask->ipv4_mask.dst_ip,
2734 .time_to_live = mask->ipv4_mask.ttl,
2735 .type_of_service = mask->ipv4_mask.tos,
2736 .next_proto_id = mask->ipv4_mask.proto,
2738 attributes->items[1] = (struct rte_flow_item){
2739 .type = RTE_FLOW_ITEM_TYPE_IPV4,
2740 .spec = &attributes->l3,
2741 .mask = &attributes->l3_mask,
2744 case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
2745 case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
2746 case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
2747 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2748 .hop_limits = input->flow.ipv6_flow.hop_limits,
2749 .proto = input->flow.ipv6_flow.proto,
2752 memcpy(attributes->l3.ipv6.hdr.src_addr,
2753 input->flow.ipv6_flow.src_ip,
2754 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2755 memcpy(attributes->l3.ipv6.hdr.dst_addr,
2756 input->flow.ipv6_flow.dst_ip,
2757 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2758 memcpy(attributes->l3_mask.ipv6.hdr.src_addr,
2759 mask->ipv6_mask.src_ip,
2760 RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
2761 memcpy(attributes->l3_mask.ipv6.hdr.dst_addr,
2762 mask->ipv6_mask.dst_ip,
2763 RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
2764 attributes->items[1] = (struct rte_flow_item){
2765 .type = RTE_FLOW_ITEM_TYPE_IPV6,
2766 .spec = &attributes->l3,
2767 .mask = &attributes->l3_mask,
2771 DRV_LOG(ERR, "port %u invalid flow type%d",
2772 dev->data->port_id, fdir_filter->input.flow_type);
2773 rte_errno = ENOTSUP;
2777 switch (fdir_filter->input.flow_type) {
2778 case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
2779 attributes->l4.udp.hdr = (struct udp_hdr){
2780 .src_port = input->flow.udp4_flow.src_port,
2781 .dst_port = input->flow.udp4_flow.dst_port,
2783 attributes->l4_mask.udp.hdr = (struct udp_hdr){
2784 .src_port = mask->src_port_mask,
2785 .dst_port = mask->dst_port_mask,
2787 attributes->items[2] = (struct rte_flow_item){
2788 .type = RTE_FLOW_ITEM_TYPE_UDP,
2789 .spec = &attributes->l4,
2790 .mask = &attributes->l4_mask,
2793 case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
2794 attributes->l4.tcp.hdr = (struct tcp_hdr){
2795 .src_port = input->flow.tcp4_flow.src_port,
2796 .dst_port = input->flow.tcp4_flow.dst_port,
2798 attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
2799 .src_port = mask->src_port_mask,
2800 .dst_port = mask->dst_port_mask,
2802 attributes->items[2] = (struct rte_flow_item){
2803 .type = RTE_FLOW_ITEM_TYPE_TCP,
2804 .spec = &attributes->l4,
2805 .mask = &attributes->l4_mask,
2808 case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
2809 attributes->l4.udp.hdr = (struct udp_hdr){
2810 .src_port = input->flow.udp6_flow.src_port,
2811 .dst_port = input->flow.udp6_flow.dst_port,
2813 attributes->l4_mask.udp.hdr = (struct udp_hdr){
2814 .src_port = mask->src_port_mask,
2815 .dst_port = mask->dst_port_mask,
2817 attributes->items[2] = (struct rte_flow_item){
2818 .type = RTE_FLOW_ITEM_TYPE_UDP,
2819 .spec = &attributes->l4,
2820 .mask = &attributes->l4_mask,
2823 case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
2824 attributes->l4.tcp.hdr = (struct tcp_hdr){
2825 .src_port = input->flow.tcp6_flow.src_port,
2826 .dst_port = input->flow.tcp6_flow.dst_port,
2828 attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
2829 .src_port = mask->src_port_mask,
2830 .dst_port = mask->dst_port_mask,
2832 attributes->items[2] = (struct rte_flow_item){
2833 .type = RTE_FLOW_ITEM_TYPE_TCP,
2834 .spec = &attributes->l4,
2835 .mask = &attributes->l4_mask,
2838 case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
2839 case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
2842 DRV_LOG(ERR, "port %u invalid flow type%d",
2843 dev->data->port_id, fdir_filter->input.flow_type);
2844 rte_errno = ENOTSUP;
2851 * Add new flow director filter and store it in list.
2854 * Pointer to Ethernet device.
2855 * @param fdir_filter
2856 * Flow director filter to add.
2859 * 0 on success, a negative errno value otherwise and rte_errno is set.
2862 mlx5_fdir_filter_add(struct rte_eth_dev *dev,
2863 const struct rte_eth_fdir_filter *fdir_filter)
2865 struct priv *priv = dev->data->dev_private;
2866 struct mlx5_fdir attributes = {
2869 .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2870 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2874 struct mlx5_flow_parse parser = {
2875 .layer = HASH_RXQ_ETH,
2877 struct rte_flow_error error;
2878 struct rte_flow *flow;
2881 ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
2884 ret = mlx5_flow_convert(dev, &attributes.attr, attributes.items,
2885 attributes.actions, &error, &parser);
2888 flow = mlx5_flow_list_create(dev, &priv->flows, &attributes.attr,
2889 attributes.items, attributes.actions,
2892 DRV_LOG(DEBUG, "port %u FDIR created %p", dev->data->port_id,
2900 * Delete specific filter.
2903 * Pointer to Ethernet device.
2904 * @param fdir_filter
2905 * Filter to be deleted.
2908 * 0 on success, a negative errno value otherwise and rte_errno is set.
2911 mlx5_fdir_filter_delete(struct rte_eth_dev *dev,
2912 const struct rte_eth_fdir_filter *fdir_filter)
2914 struct priv *priv = dev->data->dev_private;
2915 struct mlx5_fdir attributes = {
2918 struct mlx5_flow_parse parser = {
2920 .layer = HASH_RXQ_ETH,
2922 struct rte_flow_error error;
2923 struct rte_flow *flow;
2927 ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
2930 ret = mlx5_flow_convert(dev, &attributes.attr, attributes.items,
2931 attributes.actions, &error, &parser);
2935 * Special case for drop action which is only set in the
2936 * specifications when the flow is created. In this situation the
2937 * drop specification is missing.
2940 struct ibv_flow_spec_action_drop *drop;
2942 drop = (void *)((uintptr_t)parser.queue[HASH_RXQ_ETH].ibv_attr +
2943 parser.queue[HASH_RXQ_ETH].offset);
2944 *drop = (struct ibv_flow_spec_action_drop){
2945 .type = IBV_FLOW_SPEC_ACTION_DROP,
2946 .size = sizeof(struct ibv_flow_spec_action_drop),
2948 parser.queue[HASH_RXQ_ETH].ibv_attr->num_of_specs++;
2950 TAILQ_FOREACH(flow, &priv->flows, next) {
2951 struct ibv_flow_attr *attr;
2952 struct ibv_spec_header *attr_h;
2954 struct ibv_flow_attr *flow_attr;
2955 struct ibv_spec_header *flow_h;
2957 unsigned int specs_n;
2959 attr = parser.queue[HASH_RXQ_ETH].ibv_attr;
2960 flow_attr = flow->frxq[HASH_RXQ_ETH].ibv_attr;
2961 /* Compare first the attributes. */
2962 if (memcmp(attr, flow_attr, sizeof(struct ibv_flow_attr)))
2964 if (attr->num_of_specs == 0)
2966 spec = (void *)((uintptr_t)attr +
2967 sizeof(struct ibv_flow_attr));
2968 flow_spec = (void *)((uintptr_t)flow_attr +
2969 sizeof(struct ibv_flow_attr));
2970 specs_n = RTE_MIN(attr->num_of_specs, flow_attr->num_of_specs);
2971 for (i = 0; i != specs_n; ++i) {
2974 if (memcmp(spec, flow_spec,
2975 RTE_MIN(attr_h->size, flow_h->size)))
2977 spec = (void *)((uintptr_t)spec + attr_h->size);
2978 flow_spec = (void *)((uintptr_t)flow_spec +
2981 /* At this point, the flow match. */
2984 /* The flow does not match. */
2987 ret = rte_errno; /* Save rte_errno before cleanup. */
2989 mlx5_flow_list_destroy(dev, &priv->flows, flow);
2991 for (i = 0; i != hash_rxq_init_n; ++i) {
2992 if (parser.queue[i].ibv_attr)
2993 rte_free(parser.queue[i].ibv_attr);
2995 rte_errno = ret; /* Restore rte_errno. */
3000 * Update queue for specific filter.
3003 * Pointer to Ethernet device.
3004 * @param fdir_filter
3005 * Filter to be updated.
3008 * 0 on success, a negative errno value otherwise and rte_errno is set.
3011 mlx5_fdir_filter_update(struct rte_eth_dev *dev,
3012 const struct rte_eth_fdir_filter *fdir_filter)
3016 ret = mlx5_fdir_filter_delete(dev, fdir_filter);
3019 return mlx5_fdir_filter_add(dev, fdir_filter);
3023 * Flush all filters.
3026 * Pointer to Ethernet device.
3029 mlx5_fdir_filter_flush(struct rte_eth_dev *dev)
3031 struct priv *priv = dev->data->dev_private;
3033 mlx5_flow_list_flush(dev, &priv->flows);
3037 * Get flow director information.
3040 * Pointer to Ethernet device.
3041 * @param[out] fdir_info
3042 * Resulting flow director information.
3045 mlx5_fdir_info_get(struct rte_eth_dev *dev, struct rte_eth_fdir_info *fdir_info)
3047 struct priv *priv = dev->data->dev_private;
3048 struct rte_eth_fdir_masks *mask =
3049 &priv->dev->data->dev_conf.fdir_conf.mask;
3051 fdir_info->mode = priv->dev->data->dev_conf.fdir_conf.mode;
3052 fdir_info->guarant_spc = 0;
3053 rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
3054 fdir_info->max_flexpayload = 0;
3055 fdir_info->flow_types_mask[0] = 0;
3056 fdir_info->flex_payload_unit = 0;
3057 fdir_info->max_flex_payload_segment_num = 0;
3058 fdir_info->flex_payload_limit = 0;
3059 memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
3063 * Deal with flow director operations.
3066 * Pointer to Ethernet device.
3068 * Operation to perform.
3070 * Pointer to operation-specific structure.
3073 * 0 on success, a negative errno value otherwise and rte_errno is set.
3076 mlx5_fdir_ctrl_func(struct rte_eth_dev *dev, enum rte_filter_op filter_op,
3079 struct priv *priv = dev->data->dev_private;
3080 enum rte_fdir_mode fdir_mode =
3081 priv->dev->data->dev_conf.fdir_conf.mode;
3083 if (filter_op == RTE_ETH_FILTER_NOP)
3085 if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
3086 fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
3087 DRV_LOG(ERR, "port %u flow director mode %d not supported",
3088 dev->data->port_id, fdir_mode);
3092 switch (filter_op) {
3093 case RTE_ETH_FILTER_ADD:
3094 return mlx5_fdir_filter_add(dev, arg);
3095 case RTE_ETH_FILTER_UPDATE:
3096 return mlx5_fdir_filter_update(dev, arg);
3097 case RTE_ETH_FILTER_DELETE:
3098 return mlx5_fdir_filter_delete(dev, arg);
3099 case RTE_ETH_FILTER_FLUSH:
3100 mlx5_fdir_filter_flush(dev);
3102 case RTE_ETH_FILTER_INFO:
3103 mlx5_fdir_info_get(dev, arg);
3106 DRV_LOG(DEBUG, "port %u unknown operation %u",
3107 dev->data->port_id, filter_op);
3115 * Manage filter operations.
3118 * Pointer to Ethernet device structure.
3119 * @param filter_type
3122 * Operation to perform.
3124 * Pointer to operation-specific structure.
3127 * 0 on success, a negative errno value otherwise and rte_errno is set.
3130 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
3131 enum rte_filter_type filter_type,
3132 enum rte_filter_op filter_op,
3135 switch (filter_type) {
3136 case RTE_ETH_FILTER_GENERIC:
3137 if (filter_op != RTE_ETH_FILTER_GET) {
3141 *(const void **)arg = &mlx5_flow_ops;
3143 case RTE_ETH_FILTER_FDIR:
3144 return mlx5_fdir_ctrl_func(dev, filter_op, arg);
3146 DRV_LOG(ERR, "port %u filter type (%d) not supported",
3147 dev->data->port_id, filter_type);
3148 rte_errno = ENOTSUP;