4 * Copyright 2016 6WIND S.A.
5 * Copyright 2016 Mellanox.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of 6WIND S.A. nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #include <sys/queue.h>
38 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
40 #pragma GCC diagnostic ignored "-Wpedantic"
42 #include <infiniband/verbs.h>
44 #pragma GCC diagnostic error "-Wpedantic"
47 #include <rte_ethdev.h>
49 #include <rte_flow_driver.h>
50 #include <rte_malloc.h>
53 #include "mlx5_defs.h"
56 /* Define minimal priority for control plane flows. */
57 #define MLX5_CTRL_FLOW_PRIORITY 4
59 /* Internet Protocol versions. */
63 #ifndef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
64 struct ibv_counter_set_init_attr {
67 struct ibv_flow_spec_counter_action {
70 struct ibv_counter_set {
75 ibv_destroy_counter_set(struct ibv_counter_set *cs)
82 /* Dev ops structure defined in mlx5.c */
83 extern const struct eth_dev_ops mlx5_dev_ops;
84 extern const struct eth_dev_ops mlx5_dev_ops_isolate;
87 mlx5_flow_create_eth(const struct rte_flow_item *item,
88 const void *default_mask,
92 mlx5_flow_create_vlan(const struct rte_flow_item *item,
93 const void *default_mask,
97 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
98 const void *default_mask,
102 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
103 const void *default_mask,
107 mlx5_flow_create_udp(const struct rte_flow_item *item,
108 const void *default_mask,
112 mlx5_flow_create_tcp(const struct rte_flow_item *item,
113 const void *default_mask,
117 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
118 const void *default_mask,
121 struct mlx5_flow_parse;
124 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
128 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id);
131 mlx5_flow_create_count(struct priv *priv, struct mlx5_flow_parse *parser);
133 /* Hash RX queue types. */
144 /* Initialization data for hash RX queue. */
145 struct hash_rxq_init {
146 uint64_t hash_fields; /* Fields that participate in the hash. */
147 uint64_t dpdk_rss_hf; /* Matching DPDK RSS hash fields. */
148 unsigned int flow_priority; /* Flow priority to use. */
149 unsigned int ip_version; /* Internet protocol. */
152 /* Initialization data for hash RX queues. */
153 const struct hash_rxq_init hash_rxq_init[] = {
155 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
156 IBV_RX_HASH_DST_IPV4 |
157 IBV_RX_HASH_SRC_PORT_TCP |
158 IBV_RX_HASH_DST_PORT_TCP),
159 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,
161 .ip_version = MLX5_IPV4,
164 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
165 IBV_RX_HASH_DST_IPV4 |
166 IBV_RX_HASH_SRC_PORT_UDP |
167 IBV_RX_HASH_DST_PORT_UDP),
168 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP,
170 .ip_version = MLX5_IPV4,
173 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
174 IBV_RX_HASH_DST_IPV4),
175 .dpdk_rss_hf = (ETH_RSS_IPV4 |
178 .ip_version = MLX5_IPV4,
181 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
182 IBV_RX_HASH_DST_IPV6 |
183 IBV_RX_HASH_SRC_PORT_TCP |
184 IBV_RX_HASH_DST_PORT_TCP),
185 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP,
187 .ip_version = MLX5_IPV6,
190 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
191 IBV_RX_HASH_DST_IPV6 |
192 IBV_RX_HASH_SRC_PORT_UDP |
193 IBV_RX_HASH_DST_PORT_UDP),
194 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP,
196 .ip_version = MLX5_IPV6,
199 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
200 IBV_RX_HASH_DST_IPV6),
201 .dpdk_rss_hf = (ETH_RSS_IPV6 |
204 .ip_version = MLX5_IPV6,
213 /* Number of entries in hash_rxq_init[]. */
214 const unsigned int hash_rxq_init_n = RTE_DIM(hash_rxq_init);
216 /** Structure for holding counter stats. */
217 struct mlx5_flow_counter_stats {
218 uint64_t hits; /**< Number of packets matched by the rule. */
219 uint64_t bytes; /**< Number of bytes matched by the rule. */
222 /** Structure for Drop queue. */
223 struct mlx5_hrxq_drop {
224 struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */
225 struct ibv_qp *qp; /**< Verbs queue pair. */
226 struct ibv_wq *wq; /**< Verbs work queue. */
227 struct ibv_cq *cq; /**< Verbs completion queue. */
230 /* Flows structures. */
232 uint64_t hash_fields; /**< Fields that participate in the hash. */
233 struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
234 struct ibv_flow *ibv_flow; /**< Verbs flow. */
235 struct mlx5_hrxq *hrxq; /**< Hash Rx queues. */
238 /* Drop flows structures. */
239 struct mlx5_flow_drop {
240 struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
241 struct ibv_flow *ibv_flow; /**< Verbs flow. */
245 TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
246 uint32_t mark:1; /**< Set if the flow is marked. */
247 uint32_t drop:1; /**< Drop queue. */
248 uint16_t queues_n; /**< Number of entries in queue[]. */
249 uint16_t (*queues)[]; /**< Queues indexes to use. */
250 struct rte_eth_rss_conf rss_conf; /**< RSS configuration */
251 uint8_t rss_key[40]; /**< copy of the RSS key. */
252 struct ibv_counter_set *cs; /**< Holds the counters for the rule. */
253 struct mlx5_flow_counter_stats counter_stats;/**<The counter stats. */
254 struct mlx5_flow frxq[RTE_DIM(hash_rxq_init)];
255 /**< Flow with Rx queue. */
258 /** Static initializer for items. */
260 (const enum rte_flow_item_type []){ \
261 __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
264 /** Structure to generate a simple graph of layers supported by the NIC. */
265 struct mlx5_flow_items {
266 /** List of possible actions for these items. */
267 const enum rte_flow_action_type *const actions;
268 /** Bit-masks corresponding to the possibilities for the item. */
271 * Default bit-masks to use when item->mask is not provided. When
272 * \default_mask is also NULL, the full supported bit-mask (\mask) is
275 const void *default_mask;
276 /** Bit-masks size in bytes. */
277 const unsigned int mask_sz;
279 * Conversion function from rte_flow to NIC specific flow.
282 * rte_flow item to convert.
283 * @param default_mask
284 * Default bit-masks to use when item->mask is not provided.
286 * Internal structure to store the conversion.
289 * 0 on success, negative value otherwise.
291 int (*convert)(const struct rte_flow_item *item,
292 const void *default_mask,
294 /** Size in bytes of the destination structure. */
295 const unsigned int dst_sz;
296 /** List of possible following items. */
297 const enum rte_flow_item_type *const items;
300 /** Valid action for this PMD. */
301 static const enum rte_flow_action_type valid_actions[] = {
302 RTE_FLOW_ACTION_TYPE_DROP,
303 RTE_FLOW_ACTION_TYPE_QUEUE,
304 RTE_FLOW_ACTION_TYPE_MARK,
305 RTE_FLOW_ACTION_TYPE_FLAG,
306 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
307 RTE_FLOW_ACTION_TYPE_COUNT,
309 RTE_FLOW_ACTION_TYPE_END,
312 /** Graph of supported items and associated actions. */
313 static const struct mlx5_flow_items mlx5_flow_items[] = {
314 [RTE_FLOW_ITEM_TYPE_END] = {
315 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
316 RTE_FLOW_ITEM_TYPE_VXLAN),
318 [RTE_FLOW_ITEM_TYPE_ETH] = {
319 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
320 RTE_FLOW_ITEM_TYPE_IPV4,
321 RTE_FLOW_ITEM_TYPE_IPV6),
322 .actions = valid_actions,
323 .mask = &(const struct rte_flow_item_eth){
324 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
325 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
328 .default_mask = &rte_flow_item_eth_mask,
329 .mask_sz = sizeof(struct rte_flow_item_eth),
330 .convert = mlx5_flow_create_eth,
331 .dst_sz = sizeof(struct ibv_flow_spec_eth),
333 [RTE_FLOW_ITEM_TYPE_VLAN] = {
334 .items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
335 RTE_FLOW_ITEM_TYPE_IPV6),
336 .actions = valid_actions,
337 .mask = &(const struct rte_flow_item_vlan){
340 .default_mask = &rte_flow_item_vlan_mask,
341 .mask_sz = sizeof(struct rte_flow_item_vlan),
342 .convert = mlx5_flow_create_vlan,
345 [RTE_FLOW_ITEM_TYPE_IPV4] = {
346 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
347 RTE_FLOW_ITEM_TYPE_TCP),
348 .actions = valid_actions,
349 .mask = &(const struct rte_flow_item_ipv4){
353 .type_of_service = -1,
357 .default_mask = &rte_flow_item_ipv4_mask,
358 .mask_sz = sizeof(struct rte_flow_item_ipv4),
359 .convert = mlx5_flow_create_ipv4,
360 .dst_sz = sizeof(struct ibv_flow_spec_ipv4_ext),
362 [RTE_FLOW_ITEM_TYPE_IPV6] = {
363 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
364 RTE_FLOW_ITEM_TYPE_TCP),
365 .actions = valid_actions,
366 .mask = &(const struct rte_flow_item_ipv6){
369 0xff, 0xff, 0xff, 0xff,
370 0xff, 0xff, 0xff, 0xff,
371 0xff, 0xff, 0xff, 0xff,
372 0xff, 0xff, 0xff, 0xff,
375 0xff, 0xff, 0xff, 0xff,
376 0xff, 0xff, 0xff, 0xff,
377 0xff, 0xff, 0xff, 0xff,
378 0xff, 0xff, 0xff, 0xff,
385 .default_mask = &rte_flow_item_ipv6_mask,
386 .mask_sz = sizeof(struct rte_flow_item_ipv6),
387 .convert = mlx5_flow_create_ipv6,
388 .dst_sz = sizeof(struct ibv_flow_spec_ipv6),
390 [RTE_FLOW_ITEM_TYPE_UDP] = {
391 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN),
392 .actions = valid_actions,
393 .mask = &(const struct rte_flow_item_udp){
399 .default_mask = &rte_flow_item_udp_mask,
400 .mask_sz = sizeof(struct rte_flow_item_udp),
401 .convert = mlx5_flow_create_udp,
402 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
404 [RTE_FLOW_ITEM_TYPE_TCP] = {
405 .actions = valid_actions,
406 .mask = &(const struct rte_flow_item_tcp){
412 .default_mask = &rte_flow_item_tcp_mask,
413 .mask_sz = sizeof(struct rte_flow_item_tcp),
414 .convert = mlx5_flow_create_tcp,
415 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
417 [RTE_FLOW_ITEM_TYPE_VXLAN] = {
418 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH),
419 .actions = valid_actions,
420 .mask = &(const struct rte_flow_item_vxlan){
421 .vni = "\xff\xff\xff",
423 .default_mask = &rte_flow_item_vxlan_mask,
424 .mask_sz = sizeof(struct rte_flow_item_vxlan),
425 .convert = mlx5_flow_create_vxlan,
426 .dst_sz = sizeof(struct ibv_flow_spec_tunnel),
430 /** Structure to pass to the conversion function. */
431 struct mlx5_flow_parse {
432 uint32_t inner; /**< Set once VXLAN is encountered. */
433 uint32_t allmulti:1; /**< Set once allmulti dst MAC is encountered. */
435 /**< Whether resources should remain after a validate. */
436 uint32_t drop:1; /**< Target is a drop queue. */
437 uint32_t mark:1; /**< Mark is present in the flow. */
438 uint32_t count:1; /**< Count is present in the flow. */
439 uint32_t mark_id; /**< Mark identifier. */
440 uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
441 uint16_t queues_n; /**< Number of entries in queue[]. */
442 struct rte_eth_rss_conf rss_conf; /**< RSS configuration */
443 uint8_t rss_key[40]; /**< copy of the RSS key. */
444 enum hash_rxq_type layer; /**< Last pattern layer detected. */
445 struct ibv_counter_set *cs; /**< Holds the counter set for the rule */
447 struct ibv_flow_attr *ibv_attr;
448 /**< Pointer to Verbs attributes. */
450 /**< Current position or total size of the attribute. */
451 } queue[RTE_DIM(hash_rxq_init)];
454 static const struct rte_flow_ops mlx5_flow_ops = {
455 .validate = mlx5_flow_validate,
456 .create = mlx5_flow_create,
457 .destroy = mlx5_flow_destroy,
458 .flush = mlx5_flow_flush,
459 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
460 .query = mlx5_flow_query,
464 .isolate = mlx5_flow_isolate,
467 /* Convert FDIR request to Generic flow. */
469 struct rte_flow_attr attr;
470 struct rte_flow_action actions[2];
471 struct rte_flow_item items[4];
472 struct rte_flow_item_eth l2;
473 struct rte_flow_item_eth l2_mask;
475 struct rte_flow_item_ipv4 ipv4;
476 struct rte_flow_item_ipv6 ipv6;
479 struct rte_flow_item_udp udp;
480 struct rte_flow_item_tcp tcp;
482 struct rte_flow_action_queue queue;
485 /* Verbs specification header. */
486 struct ibv_spec_header {
487 enum ibv_flow_spec_type type;
492 * Check support for a given item.
495 * Item specification.
497 * Bit-masks covering supported fields to compare with spec, last and mask in
500 * Bit-Mask size in bytes.
506 mlx5_flow_item_validate(const struct rte_flow_item *item,
507 const uint8_t *mask, unsigned int size)
511 if (!item->spec && (item->mask || item->last))
513 if (item->spec && !item->mask) {
515 const uint8_t *spec = item->spec;
517 for (i = 0; i < size; ++i)
518 if ((spec[i] | mask[i]) != mask[i])
521 if (item->last && !item->mask) {
523 const uint8_t *spec = item->last;
525 for (i = 0; i < size; ++i)
526 if ((spec[i] | mask[i]) != mask[i])
531 const uint8_t *spec = item->mask;
533 for (i = 0; i < size; ++i)
534 if ((spec[i] | mask[i]) != mask[i])
537 if (item->spec && item->last) {
540 const uint8_t *apply = mask;
545 for (i = 0; i < size; ++i) {
546 spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
547 last[i] = ((const uint8_t *)item->last)[i] & apply[i];
549 ret = memcmp(spec, last, size);
555 * Copy the RSS configuration from the user ones.
558 * Pointer to private structure.
560 * Internal parser structure.
562 * User RSS configuration to save.
565 * 0 on success, errno value on failure.
568 priv_flow_convert_rss_conf(struct priv *priv,
569 struct mlx5_flow_parse *parser,
570 const struct rte_eth_rss_conf *rss_conf)
572 const struct rte_eth_rss_conf *rss;
575 if (rss_conf->rss_hf & MLX5_RSS_HF_MASK)
579 rss = &priv->rss_conf;
581 if (rss->rss_key_len > 40)
583 parser->rss_conf.rss_key_len = rss->rss_key_len;
584 parser->rss_conf.rss_hf = rss->rss_hf;
585 memcpy(parser->rss_key, rss->rss_key, rss->rss_key_len);
586 parser->rss_conf.rss_key = parser->rss_key;
591 * Extract attribute to the parser.
594 * Pointer to private structure.
596 * Flow rule attributes.
598 * Perform verbose error reporting if not NULL.
599 * @param[in, out] parser
600 * Internal parser structure.
603 * 0 on success, a negative errno value otherwise and rte_errno is set.
606 priv_flow_convert_attributes(struct priv *priv,
607 const struct rte_flow_attr *attr,
608 struct rte_flow_error *error,
609 struct mlx5_flow_parse *parser)
614 rte_flow_error_set(error, ENOTSUP,
615 RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
617 "groups are not supported");
620 if (attr->priority && attr->priority != MLX5_CTRL_FLOW_PRIORITY) {
621 rte_flow_error_set(error, ENOTSUP,
622 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
624 "priorities are not supported");
628 rte_flow_error_set(error, ENOTSUP,
629 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
631 "egress is not supported");
634 if (!attr->ingress) {
635 rte_flow_error_set(error, ENOTSUP,
636 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
638 "only ingress is supported");
645 * Extract actions request to the parser.
648 * Pointer to private structure.
650 * Associated actions (list terminated by the END action).
652 * Perform verbose error reporting if not NULL.
653 * @param[in, out] parser
654 * Internal parser structure.
657 * 0 on success, a negative errno value otherwise and rte_errno is set.
660 priv_flow_convert_actions(struct priv *priv,
661 const struct rte_flow_action actions[],
662 struct rte_flow_error *error,
663 struct mlx5_flow_parse *parser)
666 * Add default RSS configuration necessary for Verbs to create QP even
667 * if no RSS is necessary.
669 priv_flow_convert_rss_conf(priv, parser,
670 (const struct rte_eth_rss_conf *)
672 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
673 if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
675 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
677 } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
678 const struct rte_flow_action_queue *queue =
679 (const struct rte_flow_action_queue *)
684 if (!queue || (queue->index > (priv->rxqs_n - 1)))
685 goto exit_action_not_supported;
686 for (n = 0; n < parser->queues_n; ++n) {
687 if (parser->queues[n] == queue->index) {
692 if (parser->queues_n > 1 && !found) {
693 rte_flow_error_set(error, ENOTSUP,
694 RTE_FLOW_ERROR_TYPE_ACTION,
696 "queue action not in RSS queues");
700 parser->queues_n = 1;
701 parser->queues[0] = queue->index;
703 } else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
704 const struct rte_flow_action_rss *rss =
705 (const struct rte_flow_action_rss *)
709 if (!rss || !rss->num) {
710 rte_flow_error_set(error, EINVAL,
711 RTE_FLOW_ERROR_TYPE_ACTION,
716 if (parser->queues_n == 1) {
719 assert(parser->queues_n);
720 for (n = 0; n < rss->num; ++n) {
721 if (parser->queues[0] ==
728 rte_flow_error_set(error, ENOTSUP,
729 RTE_FLOW_ERROR_TYPE_ACTION,
731 "queue action not in RSS"
736 for (n = 0; n < rss->num; ++n) {
737 if (rss->queue[n] >= priv->rxqs_n) {
738 rte_flow_error_set(error, EINVAL,
739 RTE_FLOW_ERROR_TYPE_ACTION,
741 "queue id > number of"
746 for (n = 0; n < rss->num; ++n)
747 parser->queues[n] = rss->queue[n];
748 parser->queues_n = rss->num;
749 if (priv_flow_convert_rss_conf(priv, parser,
751 rte_flow_error_set(error, EINVAL,
752 RTE_FLOW_ERROR_TYPE_ACTION,
754 "wrong RSS configuration");
757 } else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
758 const struct rte_flow_action_mark *mark =
759 (const struct rte_flow_action_mark *)
763 rte_flow_error_set(error, EINVAL,
764 RTE_FLOW_ERROR_TYPE_ACTION,
766 "mark must be defined");
768 } else if (mark->id >= MLX5_FLOW_MARK_MAX) {
769 rte_flow_error_set(error, ENOTSUP,
770 RTE_FLOW_ERROR_TYPE_ACTION,
772 "mark must be between 0"
777 parser->mark_id = mark->id;
778 } else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
780 } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT &&
781 priv->counter_set_supported) {
784 goto exit_action_not_supported;
787 if (parser->drop && parser->mark)
789 if (!parser->queues_n && !parser->drop) {
790 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
791 NULL, "no valid action");
795 exit_action_not_supported:
796 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
797 actions, "action not supported");
805 * Pointer to private structure.
807 * Pattern specification (list terminated by the END pattern item).
809 * Perform verbose error reporting if not NULL.
810 * @param[in, out] parser
811 * Internal parser structure.
814 * 0 on success, a negative errno value otherwise and rte_errno is set.
817 priv_flow_convert_items_validate(struct priv *priv,
818 const struct rte_flow_item items[],
819 struct rte_flow_error *error,
820 struct mlx5_flow_parse *parser)
822 const struct mlx5_flow_items *cur_item = mlx5_flow_items;
826 /* Initialise the offsets to start after verbs attribute. */
827 for (i = 0; i != hash_rxq_init_n; ++i)
828 parser->queue[i].offset = sizeof(struct ibv_flow_attr);
829 for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
830 const struct mlx5_flow_items *token = NULL;
834 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
838 cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
840 if (cur_item->items[i] == items->type) {
841 token = &mlx5_flow_items[items->type];
846 goto exit_item_not_supported;
848 err = mlx5_flow_item_validate(items,
849 (const uint8_t *)cur_item->mask,
852 goto exit_item_not_supported;
853 if (items->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
855 rte_flow_error_set(error, ENOTSUP,
856 RTE_FLOW_ERROR_TYPE_ITEM,
858 "cannot recognize multiple"
859 " VXLAN encapsulations");
862 parser->inner = IBV_FLOW_SPEC_INNER;
864 if (parser->drop || parser->queues_n == 1) {
865 parser->queue[HASH_RXQ_ETH].offset += cur_item->dst_sz;
867 for (n = 0; n != hash_rxq_init_n; ++n)
868 parser->queue[n].offset += cur_item->dst_sz;
872 for (i = 0; i != hash_rxq_init_n; ++i)
873 parser->queue[i].offset +=
874 sizeof(struct ibv_flow_spec_action_tag);
877 unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
879 for (i = 0; i != hash_rxq_init_n; ++i)
880 parser->queue[i].offset += size;
883 exit_item_not_supported:
884 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
885 items, "item not supported");
890 * Allocate memory space to store verbs flow attributes.
893 * Pointer to private structure.
894 * @param[in] priority
897 * Amount of byte to allocate.
899 * Perform verbose error reporting if not NULL.
902 * A verbs flow attribute on success, NULL otherwise.
904 static struct ibv_flow_attr*
905 priv_flow_convert_allocate(struct priv *priv,
906 unsigned int priority,
908 struct rte_flow_error *error)
910 struct ibv_flow_attr *ibv_attr;
913 ibv_attr = rte_calloc(__func__, 1, size, 0);
915 rte_flow_error_set(error, ENOMEM,
916 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
918 "cannot allocate verbs spec attributes.");
921 ibv_attr->priority = priority;
926 * Finalise verbs flow attributes.
929 * Pointer to private structure.
930 * @param[in, out] parser
931 * Internal parser structure.
934 priv_flow_convert_finalise(struct priv *priv, struct mlx5_flow_parse *parser)
936 const unsigned int ipv4 =
937 hash_rxq_init[parser->layer].ip_version == MLX5_IPV4;
938 const enum hash_rxq_type hmin = ipv4 ? HASH_RXQ_TCPV4 : HASH_RXQ_TCPV6;
939 const enum hash_rxq_type hmax = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
940 const enum hash_rxq_type ohmin = ipv4 ? HASH_RXQ_TCPV6 : HASH_RXQ_TCPV4;
941 const enum hash_rxq_type ohmax = ipv4 ? HASH_RXQ_IPV6 : HASH_RXQ_IPV4;
942 const enum hash_rxq_type ip = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
946 if (parser->layer == HASH_RXQ_ETH) {
950 * This layer becomes useless as the pattern define under
953 rte_free(parser->queue[HASH_RXQ_ETH].ibv_attr);
954 parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
956 /* Remove opposite kind of layer e.g. IPv6 if the pattern is IPv4. */
957 for (i = ohmin; i != (ohmax + 1); ++i) {
958 if (!parser->queue[i].ibv_attr)
960 rte_free(parser->queue[i].ibv_attr);
961 parser->queue[i].ibv_attr = NULL;
963 /* Remove impossible flow according to the RSS configuration. */
964 if (hash_rxq_init[parser->layer].dpdk_rss_hf &
965 parser->rss_conf.rss_hf) {
966 /* Remove any other flow. */
967 for (i = hmin; i != (hmax + 1); ++i) {
968 if ((i == parser->layer) ||
969 (!parser->queue[i].ibv_attr))
971 rte_free(parser->queue[i].ibv_attr);
972 parser->queue[i].ibv_attr = NULL;
974 } else if (!parser->queue[ip].ibv_attr) {
975 /* no RSS possible with the current configuration. */
976 parser->queues_n = 1;
981 * Fill missing layers in verbs specifications, or compute the correct
982 * offset to allocate the memory space for the attributes and
985 for (i = 0; i != hash_rxq_init_n - 1; ++i) {
987 struct ibv_flow_spec_ipv4_ext ipv4;
988 struct ibv_flow_spec_ipv6 ipv6;
989 struct ibv_flow_spec_tcp_udp udp_tcp;
994 if (i == parser->layer)
996 if (parser->layer == HASH_RXQ_ETH) {
997 if (hash_rxq_init[i].ip_version == MLX5_IPV4) {
998 size = sizeof(struct ibv_flow_spec_ipv4_ext);
999 specs.ipv4 = (struct ibv_flow_spec_ipv4_ext){
1000 .type = IBV_FLOW_SPEC_IPV4_EXT,
1004 size = sizeof(struct ibv_flow_spec_ipv6);
1005 specs.ipv6 = (struct ibv_flow_spec_ipv6){
1006 .type = IBV_FLOW_SPEC_IPV6,
1010 if (parser->queue[i].ibv_attr) {
1011 dst = (void *)((uintptr_t)
1012 parser->queue[i].ibv_attr +
1013 parser->queue[i].offset);
1014 memcpy(dst, &specs, size);
1015 ++parser->queue[i].ibv_attr->num_of_specs;
1017 parser->queue[i].offset += size;
1019 if ((i == HASH_RXQ_UDPV4) || (i == HASH_RXQ_TCPV4) ||
1020 (i == HASH_RXQ_UDPV6) || (i == HASH_RXQ_TCPV6)) {
1021 size = sizeof(struct ibv_flow_spec_tcp_udp);
1022 specs.udp_tcp = (struct ibv_flow_spec_tcp_udp) {
1023 .type = ((i == HASH_RXQ_UDPV4 ||
1024 i == HASH_RXQ_UDPV6) ?
1029 if (parser->queue[i].ibv_attr) {
1030 dst = (void *)((uintptr_t)
1031 parser->queue[i].ibv_attr +
1032 parser->queue[i].offset);
1033 memcpy(dst, &specs, size);
1034 ++parser->queue[i].ibv_attr->num_of_specs;
1036 parser->queue[i].offset += size;
1042 * Validate and convert a flow supported by the NIC.
1045 * Pointer to private structure.
1047 * Flow rule attributes.
1048 * @param[in] pattern
1049 * Pattern specification (list terminated by the END pattern item).
1050 * @param[in] actions
1051 * Associated actions (list terminated by the END action).
1053 * Perform verbose error reporting if not NULL.
1054 * @param[in, out] parser
1055 * Internal parser structure.
1058 * 0 on success, a negative errno value otherwise and rte_errno is set.
1061 priv_flow_convert(struct priv *priv,
1062 const struct rte_flow_attr *attr,
1063 const struct rte_flow_item items[],
1064 const struct rte_flow_action actions[],
1065 struct rte_flow_error *error,
1066 struct mlx5_flow_parse *parser)
1068 const struct mlx5_flow_items *cur_item = mlx5_flow_items;
1072 /* First step. Validate the attributes, items and actions. */
1073 *parser = (struct mlx5_flow_parse){
1074 .create = parser->create,
1075 .layer = HASH_RXQ_ETH,
1076 .mark_id = MLX5_FLOW_MARK_DEFAULT,
1078 ret = priv_flow_convert_attributes(priv, attr, error, parser);
1081 ret = priv_flow_convert_actions(priv, actions, error, parser);
1084 ret = priv_flow_convert_items_validate(priv, items, error, parser);
1087 priv_flow_convert_finalise(priv, parser);
1090 * Allocate the memory space to store verbs specifications.
1092 if (parser->drop || parser->queues_n == 1) {
1093 unsigned int priority =
1095 hash_rxq_init[HASH_RXQ_ETH].flow_priority;
1096 unsigned int offset = parser->queue[HASH_RXQ_ETH].offset;
1098 parser->queue[HASH_RXQ_ETH].ibv_attr =
1099 priv_flow_convert_allocate(priv, priority,
1101 if (!parser->queue[HASH_RXQ_ETH].ibv_attr)
1103 parser->queue[HASH_RXQ_ETH].offset =
1104 sizeof(struct ibv_flow_attr);
1106 for (i = 0; i != hash_rxq_init_n; ++i) {
1107 unsigned int priority =
1109 hash_rxq_init[i].flow_priority;
1110 unsigned int offset;
1112 if (!(parser->rss_conf.rss_hf &
1113 hash_rxq_init[i].dpdk_rss_hf) &&
1114 (i != HASH_RXQ_ETH))
1116 offset = parser->queue[i].offset;
1117 parser->queue[i].ibv_attr =
1118 priv_flow_convert_allocate(priv, priority,
1120 if (!parser->queue[i].ibv_attr)
1122 parser->queue[i].offset = sizeof(struct ibv_flow_attr);
1125 /* Third step. Conversion parse, fill the specifications. */
1127 for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
1128 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
1130 cur_item = &mlx5_flow_items[items->type];
1131 ret = cur_item->convert(items,
1132 (cur_item->default_mask ?
1133 cur_item->default_mask :
1137 rte_flow_error_set(error, ret,
1138 RTE_FLOW_ERROR_TYPE_ITEM,
1139 items, "item not supported");
1144 mlx5_flow_create_flag_mark(parser, parser->mark_id);
1145 if (parser->count && parser->create) {
1146 mlx5_flow_create_count(priv, parser);
1148 goto exit_count_error;
1151 * Last step. Complete missing specification to reach the RSS
1154 if (parser->queues_n > 1) {
1155 priv_flow_convert_finalise(priv, parser);
1158 * Action queue have their priority overridden with
1159 * Ethernet priority, this priority needs to be adjusted to
1160 * their most specific layer priority.
1162 parser->queue[HASH_RXQ_ETH].ibv_attr->priority =
1164 hash_rxq_init[parser->layer].flow_priority;
1167 /* Only verification is expected, all resources should be released. */
1168 if (!parser->create) {
1169 for (i = 0; i != hash_rxq_init_n; ++i) {
1170 if (parser->queue[i].ibv_attr) {
1171 rte_free(parser->queue[i].ibv_attr);
1172 parser->queue[i].ibv_attr = NULL;
1176 if (parser->allmulti &&
1177 parser->layer == HASH_RXQ_ETH) {
1178 for (i = 0; i != hash_rxq_init_n; ++i) {
1179 if (!parser->queue[i].ibv_attr)
1181 if (parser->queue[i].ibv_attr->num_of_specs != 1)
1183 parser->queue[i].ibv_attr->type =
1184 IBV_FLOW_ATTR_MC_DEFAULT;
1189 for (i = 0; i != hash_rxq_init_n; ++i) {
1190 if (parser->queue[i].ibv_attr) {
1191 rte_free(parser->queue[i].ibv_attr);
1192 parser->queue[i].ibv_attr = NULL;
1195 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1196 NULL, "cannot allocate verbs spec attributes.");
1199 rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1200 NULL, "cannot create counter.");
1205 * Copy the specification created into the flow.
1208 * Internal parser structure.
1210 * Create specification.
1212 * Size in bytes of the specification to copy.
1215 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
1221 for (i = 0; i != hash_rxq_init_n; ++i) {
1222 if (!parser->queue[i].ibv_attr)
1224 /* Specification must be the same l3 type or none. */
1225 if (parser->layer == HASH_RXQ_ETH ||
1226 (hash_rxq_init[parser->layer].ip_version ==
1227 hash_rxq_init[i].ip_version) ||
1228 (hash_rxq_init[i].ip_version == 0)) {
1229 dst = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1230 parser->queue[i].offset);
1231 memcpy(dst, src, size);
1232 ++parser->queue[i].ibv_attr->num_of_specs;
1233 parser->queue[i].offset += size;
1239 * Convert Ethernet item to Verbs specification.
1242 * Item specification.
1243 * @param default_mask[in]
1244 * Default bit-masks to use when item->mask is not provided.
1245 * @param data[in, out]
1249 mlx5_flow_create_eth(const struct rte_flow_item *item,
1250 const void *default_mask,
1253 const struct rte_flow_item_eth *spec = item->spec;
1254 const struct rte_flow_item_eth *mask = item->mask;
1255 struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1256 const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1257 struct ibv_flow_spec_eth eth = {
1258 .type = parser->inner | IBV_FLOW_SPEC_ETH,
1262 /* Don't update layer for the inner pattern. */
1264 parser->layer = HASH_RXQ_ETH;
1269 mask = default_mask;
1270 memcpy(ð.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
1271 memcpy(ð.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
1272 eth.val.ether_type = spec->type;
1273 memcpy(ð.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
1274 memcpy(ð.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
1275 eth.mask.ether_type = mask->type;
1276 /* Remove unwanted bits from values. */
1277 for (i = 0; i < ETHER_ADDR_LEN; ++i) {
1278 eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
1279 eth.val.src_mac[i] &= eth.mask.src_mac[i];
1281 eth.val.ether_type &= eth.mask.ether_type;
1283 mlx5_flow_create_copy(parser, ð, eth_size);
1284 parser->allmulti = eth.val.dst_mac[0] & 1;
1289 * Convert VLAN item to Verbs specification.
1292 * Item specification.
1293 * @param default_mask[in]
1294 * Default bit-masks to use when item->mask is not provided.
1295 * @param data[in, out]
1299 mlx5_flow_create_vlan(const struct rte_flow_item *item,
1300 const void *default_mask,
1303 const struct rte_flow_item_vlan *spec = item->spec;
1304 const struct rte_flow_item_vlan *mask = item->mask;
1305 struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1306 struct ibv_flow_spec_eth *eth;
1307 const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1312 mask = default_mask;
1314 for (i = 0; i != hash_rxq_init_n; ++i) {
1315 if (!parser->queue[i].ibv_attr)
1318 eth = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1319 parser->queue[i].offset - eth_size);
1320 eth->val.vlan_tag = spec->tci;
1321 eth->mask.vlan_tag = mask->tci;
1322 eth->val.vlan_tag &= eth->mask.vlan_tag;
1329 * Convert IPv4 item to Verbs specification.
1332 * Item specification.
1333 * @param default_mask[in]
1334 * Default bit-masks to use when item->mask is not provided.
1335 * @param data[in, out]
1339 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
1340 const void *default_mask,
1343 const struct rte_flow_item_ipv4 *spec = item->spec;
1344 const struct rte_flow_item_ipv4 *mask = item->mask;
1345 struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1346 unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4_ext);
1347 struct ibv_flow_spec_ipv4_ext ipv4 = {
1348 .type = parser->inner | IBV_FLOW_SPEC_IPV4_EXT,
1352 /* Don't update layer for the inner pattern. */
1354 parser->layer = HASH_RXQ_IPV4;
1357 mask = default_mask;
1358 ipv4.val = (struct ibv_flow_ipv4_ext_filter){
1359 .src_ip = spec->hdr.src_addr,
1360 .dst_ip = spec->hdr.dst_addr,
1361 .proto = spec->hdr.next_proto_id,
1362 .tos = spec->hdr.type_of_service,
1364 ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
1365 .src_ip = mask->hdr.src_addr,
1366 .dst_ip = mask->hdr.dst_addr,
1367 .proto = mask->hdr.next_proto_id,
1368 .tos = mask->hdr.type_of_service,
1370 /* Remove unwanted bits from values. */
1371 ipv4.val.src_ip &= ipv4.mask.src_ip;
1372 ipv4.val.dst_ip &= ipv4.mask.dst_ip;
1373 ipv4.val.proto &= ipv4.mask.proto;
1374 ipv4.val.tos &= ipv4.mask.tos;
1376 mlx5_flow_create_copy(parser, &ipv4, ipv4_size);
1381 * Convert IPv6 item to Verbs specification.
1384 * Item specification.
1385 * @param default_mask[in]
1386 * Default bit-masks to use when item->mask is not provided.
1387 * @param data[in, out]
1391 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
1392 const void *default_mask,
1395 const struct rte_flow_item_ipv6 *spec = item->spec;
1396 const struct rte_flow_item_ipv6 *mask = item->mask;
1397 struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1398 unsigned int ipv6_size = sizeof(struct ibv_flow_spec_ipv6);
1399 struct ibv_flow_spec_ipv6 ipv6 = {
1400 .type = parser->inner | IBV_FLOW_SPEC_IPV6,
1404 /* Don't update layer for the inner pattern. */
1406 parser->layer = HASH_RXQ_IPV6;
1411 mask = default_mask;
1412 memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
1413 RTE_DIM(ipv6.val.src_ip));
1414 memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
1415 RTE_DIM(ipv6.val.dst_ip));
1416 memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
1417 RTE_DIM(ipv6.mask.src_ip));
1418 memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
1419 RTE_DIM(ipv6.mask.dst_ip));
1420 ipv6.mask.flow_label = mask->hdr.vtc_flow;
1421 ipv6.mask.next_hdr = mask->hdr.proto;
1422 ipv6.mask.hop_limit = mask->hdr.hop_limits;
1423 /* Remove unwanted bits from values. */
1424 for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
1425 ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
1426 ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
1428 ipv6.val.flow_label &= ipv6.mask.flow_label;
1429 ipv6.val.next_hdr &= ipv6.mask.next_hdr;
1430 ipv6.val.hop_limit &= ipv6.mask.hop_limit;
1432 mlx5_flow_create_copy(parser, &ipv6, ipv6_size);
1437 * Convert UDP item to Verbs specification.
1440 * Item specification.
1441 * @param default_mask[in]
1442 * Default bit-masks to use when item->mask is not provided.
1443 * @param data[in, out]
1447 mlx5_flow_create_udp(const struct rte_flow_item *item,
1448 const void *default_mask,
1451 const struct rte_flow_item_udp *spec = item->spec;
1452 const struct rte_flow_item_udp *mask = item->mask;
1453 struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1454 unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1455 struct ibv_flow_spec_tcp_udp udp = {
1456 .type = parser->inner | IBV_FLOW_SPEC_UDP,
1460 /* Don't update layer for the inner pattern. */
1461 if (!parser->inner) {
1462 if (parser->layer == HASH_RXQ_IPV4)
1463 parser->layer = HASH_RXQ_UDPV4;
1465 parser->layer = HASH_RXQ_UDPV6;
1469 mask = default_mask;
1470 udp.val.dst_port = spec->hdr.dst_port;
1471 udp.val.src_port = spec->hdr.src_port;
1472 udp.mask.dst_port = mask->hdr.dst_port;
1473 udp.mask.src_port = mask->hdr.src_port;
1474 /* Remove unwanted bits from values. */
1475 udp.val.src_port &= udp.mask.src_port;
1476 udp.val.dst_port &= udp.mask.dst_port;
1478 mlx5_flow_create_copy(parser, &udp, udp_size);
1483 * Convert TCP item to Verbs specification.
1486 * Item specification.
1487 * @param default_mask[in]
1488 * Default bit-masks to use when item->mask is not provided.
1489 * @param data[in, out]
1493 mlx5_flow_create_tcp(const struct rte_flow_item *item,
1494 const void *default_mask,
1497 const struct rte_flow_item_tcp *spec = item->spec;
1498 const struct rte_flow_item_tcp *mask = item->mask;
1499 struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1500 unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1501 struct ibv_flow_spec_tcp_udp tcp = {
1502 .type = parser->inner | IBV_FLOW_SPEC_TCP,
1506 /* Don't update layer for the inner pattern. */
1507 if (!parser->inner) {
1508 if (parser->layer == HASH_RXQ_IPV4)
1509 parser->layer = HASH_RXQ_TCPV4;
1511 parser->layer = HASH_RXQ_TCPV6;
1515 mask = default_mask;
1516 tcp.val.dst_port = spec->hdr.dst_port;
1517 tcp.val.src_port = spec->hdr.src_port;
1518 tcp.mask.dst_port = mask->hdr.dst_port;
1519 tcp.mask.src_port = mask->hdr.src_port;
1520 /* Remove unwanted bits from values. */
1521 tcp.val.src_port &= tcp.mask.src_port;
1522 tcp.val.dst_port &= tcp.mask.dst_port;
1524 mlx5_flow_create_copy(parser, &tcp, tcp_size);
1529 * Convert VXLAN item to Verbs specification.
1532 * Item specification.
1533 * @param default_mask[in]
1534 * Default bit-masks to use when item->mask is not provided.
1535 * @param data[in, out]
1539 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
1540 const void *default_mask,
1543 const struct rte_flow_item_vxlan *spec = item->spec;
1544 const struct rte_flow_item_vxlan *mask = item->mask;
1545 struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1546 unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1547 struct ibv_flow_spec_tunnel vxlan = {
1548 .type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1557 parser->inner = IBV_FLOW_SPEC_INNER;
1560 mask = default_mask;
1561 memcpy(&id.vni[1], spec->vni, 3);
1562 vxlan.val.tunnel_id = id.vlan_id;
1563 memcpy(&id.vni[1], mask->vni, 3);
1564 vxlan.mask.tunnel_id = id.vlan_id;
1565 /* Remove unwanted bits from values. */
1566 vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
1569 * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
1570 * layer is defined in the Verbs specification it is interpreted as
1571 * wildcard and all packets will match this rule, if it follows a full
1572 * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
1573 * before will also match this rule.
1574 * To avoid such situation, VNI 0 is currently refused.
1576 if (!vxlan.val.tunnel_id)
1578 mlx5_flow_create_copy(parser, &vxlan, size);
1583 * Convert mark/flag action to Verbs specification.
1586 * Internal parser structure.
1591 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id)
1593 unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1594 struct ibv_flow_spec_action_tag tag = {
1595 .type = IBV_FLOW_SPEC_ACTION_TAG,
1597 .tag_id = mlx5_flow_mark_set(mark_id),
1600 assert(parser->mark);
1601 mlx5_flow_create_copy(parser, &tag, size);
1606 * Convert count action to Verbs specification.
1609 * Pointer to private structure.
1611 * Pointer to MLX5 flow parser structure.
1614 * 0 on success, errno value on failure.
1617 mlx5_flow_create_count(struct priv *priv __rte_unused,
1618 struct mlx5_flow_parse *parser __rte_unused)
1620 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
1621 unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
1622 struct ibv_counter_set_init_attr init_attr = {0};
1623 struct ibv_flow_spec_counter_action counter = {
1624 .type = IBV_FLOW_SPEC_ACTION_COUNT,
1626 .counter_set_handle = 0,
1629 init_attr.counter_set_id = 0;
1630 parser->cs = ibv_create_counter_set(priv->ctx, &init_attr);
1633 counter.counter_set_handle = parser->cs->handle;
1634 mlx5_flow_create_copy(parser, &counter, size);
1640 * Complete flow rule creation with a drop queue.
1643 * Pointer to private structure.
1645 * Internal parser structure.
1647 * Pointer to the rte_flow.
1649 * Perform verbose error reporting if not NULL.
1652 * 0 on success, errno value on failure.
1655 priv_flow_create_action_queue_drop(struct priv *priv,
1656 struct mlx5_flow_parse *parser,
1657 struct rte_flow *flow,
1658 struct rte_flow_error *error)
1660 struct ibv_flow_spec_action_drop *drop;
1661 unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
1667 drop = (void *)((uintptr_t)parser->queue[HASH_RXQ_ETH].ibv_attr +
1668 parser->queue[HASH_RXQ_ETH].offset);
1669 *drop = (struct ibv_flow_spec_action_drop){
1670 .type = IBV_FLOW_SPEC_ACTION_DROP,
1673 ++parser->queue[HASH_RXQ_ETH].ibv_attr->num_of_specs;
1674 parser->queue[HASH_RXQ_ETH].offset += size;
1675 flow->frxq[HASH_RXQ_ETH].ibv_attr =
1676 parser->queue[HASH_RXQ_ETH].ibv_attr;
1678 flow->cs = parser->cs;
1679 if (!priv->dev->data->dev_started)
1681 parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
1682 flow->frxq[HASH_RXQ_ETH].ibv_flow =
1683 ibv_create_flow(priv->flow_drop_queue->qp,
1684 flow->frxq[HASH_RXQ_ETH].ibv_attr);
1685 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
1686 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1687 NULL, "flow rule creation failure");
1694 if (flow->frxq[HASH_RXQ_ETH].ibv_flow) {
1695 claim_zero(ibv_destroy_flow(flow->frxq[HASH_RXQ_ETH].ibv_flow));
1696 flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
1698 if (flow->frxq[HASH_RXQ_ETH].ibv_attr) {
1699 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
1700 flow->frxq[HASH_RXQ_ETH].ibv_attr = NULL;
1703 claim_zero(ibv_destroy_counter_set(flow->cs));
1711 * Create hash Rx queues when RSS is enabled.
1714 * Pointer to private structure.
1716 * Internal parser structure.
1718 * Pointer to the rte_flow.
1720 * Perform verbose error reporting if not NULL.
1723 * 0 on success, a errno value otherwise and rte_errno is set.
1726 priv_flow_create_action_queue_rss(struct priv *priv,
1727 struct mlx5_flow_parse *parser,
1728 struct rte_flow *flow,
1729 struct rte_flow_error *error)
1733 for (i = 0; i != hash_rxq_init_n; ++i) {
1734 uint64_t hash_fields;
1736 if (!parser->queue[i].ibv_attr)
1738 flow->frxq[i].ibv_attr = parser->queue[i].ibv_attr;
1739 parser->queue[i].ibv_attr = NULL;
1740 hash_fields = hash_rxq_init[i].hash_fields;
1741 if (!priv->dev->data->dev_started)
1743 flow->frxq[i].hrxq =
1744 mlx5_priv_hrxq_get(priv,
1745 parser->rss_conf.rss_key,
1746 parser->rss_conf.rss_key_len,
1750 if (flow->frxq[i].hrxq)
1752 flow->frxq[i].hrxq =
1753 mlx5_priv_hrxq_new(priv,
1754 parser->rss_conf.rss_key,
1755 parser->rss_conf.rss_key_len,
1759 if (!flow->frxq[i].hrxq) {
1760 rte_flow_error_set(error, ENOMEM,
1761 RTE_FLOW_ERROR_TYPE_HANDLE,
1762 NULL, "cannot create hash rxq");
1770 * Complete flow rule creation.
1773 * Pointer to private structure.
1775 * Internal parser structure.
1777 * Pointer to the rte_flow.
1779 * Perform verbose error reporting if not NULL.
1782 * 0 on success, a errno value otherwise and rte_errno is set.
1785 priv_flow_create_action_queue(struct priv *priv,
1786 struct mlx5_flow_parse *parser,
1787 struct rte_flow *flow,
1788 struct rte_flow_error *error)
1795 assert(!parser->drop);
1796 err = priv_flow_create_action_queue_rss(priv, parser, flow, error);
1800 flow->cs = parser->cs;
1801 if (!priv->dev->data->dev_started)
1803 for (i = 0; i != hash_rxq_init_n; ++i) {
1804 if (!flow->frxq[i].hrxq)
1806 flow->frxq[i].ibv_flow =
1807 ibv_create_flow(flow->frxq[i].hrxq->qp,
1808 flow->frxq[i].ibv_attr);
1809 if (!flow->frxq[i].ibv_flow) {
1810 rte_flow_error_set(error, ENOMEM,
1811 RTE_FLOW_ERROR_TYPE_HANDLE,
1812 NULL, "flow rule creation failure");
1816 DEBUG("%p type %d QP %p ibv_flow %p",
1818 (void *)flow->frxq[i].hrxq,
1819 (void *)flow->frxq[i].ibv_flow);
1821 for (i = 0; i != parser->queues_n; ++i) {
1822 struct mlx5_rxq_data *q =
1823 (*priv->rxqs)[parser->queues[i]];
1825 q->mark |= parser->mark;
1830 for (i = 0; i != hash_rxq_init_n; ++i) {
1831 if (flow->frxq[i].ibv_flow) {
1832 struct ibv_flow *ibv_flow = flow->frxq[i].ibv_flow;
1834 claim_zero(ibv_destroy_flow(ibv_flow));
1836 if (flow->frxq[i].hrxq)
1837 mlx5_priv_hrxq_release(priv, flow->frxq[i].hrxq);
1838 if (flow->frxq[i].ibv_attr)
1839 rte_free(flow->frxq[i].ibv_attr);
1842 claim_zero(ibv_destroy_counter_set(flow->cs));
1853 * Pointer to private structure.
1855 * Pointer to a TAILQ flow list.
1857 * Flow rule attributes.
1858 * @param[in] pattern
1859 * Pattern specification (list terminated by the END pattern item).
1860 * @param[in] actions
1861 * Associated actions (list terminated by the END action).
1863 * Perform verbose error reporting if not NULL.
1866 * A flow on success, NULL otherwise.
1868 static struct rte_flow *
1869 priv_flow_create(struct priv *priv,
1870 struct mlx5_flows *list,
1871 const struct rte_flow_attr *attr,
1872 const struct rte_flow_item items[],
1873 const struct rte_flow_action actions[],
1874 struct rte_flow_error *error)
1876 struct mlx5_flow_parse parser = { .create = 1, };
1877 struct rte_flow *flow = NULL;
1881 err = priv_flow_convert(priv, attr, items, actions, error, &parser);
1884 flow = rte_calloc(__func__, 1,
1885 sizeof(*flow) + parser.queues_n * sizeof(uint16_t),
1888 rte_flow_error_set(error, ENOMEM,
1889 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1891 "cannot allocate flow memory");
1894 /* Copy queues configuration. */
1895 flow->queues = (uint16_t (*)[])(flow + 1);
1896 memcpy(flow->queues, parser.queues, parser.queues_n * sizeof(uint16_t));
1897 flow->queues_n = parser.queues_n;
1898 flow->mark = parser.mark;
1899 /* Copy RSS configuration. */
1900 flow->rss_conf = parser.rss_conf;
1901 flow->rss_conf.rss_key = flow->rss_key;
1902 memcpy(flow->rss_key, parser.rss_key, parser.rss_conf.rss_key_len);
1903 /* finalise the flow. */
1905 err = priv_flow_create_action_queue_drop(priv, &parser, flow,
1908 err = priv_flow_create_action_queue(priv, &parser, flow, error);
1911 TAILQ_INSERT_TAIL(list, flow, next);
1912 DEBUG("Flow created %p", (void *)flow);
1915 for (i = 0; i != hash_rxq_init_n; ++i) {
1916 if (parser.queue[i].ibv_attr)
1917 rte_free(parser.queue[i].ibv_attr);
1924 * Validate a flow supported by the NIC.
1926 * @see rte_flow_validate()
1930 mlx5_flow_validate(struct rte_eth_dev *dev,
1931 const struct rte_flow_attr *attr,
1932 const struct rte_flow_item items[],
1933 const struct rte_flow_action actions[],
1934 struct rte_flow_error *error)
1936 struct priv *priv = dev->data->dev_private;
1938 struct mlx5_flow_parse parser = { .create = 0, };
1941 ret = priv_flow_convert(priv, attr, items, actions, error, &parser);
1949 * @see rte_flow_create()
1953 mlx5_flow_create(struct rte_eth_dev *dev,
1954 const struct rte_flow_attr *attr,
1955 const struct rte_flow_item items[],
1956 const struct rte_flow_action actions[],
1957 struct rte_flow_error *error)
1959 struct priv *priv = dev->data->dev_private;
1960 struct rte_flow *flow;
1963 flow = priv_flow_create(priv, &priv->flows, attr, items, actions,
1973 * Pointer to private structure.
1975 * Pointer to a TAILQ flow list.
1980 priv_flow_destroy(struct priv *priv,
1981 struct mlx5_flows *list,
1982 struct rte_flow *flow)
1986 if (flow->drop || !flow->mark)
1988 for (i = 0; i != flow->queues_n; ++i) {
1989 struct rte_flow *tmp;
1993 * To remove the mark from the queue, the queue must not be
1994 * present in any other marked flow (RSS or not).
1996 TAILQ_FOREACH(tmp, list, next) {
1998 uint16_t *tqs = NULL;
2003 for (j = 0; j != hash_rxq_init_n; ++j) {
2004 if (!tmp->frxq[j].hrxq)
2006 tqs = tmp->frxq[j].hrxq->ind_table->queues;
2007 tq_n = tmp->frxq[j].hrxq->ind_table->queues_n;
2011 for (j = 0; (j != tq_n) && !mark; j++)
2012 if (tqs[j] == (*flow->queues)[i])
2015 (*priv->rxqs)[(*flow->queues)[i]]->mark = mark;
2019 if (flow->frxq[HASH_RXQ_ETH].ibv_flow)
2020 claim_zero(ibv_destroy_flow
2021 (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2022 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
2024 for (i = 0; i != hash_rxq_init_n; ++i) {
2025 struct mlx5_flow *frxq = &flow->frxq[i];
2028 claim_zero(ibv_destroy_flow(frxq->ibv_flow));
2030 mlx5_priv_hrxq_release(priv, frxq->hrxq);
2032 rte_free(frxq->ibv_attr);
2036 claim_zero(ibv_destroy_counter_set(flow->cs));
2039 TAILQ_REMOVE(list, flow, next);
2040 DEBUG("Flow destroyed %p", (void *)flow);
2045 * Destroy all flows.
2048 * Pointer to private structure.
2050 * Pointer to a TAILQ flow list.
2053 priv_flow_flush(struct priv *priv, struct mlx5_flows *list)
2055 while (!TAILQ_EMPTY(list)) {
2056 struct rte_flow *flow;
2058 flow = TAILQ_FIRST(list);
2059 priv_flow_destroy(priv, list, flow);
2064 * Create drop queue.
2067 * Pointer to private structure.
2073 priv_flow_create_drop_queue(struct priv *priv)
2075 struct mlx5_hrxq_drop *fdq = NULL;
2079 fdq = rte_calloc(__func__, 1, sizeof(*fdq), 0);
2081 WARN("cannot allocate memory for drop queue");
2084 fdq->cq = ibv_create_cq(priv->ctx, 1, NULL, NULL, 0);
2086 WARN("cannot allocate CQ for drop queue");
2089 fdq->wq = ibv_create_wq(priv->ctx,
2090 &(struct ibv_wq_init_attr){
2091 .wq_type = IBV_WQT_RQ,
2098 WARN("cannot allocate WQ for drop queue");
2101 fdq->ind_table = ibv_create_rwq_ind_table(priv->ctx,
2102 &(struct ibv_rwq_ind_table_init_attr){
2103 .log_ind_tbl_size = 0,
2104 .ind_tbl = &fdq->wq,
2107 if (!fdq->ind_table) {
2108 WARN("cannot allocate indirection table for drop queue");
2111 fdq->qp = ibv_create_qp_ex(priv->ctx,
2112 &(struct ibv_qp_init_attr_ex){
2113 .qp_type = IBV_QPT_RAW_PACKET,
2115 IBV_QP_INIT_ATTR_PD |
2116 IBV_QP_INIT_ATTR_IND_TABLE |
2117 IBV_QP_INIT_ATTR_RX_HASH,
2118 .rx_hash_conf = (struct ibv_rx_hash_conf){
2120 IBV_RX_HASH_FUNC_TOEPLITZ,
2121 .rx_hash_key_len = rss_hash_default_key_len,
2122 .rx_hash_key = rss_hash_default_key,
2123 .rx_hash_fields_mask = 0,
2125 .rwq_ind_tbl = fdq->ind_table,
2129 WARN("cannot allocate QP for drop queue");
2132 priv->flow_drop_queue = fdq;
2136 claim_zero(ibv_destroy_qp(fdq->qp));
2138 claim_zero(ibv_destroy_rwq_ind_table(fdq->ind_table));
2140 claim_zero(ibv_destroy_wq(fdq->wq));
2142 claim_zero(ibv_destroy_cq(fdq->cq));
2145 priv->flow_drop_queue = NULL;
2150 * Delete drop queue.
2153 * Pointer to private structure.
2156 priv_flow_delete_drop_queue(struct priv *priv)
2158 struct mlx5_hrxq_drop *fdq = priv->flow_drop_queue;
2163 claim_zero(ibv_destroy_qp(fdq->qp));
2165 claim_zero(ibv_destroy_rwq_ind_table(fdq->ind_table));
2167 claim_zero(ibv_destroy_wq(fdq->wq));
2169 claim_zero(ibv_destroy_cq(fdq->cq));
2171 priv->flow_drop_queue = NULL;
2178 * Pointer to private structure.
2180 * Pointer to a TAILQ flow list.
2183 priv_flow_stop(struct priv *priv, struct mlx5_flows *list)
2185 struct rte_flow *flow;
2187 TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) {
2191 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow)
2193 claim_zero(ibv_destroy_flow
2194 (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2195 flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
2200 struct mlx5_ind_table_ibv *ind_tbl = NULL;
2202 for (i = 0; i != hash_rxq_init_n; ++i) {
2203 if (!flow->frxq[i].hrxq)
2205 ind_tbl = flow->frxq[i].hrxq->ind_table;
2208 for (i = 0; i != ind_tbl->queues_n; ++i)
2209 (*priv->rxqs)[ind_tbl->queues[i]]->mark = 0;
2211 for (i = 0; i != hash_rxq_init_n; ++i) {
2212 if (!flow->frxq[i].ibv_flow)
2214 claim_zero(ibv_destroy_flow(flow->frxq[i].ibv_flow));
2215 flow->frxq[i].ibv_flow = NULL;
2216 mlx5_priv_hrxq_release(priv, flow->frxq[i].hrxq);
2217 flow->frxq[i].hrxq = NULL;
2219 DEBUG("Flow %p removed", (void *)flow);
2227 * Pointer to private structure.
2229 * Pointer to a TAILQ flow list.
2232 * 0 on success, a errno value otherwise and rte_errno is set.
2235 priv_flow_start(struct priv *priv, struct mlx5_flows *list)
2237 struct rte_flow *flow;
2239 TAILQ_FOREACH(flow, list, next) {
2243 flow->frxq[HASH_RXQ_ETH].ibv_flow =
2245 (priv->flow_drop_queue->qp,
2246 flow->frxq[HASH_RXQ_ETH].ibv_attr);
2247 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
2248 DEBUG("Flow %p cannot be applied",
2253 DEBUG("Flow %p applied", (void *)flow);
2257 for (i = 0; i != hash_rxq_init_n; ++i) {
2258 if (!flow->frxq[i].ibv_attr)
2260 flow->frxq[i].hrxq =
2261 mlx5_priv_hrxq_get(priv, flow->rss_conf.rss_key,
2262 flow->rss_conf.rss_key_len,
2263 hash_rxq_init[i].hash_fields,
2266 if (flow->frxq[i].hrxq)
2268 flow->frxq[i].hrxq =
2269 mlx5_priv_hrxq_new(priv, flow->rss_conf.rss_key,
2270 flow->rss_conf.rss_key_len,
2271 hash_rxq_init[i].hash_fields,
2274 if (!flow->frxq[i].hrxq) {
2275 DEBUG("Flow %p cannot be applied",
2281 flow->frxq[i].ibv_flow =
2282 ibv_create_flow(flow->frxq[i].hrxq->qp,
2283 flow->frxq[i].ibv_attr);
2284 if (!flow->frxq[i].ibv_flow) {
2285 DEBUG("Flow %p cannot be applied",
2290 DEBUG("Flow %p applied", (void *)flow);
2294 for (i = 0; i != flow->queues_n; ++i)
2295 (*priv->rxqs)[(*flow->queues)[i]]->mark = 1;
2301 * Verify the flow list is empty
2304 * Pointer to private structure.
2306 * @return the number of flows not released.
2309 priv_flow_verify(struct priv *priv)
2311 struct rte_flow *flow;
2314 TAILQ_FOREACH(flow, &priv->flows, next) {
2315 DEBUG("%p: flow %p still referenced", (void *)priv,
2323 * Enable a control flow configured from the control plane.
2326 * Pointer to Ethernet device.
2328 * An Ethernet flow spec to apply.
2330 * An Ethernet flow mask to apply.
2332 * A VLAN flow spec to apply.
2334 * A VLAN flow mask to apply.
2340 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
2341 struct rte_flow_item_eth *eth_spec,
2342 struct rte_flow_item_eth *eth_mask,
2343 struct rte_flow_item_vlan *vlan_spec,
2344 struct rte_flow_item_vlan *vlan_mask)
2346 struct priv *priv = dev->data->dev_private;
2347 const struct rte_flow_attr attr = {
2349 .priority = MLX5_CTRL_FLOW_PRIORITY,
2351 struct rte_flow_item items[] = {
2353 .type = RTE_FLOW_ITEM_TYPE_ETH,
2359 .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
2360 RTE_FLOW_ITEM_TYPE_END,
2366 .type = RTE_FLOW_ITEM_TYPE_END,
2369 struct rte_flow_action actions[] = {
2371 .type = RTE_FLOW_ACTION_TYPE_RSS,
2374 .type = RTE_FLOW_ACTION_TYPE_END,
2377 struct rte_flow *flow;
2378 struct rte_flow_error error;
2381 struct rte_flow_action_rss rss;
2383 const struct rte_eth_rss_conf *rss_conf;
2385 uint16_t queue[RTE_MAX_QUEUES_PER_PORT];
2389 if (!priv->reta_idx_n)
2391 for (i = 0; i != priv->reta_idx_n; ++i)
2392 action_rss.local.queue[i] = (*priv->reta_idx)[i];
2393 action_rss.local.rss_conf = &priv->rss_conf;
2394 action_rss.local.num = priv->reta_idx_n;
2395 actions[0].conf = (const void *)&action_rss.rss;
2396 flow = priv_flow_create(priv, &priv->ctrl_flows, &attr, items, actions,
2404 * Enable a flow control configured from the control plane.
2407 * Pointer to Ethernet device.
2409 * An Ethernet flow spec to apply.
2411 * An Ethernet flow mask to apply.
2417 mlx5_ctrl_flow(struct rte_eth_dev *dev,
2418 struct rte_flow_item_eth *eth_spec,
2419 struct rte_flow_item_eth *eth_mask)
2421 return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
2427 * @see rte_flow_destroy()
2431 mlx5_flow_destroy(struct rte_eth_dev *dev,
2432 struct rte_flow *flow,
2433 struct rte_flow_error *error)
2435 struct priv *priv = dev->data->dev_private;
2439 priv_flow_destroy(priv, &priv->flows, flow);
2445 * Destroy all flows.
2447 * @see rte_flow_flush()
2451 mlx5_flow_flush(struct rte_eth_dev *dev,
2452 struct rte_flow_error *error)
2454 struct priv *priv = dev->data->dev_private;
2458 priv_flow_flush(priv, &priv->flows);
2463 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
2465 * Query flow counter.
2469 * @param counter_value
2470 * returned data from the counter.
2473 * 0 on success, a errno value otherwise and rte_errno is set.
2476 priv_flow_query_count(struct ibv_counter_set *cs,
2477 struct mlx5_flow_counter_stats *counter_stats,
2478 struct rte_flow_query_count *query_count,
2479 struct rte_flow_error *error)
2481 uint64_t counters[2];
2482 struct ibv_query_counter_set_attr query_cs_attr = {
2484 .query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
2486 struct ibv_counter_set_data query_out = {
2488 .outlen = 2 * sizeof(uint64_t),
2490 int res = ibv_query_counter_set(&query_cs_attr, &query_out);
2493 rte_flow_error_set(error, -res,
2494 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2496 "cannot read counter");
2499 query_count->hits_set = 1;
2500 query_count->bytes_set = 1;
2501 query_count->hits = counters[0] - counter_stats->hits;
2502 query_count->bytes = counters[1] - counter_stats->bytes;
2503 if (query_count->reset) {
2504 counter_stats->hits = counters[0];
2505 counter_stats->bytes = counters[1];
2513 * @see rte_flow_query()
2517 mlx5_flow_query(struct rte_eth_dev *dev,
2518 struct rte_flow *flow,
2519 enum rte_flow_action_type action __rte_unused,
2521 struct rte_flow_error *error)
2523 struct priv *priv = dev->data->dev_private;
2528 res = priv_flow_query_count(flow->cs,
2529 &flow->counter_stats,
2530 (struct rte_flow_query_count *)data,
2533 rte_flow_error_set(error, res,
2534 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2536 "no counter found for flow");
2546 * @see rte_flow_isolate()
2550 mlx5_flow_isolate(struct rte_eth_dev *dev,
2552 struct rte_flow_error *error)
2554 struct priv *priv = dev->data->dev_private;
2557 if (dev->data->dev_started) {
2558 rte_flow_error_set(error, EBUSY,
2559 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2561 "port must be stopped first");
2565 priv->isolated = !!enable;
2567 priv->dev->dev_ops = &mlx5_dev_ops_isolate;
2569 priv->dev->dev_ops = &mlx5_dev_ops;
2575 * Convert a flow director filter to a generic flow.
2578 * Private structure.
2579 * @param fdir_filter
2580 * Flow director filter to add.
2582 * Generic flow parameters structure.
2585 * 0 on success, errno value on error.
2588 priv_fdir_filter_convert(struct priv *priv,
2589 const struct rte_eth_fdir_filter *fdir_filter,
2590 struct mlx5_fdir *attributes)
2592 const struct rte_eth_fdir_input *input = &fdir_filter->input;
2594 /* Validate queue number. */
2595 if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
2596 ERROR("invalid queue number %d", fdir_filter->action.rx_queue);
2599 attributes->attr.ingress = 1;
2600 attributes->items[0] = (struct rte_flow_item) {
2601 .type = RTE_FLOW_ITEM_TYPE_ETH,
2602 .spec = &attributes->l2,
2603 .mask = &attributes->l2_mask,
2605 switch (fdir_filter->action.behavior) {
2606 case RTE_ETH_FDIR_ACCEPT:
2607 attributes->actions[0] = (struct rte_flow_action){
2608 .type = RTE_FLOW_ACTION_TYPE_QUEUE,
2609 .conf = &attributes->queue,
2612 case RTE_ETH_FDIR_REJECT:
2613 attributes->actions[0] = (struct rte_flow_action){
2614 .type = RTE_FLOW_ACTION_TYPE_DROP,
2618 ERROR("invalid behavior %d", fdir_filter->action.behavior);
2621 attributes->queue.index = fdir_filter->action.rx_queue;
2622 switch (fdir_filter->input.flow_type) {
2623 case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
2624 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2625 .src_addr = input->flow.udp4_flow.ip.src_ip,
2626 .dst_addr = input->flow.udp4_flow.ip.dst_ip,
2627 .time_to_live = input->flow.udp4_flow.ip.ttl,
2628 .type_of_service = input->flow.udp4_flow.ip.tos,
2629 .next_proto_id = input->flow.udp4_flow.ip.proto,
2631 attributes->l4.udp.hdr = (struct udp_hdr){
2632 .src_port = input->flow.udp4_flow.src_port,
2633 .dst_port = input->flow.udp4_flow.dst_port,
2635 attributes->items[1] = (struct rte_flow_item){
2636 .type = RTE_FLOW_ITEM_TYPE_IPV4,
2637 .spec = &attributes->l3,
2639 attributes->items[2] = (struct rte_flow_item){
2640 .type = RTE_FLOW_ITEM_TYPE_UDP,
2641 .spec = &attributes->l4,
2644 case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
2645 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2646 .src_addr = input->flow.tcp4_flow.ip.src_ip,
2647 .dst_addr = input->flow.tcp4_flow.ip.dst_ip,
2648 .time_to_live = input->flow.tcp4_flow.ip.ttl,
2649 .type_of_service = input->flow.tcp4_flow.ip.tos,
2650 .next_proto_id = input->flow.tcp4_flow.ip.proto,
2652 attributes->l4.tcp.hdr = (struct tcp_hdr){
2653 .src_port = input->flow.tcp4_flow.src_port,
2654 .dst_port = input->flow.tcp4_flow.dst_port,
2656 attributes->items[1] = (struct rte_flow_item){
2657 .type = RTE_FLOW_ITEM_TYPE_IPV4,
2658 .spec = &attributes->l3,
2660 attributes->items[2] = (struct rte_flow_item){
2661 .type = RTE_FLOW_ITEM_TYPE_TCP,
2662 .spec = &attributes->l4,
2665 case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
2666 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2667 .src_addr = input->flow.ip4_flow.src_ip,
2668 .dst_addr = input->flow.ip4_flow.dst_ip,
2669 .time_to_live = input->flow.ip4_flow.ttl,
2670 .type_of_service = input->flow.ip4_flow.tos,
2671 .next_proto_id = input->flow.ip4_flow.proto,
2673 attributes->items[1] = (struct rte_flow_item){
2674 .type = RTE_FLOW_ITEM_TYPE_IPV4,
2675 .spec = &attributes->l3,
2678 case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
2679 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2680 .hop_limits = input->flow.udp6_flow.ip.hop_limits,
2681 .proto = input->flow.udp6_flow.ip.proto,
2683 memcpy(attributes->l3.ipv6.hdr.src_addr,
2684 input->flow.udp6_flow.ip.src_ip,
2685 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2686 memcpy(attributes->l3.ipv6.hdr.dst_addr,
2687 input->flow.udp6_flow.ip.dst_ip,
2688 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2689 attributes->l4.udp.hdr = (struct udp_hdr){
2690 .src_port = input->flow.udp6_flow.src_port,
2691 .dst_port = input->flow.udp6_flow.dst_port,
2693 attributes->items[1] = (struct rte_flow_item){
2694 .type = RTE_FLOW_ITEM_TYPE_IPV6,
2695 .spec = &attributes->l3,
2697 attributes->items[2] = (struct rte_flow_item){
2698 .type = RTE_FLOW_ITEM_TYPE_UDP,
2699 .spec = &attributes->l4,
2702 case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
2703 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2704 .hop_limits = input->flow.tcp6_flow.ip.hop_limits,
2705 .proto = input->flow.tcp6_flow.ip.proto,
2707 memcpy(attributes->l3.ipv6.hdr.src_addr,
2708 input->flow.tcp6_flow.ip.src_ip,
2709 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2710 memcpy(attributes->l3.ipv6.hdr.dst_addr,
2711 input->flow.tcp6_flow.ip.dst_ip,
2712 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2713 attributes->l4.tcp.hdr = (struct tcp_hdr){
2714 .src_port = input->flow.tcp6_flow.src_port,
2715 .dst_port = input->flow.tcp6_flow.dst_port,
2717 attributes->items[1] = (struct rte_flow_item){
2718 .type = RTE_FLOW_ITEM_TYPE_IPV6,
2719 .spec = &attributes->l3,
2721 attributes->items[2] = (struct rte_flow_item){
2722 .type = RTE_FLOW_ITEM_TYPE_TCP,
2723 .spec = &attributes->l4,
2726 case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
2727 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2728 .hop_limits = input->flow.ipv6_flow.hop_limits,
2729 .proto = input->flow.ipv6_flow.proto,
2731 memcpy(attributes->l3.ipv6.hdr.src_addr,
2732 input->flow.ipv6_flow.src_ip,
2733 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2734 memcpy(attributes->l3.ipv6.hdr.dst_addr,
2735 input->flow.ipv6_flow.dst_ip,
2736 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2737 attributes->items[1] = (struct rte_flow_item){
2738 .type = RTE_FLOW_ITEM_TYPE_IPV6,
2739 .spec = &attributes->l3,
2743 ERROR("invalid flow type%d",
2744 fdir_filter->input.flow_type);
2751 * Add new flow director filter and store it in list.
2754 * Private structure.
2755 * @param fdir_filter
2756 * Flow director filter to add.
2759 * 0 on success, errno value on failure.
2762 priv_fdir_filter_add(struct priv *priv,
2763 const struct rte_eth_fdir_filter *fdir_filter)
2765 struct mlx5_fdir attributes = {
2768 .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2769 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2773 struct mlx5_flow_parse parser = {
2774 .layer = HASH_RXQ_ETH,
2776 struct rte_flow_error error;
2777 struct rte_flow *flow;
2780 ret = priv_fdir_filter_convert(priv, fdir_filter, &attributes);
2783 ret = priv_flow_convert(priv, &attributes.attr, attributes.items,
2784 attributes.actions, &error, &parser);
2787 flow = priv_flow_create(priv,
2794 DEBUG("FDIR created %p", (void *)flow);
2801 * Delete specific filter.
2804 * Private structure.
2805 * @param fdir_filter
2806 * Filter to be deleted.
2809 * 0 on success, errno value on failure.
2812 priv_fdir_filter_delete(struct priv *priv,
2813 const struct rte_eth_fdir_filter *fdir_filter)
2815 struct mlx5_fdir attributes = {
2818 struct mlx5_flow_parse parser = {
2820 .layer = HASH_RXQ_ETH,
2822 struct rte_flow_error error;
2823 struct rte_flow *flow;
2827 ret = priv_fdir_filter_convert(priv, fdir_filter, &attributes);
2830 ret = priv_flow_convert(priv, &attributes.attr, attributes.items,
2831 attributes.actions, &error, &parser);
2835 * Special case for drop action which is only set in the
2836 * specifications when the flow is created. In this situation the
2837 * drop specification is missing.
2840 struct ibv_flow_spec_action_drop *drop;
2842 drop = (void *)((uintptr_t)parser.queue[HASH_RXQ_ETH].ibv_attr +
2843 parser.queue[HASH_RXQ_ETH].offset);
2844 *drop = (struct ibv_flow_spec_action_drop){
2845 .type = IBV_FLOW_SPEC_ACTION_DROP,
2846 .size = sizeof(struct ibv_flow_spec_action_drop),
2848 parser.queue[HASH_RXQ_ETH].ibv_attr->num_of_specs++;
2850 TAILQ_FOREACH(flow, &priv->flows, next) {
2851 struct ibv_flow_attr *attr;
2852 struct ibv_spec_header *attr_h;
2854 struct ibv_flow_attr *flow_attr;
2855 struct ibv_spec_header *flow_h;
2857 unsigned int specs_n;
2859 attr = parser.queue[HASH_RXQ_ETH].ibv_attr;
2860 flow_attr = flow->frxq[HASH_RXQ_ETH].ibv_attr;
2861 /* Compare first the attributes. */
2862 if (memcmp(attr, flow_attr, sizeof(struct ibv_flow_attr)))
2864 if (attr->num_of_specs == 0)
2866 spec = (void *)((uintptr_t)attr +
2867 sizeof(struct ibv_flow_attr));
2868 flow_spec = (void *)((uintptr_t)flow_attr +
2869 sizeof(struct ibv_flow_attr));
2870 specs_n = RTE_MIN(attr->num_of_specs, flow_attr->num_of_specs);
2871 for (i = 0; i != specs_n; ++i) {
2874 if (memcmp(spec, flow_spec,
2875 RTE_MIN(attr_h->size, flow_h->size)))
2877 spec = (void *)((uintptr_t)spec + attr_h->size);
2878 flow_spec = (void *)((uintptr_t)flow_spec +
2881 /* At this point, the flow match. */
2884 /* The flow does not match. */
2888 priv_flow_destroy(priv, &priv->flows, flow);
2890 for (i = 0; i != hash_rxq_init_n; ++i) {
2891 if (parser.queue[i].ibv_attr)
2892 rte_free(parser.queue[i].ibv_attr);
2898 * Update queue for specific filter.
2901 * Private structure.
2902 * @param fdir_filter
2903 * Filter to be updated.
2906 * 0 on success, errno value on failure.
2909 priv_fdir_filter_update(struct priv *priv,
2910 const struct rte_eth_fdir_filter *fdir_filter)
2914 ret = priv_fdir_filter_delete(priv, fdir_filter);
2917 ret = priv_fdir_filter_add(priv, fdir_filter);
2922 * Flush all filters.
2925 * Private structure.
2928 priv_fdir_filter_flush(struct priv *priv)
2930 priv_flow_flush(priv, &priv->flows);
2934 * Get flow director information.
2937 * Private structure.
2938 * @param[out] fdir_info
2939 * Resulting flow director information.
2942 priv_fdir_info_get(struct priv *priv, struct rte_eth_fdir_info *fdir_info)
2944 struct rte_eth_fdir_masks *mask =
2945 &priv->dev->data->dev_conf.fdir_conf.mask;
2947 fdir_info->mode = priv->dev->data->dev_conf.fdir_conf.mode;
2948 fdir_info->guarant_spc = 0;
2949 rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
2950 fdir_info->max_flexpayload = 0;
2951 fdir_info->flow_types_mask[0] = 0;
2952 fdir_info->flex_payload_unit = 0;
2953 fdir_info->max_flex_payload_segment_num = 0;
2954 fdir_info->flex_payload_limit = 0;
2955 memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
2959 * Deal with flow director operations.
2962 * Pointer to private structure.
2964 * Operation to perform.
2966 * Pointer to operation-specific structure.
2969 * 0 on success, errno value on failure.
2972 priv_fdir_ctrl_func(struct priv *priv, enum rte_filter_op filter_op, void *arg)
2974 enum rte_fdir_mode fdir_mode =
2975 priv->dev->data->dev_conf.fdir_conf.mode;
2978 if (filter_op == RTE_ETH_FILTER_NOP)
2980 if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
2981 fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
2982 ERROR("%p: flow director mode %d not supported",
2983 (void *)priv, fdir_mode);
2986 switch (filter_op) {
2987 case RTE_ETH_FILTER_ADD:
2988 ret = priv_fdir_filter_add(priv, arg);
2990 case RTE_ETH_FILTER_UPDATE:
2991 ret = priv_fdir_filter_update(priv, arg);
2993 case RTE_ETH_FILTER_DELETE:
2994 ret = priv_fdir_filter_delete(priv, arg);
2996 case RTE_ETH_FILTER_FLUSH:
2997 priv_fdir_filter_flush(priv);
2999 case RTE_ETH_FILTER_INFO:
3000 priv_fdir_info_get(priv, arg);
3003 DEBUG("%p: unknown operation %u", (void *)priv,
3012 * Manage filter operations.
3015 * Pointer to Ethernet device structure.
3016 * @param filter_type
3019 * Operation to perform.
3021 * Pointer to operation-specific structure.
3024 * 0 on success, negative errno value on failure.
3027 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
3028 enum rte_filter_type filter_type,
3029 enum rte_filter_op filter_op,
3033 struct priv *priv = dev->data->dev_private;
3035 switch (filter_type) {
3036 case RTE_ETH_FILTER_GENERIC:
3037 if (filter_op != RTE_ETH_FILTER_GET)
3039 *(const void **)arg = &mlx5_flow_ops;
3041 case RTE_ETH_FILTER_FDIR:
3043 ret = priv_fdir_ctrl_func(priv, filter_op, arg);
3047 ERROR("%p: filter type (%d) not supported",
3048 (void *)dev, filter_type);