4 * Copyright 2016 6WIND S.A.
5 * Copyright 2016 Mellanox.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of 6WIND S.A. nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #include <sys/queue.h>
38 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
40 #pragma GCC diagnostic ignored "-Wpedantic"
42 #include <infiniband/verbs.h>
44 #pragma GCC diagnostic error "-Wpedantic"
47 #include <rte_ethdev.h>
49 #include <rte_flow_driver.h>
50 #include <rte_malloc.h>
53 #include "mlx5_defs.h"
56 /* Define minimal priority for control plane flows. */
57 #define MLX5_CTRL_FLOW_PRIORITY 4
59 /* Internet Protocol versions. */
63 #ifndef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
64 struct ibv_counter_set_init_attr {
67 struct ibv_flow_spec_counter_action {
70 struct ibv_counter_set {
75 ibv_destroy_counter_set(struct ibv_counter_set *cs)
82 /* Dev ops structure defined in mlx5.c */
83 extern const struct eth_dev_ops mlx5_dev_ops;
84 extern const struct eth_dev_ops mlx5_dev_ops_isolate;
87 mlx5_flow_create_eth(const struct rte_flow_item *item,
88 const void *default_mask,
92 mlx5_flow_create_vlan(const struct rte_flow_item *item,
93 const void *default_mask,
97 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
98 const void *default_mask,
102 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
103 const void *default_mask,
107 mlx5_flow_create_udp(const struct rte_flow_item *item,
108 const void *default_mask,
112 mlx5_flow_create_tcp(const struct rte_flow_item *item,
113 const void *default_mask,
117 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
118 const void *default_mask,
121 struct mlx5_flow_parse;
124 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
128 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id);
131 mlx5_flow_create_count(struct priv *priv, struct mlx5_flow_parse *parser);
133 /* Hash RX queue types. */
144 /* Initialization data for hash RX queue. */
145 struct hash_rxq_init {
146 uint64_t hash_fields; /* Fields that participate in the hash. */
147 uint64_t dpdk_rss_hf; /* Matching DPDK RSS hash fields. */
148 unsigned int flow_priority; /* Flow priority to use. */
149 unsigned int ip_version; /* Internet protocol. */
152 /* Initialization data for hash RX queues. */
153 const struct hash_rxq_init hash_rxq_init[] = {
155 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
156 IBV_RX_HASH_DST_IPV4 |
157 IBV_RX_HASH_SRC_PORT_TCP |
158 IBV_RX_HASH_DST_PORT_TCP),
159 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,
161 .ip_version = MLX5_IPV4,
164 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
165 IBV_RX_HASH_DST_IPV4 |
166 IBV_RX_HASH_SRC_PORT_UDP |
167 IBV_RX_HASH_DST_PORT_UDP),
168 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP,
170 .ip_version = MLX5_IPV4,
173 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
174 IBV_RX_HASH_DST_IPV4),
175 .dpdk_rss_hf = (ETH_RSS_IPV4 |
178 .ip_version = MLX5_IPV4,
181 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
182 IBV_RX_HASH_DST_IPV6 |
183 IBV_RX_HASH_SRC_PORT_TCP |
184 IBV_RX_HASH_DST_PORT_TCP),
185 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP,
187 .ip_version = MLX5_IPV6,
190 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
191 IBV_RX_HASH_DST_IPV6 |
192 IBV_RX_HASH_SRC_PORT_UDP |
193 IBV_RX_HASH_DST_PORT_UDP),
194 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP,
196 .ip_version = MLX5_IPV6,
199 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
200 IBV_RX_HASH_DST_IPV6),
201 .dpdk_rss_hf = (ETH_RSS_IPV6 |
204 .ip_version = MLX5_IPV6,
213 /* Number of entries in hash_rxq_init[]. */
214 const unsigned int hash_rxq_init_n = RTE_DIM(hash_rxq_init);
216 /** Structure for holding counter stats. */
217 struct mlx5_flow_counter_stats {
218 uint64_t hits; /**< Number of packets matched by the rule. */
219 uint64_t bytes; /**< Number of bytes matched by the rule. */
222 /** Structure for Drop queue. */
223 struct mlx5_hrxq_drop {
224 struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */
225 struct ibv_qp *qp; /**< Verbs queue pair. */
226 struct ibv_wq *wq; /**< Verbs work queue. */
227 struct ibv_cq *cq; /**< Verbs completion queue. */
230 /* Flows structures. */
232 uint64_t hash_fields; /**< Fields that participate in the hash. */
233 struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
234 struct ibv_flow *ibv_flow; /**< Verbs flow. */
235 struct mlx5_hrxq *hrxq; /**< Hash Rx queues. */
238 /* Drop flows structures. */
239 struct mlx5_flow_drop {
240 struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
241 struct ibv_flow *ibv_flow; /**< Verbs flow. */
245 TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
246 uint32_t mark:1; /**< Set if the flow is marked. */
247 uint32_t drop:1; /**< Drop queue. */
248 uint16_t queues_n; /**< Number of entries in queue[]. */
249 uint16_t (*queues)[]; /**< Queues indexes to use. */
250 struct rte_eth_rss_conf rss_conf; /**< RSS configuration */
251 uint8_t rss_key[40]; /**< copy of the RSS key. */
252 struct ibv_counter_set *cs; /**< Holds the counters for the rule. */
253 struct mlx5_flow_counter_stats counter_stats;/**<The counter stats. */
254 struct mlx5_flow frxq[RTE_DIM(hash_rxq_init)];
255 /**< Flow with Rx queue. */
258 /** Static initializer for items. */
260 (const enum rte_flow_item_type []){ \
261 __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
264 /** Structure to generate a simple graph of layers supported by the NIC. */
265 struct mlx5_flow_items {
266 /** List of possible actions for these items. */
267 const enum rte_flow_action_type *const actions;
268 /** Bit-masks corresponding to the possibilities for the item. */
271 * Default bit-masks to use when item->mask is not provided. When
272 * \default_mask is also NULL, the full supported bit-mask (\mask) is
275 const void *default_mask;
276 /** Bit-masks size in bytes. */
277 const unsigned int mask_sz;
279 * Conversion function from rte_flow to NIC specific flow.
282 * rte_flow item to convert.
283 * @param default_mask
284 * Default bit-masks to use when item->mask is not provided.
286 * Internal structure to store the conversion.
289 * 0 on success, negative value otherwise.
291 int (*convert)(const struct rte_flow_item *item,
292 const void *default_mask,
294 /** Size in bytes of the destination structure. */
295 const unsigned int dst_sz;
296 /** List of possible following items. */
297 const enum rte_flow_item_type *const items;
300 /** Valid action for this PMD. */
301 static const enum rte_flow_action_type valid_actions[] = {
302 RTE_FLOW_ACTION_TYPE_DROP,
303 RTE_FLOW_ACTION_TYPE_QUEUE,
304 RTE_FLOW_ACTION_TYPE_MARK,
305 RTE_FLOW_ACTION_TYPE_FLAG,
306 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
307 RTE_FLOW_ACTION_TYPE_COUNT,
309 RTE_FLOW_ACTION_TYPE_END,
312 /** Graph of supported items and associated actions. */
313 static const struct mlx5_flow_items mlx5_flow_items[] = {
314 [RTE_FLOW_ITEM_TYPE_END] = {
315 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
316 RTE_FLOW_ITEM_TYPE_VXLAN),
318 [RTE_FLOW_ITEM_TYPE_ETH] = {
319 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
320 RTE_FLOW_ITEM_TYPE_IPV4,
321 RTE_FLOW_ITEM_TYPE_IPV6),
322 .actions = valid_actions,
323 .mask = &(const struct rte_flow_item_eth){
324 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
325 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
328 .default_mask = &rte_flow_item_eth_mask,
329 .mask_sz = sizeof(struct rte_flow_item_eth),
330 .convert = mlx5_flow_create_eth,
331 .dst_sz = sizeof(struct ibv_flow_spec_eth),
333 [RTE_FLOW_ITEM_TYPE_VLAN] = {
334 .items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
335 RTE_FLOW_ITEM_TYPE_IPV6),
336 .actions = valid_actions,
337 .mask = &(const struct rte_flow_item_vlan){
340 .default_mask = &rte_flow_item_vlan_mask,
341 .mask_sz = sizeof(struct rte_flow_item_vlan),
342 .convert = mlx5_flow_create_vlan,
345 [RTE_FLOW_ITEM_TYPE_IPV4] = {
346 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
347 RTE_FLOW_ITEM_TYPE_TCP),
348 .actions = valid_actions,
349 .mask = &(const struct rte_flow_item_ipv4){
353 .type_of_service = -1,
357 .default_mask = &rte_flow_item_ipv4_mask,
358 .mask_sz = sizeof(struct rte_flow_item_ipv4),
359 .convert = mlx5_flow_create_ipv4,
360 .dst_sz = sizeof(struct ibv_flow_spec_ipv4_ext),
362 [RTE_FLOW_ITEM_TYPE_IPV6] = {
363 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
364 RTE_FLOW_ITEM_TYPE_TCP),
365 .actions = valid_actions,
366 .mask = &(const struct rte_flow_item_ipv6){
369 0xff, 0xff, 0xff, 0xff,
370 0xff, 0xff, 0xff, 0xff,
371 0xff, 0xff, 0xff, 0xff,
372 0xff, 0xff, 0xff, 0xff,
375 0xff, 0xff, 0xff, 0xff,
376 0xff, 0xff, 0xff, 0xff,
377 0xff, 0xff, 0xff, 0xff,
378 0xff, 0xff, 0xff, 0xff,
385 .default_mask = &rte_flow_item_ipv6_mask,
386 .mask_sz = sizeof(struct rte_flow_item_ipv6),
387 .convert = mlx5_flow_create_ipv6,
388 .dst_sz = sizeof(struct ibv_flow_spec_ipv6),
390 [RTE_FLOW_ITEM_TYPE_UDP] = {
391 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN),
392 .actions = valid_actions,
393 .mask = &(const struct rte_flow_item_udp){
399 .default_mask = &rte_flow_item_udp_mask,
400 .mask_sz = sizeof(struct rte_flow_item_udp),
401 .convert = mlx5_flow_create_udp,
402 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
404 [RTE_FLOW_ITEM_TYPE_TCP] = {
405 .actions = valid_actions,
406 .mask = &(const struct rte_flow_item_tcp){
412 .default_mask = &rte_flow_item_tcp_mask,
413 .mask_sz = sizeof(struct rte_flow_item_tcp),
414 .convert = mlx5_flow_create_tcp,
415 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
417 [RTE_FLOW_ITEM_TYPE_VXLAN] = {
418 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH),
419 .actions = valid_actions,
420 .mask = &(const struct rte_flow_item_vxlan){
421 .vni = "\xff\xff\xff",
423 .default_mask = &rte_flow_item_vxlan_mask,
424 .mask_sz = sizeof(struct rte_flow_item_vxlan),
425 .convert = mlx5_flow_create_vxlan,
426 .dst_sz = sizeof(struct ibv_flow_spec_tunnel),
430 /** Structure to pass to the conversion function. */
431 struct mlx5_flow_parse {
432 uint32_t inner; /**< Set once VXLAN is encountered. */
433 uint32_t allmulti:1; /**< Set once allmulti dst MAC is encountered. */
435 /**< Whether resources should remain after a validate. */
436 uint32_t drop:1; /**< Target is a drop queue. */
437 uint32_t mark:1; /**< Mark is present in the flow. */
438 uint32_t count:1; /**< Count is present in the flow. */
439 uint32_t mark_id; /**< Mark identifier. */
440 uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
441 uint16_t queues_n; /**< Number of entries in queue[]. */
442 struct rte_eth_rss_conf rss_conf; /**< RSS configuration */
443 uint8_t rss_key[40]; /**< copy of the RSS key. */
444 enum hash_rxq_type layer; /**< Last pattern layer detected. */
445 struct ibv_counter_set *cs; /**< Holds the counter set for the rule */
447 struct ibv_flow_attr *ibv_attr;
448 /**< Pointer to Verbs attributes. */
450 /**< Current position or total size of the attribute. */
451 } queue[RTE_DIM(hash_rxq_init)];
454 static const struct rte_flow_ops mlx5_flow_ops = {
455 .validate = mlx5_flow_validate,
456 .create = mlx5_flow_create,
457 .destroy = mlx5_flow_destroy,
458 .flush = mlx5_flow_flush,
459 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
460 .query = mlx5_flow_query,
464 .isolate = mlx5_flow_isolate,
467 /* Convert FDIR request to Generic flow. */
469 struct rte_flow_attr attr;
470 struct rte_flow_action actions[2];
471 struct rte_flow_item items[4];
472 struct rte_flow_item_eth l2;
473 struct rte_flow_item_eth l2_mask;
475 struct rte_flow_item_ipv4 ipv4;
476 struct rte_flow_item_ipv6 ipv6;
479 struct rte_flow_item_udp udp;
480 struct rte_flow_item_tcp tcp;
482 struct rte_flow_action_queue queue;
485 /* Verbs specification header. */
486 struct ibv_spec_header {
487 enum ibv_flow_spec_type type;
492 * Check support for a given item.
495 * Item specification.
497 * Bit-masks covering supported fields to compare with spec, last and mask in
500 * Bit-Mask size in bytes.
506 mlx5_flow_item_validate(const struct rte_flow_item *item,
507 const uint8_t *mask, unsigned int size)
511 if (!item->spec && (item->mask || item->last))
513 if (item->spec && !item->mask) {
515 const uint8_t *spec = item->spec;
517 for (i = 0; i < size; ++i)
518 if ((spec[i] | mask[i]) != mask[i])
521 if (item->last && !item->mask) {
523 const uint8_t *spec = item->last;
525 for (i = 0; i < size; ++i)
526 if ((spec[i] | mask[i]) != mask[i])
531 const uint8_t *spec = item->mask;
533 for (i = 0; i < size; ++i)
534 if ((spec[i] | mask[i]) != mask[i])
537 if (item->spec && item->last) {
540 const uint8_t *apply = mask;
545 for (i = 0; i < size; ++i) {
546 spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
547 last[i] = ((const uint8_t *)item->last)[i] & apply[i];
549 ret = memcmp(spec, last, size);
555 * Copy the RSS configuration from the user ones.
558 * Pointer to private structure.
560 * Internal parser structure.
562 * User RSS configuration to save.
565 * 0 on success, errno value on failure.
568 priv_flow_convert_rss_conf(struct priv *priv,
569 struct mlx5_flow_parse *parser,
570 const struct rte_eth_rss_conf *rss_conf)
572 const struct rte_eth_rss_conf *rss;
575 if (rss_conf->rss_hf & MLX5_RSS_HF_MASK)
579 rss = &priv->rss_conf;
581 if (rss->rss_key_len > 40)
583 parser->rss_conf.rss_key_len = rss->rss_key_len;
584 parser->rss_conf.rss_hf = rss->rss_hf;
585 memcpy(parser->rss_key, rss->rss_key, rss->rss_key_len);
586 parser->rss_conf.rss_key = parser->rss_key;
591 * Extract attribute to the parser.
594 * Pointer to private structure.
596 * Flow rule attributes.
598 * Perform verbose error reporting if not NULL.
599 * @param[in, out] parser
600 * Internal parser structure.
603 * 0 on success, a negative errno value otherwise and rte_errno is set.
606 priv_flow_convert_attributes(struct priv *priv,
607 const struct rte_flow_attr *attr,
608 struct rte_flow_error *error,
609 struct mlx5_flow_parse *parser)
614 rte_flow_error_set(error, ENOTSUP,
615 RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
617 "groups are not supported");
620 if (attr->priority && attr->priority != MLX5_CTRL_FLOW_PRIORITY) {
621 rte_flow_error_set(error, ENOTSUP,
622 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
624 "priorities are not supported");
628 rte_flow_error_set(error, ENOTSUP,
629 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
631 "egress is not supported");
634 if (!attr->ingress) {
635 rte_flow_error_set(error, ENOTSUP,
636 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
638 "only ingress is supported");
645 * Extract actions request to the parser.
648 * Pointer to private structure.
650 * Associated actions (list terminated by the END action).
652 * Perform verbose error reporting if not NULL.
653 * @param[in, out] parser
654 * Internal parser structure.
657 * 0 on success, a negative errno value otherwise and rte_errno is set.
660 priv_flow_convert_actions(struct priv *priv,
661 const struct rte_flow_action actions[],
662 struct rte_flow_error *error,
663 struct mlx5_flow_parse *parser)
666 * Add default RSS configuration necessary for Verbs to create QP even
667 * if no RSS is necessary.
669 priv_flow_convert_rss_conf(priv, parser,
670 (const struct rte_eth_rss_conf *)
672 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
673 if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
675 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
677 } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
678 const struct rte_flow_action_queue *queue =
679 (const struct rte_flow_action_queue *)
684 if (!queue || (queue->index > (priv->rxqs_n - 1)))
685 goto exit_action_not_supported;
686 for (n = 0; n < parser->queues_n; ++n) {
687 if (parser->queues[n] == queue->index) {
692 if (parser->queues_n > 1 && !found) {
693 rte_flow_error_set(error, ENOTSUP,
694 RTE_FLOW_ERROR_TYPE_ACTION,
696 "queue action not in RSS queues");
700 parser->queues_n = 1;
701 parser->queues[0] = queue->index;
703 } else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
704 const struct rte_flow_action_rss *rss =
705 (const struct rte_flow_action_rss *)
709 if (!rss || !rss->num) {
710 rte_flow_error_set(error, EINVAL,
711 RTE_FLOW_ERROR_TYPE_ACTION,
716 if (parser->queues_n == 1) {
719 assert(parser->queues_n);
720 for (n = 0; n < rss->num; ++n) {
721 if (parser->queues[0] ==
728 rte_flow_error_set(error, ENOTSUP,
729 RTE_FLOW_ERROR_TYPE_ACTION,
731 "queue action not in RSS"
736 for (n = 0; n < rss->num; ++n) {
737 if (rss->queue[n] >= priv->rxqs_n) {
738 rte_flow_error_set(error, EINVAL,
739 RTE_FLOW_ERROR_TYPE_ACTION,
741 "queue id > number of"
746 for (n = 0; n < rss->num; ++n)
747 parser->queues[n] = rss->queue[n];
748 parser->queues_n = rss->num;
749 if (priv_flow_convert_rss_conf(priv, parser,
751 rte_flow_error_set(error, EINVAL,
752 RTE_FLOW_ERROR_TYPE_ACTION,
754 "wrong RSS configuration");
757 } else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
758 const struct rte_flow_action_mark *mark =
759 (const struct rte_flow_action_mark *)
763 rte_flow_error_set(error, EINVAL,
764 RTE_FLOW_ERROR_TYPE_ACTION,
766 "mark must be defined");
768 } else if (mark->id >= MLX5_FLOW_MARK_MAX) {
769 rte_flow_error_set(error, ENOTSUP,
770 RTE_FLOW_ERROR_TYPE_ACTION,
772 "mark must be between 0"
777 parser->mark_id = mark->id;
778 } else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
780 } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT &&
781 priv->config.flow_counter_en) {
784 goto exit_action_not_supported;
787 if (parser->drop && parser->mark)
789 if (!parser->queues_n && !parser->drop) {
790 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
791 NULL, "no valid action");
795 exit_action_not_supported:
796 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
797 actions, "action not supported");
805 * Pointer to private structure.
807 * Pattern specification (list terminated by the END pattern item).
809 * Perform verbose error reporting if not NULL.
810 * @param[in, out] parser
811 * Internal parser structure.
814 * 0 on success, a negative errno value otherwise and rte_errno is set.
817 priv_flow_convert_items_validate(struct priv *priv,
818 const struct rte_flow_item items[],
819 struct rte_flow_error *error,
820 struct mlx5_flow_parse *parser)
822 const struct mlx5_flow_items *cur_item = mlx5_flow_items;
826 /* Initialise the offsets to start after verbs attribute. */
827 for (i = 0; i != hash_rxq_init_n; ++i)
828 parser->queue[i].offset = sizeof(struct ibv_flow_attr);
829 for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
830 const struct mlx5_flow_items *token = NULL;
834 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
838 cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
840 if (cur_item->items[i] == items->type) {
841 token = &mlx5_flow_items[items->type];
846 goto exit_item_not_supported;
848 err = mlx5_flow_item_validate(items,
849 (const uint8_t *)cur_item->mask,
852 goto exit_item_not_supported;
853 if (items->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
855 rte_flow_error_set(error, ENOTSUP,
856 RTE_FLOW_ERROR_TYPE_ITEM,
858 "cannot recognize multiple"
859 " VXLAN encapsulations");
862 parser->inner = IBV_FLOW_SPEC_INNER;
864 if (parser->drop || parser->queues_n == 1) {
865 parser->queue[HASH_RXQ_ETH].offset += cur_item->dst_sz;
867 for (n = 0; n != hash_rxq_init_n; ++n)
868 parser->queue[n].offset += cur_item->dst_sz;
872 parser->queue[HASH_RXQ_ETH].offset +=
873 sizeof(struct ibv_flow_spec_action_drop);
876 for (i = 0; i != hash_rxq_init_n; ++i)
877 parser->queue[i].offset +=
878 sizeof(struct ibv_flow_spec_action_tag);
881 unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
883 for (i = 0; i != hash_rxq_init_n; ++i)
884 parser->queue[i].offset += size;
887 exit_item_not_supported:
888 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
889 items, "item not supported");
894 * Allocate memory space to store verbs flow attributes.
897 * Pointer to private structure.
898 * @param[in] priority
901 * Amount of byte to allocate.
903 * Perform verbose error reporting if not NULL.
906 * A verbs flow attribute on success, NULL otherwise.
908 static struct ibv_flow_attr*
909 priv_flow_convert_allocate(struct priv *priv,
910 unsigned int priority,
912 struct rte_flow_error *error)
914 struct ibv_flow_attr *ibv_attr;
917 ibv_attr = rte_calloc(__func__, 1, size, 0);
919 rte_flow_error_set(error, ENOMEM,
920 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
922 "cannot allocate verbs spec attributes.");
925 ibv_attr->priority = priority;
930 * Finalise verbs flow attributes.
933 * Pointer to private structure.
934 * @param[in, out] parser
935 * Internal parser structure.
938 priv_flow_convert_finalise(struct priv *priv, struct mlx5_flow_parse *parser)
940 const unsigned int ipv4 =
941 hash_rxq_init[parser->layer].ip_version == MLX5_IPV4;
942 const enum hash_rxq_type hmin = ipv4 ? HASH_RXQ_TCPV4 : HASH_RXQ_TCPV6;
943 const enum hash_rxq_type hmax = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
944 const enum hash_rxq_type ohmin = ipv4 ? HASH_RXQ_TCPV6 : HASH_RXQ_TCPV4;
945 const enum hash_rxq_type ohmax = ipv4 ? HASH_RXQ_IPV6 : HASH_RXQ_IPV4;
946 const enum hash_rxq_type ip = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
950 if (parser->layer == HASH_RXQ_ETH) {
954 * This layer becomes useless as the pattern define under
957 rte_free(parser->queue[HASH_RXQ_ETH].ibv_attr);
958 parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
960 /* Remove opposite kind of layer e.g. IPv6 if the pattern is IPv4. */
961 for (i = ohmin; i != (ohmax + 1); ++i) {
962 if (!parser->queue[i].ibv_attr)
964 rte_free(parser->queue[i].ibv_attr);
965 parser->queue[i].ibv_attr = NULL;
967 /* Remove impossible flow according to the RSS configuration. */
968 if (hash_rxq_init[parser->layer].dpdk_rss_hf &
969 parser->rss_conf.rss_hf) {
970 /* Remove any other flow. */
971 for (i = hmin; i != (hmax + 1); ++i) {
972 if ((i == parser->layer) ||
973 (!parser->queue[i].ibv_attr))
975 rte_free(parser->queue[i].ibv_attr);
976 parser->queue[i].ibv_attr = NULL;
978 } else if (!parser->queue[ip].ibv_attr) {
979 /* no RSS possible with the current configuration. */
980 parser->queues_n = 1;
985 * Fill missing layers in verbs specifications, or compute the correct
986 * offset to allocate the memory space for the attributes and
989 for (i = 0; i != hash_rxq_init_n - 1; ++i) {
991 struct ibv_flow_spec_ipv4_ext ipv4;
992 struct ibv_flow_spec_ipv6 ipv6;
993 struct ibv_flow_spec_tcp_udp udp_tcp;
998 if (i == parser->layer)
1000 if (parser->layer == HASH_RXQ_ETH) {
1001 if (hash_rxq_init[i].ip_version == MLX5_IPV4) {
1002 size = sizeof(struct ibv_flow_spec_ipv4_ext);
1003 specs.ipv4 = (struct ibv_flow_spec_ipv4_ext){
1004 .type = IBV_FLOW_SPEC_IPV4_EXT,
1008 size = sizeof(struct ibv_flow_spec_ipv6);
1009 specs.ipv6 = (struct ibv_flow_spec_ipv6){
1010 .type = IBV_FLOW_SPEC_IPV6,
1014 if (parser->queue[i].ibv_attr) {
1015 dst = (void *)((uintptr_t)
1016 parser->queue[i].ibv_attr +
1017 parser->queue[i].offset);
1018 memcpy(dst, &specs, size);
1019 ++parser->queue[i].ibv_attr->num_of_specs;
1021 parser->queue[i].offset += size;
1023 if ((i == HASH_RXQ_UDPV4) || (i == HASH_RXQ_TCPV4) ||
1024 (i == HASH_RXQ_UDPV6) || (i == HASH_RXQ_TCPV6)) {
1025 size = sizeof(struct ibv_flow_spec_tcp_udp);
1026 specs.udp_tcp = (struct ibv_flow_spec_tcp_udp) {
1027 .type = ((i == HASH_RXQ_UDPV4 ||
1028 i == HASH_RXQ_UDPV6) ?
1033 if (parser->queue[i].ibv_attr) {
1034 dst = (void *)((uintptr_t)
1035 parser->queue[i].ibv_attr +
1036 parser->queue[i].offset);
1037 memcpy(dst, &specs, size);
1038 ++parser->queue[i].ibv_attr->num_of_specs;
1040 parser->queue[i].offset += size;
1046 * Validate and convert a flow supported by the NIC.
1049 * Pointer to private structure.
1051 * Flow rule attributes.
1052 * @param[in] pattern
1053 * Pattern specification (list terminated by the END pattern item).
1054 * @param[in] actions
1055 * Associated actions (list terminated by the END action).
1057 * Perform verbose error reporting if not NULL.
1058 * @param[in, out] parser
1059 * Internal parser structure.
1062 * 0 on success, a negative errno value otherwise and rte_errno is set.
1065 priv_flow_convert(struct priv *priv,
1066 const struct rte_flow_attr *attr,
1067 const struct rte_flow_item items[],
1068 const struct rte_flow_action actions[],
1069 struct rte_flow_error *error,
1070 struct mlx5_flow_parse *parser)
1072 const struct mlx5_flow_items *cur_item = mlx5_flow_items;
1076 /* First step. Validate the attributes, items and actions. */
1077 *parser = (struct mlx5_flow_parse){
1078 .create = parser->create,
1079 .layer = HASH_RXQ_ETH,
1080 .mark_id = MLX5_FLOW_MARK_DEFAULT,
1082 ret = priv_flow_convert_attributes(priv, attr, error, parser);
1085 ret = priv_flow_convert_actions(priv, actions, error, parser);
1088 ret = priv_flow_convert_items_validate(priv, items, error, parser);
1091 priv_flow_convert_finalise(priv, parser);
1094 * Allocate the memory space to store verbs specifications.
1096 if (parser->drop || parser->queues_n == 1) {
1097 unsigned int priority =
1099 hash_rxq_init[HASH_RXQ_ETH].flow_priority;
1100 unsigned int offset = parser->queue[HASH_RXQ_ETH].offset;
1102 parser->queue[HASH_RXQ_ETH].ibv_attr =
1103 priv_flow_convert_allocate(priv, priority,
1105 if (!parser->queue[HASH_RXQ_ETH].ibv_attr)
1107 parser->queue[HASH_RXQ_ETH].offset =
1108 sizeof(struct ibv_flow_attr);
1110 for (i = 0; i != hash_rxq_init_n; ++i) {
1111 unsigned int priority =
1113 hash_rxq_init[i].flow_priority;
1114 unsigned int offset;
1116 if (!(parser->rss_conf.rss_hf &
1117 hash_rxq_init[i].dpdk_rss_hf) &&
1118 (i != HASH_RXQ_ETH))
1120 offset = parser->queue[i].offset;
1121 parser->queue[i].ibv_attr =
1122 priv_flow_convert_allocate(priv, priority,
1124 if (!parser->queue[i].ibv_attr)
1126 parser->queue[i].offset = sizeof(struct ibv_flow_attr);
1129 /* Third step. Conversion parse, fill the specifications. */
1131 for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
1132 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
1134 cur_item = &mlx5_flow_items[items->type];
1135 ret = cur_item->convert(items,
1136 (cur_item->default_mask ?
1137 cur_item->default_mask :
1141 rte_flow_error_set(error, ret,
1142 RTE_FLOW_ERROR_TYPE_ITEM,
1143 items, "item not supported");
1148 mlx5_flow_create_flag_mark(parser, parser->mark_id);
1149 if (parser->count && parser->create) {
1150 mlx5_flow_create_count(priv, parser);
1152 goto exit_count_error;
1155 * Last step. Complete missing specification to reach the RSS
1158 if (parser->queues_n > 1) {
1159 priv_flow_convert_finalise(priv, parser);
1162 * Action queue have their priority overridden with
1163 * Ethernet priority, this priority needs to be adjusted to
1164 * their most specific layer priority.
1166 parser->queue[HASH_RXQ_ETH].ibv_attr->priority =
1168 hash_rxq_init[parser->layer].flow_priority;
1171 /* Only verification is expected, all resources should be released. */
1172 if (!parser->create) {
1173 for (i = 0; i != hash_rxq_init_n; ++i) {
1174 if (parser->queue[i].ibv_attr) {
1175 rte_free(parser->queue[i].ibv_attr);
1176 parser->queue[i].ibv_attr = NULL;
1180 if (parser->allmulti &&
1181 parser->layer == HASH_RXQ_ETH) {
1182 for (i = 0; i != hash_rxq_init_n; ++i) {
1183 if (!parser->queue[i].ibv_attr)
1185 if (parser->queue[i].ibv_attr->num_of_specs != 1)
1187 parser->queue[i].ibv_attr->type =
1188 IBV_FLOW_ATTR_MC_DEFAULT;
1193 for (i = 0; i != hash_rxq_init_n; ++i) {
1194 if (parser->queue[i].ibv_attr) {
1195 rte_free(parser->queue[i].ibv_attr);
1196 parser->queue[i].ibv_attr = NULL;
1199 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1200 NULL, "cannot allocate verbs spec attributes.");
1203 rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1204 NULL, "cannot create counter.");
1209 * Copy the specification created into the flow.
1212 * Internal parser structure.
1214 * Create specification.
1216 * Size in bytes of the specification to copy.
1219 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
1225 for (i = 0; i != hash_rxq_init_n; ++i) {
1226 if (!parser->queue[i].ibv_attr)
1228 /* Specification must be the same l3 type or none. */
1229 if (parser->layer == HASH_RXQ_ETH ||
1230 (hash_rxq_init[parser->layer].ip_version ==
1231 hash_rxq_init[i].ip_version) ||
1232 (hash_rxq_init[i].ip_version == 0)) {
1233 dst = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1234 parser->queue[i].offset);
1235 memcpy(dst, src, size);
1236 ++parser->queue[i].ibv_attr->num_of_specs;
1237 parser->queue[i].offset += size;
1243 * Convert Ethernet item to Verbs specification.
1246 * Item specification.
1247 * @param default_mask[in]
1248 * Default bit-masks to use when item->mask is not provided.
1249 * @param data[in, out]
1253 mlx5_flow_create_eth(const struct rte_flow_item *item,
1254 const void *default_mask,
1257 const struct rte_flow_item_eth *spec = item->spec;
1258 const struct rte_flow_item_eth *mask = item->mask;
1259 struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1260 const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1261 struct ibv_flow_spec_eth eth = {
1262 .type = parser->inner | IBV_FLOW_SPEC_ETH,
1266 /* Don't update layer for the inner pattern. */
1268 parser->layer = HASH_RXQ_ETH;
1273 mask = default_mask;
1274 memcpy(ð.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
1275 memcpy(ð.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
1276 eth.val.ether_type = spec->type;
1277 memcpy(ð.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
1278 memcpy(ð.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
1279 eth.mask.ether_type = mask->type;
1280 /* Remove unwanted bits from values. */
1281 for (i = 0; i < ETHER_ADDR_LEN; ++i) {
1282 eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
1283 eth.val.src_mac[i] &= eth.mask.src_mac[i];
1285 eth.val.ether_type &= eth.mask.ether_type;
1287 mlx5_flow_create_copy(parser, ð, eth_size);
1288 parser->allmulti = eth.val.dst_mac[0] & 1;
1293 * Convert VLAN item to Verbs specification.
1296 * Item specification.
1297 * @param default_mask[in]
1298 * Default bit-masks to use when item->mask is not provided.
1299 * @param data[in, out]
1303 mlx5_flow_create_vlan(const struct rte_flow_item *item,
1304 const void *default_mask,
1307 const struct rte_flow_item_vlan *spec = item->spec;
1308 const struct rte_flow_item_vlan *mask = item->mask;
1309 struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1310 struct ibv_flow_spec_eth *eth;
1311 const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1316 mask = default_mask;
1318 for (i = 0; i != hash_rxq_init_n; ++i) {
1319 if (!parser->queue[i].ibv_attr)
1322 eth = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1323 parser->queue[i].offset - eth_size);
1324 eth->val.vlan_tag = spec->tci;
1325 eth->mask.vlan_tag = mask->tci;
1326 eth->val.vlan_tag &= eth->mask.vlan_tag;
1333 * Convert IPv4 item to Verbs specification.
1336 * Item specification.
1337 * @param default_mask[in]
1338 * Default bit-masks to use when item->mask is not provided.
1339 * @param data[in, out]
1343 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
1344 const void *default_mask,
1347 const struct rte_flow_item_ipv4 *spec = item->spec;
1348 const struct rte_flow_item_ipv4 *mask = item->mask;
1349 struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1350 unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4_ext);
1351 struct ibv_flow_spec_ipv4_ext ipv4 = {
1352 .type = parser->inner | IBV_FLOW_SPEC_IPV4_EXT,
1356 /* Don't update layer for the inner pattern. */
1358 parser->layer = HASH_RXQ_IPV4;
1361 mask = default_mask;
1362 ipv4.val = (struct ibv_flow_ipv4_ext_filter){
1363 .src_ip = spec->hdr.src_addr,
1364 .dst_ip = spec->hdr.dst_addr,
1365 .proto = spec->hdr.next_proto_id,
1366 .tos = spec->hdr.type_of_service,
1368 ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
1369 .src_ip = mask->hdr.src_addr,
1370 .dst_ip = mask->hdr.dst_addr,
1371 .proto = mask->hdr.next_proto_id,
1372 .tos = mask->hdr.type_of_service,
1374 /* Remove unwanted bits from values. */
1375 ipv4.val.src_ip &= ipv4.mask.src_ip;
1376 ipv4.val.dst_ip &= ipv4.mask.dst_ip;
1377 ipv4.val.proto &= ipv4.mask.proto;
1378 ipv4.val.tos &= ipv4.mask.tos;
1380 mlx5_flow_create_copy(parser, &ipv4, ipv4_size);
1385 * Convert IPv6 item to Verbs specification.
1388 * Item specification.
1389 * @param default_mask[in]
1390 * Default bit-masks to use when item->mask is not provided.
1391 * @param data[in, out]
1395 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
1396 const void *default_mask,
1399 const struct rte_flow_item_ipv6 *spec = item->spec;
1400 const struct rte_flow_item_ipv6 *mask = item->mask;
1401 struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1402 unsigned int ipv6_size = sizeof(struct ibv_flow_spec_ipv6);
1403 struct ibv_flow_spec_ipv6 ipv6 = {
1404 .type = parser->inner | IBV_FLOW_SPEC_IPV6,
1408 /* Don't update layer for the inner pattern. */
1410 parser->layer = HASH_RXQ_IPV6;
1415 mask = default_mask;
1416 memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
1417 RTE_DIM(ipv6.val.src_ip));
1418 memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
1419 RTE_DIM(ipv6.val.dst_ip));
1420 memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
1421 RTE_DIM(ipv6.mask.src_ip));
1422 memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
1423 RTE_DIM(ipv6.mask.dst_ip));
1424 ipv6.mask.flow_label = mask->hdr.vtc_flow;
1425 ipv6.mask.next_hdr = mask->hdr.proto;
1426 ipv6.mask.hop_limit = mask->hdr.hop_limits;
1427 /* Remove unwanted bits from values. */
1428 for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
1429 ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
1430 ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
1432 ipv6.val.flow_label &= ipv6.mask.flow_label;
1433 ipv6.val.next_hdr &= ipv6.mask.next_hdr;
1434 ipv6.val.hop_limit &= ipv6.mask.hop_limit;
1436 mlx5_flow_create_copy(parser, &ipv6, ipv6_size);
1441 * Convert UDP item to Verbs specification.
1444 * Item specification.
1445 * @param default_mask[in]
1446 * Default bit-masks to use when item->mask is not provided.
1447 * @param data[in, out]
1451 mlx5_flow_create_udp(const struct rte_flow_item *item,
1452 const void *default_mask,
1455 const struct rte_flow_item_udp *spec = item->spec;
1456 const struct rte_flow_item_udp *mask = item->mask;
1457 struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1458 unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1459 struct ibv_flow_spec_tcp_udp udp = {
1460 .type = parser->inner | IBV_FLOW_SPEC_UDP,
1464 /* Don't update layer for the inner pattern. */
1465 if (!parser->inner) {
1466 if (parser->layer == HASH_RXQ_IPV4)
1467 parser->layer = HASH_RXQ_UDPV4;
1469 parser->layer = HASH_RXQ_UDPV6;
1473 mask = default_mask;
1474 udp.val.dst_port = spec->hdr.dst_port;
1475 udp.val.src_port = spec->hdr.src_port;
1476 udp.mask.dst_port = mask->hdr.dst_port;
1477 udp.mask.src_port = mask->hdr.src_port;
1478 /* Remove unwanted bits from values. */
1479 udp.val.src_port &= udp.mask.src_port;
1480 udp.val.dst_port &= udp.mask.dst_port;
1482 mlx5_flow_create_copy(parser, &udp, udp_size);
1487 * Convert TCP item to Verbs specification.
1490 * Item specification.
1491 * @param default_mask[in]
1492 * Default bit-masks to use when item->mask is not provided.
1493 * @param data[in, out]
1497 mlx5_flow_create_tcp(const struct rte_flow_item *item,
1498 const void *default_mask,
1501 const struct rte_flow_item_tcp *spec = item->spec;
1502 const struct rte_flow_item_tcp *mask = item->mask;
1503 struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1504 unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1505 struct ibv_flow_spec_tcp_udp tcp = {
1506 .type = parser->inner | IBV_FLOW_SPEC_TCP,
1510 /* Don't update layer for the inner pattern. */
1511 if (!parser->inner) {
1512 if (parser->layer == HASH_RXQ_IPV4)
1513 parser->layer = HASH_RXQ_TCPV4;
1515 parser->layer = HASH_RXQ_TCPV6;
1519 mask = default_mask;
1520 tcp.val.dst_port = spec->hdr.dst_port;
1521 tcp.val.src_port = spec->hdr.src_port;
1522 tcp.mask.dst_port = mask->hdr.dst_port;
1523 tcp.mask.src_port = mask->hdr.src_port;
1524 /* Remove unwanted bits from values. */
1525 tcp.val.src_port &= tcp.mask.src_port;
1526 tcp.val.dst_port &= tcp.mask.dst_port;
1528 mlx5_flow_create_copy(parser, &tcp, tcp_size);
1533 * Convert VXLAN item to Verbs specification.
1536 * Item specification.
1537 * @param default_mask[in]
1538 * Default bit-masks to use when item->mask is not provided.
1539 * @param data[in, out]
1543 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
1544 const void *default_mask,
1547 const struct rte_flow_item_vxlan *spec = item->spec;
1548 const struct rte_flow_item_vxlan *mask = item->mask;
1549 struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1550 unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1551 struct ibv_flow_spec_tunnel vxlan = {
1552 .type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1561 parser->inner = IBV_FLOW_SPEC_INNER;
1564 mask = default_mask;
1565 memcpy(&id.vni[1], spec->vni, 3);
1566 vxlan.val.tunnel_id = id.vlan_id;
1567 memcpy(&id.vni[1], mask->vni, 3);
1568 vxlan.mask.tunnel_id = id.vlan_id;
1569 /* Remove unwanted bits from values. */
1570 vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
1573 * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
1574 * layer is defined in the Verbs specification it is interpreted as
1575 * wildcard and all packets will match this rule, if it follows a full
1576 * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
1577 * before will also match this rule.
1578 * To avoid such situation, VNI 0 is currently refused.
1580 if (!vxlan.val.tunnel_id)
1582 mlx5_flow_create_copy(parser, &vxlan, size);
1587 * Convert mark/flag action to Verbs specification.
1590 * Internal parser structure.
1595 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id)
1597 unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1598 struct ibv_flow_spec_action_tag tag = {
1599 .type = IBV_FLOW_SPEC_ACTION_TAG,
1601 .tag_id = mlx5_flow_mark_set(mark_id),
1604 assert(parser->mark);
1605 mlx5_flow_create_copy(parser, &tag, size);
1610 * Convert count action to Verbs specification.
1613 * Pointer to private structure.
1615 * Pointer to MLX5 flow parser structure.
1618 * 0 on success, errno value on failure.
1621 mlx5_flow_create_count(struct priv *priv __rte_unused,
1622 struct mlx5_flow_parse *parser __rte_unused)
1624 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
1625 unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
1626 struct ibv_counter_set_init_attr init_attr = {0};
1627 struct ibv_flow_spec_counter_action counter = {
1628 .type = IBV_FLOW_SPEC_ACTION_COUNT,
1630 .counter_set_handle = 0,
1633 init_attr.counter_set_id = 0;
1634 parser->cs = ibv_create_counter_set(priv->ctx, &init_attr);
1637 counter.counter_set_handle = parser->cs->handle;
1638 mlx5_flow_create_copy(parser, &counter, size);
1644 * Complete flow rule creation with a drop queue.
1647 * Pointer to private structure.
1649 * Internal parser structure.
1651 * Pointer to the rte_flow.
1653 * Perform verbose error reporting if not NULL.
1656 * 0 on success, errno value on failure.
1659 priv_flow_create_action_queue_drop(struct priv *priv,
1660 struct mlx5_flow_parse *parser,
1661 struct rte_flow *flow,
1662 struct rte_flow_error *error)
1664 struct ibv_flow_spec_action_drop *drop;
1665 unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
1671 drop = (void *)((uintptr_t)parser->queue[HASH_RXQ_ETH].ibv_attr +
1672 parser->queue[HASH_RXQ_ETH].offset);
1673 *drop = (struct ibv_flow_spec_action_drop){
1674 .type = IBV_FLOW_SPEC_ACTION_DROP,
1677 ++parser->queue[HASH_RXQ_ETH].ibv_attr->num_of_specs;
1678 parser->queue[HASH_RXQ_ETH].offset += size;
1679 flow->frxq[HASH_RXQ_ETH].ibv_attr =
1680 parser->queue[HASH_RXQ_ETH].ibv_attr;
1682 flow->cs = parser->cs;
1683 if (!priv->dev->data->dev_started)
1685 parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
1686 flow->frxq[HASH_RXQ_ETH].ibv_flow =
1687 ibv_create_flow(priv->flow_drop_queue->qp,
1688 flow->frxq[HASH_RXQ_ETH].ibv_attr);
1689 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
1690 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1691 NULL, "flow rule creation failure");
1698 if (flow->frxq[HASH_RXQ_ETH].ibv_flow) {
1699 claim_zero(ibv_destroy_flow(flow->frxq[HASH_RXQ_ETH].ibv_flow));
1700 flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
1702 if (flow->frxq[HASH_RXQ_ETH].ibv_attr) {
1703 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
1704 flow->frxq[HASH_RXQ_ETH].ibv_attr = NULL;
1707 claim_zero(ibv_destroy_counter_set(flow->cs));
1715 * Create hash Rx queues when RSS is enabled.
1718 * Pointer to private structure.
1720 * Internal parser structure.
1722 * Pointer to the rte_flow.
1724 * Perform verbose error reporting if not NULL.
1727 * 0 on success, a errno value otherwise and rte_errno is set.
1730 priv_flow_create_action_queue_rss(struct priv *priv,
1731 struct mlx5_flow_parse *parser,
1732 struct rte_flow *flow,
1733 struct rte_flow_error *error)
1737 for (i = 0; i != hash_rxq_init_n; ++i) {
1738 uint64_t hash_fields;
1740 if (!parser->queue[i].ibv_attr)
1742 flow->frxq[i].ibv_attr = parser->queue[i].ibv_attr;
1743 parser->queue[i].ibv_attr = NULL;
1744 hash_fields = hash_rxq_init[i].hash_fields;
1745 if (!priv->dev->data->dev_started)
1747 flow->frxq[i].hrxq =
1748 mlx5_priv_hrxq_get(priv,
1749 parser->rss_conf.rss_key,
1750 parser->rss_conf.rss_key_len,
1754 if (flow->frxq[i].hrxq)
1756 flow->frxq[i].hrxq =
1757 mlx5_priv_hrxq_new(priv,
1758 parser->rss_conf.rss_key,
1759 parser->rss_conf.rss_key_len,
1763 if (!flow->frxq[i].hrxq) {
1764 rte_flow_error_set(error, ENOMEM,
1765 RTE_FLOW_ERROR_TYPE_HANDLE,
1766 NULL, "cannot create hash rxq");
1774 * Complete flow rule creation.
1777 * Pointer to private structure.
1779 * Internal parser structure.
1781 * Pointer to the rte_flow.
1783 * Perform verbose error reporting if not NULL.
1786 * 0 on success, a errno value otherwise and rte_errno is set.
1789 priv_flow_create_action_queue(struct priv *priv,
1790 struct mlx5_flow_parse *parser,
1791 struct rte_flow *flow,
1792 struct rte_flow_error *error)
1799 assert(!parser->drop);
1800 err = priv_flow_create_action_queue_rss(priv, parser, flow, error);
1804 flow->cs = parser->cs;
1805 if (!priv->dev->data->dev_started)
1807 for (i = 0; i != hash_rxq_init_n; ++i) {
1808 if (!flow->frxq[i].hrxq)
1810 flow->frxq[i].ibv_flow =
1811 ibv_create_flow(flow->frxq[i].hrxq->qp,
1812 flow->frxq[i].ibv_attr);
1813 if (!flow->frxq[i].ibv_flow) {
1814 rte_flow_error_set(error, ENOMEM,
1815 RTE_FLOW_ERROR_TYPE_HANDLE,
1816 NULL, "flow rule creation failure");
1820 DEBUG("%p type %d QP %p ibv_flow %p",
1822 (void *)flow->frxq[i].hrxq,
1823 (void *)flow->frxq[i].ibv_flow);
1825 for (i = 0; i != parser->queues_n; ++i) {
1826 struct mlx5_rxq_data *q =
1827 (*priv->rxqs)[parser->queues[i]];
1829 q->mark |= parser->mark;
1834 for (i = 0; i != hash_rxq_init_n; ++i) {
1835 if (flow->frxq[i].ibv_flow) {
1836 struct ibv_flow *ibv_flow = flow->frxq[i].ibv_flow;
1838 claim_zero(ibv_destroy_flow(ibv_flow));
1840 if (flow->frxq[i].hrxq)
1841 mlx5_priv_hrxq_release(priv, flow->frxq[i].hrxq);
1842 if (flow->frxq[i].ibv_attr)
1843 rte_free(flow->frxq[i].ibv_attr);
1846 claim_zero(ibv_destroy_counter_set(flow->cs));
1857 * Pointer to private structure.
1859 * Pointer to a TAILQ flow list.
1861 * Flow rule attributes.
1862 * @param[in] pattern
1863 * Pattern specification (list terminated by the END pattern item).
1864 * @param[in] actions
1865 * Associated actions (list terminated by the END action).
1867 * Perform verbose error reporting if not NULL.
1870 * A flow on success, NULL otherwise.
1872 static struct rte_flow *
1873 priv_flow_create(struct priv *priv,
1874 struct mlx5_flows *list,
1875 const struct rte_flow_attr *attr,
1876 const struct rte_flow_item items[],
1877 const struct rte_flow_action actions[],
1878 struct rte_flow_error *error)
1880 struct mlx5_flow_parse parser = { .create = 1, };
1881 struct rte_flow *flow = NULL;
1885 err = priv_flow_convert(priv, attr, items, actions, error, &parser);
1888 flow = rte_calloc(__func__, 1,
1889 sizeof(*flow) + parser.queues_n * sizeof(uint16_t),
1892 rte_flow_error_set(error, ENOMEM,
1893 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1895 "cannot allocate flow memory");
1898 /* Copy queues configuration. */
1899 flow->queues = (uint16_t (*)[])(flow + 1);
1900 memcpy(flow->queues, parser.queues, parser.queues_n * sizeof(uint16_t));
1901 flow->queues_n = parser.queues_n;
1902 flow->mark = parser.mark;
1903 /* Copy RSS configuration. */
1904 flow->rss_conf = parser.rss_conf;
1905 flow->rss_conf.rss_key = flow->rss_key;
1906 memcpy(flow->rss_key, parser.rss_key, parser.rss_conf.rss_key_len);
1907 /* finalise the flow. */
1909 err = priv_flow_create_action_queue_drop(priv, &parser, flow,
1912 err = priv_flow_create_action_queue(priv, &parser, flow, error);
1915 TAILQ_INSERT_TAIL(list, flow, next);
1916 DEBUG("Flow created %p", (void *)flow);
1919 for (i = 0; i != hash_rxq_init_n; ++i) {
1920 if (parser.queue[i].ibv_attr)
1921 rte_free(parser.queue[i].ibv_attr);
1928 * Validate a flow supported by the NIC.
1930 * @see rte_flow_validate()
1934 mlx5_flow_validate(struct rte_eth_dev *dev,
1935 const struct rte_flow_attr *attr,
1936 const struct rte_flow_item items[],
1937 const struct rte_flow_action actions[],
1938 struct rte_flow_error *error)
1940 struct priv *priv = dev->data->dev_private;
1942 struct mlx5_flow_parse parser = { .create = 0, };
1945 ret = priv_flow_convert(priv, attr, items, actions, error, &parser);
1953 * @see rte_flow_create()
1957 mlx5_flow_create(struct rte_eth_dev *dev,
1958 const struct rte_flow_attr *attr,
1959 const struct rte_flow_item items[],
1960 const struct rte_flow_action actions[],
1961 struct rte_flow_error *error)
1963 struct priv *priv = dev->data->dev_private;
1964 struct rte_flow *flow;
1967 flow = priv_flow_create(priv, &priv->flows, attr, items, actions,
1977 * Pointer to private structure.
1979 * Pointer to a TAILQ flow list.
1984 priv_flow_destroy(struct priv *priv,
1985 struct mlx5_flows *list,
1986 struct rte_flow *flow)
1990 if (flow->drop || !flow->mark)
1992 for (i = 0; i != flow->queues_n; ++i) {
1993 struct rte_flow *tmp;
1997 * To remove the mark from the queue, the queue must not be
1998 * present in any other marked flow (RSS or not).
2000 TAILQ_FOREACH(tmp, list, next) {
2002 uint16_t *tqs = NULL;
2007 for (j = 0; j != hash_rxq_init_n; ++j) {
2008 if (!tmp->frxq[j].hrxq)
2010 tqs = tmp->frxq[j].hrxq->ind_table->queues;
2011 tq_n = tmp->frxq[j].hrxq->ind_table->queues_n;
2015 for (j = 0; (j != tq_n) && !mark; j++)
2016 if (tqs[j] == (*flow->queues)[i])
2019 (*priv->rxqs)[(*flow->queues)[i]]->mark = mark;
2023 if (flow->frxq[HASH_RXQ_ETH].ibv_flow)
2024 claim_zero(ibv_destroy_flow
2025 (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2026 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
2028 for (i = 0; i != hash_rxq_init_n; ++i) {
2029 struct mlx5_flow *frxq = &flow->frxq[i];
2032 claim_zero(ibv_destroy_flow(frxq->ibv_flow));
2034 mlx5_priv_hrxq_release(priv, frxq->hrxq);
2036 rte_free(frxq->ibv_attr);
2040 claim_zero(ibv_destroy_counter_set(flow->cs));
2043 TAILQ_REMOVE(list, flow, next);
2044 DEBUG("Flow destroyed %p", (void *)flow);
2049 * Destroy all flows.
2052 * Pointer to private structure.
2054 * Pointer to a TAILQ flow list.
2057 priv_flow_flush(struct priv *priv, struct mlx5_flows *list)
2059 while (!TAILQ_EMPTY(list)) {
2060 struct rte_flow *flow;
2062 flow = TAILQ_FIRST(list);
2063 priv_flow_destroy(priv, list, flow);
2068 * Create drop queue.
2071 * Pointer to private structure.
2077 priv_flow_create_drop_queue(struct priv *priv)
2079 struct mlx5_hrxq_drop *fdq = NULL;
2083 fdq = rte_calloc(__func__, 1, sizeof(*fdq), 0);
2085 WARN("cannot allocate memory for drop queue");
2088 fdq->cq = ibv_create_cq(priv->ctx, 1, NULL, NULL, 0);
2090 WARN("cannot allocate CQ for drop queue");
2093 fdq->wq = ibv_create_wq(priv->ctx,
2094 &(struct ibv_wq_init_attr){
2095 .wq_type = IBV_WQT_RQ,
2102 WARN("cannot allocate WQ for drop queue");
2105 fdq->ind_table = ibv_create_rwq_ind_table(priv->ctx,
2106 &(struct ibv_rwq_ind_table_init_attr){
2107 .log_ind_tbl_size = 0,
2108 .ind_tbl = &fdq->wq,
2111 if (!fdq->ind_table) {
2112 WARN("cannot allocate indirection table for drop queue");
2115 fdq->qp = ibv_create_qp_ex(priv->ctx,
2116 &(struct ibv_qp_init_attr_ex){
2117 .qp_type = IBV_QPT_RAW_PACKET,
2119 IBV_QP_INIT_ATTR_PD |
2120 IBV_QP_INIT_ATTR_IND_TABLE |
2121 IBV_QP_INIT_ATTR_RX_HASH,
2122 .rx_hash_conf = (struct ibv_rx_hash_conf){
2124 IBV_RX_HASH_FUNC_TOEPLITZ,
2125 .rx_hash_key_len = rss_hash_default_key_len,
2126 .rx_hash_key = rss_hash_default_key,
2127 .rx_hash_fields_mask = 0,
2129 .rwq_ind_tbl = fdq->ind_table,
2133 WARN("cannot allocate QP for drop queue");
2136 priv->flow_drop_queue = fdq;
2140 claim_zero(ibv_destroy_qp(fdq->qp));
2142 claim_zero(ibv_destroy_rwq_ind_table(fdq->ind_table));
2144 claim_zero(ibv_destroy_wq(fdq->wq));
2146 claim_zero(ibv_destroy_cq(fdq->cq));
2149 priv->flow_drop_queue = NULL;
2154 * Delete drop queue.
2157 * Pointer to private structure.
2160 priv_flow_delete_drop_queue(struct priv *priv)
2162 struct mlx5_hrxq_drop *fdq = priv->flow_drop_queue;
2167 claim_zero(ibv_destroy_qp(fdq->qp));
2169 claim_zero(ibv_destroy_rwq_ind_table(fdq->ind_table));
2171 claim_zero(ibv_destroy_wq(fdq->wq));
2173 claim_zero(ibv_destroy_cq(fdq->cq));
2175 priv->flow_drop_queue = NULL;
2182 * Pointer to private structure.
2184 * Pointer to a TAILQ flow list.
2187 priv_flow_stop(struct priv *priv, struct mlx5_flows *list)
2189 struct rte_flow *flow;
2191 TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) {
2195 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow)
2197 claim_zero(ibv_destroy_flow
2198 (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2199 flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
2204 struct mlx5_ind_table_ibv *ind_tbl = NULL;
2206 for (i = 0; i != hash_rxq_init_n; ++i) {
2207 if (!flow->frxq[i].hrxq)
2209 ind_tbl = flow->frxq[i].hrxq->ind_table;
2212 for (i = 0; i != ind_tbl->queues_n; ++i)
2213 (*priv->rxqs)[ind_tbl->queues[i]]->mark = 0;
2215 for (i = 0; i != hash_rxq_init_n; ++i) {
2216 if (!flow->frxq[i].ibv_flow)
2218 claim_zero(ibv_destroy_flow(flow->frxq[i].ibv_flow));
2219 flow->frxq[i].ibv_flow = NULL;
2220 mlx5_priv_hrxq_release(priv, flow->frxq[i].hrxq);
2221 flow->frxq[i].hrxq = NULL;
2223 DEBUG("Flow %p removed", (void *)flow);
2231 * Pointer to private structure.
2233 * Pointer to a TAILQ flow list.
2236 * 0 on success, a errno value otherwise and rte_errno is set.
2239 priv_flow_start(struct priv *priv, struct mlx5_flows *list)
2241 struct rte_flow *flow;
2243 TAILQ_FOREACH(flow, list, next) {
2247 flow->frxq[HASH_RXQ_ETH].ibv_flow =
2249 (priv->flow_drop_queue->qp,
2250 flow->frxq[HASH_RXQ_ETH].ibv_attr);
2251 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
2252 DEBUG("Flow %p cannot be applied",
2257 DEBUG("Flow %p applied", (void *)flow);
2261 for (i = 0; i != hash_rxq_init_n; ++i) {
2262 if (!flow->frxq[i].ibv_attr)
2264 flow->frxq[i].hrxq =
2265 mlx5_priv_hrxq_get(priv, flow->rss_conf.rss_key,
2266 flow->rss_conf.rss_key_len,
2267 hash_rxq_init[i].hash_fields,
2270 if (flow->frxq[i].hrxq)
2272 flow->frxq[i].hrxq =
2273 mlx5_priv_hrxq_new(priv, flow->rss_conf.rss_key,
2274 flow->rss_conf.rss_key_len,
2275 hash_rxq_init[i].hash_fields,
2278 if (!flow->frxq[i].hrxq) {
2279 DEBUG("Flow %p cannot be applied",
2285 flow->frxq[i].ibv_flow =
2286 ibv_create_flow(flow->frxq[i].hrxq->qp,
2287 flow->frxq[i].ibv_attr);
2288 if (!flow->frxq[i].ibv_flow) {
2289 DEBUG("Flow %p cannot be applied",
2294 DEBUG("Flow %p applied", (void *)flow);
2298 for (i = 0; i != flow->queues_n; ++i)
2299 (*priv->rxqs)[(*flow->queues)[i]]->mark = 1;
2305 * Verify the flow list is empty
2308 * Pointer to private structure.
2310 * @return the number of flows not released.
2313 priv_flow_verify(struct priv *priv)
2315 struct rte_flow *flow;
2318 TAILQ_FOREACH(flow, &priv->flows, next) {
2319 DEBUG("%p: flow %p still referenced", (void *)priv,
2327 * Enable a control flow configured from the control plane.
2330 * Pointer to Ethernet device.
2332 * An Ethernet flow spec to apply.
2334 * An Ethernet flow mask to apply.
2336 * A VLAN flow spec to apply.
2338 * A VLAN flow mask to apply.
2344 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
2345 struct rte_flow_item_eth *eth_spec,
2346 struct rte_flow_item_eth *eth_mask,
2347 struct rte_flow_item_vlan *vlan_spec,
2348 struct rte_flow_item_vlan *vlan_mask)
2350 struct priv *priv = dev->data->dev_private;
2351 const struct rte_flow_attr attr = {
2353 .priority = MLX5_CTRL_FLOW_PRIORITY,
2355 struct rte_flow_item items[] = {
2357 .type = RTE_FLOW_ITEM_TYPE_ETH,
2363 .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
2364 RTE_FLOW_ITEM_TYPE_END,
2370 .type = RTE_FLOW_ITEM_TYPE_END,
2373 struct rte_flow_action actions[] = {
2375 .type = RTE_FLOW_ACTION_TYPE_RSS,
2378 .type = RTE_FLOW_ACTION_TYPE_END,
2381 struct rte_flow *flow;
2382 struct rte_flow_error error;
2385 struct rte_flow_action_rss rss;
2387 const struct rte_eth_rss_conf *rss_conf;
2389 uint16_t queue[RTE_MAX_QUEUES_PER_PORT];
2393 if (!priv->reta_idx_n)
2395 for (i = 0; i != priv->reta_idx_n; ++i)
2396 action_rss.local.queue[i] = (*priv->reta_idx)[i];
2397 action_rss.local.rss_conf = &priv->rss_conf;
2398 action_rss.local.num = priv->reta_idx_n;
2399 actions[0].conf = (const void *)&action_rss.rss;
2400 flow = priv_flow_create(priv, &priv->ctrl_flows, &attr, items, actions,
2408 * Enable a flow control configured from the control plane.
2411 * Pointer to Ethernet device.
2413 * An Ethernet flow spec to apply.
2415 * An Ethernet flow mask to apply.
2421 mlx5_ctrl_flow(struct rte_eth_dev *dev,
2422 struct rte_flow_item_eth *eth_spec,
2423 struct rte_flow_item_eth *eth_mask)
2425 return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
2431 * @see rte_flow_destroy()
2435 mlx5_flow_destroy(struct rte_eth_dev *dev,
2436 struct rte_flow *flow,
2437 struct rte_flow_error *error)
2439 struct priv *priv = dev->data->dev_private;
2443 priv_flow_destroy(priv, &priv->flows, flow);
2449 * Destroy all flows.
2451 * @see rte_flow_flush()
2455 mlx5_flow_flush(struct rte_eth_dev *dev,
2456 struct rte_flow_error *error)
2458 struct priv *priv = dev->data->dev_private;
2462 priv_flow_flush(priv, &priv->flows);
2467 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
2469 * Query flow counter.
2473 * @param counter_value
2474 * returned data from the counter.
2477 * 0 on success, a errno value otherwise and rte_errno is set.
2480 priv_flow_query_count(struct ibv_counter_set *cs,
2481 struct mlx5_flow_counter_stats *counter_stats,
2482 struct rte_flow_query_count *query_count,
2483 struct rte_flow_error *error)
2485 uint64_t counters[2];
2486 struct ibv_query_counter_set_attr query_cs_attr = {
2488 .query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
2490 struct ibv_counter_set_data query_out = {
2492 .outlen = 2 * sizeof(uint64_t),
2494 int res = ibv_query_counter_set(&query_cs_attr, &query_out);
2497 rte_flow_error_set(error, -res,
2498 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2500 "cannot read counter");
2503 query_count->hits_set = 1;
2504 query_count->bytes_set = 1;
2505 query_count->hits = counters[0] - counter_stats->hits;
2506 query_count->bytes = counters[1] - counter_stats->bytes;
2507 if (query_count->reset) {
2508 counter_stats->hits = counters[0];
2509 counter_stats->bytes = counters[1];
2517 * @see rte_flow_query()
2521 mlx5_flow_query(struct rte_eth_dev *dev,
2522 struct rte_flow *flow,
2523 enum rte_flow_action_type action __rte_unused,
2525 struct rte_flow_error *error)
2527 struct priv *priv = dev->data->dev_private;
2532 res = priv_flow_query_count(flow->cs,
2533 &flow->counter_stats,
2534 (struct rte_flow_query_count *)data,
2537 rte_flow_error_set(error, res,
2538 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2540 "no counter found for flow");
2550 * @see rte_flow_isolate()
2554 mlx5_flow_isolate(struct rte_eth_dev *dev,
2556 struct rte_flow_error *error)
2558 struct priv *priv = dev->data->dev_private;
2561 if (dev->data->dev_started) {
2562 rte_flow_error_set(error, EBUSY,
2563 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2565 "port must be stopped first");
2569 priv->isolated = !!enable;
2571 priv->dev->dev_ops = &mlx5_dev_ops_isolate;
2573 priv->dev->dev_ops = &mlx5_dev_ops;
2579 * Convert a flow director filter to a generic flow.
2582 * Private structure.
2583 * @param fdir_filter
2584 * Flow director filter to add.
2586 * Generic flow parameters structure.
2589 * 0 on success, errno value on error.
2592 priv_fdir_filter_convert(struct priv *priv,
2593 const struct rte_eth_fdir_filter *fdir_filter,
2594 struct mlx5_fdir *attributes)
2596 const struct rte_eth_fdir_input *input = &fdir_filter->input;
2598 /* Validate queue number. */
2599 if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
2600 ERROR("invalid queue number %d", fdir_filter->action.rx_queue);
2603 attributes->attr.ingress = 1;
2604 attributes->items[0] = (struct rte_flow_item) {
2605 .type = RTE_FLOW_ITEM_TYPE_ETH,
2606 .spec = &attributes->l2,
2607 .mask = &attributes->l2_mask,
2609 switch (fdir_filter->action.behavior) {
2610 case RTE_ETH_FDIR_ACCEPT:
2611 attributes->actions[0] = (struct rte_flow_action){
2612 .type = RTE_FLOW_ACTION_TYPE_QUEUE,
2613 .conf = &attributes->queue,
2616 case RTE_ETH_FDIR_REJECT:
2617 attributes->actions[0] = (struct rte_flow_action){
2618 .type = RTE_FLOW_ACTION_TYPE_DROP,
2622 ERROR("invalid behavior %d", fdir_filter->action.behavior);
2625 attributes->queue.index = fdir_filter->action.rx_queue;
2626 switch (fdir_filter->input.flow_type) {
2627 case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
2628 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2629 .src_addr = input->flow.udp4_flow.ip.src_ip,
2630 .dst_addr = input->flow.udp4_flow.ip.dst_ip,
2631 .time_to_live = input->flow.udp4_flow.ip.ttl,
2632 .type_of_service = input->flow.udp4_flow.ip.tos,
2633 .next_proto_id = input->flow.udp4_flow.ip.proto,
2635 attributes->l4.udp.hdr = (struct udp_hdr){
2636 .src_port = input->flow.udp4_flow.src_port,
2637 .dst_port = input->flow.udp4_flow.dst_port,
2639 attributes->items[1] = (struct rte_flow_item){
2640 .type = RTE_FLOW_ITEM_TYPE_IPV4,
2641 .spec = &attributes->l3,
2643 attributes->items[2] = (struct rte_flow_item){
2644 .type = RTE_FLOW_ITEM_TYPE_UDP,
2645 .spec = &attributes->l4,
2648 case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
2649 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2650 .src_addr = input->flow.tcp4_flow.ip.src_ip,
2651 .dst_addr = input->flow.tcp4_flow.ip.dst_ip,
2652 .time_to_live = input->flow.tcp4_flow.ip.ttl,
2653 .type_of_service = input->flow.tcp4_flow.ip.tos,
2654 .next_proto_id = input->flow.tcp4_flow.ip.proto,
2656 attributes->l4.tcp.hdr = (struct tcp_hdr){
2657 .src_port = input->flow.tcp4_flow.src_port,
2658 .dst_port = input->flow.tcp4_flow.dst_port,
2660 attributes->items[1] = (struct rte_flow_item){
2661 .type = RTE_FLOW_ITEM_TYPE_IPV4,
2662 .spec = &attributes->l3,
2664 attributes->items[2] = (struct rte_flow_item){
2665 .type = RTE_FLOW_ITEM_TYPE_TCP,
2666 .spec = &attributes->l4,
2669 case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
2670 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2671 .src_addr = input->flow.ip4_flow.src_ip,
2672 .dst_addr = input->flow.ip4_flow.dst_ip,
2673 .time_to_live = input->flow.ip4_flow.ttl,
2674 .type_of_service = input->flow.ip4_flow.tos,
2675 .next_proto_id = input->flow.ip4_flow.proto,
2677 attributes->items[1] = (struct rte_flow_item){
2678 .type = RTE_FLOW_ITEM_TYPE_IPV4,
2679 .spec = &attributes->l3,
2682 case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
2683 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2684 .hop_limits = input->flow.udp6_flow.ip.hop_limits,
2685 .proto = input->flow.udp6_flow.ip.proto,
2687 memcpy(attributes->l3.ipv6.hdr.src_addr,
2688 input->flow.udp6_flow.ip.src_ip,
2689 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2690 memcpy(attributes->l3.ipv6.hdr.dst_addr,
2691 input->flow.udp6_flow.ip.dst_ip,
2692 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2693 attributes->l4.udp.hdr = (struct udp_hdr){
2694 .src_port = input->flow.udp6_flow.src_port,
2695 .dst_port = input->flow.udp6_flow.dst_port,
2697 attributes->items[1] = (struct rte_flow_item){
2698 .type = RTE_FLOW_ITEM_TYPE_IPV6,
2699 .spec = &attributes->l3,
2701 attributes->items[2] = (struct rte_flow_item){
2702 .type = RTE_FLOW_ITEM_TYPE_UDP,
2703 .spec = &attributes->l4,
2706 case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
2707 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2708 .hop_limits = input->flow.tcp6_flow.ip.hop_limits,
2709 .proto = input->flow.tcp6_flow.ip.proto,
2711 memcpy(attributes->l3.ipv6.hdr.src_addr,
2712 input->flow.tcp6_flow.ip.src_ip,
2713 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2714 memcpy(attributes->l3.ipv6.hdr.dst_addr,
2715 input->flow.tcp6_flow.ip.dst_ip,
2716 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2717 attributes->l4.tcp.hdr = (struct tcp_hdr){
2718 .src_port = input->flow.tcp6_flow.src_port,
2719 .dst_port = input->flow.tcp6_flow.dst_port,
2721 attributes->items[1] = (struct rte_flow_item){
2722 .type = RTE_FLOW_ITEM_TYPE_IPV6,
2723 .spec = &attributes->l3,
2725 attributes->items[2] = (struct rte_flow_item){
2726 .type = RTE_FLOW_ITEM_TYPE_TCP,
2727 .spec = &attributes->l4,
2730 case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
2731 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2732 .hop_limits = input->flow.ipv6_flow.hop_limits,
2733 .proto = input->flow.ipv6_flow.proto,
2735 memcpy(attributes->l3.ipv6.hdr.src_addr,
2736 input->flow.ipv6_flow.src_ip,
2737 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2738 memcpy(attributes->l3.ipv6.hdr.dst_addr,
2739 input->flow.ipv6_flow.dst_ip,
2740 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2741 attributes->items[1] = (struct rte_flow_item){
2742 .type = RTE_FLOW_ITEM_TYPE_IPV6,
2743 .spec = &attributes->l3,
2747 ERROR("invalid flow type%d",
2748 fdir_filter->input.flow_type);
2755 * Add new flow director filter and store it in list.
2758 * Private structure.
2759 * @param fdir_filter
2760 * Flow director filter to add.
2763 * 0 on success, errno value on failure.
2766 priv_fdir_filter_add(struct priv *priv,
2767 const struct rte_eth_fdir_filter *fdir_filter)
2769 struct mlx5_fdir attributes = {
2772 .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2773 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2777 struct mlx5_flow_parse parser = {
2778 .layer = HASH_RXQ_ETH,
2780 struct rte_flow_error error;
2781 struct rte_flow *flow;
2784 ret = priv_fdir_filter_convert(priv, fdir_filter, &attributes);
2787 ret = priv_flow_convert(priv, &attributes.attr, attributes.items,
2788 attributes.actions, &error, &parser);
2791 flow = priv_flow_create(priv,
2798 DEBUG("FDIR created %p", (void *)flow);
2805 * Delete specific filter.
2808 * Private structure.
2809 * @param fdir_filter
2810 * Filter to be deleted.
2813 * 0 on success, errno value on failure.
2816 priv_fdir_filter_delete(struct priv *priv,
2817 const struct rte_eth_fdir_filter *fdir_filter)
2819 struct mlx5_fdir attributes = {
2822 struct mlx5_flow_parse parser = {
2824 .layer = HASH_RXQ_ETH,
2826 struct rte_flow_error error;
2827 struct rte_flow *flow;
2831 ret = priv_fdir_filter_convert(priv, fdir_filter, &attributes);
2834 ret = priv_flow_convert(priv, &attributes.attr, attributes.items,
2835 attributes.actions, &error, &parser);
2839 * Special case for drop action which is only set in the
2840 * specifications when the flow is created. In this situation the
2841 * drop specification is missing.
2844 struct ibv_flow_spec_action_drop *drop;
2846 drop = (void *)((uintptr_t)parser.queue[HASH_RXQ_ETH].ibv_attr +
2847 parser.queue[HASH_RXQ_ETH].offset);
2848 *drop = (struct ibv_flow_spec_action_drop){
2849 .type = IBV_FLOW_SPEC_ACTION_DROP,
2850 .size = sizeof(struct ibv_flow_spec_action_drop),
2852 parser.queue[HASH_RXQ_ETH].ibv_attr->num_of_specs++;
2854 TAILQ_FOREACH(flow, &priv->flows, next) {
2855 struct ibv_flow_attr *attr;
2856 struct ibv_spec_header *attr_h;
2858 struct ibv_flow_attr *flow_attr;
2859 struct ibv_spec_header *flow_h;
2861 unsigned int specs_n;
2863 attr = parser.queue[HASH_RXQ_ETH].ibv_attr;
2864 flow_attr = flow->frxq[HASH_RXQ_ETH].ibv_attr;
2865 /* Compare first the attributes. */
2866 if (memcmp(attr, flow_attr, sizeof(struct ibv_flow_attr)))
2868 if (attr->num_of_specs == 0)
2870 spec = (void *)((uintptr_t)attr +
2871 sizeof(struct ibv_flow_attr));
2872 flow_spec = (void *)((uintptr_t)flow_attr +
2873 sizeof(struct ibv_flow_attr));
2874 specs_n = RTE_MIN(attr->num_of_specs, flow_attr->num_of_specs);
2875 for (i = 0; i != specs_n; ++i) {
2878 if (memcmp(spec, flow_spec,
2879 RTE_MIN(attr_h->size, flow_h->size)))
2881 spec = (void *)((uintptr_t)spec + attr_h->size);
2882 flow_spec = (void *)((uintptr_t)flow_spec +
2885 /* At this point, the flow match. */
2888 /* The flow does not match. */
2892 priv_flow_destroy(priv, &priv->flows, flow);
2894 for (i = 0; i != hash_rxq_init_n; ++i) {
2895 if (parser.queue[i].ibv_attr)
2896 rte_free(parser.queue[i].ibv_attr);
2902 * Update queue for specific filter.
2905 * Private structure.
2906 * @param fdir_filter
2907 * Filter to be updated.
2910 * 0 on success, errno value on failure.
2913 priv_fdir_filter_update(struct priv *priv,
2914 const struct rte_eth_fdir_filter *fdir_filter)
2918 ret = priv_fdir_filter_delete(priv, fdir_filter);
2921 ret = priv_fdir_filter_add(priv, fdir_filter);
2926 * Flush all filters.
2929 * Private structure.
2932 priv_fdir_filter_flush(struct priv *priv)
2934 priv_flow_flush(priv, &priv->flows);
2938 * Get flow director information.
2941 * Private structure.
2942 * @param[out] fdir_info
2943 * Resulting flow director information.
2946 priv_fdir_info_get(struct priv *priv, struct rte_eth_fdir_info *fdir_info)
2948 struct rte_eth_fdir_masks *mask =
2949 &priv->dev->data->dev_conf.fdir_conf.mask;
2951 fdir_info->mode = priv->dev->data->dev_conf.fdir_conf.mode;
2952 fdir_info->guarant_spc = 0;
2953 rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
2954 fdir_info->max_flexpayload = 0;
2955 fdir_info->flow_types_mask[0] = 0;
2956 fdir_info->flex_payload_unit = 0;
2957 fdir_info->max_flex_payload_segment_num = 0;
2958 fdir_info->flex_payload_limit = 0;
2959 memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
2963 * Deal with flow director operations.
2966 * Pointer to private structure.
2968 * Operation to perform.
2970 * Pointer to operation-specific structure.
2973 * 0 on success, errno value on failure.
2976 priv_fdir_ctrl_func(struct priv *priv, enum rte_filter_op filter_op, void *arg)
2978 enum rte_fdir_mode fdir_mode =
2979 priv->dev->data->dev_conf.fdir_conf.mode;
2982 if (filter_op == RTE_ETH_FILTER_NOP)
2984 if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
2985 fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
2986 ERROR("%p: flow director mode %d not supported",
2987 (void *)priv, fdir_mode);
2990 switch (filter_op) {
2991 case RTE_ETH_FILTER_ADD:
2992 ret = priv_fdir_filter_add(priv, arg);
2994 case RTE_ETH_FILTER_UPDATE:
2995 ret = priv_fdir_filter_update(priv, arg);
2997 case RTE_ETH_FILTER_DELETE:
2998 ret = priv_fdir_filter_delete(priv, arg);
3000 case RTE_ETH_FILTER_FLUSH:
3001 priv_fdir_filter_flush(priv);
3003 case RTE_ETH_FILTER_INFO:
3004 priv_fdir_info_get(priv, arg);
3007 DEBUG("%p: unknown operation %u", (void *)priv,
3016 * Manage filter operations.
3019 * Pointer to Ethernet device structure.
3020 * @param filter_type
3023 * Operation to perform.
3025 * Pointer to operation-specific structure.
3028 * 0 on success, negative errno value on failure.
3031 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
3032 enum rte_filter_type filter_type,
3033 enum rte_filter_op filter_op,
3037 struct priv *priv = dev->data->dev_private;
3039 switch (filter_type) {
3040 case RTE_ETH_FILTER_GENERIC:
3041 if (filter_op != RTE_ETH_FILTER_GET)
3043 *(const void **)arg = &mlx5_flow_ops;
3045 case RTE_ETH_FILTER_FDIR:
3047 ret = priv_fdir_ctrl_func(priv, filter_op, arg);
3051 ERROR("%p: filter type (%d) not supported",
3052 (void *)dev, filter_type);