4 * Copyright 2016 6WIND S.A.
5 * Copyright 2016 Mellanox.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of 6WIND S.A. nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #include <sys/queue.h>
38 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
40 #pragma GCC diagnostic ignored "-Wpedantic"
42 #include <infiniband/verbs.h>
44 #pragma GCC diagnostic error "-Wpedantic"
47 #include <rte_ethdev.h>
49 #include <rte_flow_driver.h>
50 #include <rte_malloc.h>
55 /* Define minimal priority for control plane flows. */
56 #define MLX5_CTRL_FLOW_PRIORITY 4
58 /* Internet Protocol versions. */
62 #ifndef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
63 struct ibv_counter_set_init_attr {
66 struct ibv_flow_spec_counter_action {
69 struct ibv_counter_set {
74 ibv_destroy_counter_set(struct ibv_counter_set *cs)
81 /* Dev ops structure defined in mlx5.c */
82 extern const struct eth_dev_ops mlx5_dev_ops;
83 extern const struct eth_dev_ops mlx5_dev_ops_isolate;
86 mlx5_flow_create_eth(const struct rte_flow_item *item,
87 const void *default_mask,
91 mlx5_flow_create_vlan(const struct rte_flow_item *item,
92 const void *default_mask,
96 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
97 const void *default_mask,
101 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
102 const void *default_mask,
106 mlx5_flow_create_udp(const struct rte_flow_item *item,
107 const void *default_mask,
111 mlx5_flow_create_tcp(const struct rte_flow_item *item,
112 const void *default_mask,
116 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
117 const void *default_mask,
120 struct mlx5_flow_parse;
123 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
127 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id);
130 mlx5_flow_create_count(struct priv *priv, struct mlx5_flow_parse *parser);
132 /* Hash RX queue types. */
143 /* Initialization data for hash RX queue. */
144 struct hash_rxq_init {
145 uint64_t hash_fields; /* Fields that participate in the hash. */
146 uint64_t dpdk_rss_hf; /* Matching DPDK RSS hash fields. */
147 unsigned int flow_priority; /* Flow priority to use. */
148 unsigned int ip_version; /* Internet protocol. */
151 /* Initialization data for hash RX queues. */
152 const struct hash_rxq_init hash_rxq_init[] = {
154 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
155 IBV_RX_HASH_DST_IPV4 |
156 IBV_RX_HASH_SRC_PORT_TCP |
157 IBV_RX_HASH_DST_PORT_TCP),
158 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,
160 .ip_version = MLX5_IPV4,
163 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
164 IBV_RX_HASH_DST_IPV4 |
165 IBV_RX_HASH_SRC_PORT_UDP |
166 IBV_RX_HASH_DST_PORT_UDP),
167 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP,
169 .ip_version = MLX5_IPV4,
172 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
173 IBV_RX_HASH_DST_IPV4),
174 .dpdk_rss_hf = (ETH_RSS_IPV4 |
177 .ip_version = MLX5_IPV4,
180 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
181 IBV_RX_HASH_DST_IPV6 |
182 IBV_RX_HASH_SRC_PORT_TCP |
183 IBV_RX_HASH_DST_PORT_TCP),
184 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP,
186 .ip_version = MLX5_IPV6,
189 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
190 IBV_RX_HASH_DST_IPV6 |
191 IBV_RX_HASH_SRC_PORT_UDP |
192 IBV_RX_HASH_DST_PORT_UDP),
193 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP,
195 .ip_version = MLX5_IPV6,
198 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
199 IBV_RX_HASH_DST_IPV6),
200 .dpdk_rss_hf = (ETH_RSS_IPV6 |
203 .ip_version = MLX5_IPV6,
212 /* Number of entries in hash_rxq_init[]. */
213 const unsigned int hash_rxq_init_n = RTE_DIM(hash_rxq_init);
215 /** Structure for holding counter stats. */
216 struct mlx5_flow_counter_stats {
217 uint64_t hits; /**< Number of packets matched by the rule. */
218 uint64_t bytes; /**< Number of bytes matched by the rule. */
221 /** Structure for Drop queue. */
222 struct mlx5_hrxq_drop {
223 struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */
224 struct ibv_qp *qp; /**< Verbs queue pair. */
225 struct ibv_wq *wq; /**< Verbs work queue. */
226 struct ibv_cq *cq; /**< Verbs completion queue. */
229 /* Flows structures. */
231 uint64_t hash_fields; /**< Fields that participate in the hash. */
232 struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
233 struct ibv_flow *ibv_flow; /**< Verbs flow. */
234 struct mlx5_hrxq *hrxq; /**< Hash Rx queues. */
237 /* Drop flows structures. */
238 struct mlx5_flow_drop {
239 struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
240 struct ibv_flow *ibv_flow; /**< Verbs flow. */
244 TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
245 uint32_t mark:1; /**< Set if the flow is marked. */
246 uint32_t drop:1; /**< Drop queue. */
247 uint16_t queues_n; /**< Number of entries in queue[]. */
248 uint16_t (*queues)[]; /**< Queues indexes to use. */
249 struct rte_eth_rss_conf rss_conf; /**< RSS configuration */
250 uint8_t rss_key[40]; /**< copy of the RSS key. */
251 struct ibv_counter_set *cs; /**< Holds the counters for the rule. */
252 struct mlx5_flow_counter_stats counter_stats;/**<The counter stats. */
254 struct mlx5_flow frxq[RTE_DIM(hash_rxq_init)];
255 /**< Flow with Rx queue. */
256 struct mlx5_flow_drop drxq; /**< Flow with drop Rx queue. */
260 /** Static initializer for items. */
262 (const enum rte_flow_item_type []){ \
263 __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
266 /** Structure to generate a simple graph of layers supported by the NIC. */
267 struct mlx5_flow_items {
268 /** List of possible actions for these items. */
269 const enum rte_flow_action_type *const actions;
270 /** Bit-masks corresponding to the possibilities for the item. */
273 * Default bit-masks to use when item->mask is not provided. When
274 * \default_mask is also NULL, the full supported bit-mask (\mask) is
277 const void *default_mask;
278 /** Bit-masks size in bytes. */
279 const unsigned int mask_sz;
281 * Conversion function from rte_flow to NIC specific flow.
284 * rte_flow item to convert.
285 * @param default_mask
286 * Default bit-masks to use when item->mask is not provided.
288 * Internal structure to store the conversion.
291 * 0 on success, negative value otherwise.
293 int (*convert)(const struct rte_flow_item *item,
294 const void *default_mask,
296 /** Size in bytes of the destination structure. */
297 const unsigned int dst_sz;
298 /** List of possible following items. */
299 const enum rte_flow_item_type *const items;
302 /** Valid action for this PMD. */
303 static const enum rte_flow_action_type valid_actions[] = {
304 RTE_FLOW_ACTION_TYPE_DROP,
305 RTE_FLOW_ACTION_TYPE_QUEUE,
306 RTE_FLOW_ACTION_TYPE_MARK,
307 RTE_FLOW_ACTION_TYPE_FLAG,
308 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
309 RTE_FLOW_ACTION_TYPE_COUNT,
311 RTE_FLOW_ACTION_TYPE_END,
314 /** Graph of supported items and associated actions. */
315 static const struct mlx5_flow_items mlx5_flow_items[] = {
316 [RTE_FLOW_ITEM_TYPE_END] = {
317 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
318 RTE_FLOW_ITEM_TYPE_VXLAN),
320 [RTE_FLOW_ITEM_TYPE_ETH] = {
321 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
322 RTE_FLOW_ITEM_TYPE_IPV4,
323 RTE_FLOW_ITEM_TYPE_IPV6),
324 .actions = valid_actions,
325 .mask = &(const struct rte_flow_item_eth){
326 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
327 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
330 .default_mask = &rte_flow_item_eth_mask,
331 .mask_sz = sizeof(struct rte_flow_item_eth),
332 .convert = mlx5_flow_create_eth,
333 .dst_sz = sizeof(struct ibv_flow_spec_eth),
335 [RTE_FLOW_ITEM_TYPE_VLAN] = {
336 .items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
337 RTE_FLOW_ITEM_TYPE_IPV6),
338 .actions = valid_actions,
339 .mask = &(const struct rte_flow_item_vlan){
342 .default_mask = &rte_flow_item_vlan_mask,
343 .mask_sz = sizeof(struct rte_flow_item_vlan),
344 .convert = mlx5_flow_create_vlan,
347 [RTE_FLOW_ITEM_TYPE_IPV4] = {
348 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
349 RTE_FLOW_ITEM_TYPE_TCP),
350 .actions = valid_actions,
351 .mask = &(const struct rte_flow_item_ipv4){
355 .type_of_service = -1,
359 .default_mask = &rte_flow_item_ipv4_mask,
360 .mask_sz = sizeof(struct rte_flow_item_ipv4),
361 .convert = mlx5_flow_create_ipv4,
362 .dst_sz = sizeof(struct ibv_flow_spec_ipv4_ext),
364 [RTE_FLOW_ITEM_TYPE_IPV6] = {
365 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
366 RTE_FLOW_ITEM_TYPE_TCP),
367 .actions = valid_actions,
368 .mask = &(const struct rte_flow_item_ipv6){
371 0xff, 0xff, 0xff, 0xff,
372 0xff, 0xff, 0xff, 0xff,
373 0xff, 0xff, 0xff, 0xff,
374 0xff, 0xff, 0xff, 0xff,
377 0xff, 0xff, 0xff, 0xff,
378 0xff, 0xff, 0xff, 0xff,
379 0xff, 0xff, 0xff, 0xff,
380 0xff, 0xff, 0xff, 0xff,
387 .default_mask = &rte_flow_item_ipv6_mask,
388 .mask_sz = sizeof(struct rte_flow_item_ipv6),
389 .convert = mlx5_flow_create_ipv6,
390 .dst_sz = sizeof(struct ibv_flow_spec_ipv6),
392 [RTE_FLOW_ITEM_TYPE_UDP] = {
393 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN),
394 .actions = valid_actions,
395 .mask = &(const struct rte_flow_item_udp){
401 .default_mask = &rte_flow_item_udp_mask,
402 .mask_sz = sizeof(struct rte_flow_item_udp),
403 .convert = mlx5_flow_create_udp,
404 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
406 [RTE_FLOW_ITEM_TYPE_TCP] = {
407 .actions = valid_actions,
408 .mask = &(const struct rte_flow_item_tcp){
414 .default_mask = &rte_flow_item_tcp_mask,
415 .mask_sz = sizeof(struct rte_flow_item_tcp),
416 .convert = mlx5_flow_create_tcp,
417 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
419 [RTE_FLOW_ITEM_TYPE_VXLAN] = {
420 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH),
421 .actions = valid_actions,
422 .mask = &(const struct rte_flow_item_vxlan){
423 .vni = "\xff\xff\xff",
425 .default_mask = &rte_flow_item_vxlan_mask,
426 .mask_sz = sizeof(struct rte_flow_item_vxlan),
427 .convert = mlx5_flow_create_vxlan,
428 .dst_sz = sizeof(struct ibv_flow_spec_tunnel),
432 /** Structure to pass to the conversion function. */
433 struct mlx5_flow_parse {
434 uint32_t inner; /**< Set once VXLAN is encountered. */
436 /**< Whether resources should remain after a validate. */
437 uint32_t drop:1; /**< Target is a drop queue. */
438 uint32_t mark:1; /**< Mark is present in the flow. */
439 uint32_t count:1; /**< Count is present in the flow. */
440 uint32_t mark_id; /**< Mark identifier. */
441 uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
442 uint16_t queues_n; /**< Number of entries in queue[]. */
443 struct rte_eth_rss_conf rss_conf; /**< RSS configuration */
444 uint8_t rss_key[40]; /**< copy of the RSS key. */
445 enum hash_rxq_type layer; /**< Last pattern layer detected. */
446 struct ibv_counter_set *cs; /**< Holds the counter set for the rule */
449 struct ibv_flow_attr *ibv_attr;
450 /**< Pointer to Verbs attributes. */
452 /**< Current position or total size of the attribute. */
453 } queue[RTE_DIM(hash_rxq_init)];
455 struct ibv_flow_attr *ibv_attr;
456 /**< Pointer to Verbs attributes. */
458 /**< Current position or total size of the attribute. */
463 static const struct rte_flow_ops mlx5_flow_ops = {
464 .validate = mlx5_flow_validate,
465 .create = mlx5_flow_create,
466 .destroy = mlx5_flow_destroy,
467 .flush = mlx5_flow_flush,
468 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
469 .query = mlx5_flow_query,
473 .isolate = mlx5_flow_isolate,
476 /* Convert FDIR request to Generic flow. */
478 struct rte_flow_attr attr;
479 struct rte_flow_action actions[2];
480 struct rte_flow_item items[4];
481 struct rte_flow_item_eth l2;
482 struct rte_flow_item_eth l2_mask;
484 struct rte_flow_item_ipv4 ipv4;
485 struct rte_flow_item_ipv6 ipv6;
488 struct rte_flow_item_udp udp;
489 struct rte_flow_item_tcp tcp;
491 struct rte_flow_action_queue queue;
494 /* Verbs specification header. */
495 struct ibv_spec_header {
496 enum ibv_flow_spec_type type;
501 * Check support for a given item.
504 * Item specification.
506 * Bit-masks covering supported fields to compare with spec, last and mask in
509 * Bit-Mask size in bytes.
515 mlx5_flow_item_validate(const struct rte_flow_item *item,
516 const uint8_t *mask, unsigned int size)
520 if (!item->spec && (item->mask || item->last))
522 if (item->spec && !item->mask) {
524 const uint8_t *spec = item->spec;
526 for (i = 0; i < size; ++i)
527 if ((spec[i] | mask[i]) != mask[i])
530 if (item->last && !item->mask) {
532 const uint8_t *spec = item->last;
534 for (i = 0; i < size; ++i)
535 if ((spec[i] | mask[i]) != mask[i])
540 const uint8_t *spec = item->mask;
542 for (i = 0; i < size; ++i)
543 if ((spec[i] | mask[i]) != mask[i])
546 if (item->spec && item->last) {
549 const uint8_t *apply = mask;
554 for (i = 0; i < size; ++i) {
555 spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
556 last[i] = ((const uint8_t *)item->last)[i] & apply[i];
558 ret = memcmp(spec, last, size);
564 * Copy the RSS configuration from the user ones.
567 * Pointer to private structure.
569 * Internal parser structure.
571 * User RSS configuration to save.
574 * 0 on success, errno value on failure.
577 priv_flow_convert_rss_conf(struct priv *priv,
578 struct mlx5_flow_parse *parser,
579 const struct rte_eth_rss_conf *rss_conf)
581 const struct rte_eth_rss_conf *rss =
582 rss_conf ? rss_conf : &priv->rss_conf;
584 if (rss->rss_key_len > 40)
586 parser->rss_conf.rss_key_len = rss->rss_key_len;
587 parser->rss_conf.rss_hf = rss->rss_hf;
588 memcpy(parser->rss_key, rss->rss_key, rss->rss_key_len);
589 parser->rss_conf.rss_key = parser->rss_key;
594 * Extract attribute to the parser.
597 * Pointer to private structure.
599 * Flow rule attributes.
601 * Perform verbose error reporting if not NULL.
602 * @param[in, out] parser
603 * Internal parser structure.
606 * 0 on success, a negative errno value otherwise and rte_errno is set.
609 priv_flow_convert_attributes(struct priv *priv,
610 const struct rte_flow_attr *attr,
611 struct rte_flow_error *error,
612 struct mlx5_flow_parse *parser)
617 rte_flow_error_set(error, ENOTSUP,
618 RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
620 "groups are not supported");
623 if (attr->priority && attr->priority != MLX5_CTRL_FLOW_PRIORITY) {
624 rte_flow_error_set(error, ENOTSUP,
625 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
627 "priorities are not supported");
631 rte_flow_error_set(error, ENOTSUP,
632 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
634 "egress is not supported");
637 if (!attr->ingress) {
638 rte_flow_error_set(error, ENOTSUP,
639 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
641 "only ingress is supported");
648 * Extract actions request to the parser.
651 * Pointer to private structure.
653 * Associated actions (list terminated by the END action).
655 * Perform verbose error reporting if not NULL.
656 * @param[in, out] parser
657 * Internal parser structure.
660 * 0 on success, a negative errno value otherwise and rte_errno is set.
663 priv_flow_convert_actions(struct priv *priv,
664 const struct rte_flow_action actions[],
665 struct rte_flow_error *error,
666 struct mlx5_flow_parse *parser)
669 * Add default RSS configuration necessary for Verbs to create QP even
670 * if no RSS is necessary.
672 priv_flow_convert_rss_conf(priv, parser,
673 (const struct rte_eth_rss_conf *)
675 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
676 if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
678 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
680 } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
681 const struct rte_flow_action_queue *queue =
682 (const struct rte_flow_action_queue *)
687 if (!queue || (queue->index > (priv->rxqs_n - 1)))
688 goto exit_action_not_supported;
689 for (n = 0; n < parser->queues_n; ++n) {
690 if (parser->queues[n] == queue->index) {
695 if (parser->queues_n > 1 && !found) {
696 rte_flow_error_set(error, ENOTSUP,
697 RTE_FLOW_ERROR_TYPE_ACTION,
699 "queue action not in RSS queues");
703 parser->queues_n = 1;
704 parser->queues[0] = queue->index;
706 } else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
707 const struct rte_flow_action_rss *rss =
708 (const struct rte_flow_action_rss *)
712 if (!rss || !rss->num) {
713 rte_flow_error_set(error, EINVAL,
714 RTE_FLOW_ERROR_TYPE_ACTION,
719 if (parser->queues_n == 1) {
722 assert(parser->queues_n);
723 for (n = 0; n < rss->num; ++n) {
724 if (parser->queues[0] ==
731 rte_flow_error_set(error, ENOTSUP,
732 RTE_FLOW_ERROR_TYPE_ACTION,
734 "queue action not in RSS"
739 for (n = 0; n < rss->num; ++n) {
740 if (rss->queue[n] >= priv->rxqs_n) {
741 rte_flow_error_set(error, EINVAL,
742 RTE_FLOW_ERROR_TYPE_ACTION,
744 "queue id > number of"
749 for (n = 0; n < rss->num; ++n)
750 parser->queues[n] = rss->queue[n];
751 parser->queues_n = rss->num;
752 if (priv_flow_convert_rss_conf(priv, parser,
754 rte_flow_error_set(error, EINVAL,
755 RTE_FLOW_ERROR_TYPE_ACTION,
757 "wrong RSS configuration");
760 } else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
761 const struct rte_flow_action_mark *mark =
762 (const struct rte_flow_action_mark *)
766 rte_flow_error_set(error, EINVAL,
767 RTE_FLOW_ERROR_TYPE_ACTION,
769 "mark must be defined");
771 } else if (mark->id >= MLX5_FLOW_MARK_MAX) {
772 rte_flow_error_set(error, ENOTSUP,
773 RTE_FLOW_ERROR_TYPE_ACTION,
775 "mark must be between 0"
780 parser->mark_id = mark->id;
781 } else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
783 } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT &&
784 priv->counter_set_supported) {
787 goto exit_action_not_supported;
790 if (parser->drop && parser->mark)
792 if (!parser->queues_n && !parser->drop) {
793 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
794 NULL, "no valid action");
798 exit_action_not_supported:
799 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
800 actions, "action not supported");
808 * Pointer to private structure.
810 * Pattern specification (list terminated by the END pattern item).
812 * Perform verbose error reporting if not NULL.
813 * @param[in, out] parser
814 * Internal parser structure.
817 * 0 on success, a negative errno value otherwise and rte_errno is set.
820 priv_flow_convert_items_validate(struct priv *priv,
821 const struct rte_flow_item items[],
822 struct rte_flow_error *error,
823 struct mlx5_flow_parse *parser)
825 const struct mlx5_flow_items *cur_item = mlx5_flow_items;
829 /* Initialise the offsets to start after verbs attribute. */
831 parser->drop_q.offset = sizeof(struct ibv_flow_attr);
833 for (i = 0; i != hash_rxq_init_n; ++i)
834 parser->queue[i].offset = sizeof(struct ibv_flow_attr);
836 for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
837 const struct mlx5_flow_items *token = NULL;
841 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
845 cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
847 if (cur_item->items[i] == items->type) {
848 token = &mlx5_flow_items[items->type];
853 goto exit_item_not_supported;
855 err = mlx5_flow_item_validate(items,
856 (const uint8_t *)cur_item->mask,
859 goto exit_item_not_supported;
860 if (items->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
862 rte_flow_error_set(error, ENOTSUP,
863 RTE_FLOW_ERROR_TYPE_ITEM,
865 "cannot recognize multiple"
866 " VXLAN encapsulations");
869 parser->inner = IBV_FLOW_SPEC_INNER;
872 parser->drop_q.offset += cur_item->dst_sz;
873 } else if (parser->queues_n == 1) {
874 parser->queue[HASH_RXQ_ETH].offset += cur_item->dst_sz;
876 for (n = 0; n != hash_rxq_init_n; ++n)
877 parser->queue[n].offset += cur_item->dst_sz;
881 for (i = 0; i != hash_rxq_init_n; ++i)
882 parser->queue[i].offset +=
883 sizeof(struct ibv_flow_spec_action_tag);
886 unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
889 parser->drop_q.offset += size;
891 for (i = 0; i != hash_rxq_init_n; ++i)
892 parser->queue[i].offset += size;
896 exit_item_not_supported:
897 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
898 items, "item not supported");
903 * Allocate memory space to store verbs flow attributes.
906 * Pointer to private structure.
907 * @param[in] priority
910 * Amount of byte to allocate.
912 * Perform verbose error reporting if not NULL.
915 * A verbs flow attribute on success, NULL otherwise.
917 static struct ibv_flow_attr*
918 priv_flow_convert_allocate(struct priv *priv,
919 unsigned int priority,
921 struct rte_flow_error *error)
923 struct ibv_flow_attr *ibv_attr;
926 ibv_attr = rte_calloc(__func__, 1, size, 0);
928 rte_flow_error_set(error, ENOMEM,
929 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
931 "cannot allocate verbs spec attributes.");
934 ibv_attr->priority = priority;
939 * Finalise verbs flow attributes.
942 * Pointer to private structure.
943 * @param[in, out] parser
944 * Internal parser structure.
947 priv_flow_convert_finalise(struct priv *priv, struct mlx5_flow_parse *parser)
949 const unsigned int ipv4 =
950 hash_rxq_init[parser->layer].ip_version == MLX5_IPV4;
951 const enum hash_rxq_type hmin = ipv4 ? HASH_RXQ_TCPV4 : HASH_RXQ_TCPV6;
952 const enum hash_rxq_type hmax = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
953 const enum hash_rxq_type ohmin = ipv4 ? HASH_RXQ_TCPV6 : HASH_RXQ_TCPV4;
954 const enum hash_rxq_type ohmax = ipv4 ? HASH_RXQ_IPV6 : HASH_RXQ_IPV4;
955 const enum hash_rxq_type ip = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
959 if (parser->layer == HASH_RXQ_ETH) {
963 * This layer becomes useless as the pattern define under
966 rte_free(parser->queue[HASH_RXQ_ETH].ibv_attr);
967 parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
969 /* Remove opposite kind of layer e.g. IPv6 if the pattern is IPv4. */
970 for (i = ohmin; i != (ohmax + 1); ++i) {
971 if (!parser->queue[i].ibv_attr)
973 rte_free(parser->queue[i].ibv_attr);
974 parser->queue[i].ibv_attr = NULL;
976 /* Remove impossible flow according to the RSS configuration. */
977 if (hash_rxq_init[parser->layer].dpdk_rss_hf &
978 parser->rss_conf.rss_hf) {
979 /* Remove any other flow. */
980 for (i = hmin; i != (hmax + 1); ++i) {
981 if ((i == parser->layer) ||
982 (!parser->queue[i].ibv_attr))
984 rte_free(parser->queue[i].ibv_attr);
985 parser->queue[i].ibv_attr = NULL;
987 } else if (!parser->queue[ip].ibv_attr) {
988 /* no RSS possible with the current configuration. */
989 parser->queues_n = 1;
994 * Fill missing layers in verbs specifications, or compute the correct
995 * offset to allocate the memory space for the attributes and
998 for (i = 0; i != hash_rxq_init_n - 1; ++i) {
1000 struct ibv_flow_spec_ipv4_ext ipv4;
1001 struct ibv_flow_spec_ipv6 ipv6;
1002 struct ibv_flow_spec_tcp_udp udp_tcp;
1007 if (i == parser->layer)
1009 if (parser->layer == HASH_RXQ_ETH) {
1010 if (hash_rxq_init[i].ip_version == MLX5_IPV4) {
1011 size = sizeof(struct ibv_flow_spec_ipv4_ext);
1012 specs.ipv4 = (struct ibv_flow_spec_ipv4_ext){
1013 .type = IBV_FLOW_SPEC_IPV4_EXT,
1017 size = sizeof(struct ibv_flow_spec_ipv6);
1018 specs.ipv6 = (struct ibv_flow_spec_ipv6){
1019 .type = IBV_FLOW_SPEC_IPV6,
1023 if (parser->queue[i].ibv_attr) {
1024 dst = (void *)((uintptr_t)
1025 parser->queue[i].ibv_attr +
1026 parser->queue[i].offset);
1027 memcpy(dst, &specs, size);
1028 ++parser->queue[i].ibv_attr->num_of_specs;
1030 parser->queue[i].offset += size;
1032 if ((i == HASH_RXQ_UDPV4) || (i == HASH_RXQ_TCPV4) ||
1033 (i == HASH_RXQ_UDPV6) || (i == HASH_RXQ_TCPV6)) {
1034 size = sizeof(struct ibv_flow_spec_tcp_udp);
1035 specs.udp_tcp = (struct ibv_flow_spec_tcp_udp) {
1036 .type = ((i == HASH_RXQ_UDPV4 ||
1037 i == HASH_RXQ_UDPV6) ?
1042 if (parser->queue[i].ibv_attr) {
1043 dst = (void *)((uintptr_t)
1044 parser->queue[i].ibv_attr +
1045 parser->queue[i].offset);
1046 memcpy(dst, &specs, size);
1047 ++parser->queue[i].ibv_attr->num_of_specs;
1049 parser->queue[i].offset += size;
1055 * Validate and convert a flow supported by the NIC.
1058 * Pointer to private structure.
1060 * Flow rule attributes.
1061 * @param[in] pattern
1062 * Pattern specification (list terminated by the END pattern item).
1063 * @param[in] actions
1064 * Associated actions (list terminated by the END action).
1066 * Perform verbose error reporting if not NULL.
1067 * @param[in, out] parser
1068 * Internal parser structure.
1071 * 0 on success, a negative errno value otherwise and rte_errno is set.
1074 priv_flow_convert(struct priv *priv,
1075 const struct rte_flow_attr *attr,
1076 const struct rte_flow_item items[],
1077 const struct rte_flow_action actions[],
1078 struct rte_flow_error *error,
1079 struct mlx5_flow_parse *parser)
1081 const struct mlx5_flow_items *cur_item = mlx5_flow_items;
1085 /* First step. Validate the attributes, items and actions. */
1086 *parser = (struct mlx5_flow_parse){
1087 .create = parser->create,
1088 .layer = HASH_RXQ_ETH,
1089 .mark_id = MLX5_FLOW_MARK_DEFAULT,
1091 ret = priv_flow_convert_attributes(priv, attr, error, parser);
1094 ret = priv_flow_convert_actions(priv, actions, error, parser);
1097 ret = priv_flow_convert_items_validate(priv, items, error, parser);
1100 priv_flow_convert_finalise(priv, parser);
1103 * Allocate the memory space to store verbs specifications.
1106 parser->drop_q.ibv_attr =
1107 priv_flow_convert_allocate(priv, attr->priority,
1108 parser->drop_q.offset,
1110 if (!parser->drop_q.ibv_attr)
1112 parser->drop_q.offset = sizeof(struct ibv_flow_attr);
1113 } else if (parser->queues_n == 1) {
1114 unsigned int priority =
1116 hash_rxq_init[HASH_RXQ_ETH].flow_priority;
1117 unsigned int offset = parser->queue[HASH_RXQ_ETH].offset;
1119 parser->queue[HASH_RXQ_ETH].ibv_attr =
1120 priv_flow_convert_allocate(priv, priority,
1122 if (!parser->queue[HASH_RXQ_ETH].ibv_attr)
1124 parser->queue[HASH_RXQ_ETH].offset =
1125 sizeof(struct ibv_flow_attr);
1127 for (i = 0; i != hash_rxq_init_n; ++i) {
1128 unsigned int priority =
1130 hash_rxq_init[i].flow_priority;
1131 unsigned int offset;
1133 if (!(parser->rss_conf.rss_hf &
1134 hash_rxq_init[i].dpdk_rss_hf) &&
1135 (i != HASH_RXQ_ETH))
1137 offset = parser->queue[i].offset;
1138 parser->queue[i].ibv_attr =
1139 priv_flow_convert_allocate(priv, priority,
1141 if (!parser->queue[i].ibv_attr)
1143 parser->queue[i].offset = sizeof(struct ibv_flow_attr);
1146 /* Third step. Conversion parse, fill the specifications. */
1148 for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
1149 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
1151 cur_item = &mlx5_flow_items[items->type];
1152 ret = cur_item->convert(items,
1153 (cur_item->default_mask ?
1154 cur_item->default_mask :
1158 rte_flow_error_set(error, ret,
1159 RTE_FLOW_ERROR_TYPE_ITEM,
1160 items, "item not supported");
1165 mlx5_flow_create_flag_mark(parser, parser->mark_id);
1166 if (parser->count && parser->create) {
1167 mlx5_flow_create_count(priv, parser);
1169 goto exit_count_error;
1172 * Last step. Complete missing specification to reach the RSS
1177 * Drop queue priority needs to be adjusted to
1178 * their most specific layer priority.
1180 parser->drop_q.ibv_attr->priority =
1182 hash_rxq_init[parser->layer].flow_priority;
1183 } else if (parser->queues_n > 1) {
1184 priv_flow_convert_finalise(priv, parser);
1187 * Action queue have their priority overridden with
1188 * Ethernet priority, this priority needs to be adjusted to
1189 * their most specific layer priority.
1191 parser->queue[HASH_RXQ_ETH].ibv_attr->priority =
1193 hash_rxq_init[parser->layer].flow_priority;
1196 /* Only verification is expected, all resources should be released. */
1197 if (!parser->create) {
1199 rte_free(parser->drop_q.ibv_attr);
1200 parser->drop_q.ibv_attr = NULL;
1202 for (i = 0; i != hash_rxq_init_n; ++i) {
1203 if (parser->queue[i].ibv_attr) {
1204 rte_free(parser->queue[i].ibv_attr);
1205 parser->queue[i].ibv_attr = NULL;
1211 for (i = 0; i != hash_rxq_init_n; ++i) {
1212 if (parser->queue[i].ibv_attr) {
1213 rte_free(parser->queue[i].ibv_attr);
1214 parser->queue[i].ibv_attr = NULL;
1217 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1218 NULL, "cannot allocate verbs spec attributes.");
1221 rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1222 NULL, "cannot create counter.");
1227 * Copy the specification created into the flow.
1230 * Internal parser structure.
1232 * Create specification.
1234 * Size in bytes of the specification to copy.
1237 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
1244 dst = (void *)((uintptr_t)parser->drop_q.ibv_attr +
1245 parser->drop_q.offset);
1246 memcpy(dst, src, size);
1247 ++parser->drop_q.ibv_attr->num_of_specs;
1248 parser->drop_q.offset += size;
1251 for (i = 0; i != hash_rxq_init_n; ++i) {
1252 if (!parser->queue[i].ibv_attr)
1254 /* Specification must be the same l3 type or none. */
1255 if (parser->layer == HASH_RXQ_ETH ||
1256 (hash_rxq_init[parser->layer].ip_version ==
1257 hash_rxq_init[i].ip_version) ||
1258 (hash_rxq_init[i].ip_version == 0)) {
1259 dst = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1260 parser->queue[i].offset);
1261 memcpy(dst, src, size);
1262 ++parser->queue[i].ibv_attr->num_of_specs;
1263 parser->queue[i].offset += size;
1269 * Convert Ethernet item to Verbs specification.
1272 * Item specification.
1273 * @param default_mask[in]
1274 * Default bit-masks to use when item->mask is not provided.
1275 * @param data[in, out]
1279 mlx5_flow_create_eth(const struct rte_flow_item *item,
1280 const void *default_mask,
1283 const struct rte_flow_item_eth *spec = item->spec;
1284 const struct rte_flow_item_eth *mask = item->mask;
1285 struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1286 const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1287 struct ibv_flow_spec_eth eth = {
1288 .type = parser->inner | IBV_FLOW_SPEC_ETH,
1292 /* Don't update layer for the inner pattern. */
1294 parser->layer = HASH_RXQ_ETH;
1299 mask = default_mask;
1300 memcpy(ð.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
1301 memcpy(ð.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
1302 eth.val.ether_type = spec->type;
1303 memcpy(ð.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
1304 memcpy(ð.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
1305 eth.mask.ether_type = mask->type;
1306 /* Remove unwanted bits from values. */
1307 for (i = 0; i < ETHER_ADDR_LEN; ++i) {
1308 eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
1309 eth.val.src_mac[i] &= eth.mask.src_mac[i];
1311 eth.val.ether_type &= eth.mask.ether_type;
1313 mlx5_flow_create_copy(parser, ð, eth_size);
1318 * Convert VLAN item to Verbs specification.
1321 * Item specification.
1322 * @param default_mask[in]
1323 * Default bit-masks to use when item->mask is not provided.
1324 * @param data[in, out]
1328 mlx5_flow_create_vlan(const struct rte_flow_item *item,
1329 const void *default_mask,
1332 const struct rte_flow_item_vlan *spec = item->spec;
1333 const struct rte_flow_item_vlan *mask = item->mask;
1334 struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1335 struct ibv_flow_spec_eth *eth;
1336 const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1341 mask = default_mask;
1344 eth = (void *)((uintptr_t)parser->drop_q.ibv_attr +
1345 parser->drop_q.offset - eth_size);
1346 eth->val.vlan_tag = spec->tci;
1347 eth->mask.vlan_tag = mask->tci;
1348 eth->val.vlan_tag &= eth->mask.vlan_tag;
1351 for (i = 0; i != hash_rxq_init_n; ++i) {
1352 if (!parser->queue[i].ibv_attr)
1355 eth = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1356 parser->queue[i].offset - eth_size);
1357 eth->val.vlan_tag = spec->tci;
1358 eth->mask.vlan_tag = mask->tci;
1359 eth->val.vlan_tag &= eth->mask.vlan_tag;
1366 * Convert IPv4 item to Verbs specification.
1369 * Item specification.
1370 * @param default_mask[in]
1371 * Default bit-masks to use when item->mask is not provided.
1372 * @param data[in, out]
1376 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
1377 const void *default_mask,
1380 const struct rte_flow_item_ipv4 *spec = item->spec;
1381 const struct rte_flow_item_ipv4 *mask = item->mask;
1382 struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1383 unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4_ext);
1384 struct ibv_flow_spec_ipv4_ext ipv4 = {
1385 .type = parser->inner | IBV_FLOW_SPEC_IPV4_EXT,
1389 /* Don't update layer for the inner pattern. */
1391 parser->layer = HASH_RXQ_IPV4;
1394 mask = default_mask;
1395 ipv4.val = (struct ibv_flow_ipv4_ext_filter){
1396 .src_ip = spec->hdr.src_addr,
1397 .dst_ip = spec->hdr.dst_addr,
1398 .proto = spec->hdr.next_proto_id,
1399 .tos = spec->hdr.type_of_service,
1401 ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
1402 .src_ip = mask->hdr.src_addr,
1403 .dst_ip = mask->hdr.dst_addr,
1404 .proto = mask->hdr.next_proto_id,
1405 .tos = mask->hdr.type_of_service,
1407 /* Remove unwanted bits from values. */
1408 ipv4.val.src_ip &= ipv4.mask.src_ip;
1409 ipv4.val.dst_ip &= ipv4.mask.dst_ip;
1410 ipv4.val.proto &= ipv4.mask.proto;
1411 ipv4.val.tos &= ipv4.mask.tos;
1413 mlx5_flow_create_copy(parser, &ipv4, ipv4_size);
1418 * Convert IPv6 item to Verbs specification.
1421 * Item specification.
1422 * @param default_mask[in]
1423 * Default bit-masks to use when item->mask is not provided.
1424 * @param data[in, out]
1428 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
1429 const void *default_mask,
1432 const struct rte_flow_item_ipv6 *spec = item->spec;
1433 const struct rte_flow_item_ipv6 *mask = item->mask;
1434 struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1435 unsigned int ipv6_size = sizeof(struct ibv_flow_spec_ipv6);
1436 struct ibv_flow_spec_ipv6 ipv6 = {
1437 .type = parser->inner | IBV_FLOW_SPEC_IPV6,
1441 /* Don't update layer for the inner pattern. */
1443 parser->layer = HASH_RXQ_IPV6;
1448 mask = default_mask;
1449 memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
1450 RTE_DIM(ipv6.val.src_ip));
1451 memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
1452 RTE_DIM(ipv6.val.dst_ip));
1453 memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
1454 RTE_DIM(ipv6.mask.src_ip));
1455 memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
1456 RTE_DIM(ipv6.mask.dst_ip));
1457 ipv6.mask.flow_label = mask->hdr.vtc_flow;
1458 ipv6.mask.next_hdr = mask->hdr.proto;
1459 ipv6.mask.hop_limit = mask->hdr.hop_limits;
1460 /* Remove unwanted bits from values. */
1461 for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
1462 ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
1463 ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
1465 ipv6.val.flow_label &= ipv6.mask.flow_label;
1466 ipv6.val.next_hdr &= ipv6.mask.next_hdr;
1467 ipv6.val.hop_limit &= ipv6.mask.hop_limit;
1469 mlx5_flow_create_copy(parser, &ipv6, ipv6_size);
1474 * Convert UDP item to Verbs specification.
1477 * Item specification.
1478 * @param default_mask[in]
1479 * Default bit-masks to use when item->mask is not provided.
1480 * @param data[in, out]
1484 mlx5_flow_create_udp(const struct rte_flow_item *item,
1485 const void *default_mask,
1488 const struct rte_flow_item_udp *spec = item->spec;
1489 const struct rte_flow_item_udp *mask = item->mask;
1490 struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1491 unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1492 struct ibv_flow_spec_tcp_udp udp = {
1493 .type = parser->inner | IBV_FLOW_SPEC_UDP,
1497 /* Don't update layer for the inner pattern. */
1498 if (!parser->inner) {
1499 if (parser->layer == HASH_RXQ_IPV4)
1500 parser->layer = HASH_RXQ_UDPV4;
1502 parser->layer = HASH_RXQ_UDPV6;
1506 mask = default_mask;
1507 udp.val.dst_port = spec->hdr.dst_port;
1508 udp.val.src_port = spec->hdr.src_port;
1509 udp.mask.dst_port = mask->hdr.dst_port;
1510 udp.mask.src_port = mask->hdr.src_port;
1511 /* Remove unwanted bits from values. */
1512 udp.val.src_port &= udp.mask.src_port;
1513 udp.val.dst_port &= udp.mask.dst_port;
1515 mlx5_flow_create_copy(parser, &udp, udp_size);
1520 * Convert TCP item to Verbs specification.
1523 * Item specification.
1524 * @param default_mask[in]
1525 * Default bit-masks to use when item->mask is not provided.
1526 * @param data[in, out]
1530 mlx5_flow_create_tcp(const struct rte_flow_item *item,
1531 const void *default_mask,
1534 const struct rte_flow_item_tcp *spec = item->spec;
1535 const struct rte_flow_item_tcp *mask = item->mask;
1536 struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1537 unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1538 struct ibv_flow_spec_tcp_udp tcp = {
1539 .type = parser->inner | IBV_FLOW_SPEC_TCP,
1543 /* Don't update layer for the inner pattern. */
1544 if (!parser->inner) {
1545 if (parser->layer == HASH_RXQ_IPV4)
1546 parser->layer = HASH_RXQ_TCPV4;
1548 parser->layer = HASH_RXQ_TCPV6;
1552 mask = default_mask;
1553 tcp.val.dst_port = spec->hdr.dst_port;
1554 tcp.val.src_port = spec->hdr.src_port;
1555 tcp.mask.dst_port = mask->hdr.dst_port;
1556 tcp.mask.src_port = mask->hdr.src_port;
1557 /* Remove unwanted bits from values. */
1558 tcp.val.src_port &= tcp.mask.src_port;
1559 tcp.val.dst_port &= tcp.mask.dst_port;
1561 mlx5_flow_create_copy(parser, &tcp, tcp_size);
1566 * Convert VXLAN item to Verbs specification.
1569 * Item specification.
1570 * @param default_mask[in]
1571 * Default bit-masks to use when item->mask is not provided.
1572 * @param data[in, out]
1576 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
1577 const void *default_mask,
1580 const struct rte_flow_item_vxlan *spec = item->spec;
1581 const struct rte_flow_item_vxlan *mask = item->mask;
1582 struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1583 unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1584 struct ibv_flow_spec_tunnel vxlan = {
1585 .type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1594 parser->inner = IBV_FLOW_SPEC_INNER;
1597 mask = default_mask;
1598 memcpy(&id.vni[1], spec->vni, 3);
1599 vxlan.val.tunnel_id = id.vlan_id;
1600 memcpy(&id.vni[1], mask->vni, 3);
1601 vxlan.mask.tunnel_id = id.vlan_id;
1602 /* Remove unwanted bits from values. */
1603 vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
1606 * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
1607 * layer is defined in the Verbs specification it is interpreted as
1608 * wildcard and all packets will match this rule, if it follows a full
1609 * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
1610 * before will also match this rule.
1611 * To avoid such situation, VNI 0 is currently refused.
1613 if (!vxlan.val.tunnel_id)
1615 mlx5_flow_create_copy(parser, &vxlan, size);
1620 * Convert mark/flag action to Verbs specification.
1623 * Internal parser structure.
1628 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id)
1630 unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1631 struct ibv_flow_spec_action_tag tag = {
1632 .type = IBV_FLOW_SPEC_ACTION_TAG,
1634 .tag_id = mlx5_flow_mark_set(mark_id),
1637 assert(parser->mark);
1638 mlx5_flow_create_copy(parser, &tag, size);
1643 * Convert count action to Verbs specification.
1646 * Pointer to private structure.
1648 * Pointer to MLX5 flow parser structure.
1651 * 0 on success, errno value on failure.
1654 mlx5_flow_create_count(struct priv *priv __rte_unused,
1655 struct mlx5_flow_parse *parser __rte_unused)
1657 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
1658 unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
1659 struct ibv_counter_set_init_attr init_attr = {0};
1660 struct ibv_flow_spec_counter_action counter = {
1661 .type = IBV_FLOW_SPEC_ACTION_COUNT,
1663 .counter_set_handle = 0,
1666 init_attr.counter_set_id = 0;
1667 parser->cs = ibv_create_counter_set(priv->ctx, &init_attr);
1670 counter.counter_set_handle = parser->cs->handle;
1671 mlx5_flow_create_copy(parser, &counter, size);
1677 * Complete flow rule creation with a drop queue.
1680 * Pointer to private structure.
1682 * Internal parser structure.
1684 * Pointer to the rte_flow.
1686 * Perform verbose error reporting if not NULL.
1689 * 0 on success, errno value on failure.
1692 priv_flow_create_action_queue_drop(struct priv *priv,
1693 struct mlx5_flow_parse *parser,
1694 struct rte_flow *flow,
1695 struct rte_flow_error *error)
1697 struct ibv_flow_spec_action_drop *drop;
1698 unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
1704 drop = (void *)((uintptr_t)parser->drop_q.ibv_attr +
1705 parser->drop_q.offset);
1706 *drop = (struct ibv_flow_spec_action_drop){
1707 .type = IBV_FLOW_SPEC_ACTION_DROP,
1710 ++parser->drop_q.ibv_attr->num_of_specs;
1711 parser->drop_q.offset += size;
1712 flow->drxq.ibv_attr = parser->drop_q.ibv_attr;
1714 flow->cs = parser->cs;
1715 if (!priv->dev->data->dev_started)
1717 parser->drop_q.ibv_attr = NULL;
1718 flow->drxq.ibv_flow = ibv_create_flow(priv->flow_drop_queue->qp,
1719 flow->drxq.ibv_attr);
1720 if (!flow->drxq.ibv_flow) {
1721 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1722 NULL, "flow rule creation failure");
1729 if (flow->drxq.ibv_flow) {
1730 claim_zero(ibv_destroy_flow(flow->drxq.ibv_flow));
1731 flow->drxq.ibv_flow = NULL;
1733 if (flow->drxq.ibv_attr) {
1734 rte_free(flow->drxq.ibv_attr);
1735 flow->drxq.ibv_attr = NULL;
1738 claim_zero(ibv_destroy_counter_set(flow->cs));
1746 * Create hash Rx queues when RSS is enabled.
1749 * Pointer to private structure.
1751 * Internal parser structure.
1753 * Pointer to the rte_flow.
1755 * Perform verbose error reporting if not NULL.
1758 * 0 on success, a errno value otherwise and rte_errno is set.
1761 priv_flow_create_action_queue_rss(struct priv *priv,
1762 struct mlx5_flow_parse *parser,
1763 struct rte_flow *flow,
1764 struct rte_flow_error *error)
1768 for (i = 0; i != hash_rxq_init_n; ++i) {
1769 uint64_t hash_fields;
1771 if (!parser->queue[i].ibv_attr)
1773 flow->frxq[i].ibv_attr = parser->queue[i].ibv_attr;
1774 parser->queue[i].ibv_attr = NULL;
1775 hash_fields = hash_rxq_init[i].hash_fields;
1776 if (!priv->dev->data->dev_started)
1778 flow->frxq[i].hrxq =
1779 mlx5_priv_hrxq_get(priv,
1780 parser->rss_conf.rss_key,
1781 parser->rss_conf.rss_key_len,
1784 hash_fields ? parser->queues_n : 1);
1785 if (flow->frxq[i].hrxq)
1787 flow->frxq[i].hrxq =
1788 mlx5_priv_hrxq_new(priv,
1789 parser->rss_conf.rss_key,
1790 parser->rss_conf.rss_key_len,
1793 hash_fields ? parser->queues_n : 1);
1794 if (!flow->frxq[i].hrxq) {
1795 rte_flow_error_set(error, ENOMEM,
1796 RTE_FLOW_ERROR_TYPE_HANDLE,
1797 NULL, "cannot create hash rxq");
1805 * Complete flow rule creation.
1808 * Pointer to private structure.
1810 * Internal parser structure.
1812 * Pointer to the rte_flow.
1814 * Perform verbose error reporting if not NULL.
1817 * 0 on success, a errno value otherwise and rte_errno is set.
1820 priv_flow_create_action_queue(struct priv *priv,
1821 struct mlx5_flow_parse *parser,
1822 struct rte_flow *flow,
1823 struct rte_flow_error *error)
1830 assert(!parser->drop);
1831 err = priv_flow_create_action_queue_rss(priv, parser, flow, error);
1835 flow->cs = parser->cs;
1836 if (!priv->dev->data->dev_started)
1838 for (i = 0; i != hash_rxq_init_n; ++i) {
1839 if (!flow->frxq[i].hrxq)
1841 flow->frxq[i].ibv_flow =
1842 ibv_create_flow(flow->frxq[i].hrxq->qp,
1843 flow->frxq[i].ibv_attr);
1844 if (!flow->frxq[i].ibv_flow) {
1845 rte_flow_error_set(error, ENOMEM,
1846 RTE_FLOW_ERROR_TYPE_HANDLE,
1847 NULL, "flow rule creation failure");
1851 DEBUG("%p type %d QP %p ibv_flow %p",
1853 (void *)flow->frxq[i].hrxq,
1854 (void *)flow->frxq[i].ibv_flow);
1856 for (i = 0; i != parser->queues_n; ++i) {
1857 struct mlx5_rxq_data *q =
1858 (*priv->rxqs)[parser->queues[i]];
1860 q->mark |= parser->mark;
1865 for (i = 0; i != hash_rxq_init_n; ++i) {
1866 if (flow->frxq[i].ibv_flow) {
1867 struct ibv_flow *ibv_flow = flow->frxq[i].ibv_flow;
1869 claim_zero(ibv_destroy_flow(ibv_flow));
1871 if (flow->frxq[i].hrxq)
1872 mlx5_priv_hrxq_release(priv, flow->frxq[i].hrxq);
1873 if (flow->frxq[i].ibv_attr)
1874 rte_free(flow->frxq[i].ibv_attr);
1877 claim_zero(ibv_destroy_counter_set(flow->cs));
1888 * Pointer to private structure.
1890 * Pointer to a TAILQ flow list.
1892 * Flow rule attributes.
1893 * @param[in] pattern
1894 * Pattern specification (list terminated by the END pattern item).
1895 * @param[in] actions
1896 * Associated actions (list terminated by the END action).
1898 * Perform verbose error reporting if not NULL.
1901 * A flow on success, NULL otherwise.
1903 static struct rte_flow *
1904 priv_flow_create(struct priv *priv,
1905 struct mlx5_flows *list,
1906 const struct rte_flow_attr *attr,
1907 const struct rte_flow_item items[],
1908 const struct rte_flow_action actions[],
1909 struct rte_flow_error *error)
1911 struct mlx5_flow_parse parser = { .create = 1, };
1912 struct rte_flow *flow = NULL;
1916 err = priv_flow_convert(priv, attr, items, actions, error, &parser);
1919 flow = rte_calloc(__func__, 1,
1920 sizeof(*flow) + parser.queues_n * sizeof(uint16_t),
1923 rte_flow_error_set(error, ENOMEM,
1924 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1926 "cannot allocate flow memory");
1929 /* Copy queues configuration. */
1930 flow->queues = (uint16_t (*)[])(flow + 1);
1931 memcpy(flow->queues, parser.queues, parser.queues_n * sizeof(uint16_t));
1932 flow->queues_n = parser.queues_n;
1933 /* Copy RSS configuration. */
1934 flow->rss_conf = parser.rss_conf;
1935 flow->rss_conf.rss_key = flow->rss_key;
1936 memcpy(flow->rss_key, parser.rss_key, parser.rss_conf.rss_key_len);
1937 /* finalise the flow. */
1939 err = priv_flow_create_action_queue_drop(priv, &parser, flow,
1942 err = priv_flow_create_action_queue(priv, &parser, flow, error);
1945 TAILQ_INSERT_TAIL(list, flow, next);
1946 DEBUG("Flow created %p", (void *)flow);
1950 rte_free(parser.drop_q.ibv_attr);
1952 for (i = 0; i != hash_rxq_init_n; ++i) {
1953 if (parser.queue[i].ibv_attr)
1954 rte_free(parser.queue[i].ibv_attr);
1962 * Validate a flow supported by the NIC.
1964 * @see rte_flow_validate()
1968 mlx5_flow_validate(struct rte_eth_dev *dev,
1969 const struct rte_flow_attr *attr,
1970 const struct rte_flow_item items[],
1971 const struct rte_flow_action actions[],
1972 struct rte_flow_error *error)
1974 struct priv *priv = dev->data->dev_private;
1976 struct mlx5_flow_parse parser = { .create = 0, };
1979 ret = priv_flow_convert(priv, attr, items, actions, error, &parser);
1987 * @see rte_flow_create()
1991 mlx5_flow_create(struct rte_eth_dev *dev,
1992 const struct rte_flow_attr *attr,
1993 const struct rte_flow_item items[],
1994 const struct rte_flow_action actions[],
1995 struct rte_flow_error *error)
1997 struct priv *priv = dev->data->dev_private;
1998 struct rte_flow *flow;
2001 flow = priv_flow_create(priv, &priv->flows, attr, items, actions,
2011 * Pointer to private structure.
2013 * Pointer to a TAILQ flow list.
2018 priv_flow_destroy(struct priv *priv,
2019 struct mlx5_flows *list,
2020 struct rte_flow *flow)
2024 if (flow->drop || !flow->mark)
2026 for (i = 0; i != flow->queues_n; ++i) {
2027 struct rte_flow *tmp;
2031 * To remove the mark from the queue, the queue must not be
2032 * present in any other marked flow (RSS or not).
2034 TAILQ_FOREACH(tmp, list, next) {
2036 uint16_t *tqs = NULL;
2041 for (j = 0; j != hash_rxq_init_n; ++j) {
2042 if (!tmp->frxq[j].hrxq)
2044 tqs = tmp->frxq[j].hrxq->ind_table->queues;
2045 tq_n = tmp->frxq[j].hrxq->ind_table->queues_n;
2049 for (j = 0; (j != tq_n) && !mark; j++)
2050 if (tqs[j] == (*flow->queues)[i])
2053 (*priv->rxqs)[(*flow->queues)[i]]->mark = mark;
2057 if (flow->drxq.ibv_flow)
2058 claim_zero(ibv_destroy_flow(flow->drxq.ibv_flow));
2059 rte_free(flow->drxq.ibv_attr);
2061 for (i = 0; i != hash_rxq_init_n; ++i) {
2062 struct mlx5_flow *frxq = &flow->frxq[i];
2065 claim_zero(ibv_destroy_flow(frxq->ibv_flow));
2067 mlx5_priv_hrxq_release(priv, frxq->hrxq);
2069 rte_free(frxq->ibv_attr);
2073 claim_zero(ibv_destroy_counter_set(flow->cs));
2076 TAILQ_REMOVE(list, flow, next);
2077 DEBUG("Flow destroyed %p", (void *)flow);
2082 * Destroy all flows.
2085 * Pointer to private structure.
2087 * Pointer to a TAILQ flow list.
2090 priv_flow_flush(struct priv *priv, struct mlx5_flows *list)
2092 while (!TAILQ_EMPTY(list)) {
2093 struct rte_flow *flow;
2095 flow = TAILQ_FIRST(list);
2096 priv_flow_destroy(priv, list, flow);
2101 * Create drop queue.
2104 * Pointer to private structure.
2110 priv_flow_create_drop_queue(struct priv *priv)
2112 struct mlx5_hrxq_drop *fdq = NULL;
2116 fdq = rte_calloc(__func__, 1, sizeof(*fdq), 0);
2118 WARN("cannot allocate memory for drop queue");
2121 fdq->cq = ibv_create_cq(priv->ctx, 1, NULL, NULL, 0);
2123 WARN("cannot allocate CQ for drop queue");
2126 fdq->wq = ibv_create_wq(priv->ctx,
2127 &(struct ibv_wq_init_attr){
2128 .wq_type = IBV_WQT_RQ,
2135 WARN("cannot allocate WQ for drop queue");
2138 fdq->ind_table = ibv_create_rwq_ind_table(priv->ctx,
2139 &(struct ibv_rwq_ind_table_init_attr){
2140 .log_ind_tbl_size = 0,
2141 .ind_tbl = &fdq->wq,
2144 if (!fdq->ind_table) {
2145 WARN("cannot allocate indirection table for drop queue");
2148 fdq->qp = ibv_create_qp_ex(priv->ctx,
2149 &(struct ibv_qp_init_attr_ex){
2150 .qp_type = IBV_QPT_RAW_PACKET,
2152 IBV_QP_INIT_ATTR_PD |
2153 IBV_QP_INIT_ATTR_IND_TABLE |
2154 IBV_QP_INIT_ATTR_RX_HASH,
2155 .rx_hash_conf = (struct ibv_rx_hash_conf){
2157 IBV_RX_HASH_FUNC_TOEPLITZ,
2158 .rx_hash_key_len = rss_hash_default_key_len,
2159 .rx_hash_key = rss_hash_default_key,
2160 .rx_hash_fields_mask = 0,
2162 .rwq_ind_tbl = fdq->ind_table,
2166 WARN("cannot allocate QP for drop queue");
2169 priv->flow_drop_queue = fdq;
2173 claim_zero(ibv_destroy_qp(fdq->qp));
2175 claim_zero(ibv_destroy_rwq_ind_table(fdq->ind_table));
2177 claim_zero(ibv_destroy_wq(fdq->wq));
2179 claim_zero(ibv_destroy_cq(fdq->cq));
2182 priv->flow_drop_queue = NULL;
2187 * Delete drop queue.
2190 * Pointer to private structure.
2193 priv_flow_delete_drop_queue(struct priv *priv)
2195 struct mlx5_hrxq_drop *fdq = priv->flow_drop_queue;
2200 claim_zero(ibv_destroy_qp(fdq->qp));
2202 claim_zero(ibv_destroy_rwq_ind_table(fdq->ind_table));
2204 claim_zero(ibv_destroy_wq(fdq->wq));
2206 claim_zero(ibv_destroy_cq(fdq->cq));
2208 priv->flow_drop_queue = NULL;
2215 * Pointer to private structure.
2217 * Pointer to a TAILQ flow list.
2220 priv_flow_stop(struct priv *priv, struct mlx5_flows *list)
2222 struct rte_flow *flow;
2224 TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) {
2228 if (!flow->drxq.ibv_flow)
2230 claim_zero(ibv_destroy_flow(flow->drxq.ibv_flow));
2231 flow->drxq.ibv_flow = NULL;
2236 struct mlx5_ind_table_ibv *ind_tbl = NULL;
2238 for (i = 0; i != hash_rxq_init_n; ++i) {
2239 if (!flow->frxq[i].hrxq)
2241 ind_tbl = flow->frxq[i].hrxq->ind_table;
2244 for (i = 0; i != ind_tbl->queues_n; ++i)
2245 (*priv->rxqs)[ind_tbl->queues[i]]->mark = 0;
2247 for (i = 0; i != hash_rxq_init_n; ++i) {
2248 if (!flow->frxq[i].ibv_flow)
2250 claim_zero(ibv_destroy_flow(flow->frxq[i].ibv_flow));
2251 flow->frxq[i].ibv_flow = NULL;
2252 mlx5_priv_hrxq_release(priv, flow->frxq[i].hrxq);
2253 flow->frxq[i].hrxq = NULL;
2255 DEBUG("Flow %p removed", (void *)flow);
2263 * Pointer to private structure.
2265 * Pointer to a TAILQ flow list.
2268 * 0 on success, a errno value otherwise and rte_errno is set.
2271 priv_flow_start(struct priv *priv, struct mlx5_flows *list)
2273 struct rte_flow *flow;
2275 TAILQ_FOREACH(flow, list, next) {
2279 flow->drxq.ibv_flow =
2280 ibv_create_flow(priv->flow_drop_queue->qp,
2281 flow->drxq.ibv_attr);
2282 if (!flow->drxq.ibv_flow) {
2283 DEBUG("Flow %p cannot be applied",
2288 DEBUG("Flow %p applied", (void *)flow);
2292 for (i = 0; i != hash_rxq_init_n; ++i) {
2293 if (!flow->frxq[i].ibv_attr)
2295 flow->frxq[i].hrxq =
2296 mlx5_priv_hrxq_get(priv, flow->rss_conf.rss_key,
2297 flow->rss_conf.rss_key_len,
2298 hash_rxq_init[i].hash_fields,
2301 if (flow->frxq[i].hrxq)
2303 flow->frxq[i].hrxq =
2304 mlx5_priv_hrxq_new(priv, flow->rss_conf.rss_key,
2305 flow->rss_conf.rss_key_len,
2306 hash_rxq_init[i].hash_fields,
2309 if (!flow->frxq[i].hrxq) {
2310 DEBUG("Flow %p cannot be applied",
2316 flow->frxq[i].ibv_flow =
2317 ibv_create_flow(flow->frxq[i].hrxq->qp,
2318 flow->frxq[i].ibv_attr);
2319 if (!flow->frxq[i].ibv_flow) {
2320 DEBUG("Flow %p cannot be applied",
2325 DEBUG("Flow %p applied", (void *)flow);
2329 for (i = 0; i != flow->queues_n; ++i)
2330 (*priv->rxqs)[(*flow->queues)[i]]->mark = 1;
2336 * Verify the flow list is empty
2339 * Pointer to private structure.
2341 * @return the number of flows not released.
2344 priv_flow_verify(struct priv *priv)
2346 struct rte_flow *flow;
2349 TAILQ_FOREACH(flow, &priv->flows, next) {
2350 DEBUG("%p: flow %p still referenced", (void *)priv,
2358 * Enable a control flow configured from the control plane.
2361 * Pointer to Ethernet device.
2363 * An Ethernet flow spec to apply.
2365 * An Ethernet flow mask to apply.
2367 * A VLAN flow spec to apply.
2369 * A VLAN flow mask to apply.
2375 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
2376 struct rte_flow_item_eth *eth_spec,
2377 struct rte_flow_item_eth *eth_mask,
2378 struct rte_flow_item_vlan *vlan_spec,
2379 struct rte_flow_item_vlan *vlan_mask)
2381 struct priv *priv = dev->data->dev_private;
2382 const struct rte_flow_attr attr = {
2384 .priority = MLX5_CTRL_FLOW_PRIORITY,
2386 struct rte_flow_item items[] = {
2388 .type = RTE_FLOW_ITEM_TYPE_ETH,
2394 .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
2395 RTE_FLOW_ITEM_TYPE_END,
2401 .type = RTE_FLOW_ITEM_TYPE_END,
2404 struct rte_flow_action actions[] = {
2406 .type = RTE_FLOW_ACTION_TYPE_RSS,
2409 .type = RTE_FLOW_ACTION_TYPE_END,
2412 struct rte_flow *flow;
2413 struct rte_flow_error error;
2416 struct rte_flow_action_rss rss;
2418 const struct rte_eth_rss_conf *rss_conf;
2420 uint16_t queue[RTE_MAX_QUEUES_PER_PORT];
2424 if (!priv->reta_idx_n)
2426 for (i = 0; i != priv->reta_idx_n; ++i)
2427 action_rss.local.queue[i] = (*priv->reta_idx)[i];
2428 action_rss.local.rss_conf = &priv->rss_conf;
2429 action_rss.local.num = priv->reta_idx_n;
2430 actions[0].conf = (const void *)&action_rss.rss;
2431 flow = priv_flow_create(priv, &priv->ctrl_flows, &attr, items, actions,
2439 * Enable a flow control configured from the control plane.
2442 * Pointer to Ethernet device.
2444 * An Ethernet flow spec to apply.
2446 * An Ethernet flow mask to apply.
2452 mlx5_ctrl_flow(struct rte_eth_dev *dev,
2453 struct rte_flow_item_eth *eth_spec,
2454 struct rte_flow_item_eth *eth_mask)
2456 return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
2462 * @see rte_flow_destroy()
2466 mlx5_flow_destroy(struct rte_eth_dev *dev,
2467 struct rte_flow *flow,
2468 struct rte_flow_error *error)
2470 struct priv *priv = dev->data->dev_private;
2474 priv_flow_destroy(priv, &priv->flows, flow);
2480 * Destroy all flows.
2482 * @see rte_flow_flush()
2486 mlx5_flow_flush(struct rte_eth_dev *dev,
2487 struct rte_flow_error *error)
2489 struct priv *priv = dev->data->dev_private;
2493 priv_flow_flush(priv, &priv->flows);
2498 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
2500 * Query flow counter.
2504 * @param counter_value
2505 * returned data from the counter.
2508 * 0 on success, a errno value otherwise and rte_errno is set.
2511 priv_flow_query_count(struct ibv_counter_set *cs,
2512 struct mlx5_flow_counter_stats *counter_stats,
2513 struct rte_flow_query_count *query_count,
2514 struct rte_flow_error *error)
2516 uint64_t counters[2];
2517 struct ibv_query_counter_set_attr query_cs_attr = {
2519 .query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
2521 struct ibv_counter_set_data query_out = {
2523 .outlen = 2 * sizeof(uint64_t),
2525 int res = ibv_query_counter_set(&query_cs_attr, &query_out);
2528 rte_flow_error_set(error, -res,
2529 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2531 "cannot read counter");
2534 query_count->hits_set = 1;
2535 query_count->bytes_set = 1;
2536 query_count->hits = counters[0] - counter_stats->hits;
2537 query_count->bytes = counters[1] - counter_stats->bytes;
2538 if (query_count->reset) {
2539 counter_stats->hits = counters[0];
2540 counter_stats->bytes = counters[1];
2548 * @see rte_flow_query()
2552 mlx5_flow_query(struct rte_eth_dev *dev,
2553 struct rte_flow *flow,
2554 enum rte_flow_action_type action __rte_unused,
2556 struct rte_flow_error *error)
2558 struct priv *priv = dev->data->dev_private;
2563 res = priv_flow_query_count(flow->cs,
2564 &flow->counter_stats,
2565 (struct rte_flow_query_count *)data,
2568 rte_flow_error_set(error, res,
2569 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2571 "no counter found for flow");
2581 * @see rte_flow_isolate()
2585 mlx5_flow_isolate(struct rte_eth_dev *dev,
2587 struct rte_flow_error *error)
2589 struct priv *priv = dev->data->dev_private;
2592 if (dev->data->dev_started) {
2593 rte_flow_error_set(error, EBUSY,
2594 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2596 "port must be stopped first");
2600 priv->isolated = !!enable;
2602 priv->dev->dev_ops = &mlx5_dev_ops_isolate;
2604 priv->dev->dev_ops = &mlx5_dev_ops;
2610 * Convert a flow director filter to a generic flow.
2613 * Private structure.
2614 * @param fdir_filter
2615 * Flow director filter to add.
2617 * Generic flow parameters structure.
2620 * 0 on success, errno value on error.
2623 priv_fdir_filter_convert(struct priv *priv,
2624 const struct rte_eth_fdir_filter *fdir_filter,
2625 struct mlx5_fdir *attributes)
2627 const struct rte_eth_fdir_input *input = &fdir_filter->input;
2629 /* Validate queue number. */
2630 if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
2631 ERROR("invalid queue number %d", fdir_filter->action.rx_queue);
2634 attributes->attr.ingress = 1;
2635 attributes->items[0] = (struct rte_flow_item) {
2636 .type = RTE_FLOW_ITEM_TYPE_ETH,
2637 .spec = &attributes->l2,
2638 .mask = &attributes->l2_mask,
2640 switch (fdir_filter->action.behavior) {
2641 case RTE_ETH_FDIR_ACCEPT:
2642 attributes->actions[0] = (struct rte_flow_action){
2643 .type = RTE_FLOW_ACTION_TYPE_QUEUE,
2644 .conf = &attributes->queue,
2647 case RTE_ETH_FDIR_REJECT:
2648 attributes->actions[0] = (struct rte_flow_action){
2649 .type = RTE_FLOW_ACTION_TYPE_DROP,
2653 ERROR("invalid behavior %d", fdir_filter->action.behavior);
2656 attributes->queue.index = fdir_filter->action.rx_queue;
2657 switch (fdir_filter->input.flow_type) {
2658 case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
2659 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2660 .src_addr = input->flow.udp4_flow.ip.src_ip,
2661 .dst_addr = input->flow.udp4_flow.ip.dst_ip,
2662 .time_to_live = input->flow.udp4_flow.ip.ttl,
2663 .type_of_service = input->flow.udp4_flow.ip.tos,
2664 .next_proto_id = input->flow.udp4_flow.ip.proto,
2666 attributes->l4.udp.hdr = (struct udp_hdr){
2667 .src_port = input->flow.udp4_flow.src_port,
2668 .dst_port = input->flow.udp4_flow.dst_port,
2670 attributes->items[1] = (struct rte_flow_item){
2671 .type = RTE_FLOW_ITEM_TYPE_IPV4,
2672 .spec = &attributes->l3,
2674 attributes->items[2] = (struct rte_flow_item){
2675 .type = RTE_FLOW_ITEM_TYPE_UDP,
2676 .spec = &attributes->l4,
2679 case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
2680 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2681 .src_addr = input->flow.tcp4_flow.ip.src_ip,
2682 .dst_addr = input->flow.tcp4_flow.ip.dst_ip,
2683 .time_to_live = input->flow.tcp4_flow.ip.ttl,
2684 .type_of_service = input->flow.tcp4_flow.ip.tos,
2685 .next_proto_id = input->flow.tcp4_flow.ip.proto,
2687 attributes->l4.tcp.hdr = (struct tcp_hdr){
2688 .src_port = input->flow.tcp4_flow.src_port,
2689 .dst_port = input->flow.tcp4_flow.dst_port,
2691 attributes->items[1] = (struct rte_flow_item){
2692 .type = RTE_FLOW_ITEM_TYPE_IPV4,
2693 .spec = &attributes->l3,
2695 attributes->items[2] = (struct rte_flow_item){
2696 .type = RTE_FLOW_ITEM_TYPE_TCP,
2697 .spec = &attributes->l4,
2700 case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
2701 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2702 .src_addr = input->flow.ip4_flow.src_ip,
2703 .dst_addr = input->flow.ip4_flow.dst_ip,
2704 .time_to_live = input->flow.ip4_flow.ttl,
2705 .type_of_service = input->flow.ip4_flow.tos,
2706 .next_proto_id = input->flow.ip4_flow.proto,
2708 attributes->items[1] = (struct rte_flow_item){
2709 .type = RTE_FLOW_ITEM_TYPE_IPV4,
2710 .spec = &attributes->l3,
2713 case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
2714 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2715 .hop_limits = input->flow.udp6_flow.ip.hop_limits,
2716 .proto = input->flow.udp6_flow.ip.proto,
2718 memcpy(attributes->l3.ipv6.hdr.src_addr,
2719 input->flow.udp6_flow.ip.src_ip,
2720 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2721 memcpy(attributes->l3.ipv6.hdr.dst_addr,
2722 input->flow.udp6_flow.ip.dst_ip,
2723 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2724 attributes->l4.udp.hdr = (struct udp_hdr){
2725 .src_port = input->flow.udp6_flow.src_port,
2726 .dst_port = input->flow.udp6_flow.dst_port,
2728 attributes->items[1] = (struct rte_flow_item){
2729 .type = RTE_FLOW_ITEM_TYPE_IPV6,
2730 .spec = &attributes->l3,
2732 attributes->items[2] = (struct rte_flow_item){
2733 .type = RTE_FLOW_ITEM_TYPE_UDP,
2734 .spec = &attributes->l4,
2737 case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
2738 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2739 .hop_limits = input->flow.tcp6_flow.ip.hop_limits,
2740 .proto = input->flow.tcp6_flow.ip.proto,
2742 memcpy(attributes->l3.ipv6.hdr.src_addr,
2743 input->flow.tcp6_flow.ip.src_ip,
2744 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2745 memcpy(attributes->l3.ipv6.hdr.dst_addr,
2746 input->flow.tcp6_flow.ip.dst_ip,
2747 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2748 attributes->l4.tcp.hdr = (struct tcp_hdr){
2749 .src_port = input->flow.tcp6_flow.src_port,
2750 .dst_port = input->flow.tcp6_flow.dst_port,
2752 attributes->items[1] = (struct rte_flow_item){
2753 .type = RTE_FLOW_ITEM_TYPE_IPV6,
2754 .spec = &attributes->l3,
2756 attributes->items[2] = (struct rte_flow_item){
2757 .type = RTE_FLOW_ITEM_TYPE_UDP,
2758 .spec = &attributes->l4,
2761 case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
2762 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2763 .hop_limits = input->flow.ipv6_flow.hop_limits,
2764 .proto = input->flow.ipv6_flow.proto,
2766 memcpy(attributes->l3.ipv6.hdr.src_addr,
2767 input->flow.ipv6_flow.src_ip,
2768 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2769 memcpy(attributes->l3.ipv6.hdr.dst_addr,
2770 input->flow.ipv6_flow.dst_ip,
2771 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2772 attributes->items[1] = (struct rte_flow_item){
2773 .type = RTE_FLOW_ITEM_TYPE_IPV6,
2774 .spec = &attributes->l3,
2778 ERROR("invalid flow type%d",
2779 fdir_filter->input.flow_type);
2786 * Add new flow director filter and store it in list.
2789 * Private structure.
2790 * @param fdir_filter
2791 * Flow director filter to add.
2794 * 0 on success, errno value on failure.
2797 priv_fdir_filter_add(struct priv *priv,
2798 const struct rte_eth_fdir_filter *fdir_filter)
2800 struct mlx5_fdir attributes = {
2803 .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2804 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2808 struct mlx5_flow_parse parser = {
2809 .layer = HASH_RXQ_ETH,
2811 struct rte_flow_error error;
2812 struct rte_flow *flow;
2815 ret = priv_fdir_filter_convert(priv, fdir_filter, &attributes);
2818 ret = priv_flow_convert(priv, &attributes.attr, attributes.items,
2819 attributes.actions, &error, &parser);
2822 flow = priv_flow_create(priv,
2829 DEBUG("FDIR created %p", (void *)flow);
2836 * Delete specific filter.
2839 * Private structure.
2840 * @param fdir_filter
2841 * Filter to be deleted.
2844 * 0 on success, errno value on failure.
2847 priv_fdir_filter_delete(struct priv *priv,
2848 const struct rte_eth_fdir_filter *fdir_filter)
2850 struct mlx5_fdir attributes = {
2853 struct mlx5_flow_parse parser = {
2855 .layer = HASH_RXQ_ETH,
2857 struct rte_flow_error error;
2858 struct rte_flow *flow;
2862 ret = priv_fdir_filter_convert(priv, fdir_filter, &attributes);
2865 ret = priv_flow_convert(priv, &attributes.attr, attributes.items,
2866 attributes.actions, &error, &parser);
2869 TAILQ_FOREACH(flow, &priv->flows, next) {
2870 struct ibv_flow_attr *attr;
2871 struct ibv_spec_header *attr_h;
2873 struct ibv_flow_attr *flow_attr;
2874 struct ibv_spec_header *flow_h;
2876 unsigned int specs_n;
2879 attr = parser.drop_q.ibv_attr;
2881 attr = parser.queue[HASH_RXQ_ETH].ibv_attr;
2883 flow_attr = flow->drxq.ibv_attr;
2885 flow_attr = flow->frxq[HASH_RXQ_ETH].ibv_attr;
2886 /* Compare first the attributes. */
2887 if (memcmp(attr, flow_attr, sizeof(struct ibv_flow_attr)))
2889 if (attr->num_of_specs == 0)
2891 spec = (void *)((uintptr_t)attr +
2892 sizeof(struct ibv_flow_attr));
2893 flow_spec = (void *)((uintptr_t)flow_attr +
2894 sizeof(struct ibv_flow_attr));
2895 specs_n = RTE_MIN(attr->num_of_specs, flow_attr->num_of_specs);
2896 for (i = 0; i != specs_n; ++i) {
2899 if (memcmp(spec, flow_spec,
2900 RTE_MIN(attr_h->size, flow_h->size)))
2902 spec = (void *)((uintptr_t)attr + attr_h->size);
2903 flow_spec = (void *)((uintptr_t)flow_attr +
2906 /* At this point, the flow match. */
2910 priv_flow_destroy(priv, &priv->flows, flow);
2913 rte_free(parser.drop_q.ibv_attr);
2915 for (i = 0; i != hash_rxq_init_n; ++i) {
2916 if (parser.queue[i].ibv_attr)
2917 rte_free(parser.queue[i].ibv_attr);
2924 * Update queue for specific filter.
2927 * Private structure.
2928 * @param fdir_filter
2929 * Filter to be updated.
2932 * 0 on success, errno value on failure.
2935 priv_fdir_filter_update(struct priv *priv,
2936 const struct rte_eth_fdir_filter *fdir_filter)
2940 ret = priv_fdir_filter_delete(priv, fdir_filter);
2943 ret = priv_fdir_filter_add(priv, fdir_filter);
2948 * Flush all filters.
2951 * Private structure.
2954 priv_fdir_filter_flush(struct priv *priv)
2956 priv_flow_flush(priv, &priv->flows);
2960 * Get flow director information.
2963 * Private structure.
2964 * @param[out] fdir_info
2965 * Resulting flow director information.
2968 priv_fdir_info_get(struct priv *priv, struct rte_eth_fdir_info *fdir_info)
2970 struct rte_eth_fdir_masks *mask =
2971 &priv->dev->data->dev_conf.fdir_conf.mask;
2973 fdir_info->mode = priv->dev->data->dev_conf.fdir_conf.mode;
2974 fdir_info->guarant_spc = 0;
2975 rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
2976 fdir_info->max_flexpayload = 0;
2977 fdir_info->flow_types_mask[0] = 0;
2978 fdir_info->flex_payload_unit = 0;
2979 fdir_info->max_flex_payload_segment_num = 0;
2980 fdir_info->flex_payload_limit = 0;
2981 memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
2985 * Deal with flow director operations.
2988 * Pointer to private structure.
2990 * Operation to perform.
2992 * Pointer to operation-specific structure.
2995 * 0 on success, errno value on failure.
2998 priv_fdir_ctrl_func(struct priv *priv, enum rte_filter_op filter_op, void *arg)
3000 enum rte_fdir_mode fdir_mode =
3001 priv->dev->data->dev_conf.fdir_conf.mode;
3004 if (filter_op == RTE_ETH_FILTER_NOP)
3006 if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
3007 fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
3008 ERROR("%p: flow director mode %d not supported",
3009 (void *)priv, fdir_mode);
3012 switch (filter_op) {
3013 case RTE_ETH_FILTER_ADD:
3014 ret = priv_fdir_filter_add(priv, arg);
3016 case RTE_ETH_FILTER_UPDATE:
3017 ret = priv_fdir_filter_update(priv, arg);
3019 case RTE_ETH_FILTER_DELETE:
3020 ret = priv_fdir_filter_delete(priv, arg);
3022 case RTE_ETH_FILTER_FLUSH:
3023 priv_fdir_filter_flush(priv);
3025 case RTE_ETH_FILTER_INFO:
3026 priv_fdir_info_get(priv, arg);
3029 DEBUG("%p: unknown operation %u", (void *)priv,
3038 * Manage filter operations.
3041 * Pointer to Ethernet device structure.
3042 * @param filter_type
3045 * Operation to perform.
3047 * Pointer to operation-specific structure.
3050 * 0 on success, negative errno value on failure.
3053 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
3054 enum rte_filter_type filter_type,
3055 enum rte_filter_op filter_op,
3059 struct priv *priv = dev->data->dev_private;
3061 switch (filter_type) {
3062 case RTE_ETH_FILTER_GENERIC:
3063 if (filter_op != RTE_ETH_FILTER_GET)
3065 *(const void **)arg = &mlx5_flow_ops;
3067 case RTE_ETH_FILTER_FDIR:
3069 ret = priv_fdir_ctrl_func(priv, filter_op, arg);
3073 ERROR("%p: filter type (%d) not supported",
3074 (void *)dev, filter_type);