4 * Copyright 2016 6WIND S.A.
5 * Copyright 2016 Mellanox.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of 6WIND S.A. nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #include <sys/queue.h>
38 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
40 #pragma GCC diagnostic ignored "-Wpedantic"
42 #include <infiniband/verbs.h>
44 #pragma GCC diagnostic error "-Wpedantic"
47 #include <rte_ethdev_driver.h>
49 #include <rte_flow_driver.h>
50 #include <rte_malloc.h>
54 #include "mlx5_defs.h"
57 /* Define minimal priority for control plane flows. */
58 #define MLX5_CTRL_FLOW_PRIORITY 4
60 /* Internet Protocol versions. */
64 #ifndef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
65 struct ibv_counter_set_init_attr {
68 struct ibv_flow_spec_counter_action {
71 struct ibv_counter_set {
76 ibv_destroy_counter_set(struct ibv_counter_set *cs)
83 /* Dev ops structure defined in mlx5.c */
84 extern const struct eth_dev_ops mlx5_dev_ops;
85 extern const struct eth_dev_ops mlx5_dev_ops_isolate;
88 mlx5_flow_create_eth(const struct rte_flow_item *item,
89 const void *default_mask,
93 mlx5_flow_create_vlan(const struct rte_flow_item *item,
94 const void *default_mask,
98 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
99 const void *default_mask,
103 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
104 const void *default_mask,
108 mlx5_flow_create_udp(const struct rte_flow_item *item,
109 const void *default_mask,
113 mlx5_flow_create_tcp(const struct rte_flow_item *item,
114 const void *default_mask,
118 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
119 const void *default_mask,
122 struct mlx5_flow_parse;
125 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
129 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id);
132 mlx5_flow_create_count(struct priv *priv, struct mlx5_flow_parse *parser);
134 /* Hash RX queue types. */
145 /* Initialization data for hash RX queue. */
146 struct hash_rxq_init {
147 uint64_t hash_fields; /* Fields that participate in the hash. */
148 uint64_t dpdk_rss_hf; /* Matching DPDK RSS hash fields. */
149 unsigned int flow_priority; /* Flow priority to use. */
150 unsigned int ip_version; /* Internet protocol. */
153 /* Initialization data for hash RX queues. */
154 const struct hash_rxq_init hash_rxq_init[] = {
156 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
157 IBV_RX_HASH_DST_IPV4 |
158 IBV_RX_HASH_SRC_PORT_TCP |
159 IBV_RX_HASH_DST_PORT_TCP),
160 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,
162 .ip_version = MLX5_IPV4,
165 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
166 IBV_RX_HASH_DST_IPV4 |
167 IBV_RX_HASH_SRC_PORT_UDP |
168 IBV_RX_HASH_DST_PORT_UDP),
169 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP,
171 .ip_version = MLX5_IPV4,
174 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
175 IBV_RX_HASH_DST_IPV4),
176 .dpdk_rss_hf = (ETH_RSS_IPV4 |
179 .ip_version = MLX5_IPV4,
182 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
183 IBV_RX_HASH_DST_IPV6 |
184 IBV_RX_HASH_SRC_PORT_TCP |
185 IBV_RX_HASH_DST_PORT_TCP),
186 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP,
188 .ip_version = MLX5_IPV6,
191 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
192 IBV_RX_HASH_DST_IPV6 |
193 IBV_RX_HASH_SRC_PORT_UDP |
194 IBV_RX_HASH_DST_PORT_UDP),
195 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP,
197 .ip_version = MLX5_IPV6,
200 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
201 IBV_RX_HASH_DST_IPV6),
202 .dpdk_rss_hf = (ETH_RSS_IPV6 |
205 .ip_version = MLX5_IPV6,
214 /* Number of entries in hash_rxq_init[]. */
215 const unsigned int hash_rxq_init_n = RTE_DIM(hash_rxq_init);
217 /** Structure for holding counter stats. */
218 struct mlx5_flow_counter_stats {
219 uint64_t hits; /**< Number of packets matched by the rule. */
220 uint64_t bytes; /**< Number of bytes matched by the rule. */
223 /** Structure for Drop queue. */
224 struct mlx5_hrxq_drop {
225 struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */
226 struct ibv_qp *qp; /**< Verbs queue pair. */
227 struct ibv_wq *wq; /**< Verbs work queue. */
228 struct ibv_cq *cq; /**< Verbs completion queue. */
231 /* Flows structures. */
233 uint64_t hash_fields; /**< Fields that participate in the hash. */
234 struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
235 struct ibv_flow *ibv_flow; /**< Verbs flow. */
236 struct mlx5_hrxq *hrxq; /**< Hash Rx queues. */
239 /* Drop flows structures. */
240 struct mlx5_flow_drop {
241 struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
242 struct ibv_flow *ibv_flow; /**< Verbs flow. */
246 TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
247 uint32_t mark:1; /**< Set if the flow is marked. */
248 uint32_t drop:1; /**< Drop queue. */
249 uint16_t queues_n; /**< Number of entries in queue[]. */
250 uint16_t (*queues)[]; /**< Queues indexes to use. */
251 struct rte_eth_rss_conf rss_conf; /**< RSS configuration */
252 uint8_t rss_key[40]; /**< copy of the RSS key. */
253 struct ibv_counter_set *cs; /**< Holds the counters for the rule. */
254 struct mlx5_flow_counter_stats counter_stats;/**<The counter stats. */
255 struct mlx5_flow frxq[RTE_DIM(hash_rxq_init)];
256 /**< Flow with Rx queue. */
259 /** Static initializer for items. */
261 (const enum rte_flow_item_type []){ \
262 __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
265 /** Structure to generate a simple graph of layers supported by the NIC. */
266 struct mlx5_flow_items {
267 /** List of possible actions for these items. */
268 const enum rte_flow_action_type *const actions;
269 /** Bit-masks corresponding to the possibilities for the item. */
272 * Default bit-masks to use when item->mask is not provided. When
273 * \default_mask is also NULL, the full supported bit-mask (\mask) is
276 const void *default_mask;
277 /** Bit-masks size in bytes. */
278 const unsigned int mask_sz;
280 * Conversion function from rte_flow to NIC specific flow.
283 * rte_flow item to convert.
284 * @param default_mask
285 * Default bit-masks to use when item->mask is not provided.
287 * Internal structure to store the conversion.
290 * 0 on success, negative value otherwise.
292 int (*convert)(const struct rte_flow_item *item,
293 const void *default_mask,
295 /** Size in bytes of the destination structure. */
296 const unsigned int dst_sz;
297 /** List of possible following items. */
298 const enum rte_flow_item_type *const items;
301 /** Valid action for this PMD. */
302 static const enum rte_flow_action_type valid_actions[] = {
303 RTE_FLOW_ACTION_TYPE_DROP,
304 RTE_FLOW_ACTION_TYPE_QUEUE,
305 RTE_FLOW_ACTION_TYPE_MARK,
306 RTE_FLOW_ACTION_TYPE_FLAG,
307 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
308 RTE_FLOW_ACTION_TYPE_COUNT,
310 RTE_FLOW_ACTION_TYPE_END,
313 /** Graph of supported items and associated actions. */
314 static const struct mlx5_flow_items mlx5_flow_items[] = {
315 [RTE_FLOW_ITEM_TYPE_END] = {
316 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
317 RTE_FLOW_ITEM_TYPE_VXLAN),
319 [RTE_FLOW_ITEM_TYPE_ETH] = {
320 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
321 RTE_FLOW_ITEM_TYPE_IPV4,
322 RTE_FLOW_ITEM_TYPE_IPV6),
323 .actions = valid_actions,
324 .mask = &(const struct rte_flow_item_eth){
325 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
326 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
329 .default_mask = &rte_flow_item_eth_mask,
330 .mask_sz = sizeof(struct rte_flow_item_eth),
331 .convert = mlx5_flow_create_eth,
332 .dst_sz = sizeof(struct ibv_flow_spec_eth),
334 [RTE_FLOW_ITEM_TYPE_VLAN] = {
335 .items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
336 RTE_FLOW_ITEM_TYPE_IPV6),
337 .actions = valid_actions,
338 .mask = &(const struct rte_flow_item_vlan){
341 .default_mask = &rte_flow_item_vlan_mask,
342 .mask_sz = sizeof(struct rte_flow_item_vlan),
343 .convert = mlx5_flow_create_vlan,
346 [RTE_FLOW_ITEM_TYPE_IPV4] = {
347 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
348 RTE_FLOW_ITEM_TYPE_TCP),
349 .actions = valid_actions,
350 .mask = &(const struct rte_flow_item_ipv4){
354 .type_of_service = -1,
358 .default_mask = &rte_flow_item_ipv4_mask,
359 .mask_sz = sizeof(struct rte_flow_item_ipv4),
360 .convert = mlx5_flow_create_ipv4,
361 .dst_sz = sizeof(struct ibv_flow_spec_ipv4_ext),
363 [RTE_FLOW_ITEM_TYPE_IPV6] = {
364 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
365 RTE_FLOW_ITEM_TYPE_TCP),
366 .actions = valid_actions,
367 .mask = &(const struct rte_flow_item_ipv6){
370 0xff, 0xff, 0xff, 0xff,
371 0xff, 0xff, 0xff, 0xff,
372 0xff, 0xff, 0xff, 0xff,
373 0xff, 0xff, 0xff, 0xff,
376 0xff, 0xff, 0xff, 0xff,
377 0xff, 0xff, 0xff, 0xff,
378 0xff, 0xff, 0xff, 0xff,
379 0xff, 0xff, 0xff, 0xff,
386 .default_mask = &rte_flow_item_ipv6_mask,
387 .mask_sz = sizeof(struct rte_flow_item_ipv6),
388 .convert = mlx5_flow_create_ipv6,
389 .dst_sz = sizeof(struct ibv_flow_spec_ipv6),
391 [RTE_FLOW_ITEM_TYPE_UDP] = {
392 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN),
393 .actions = valid_actions,
394 .mask = &(const struct rte_flow_item_udp){
400 .default_mask = &rte_flow_item_udp_mask,
401 .mask_sz = sizeof(struct rte_flow_item_udp),
402 .convert = mlx5_flow_create_udp,
403 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
405 [RTE_FLOW_ITEM_TYPE_TCP] = {
406 .actions = valid_actions,
407 .mask = &(const struct rte_flow_item_tcp){
413 .default_mask = &rte_flow_item_tcp_mask,
414 .mask_sz = sizeof(struct rte_flow_item_tcp),
415 .convert = mlx5_flow_create_tcp,
416 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
418 [RTE_FLOW_ITEM_TYPE_VXLAN] = {
419 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH),
420 .actions = valid_actions,
421 .mask = &(const struct rte_flow_item_vxlan){
422 .vni = "\xff\xff\xff",
424 .default_mask = &rte_flow_item_vxlan_mask,
425 .mask_sz = sizeof(struct rte_flow_item_vxlan),
426 .convert = mlx5_flow_create_vxlan,
427 .dst_sz = sizeof(struct ibv_flow_spec_tunnel),
431 /** Structure to pass to the conversion function. */
432 struct mlx5_flow_parse {
433 uint32_t inner; /**< Set once VXLAN is encountered. */
434 uint32_t allmulti:1; /**< Set once allmulti dst MAC is encountered. */
436 /**< Whether resources should remain after a validate. */
437 uint32_t drop:1; /**< Target is a drop queue. */
438 uint32_t mark:1; /**< Mark is present in the flow. */
439 uint32_t count:1; /**< Count is present in the flow. */
440 uint32_t mark_id; /**< Mark identifier. */
441 uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
442 uint16_t queues_n; /**< Number of entries in queue[]. */
443 struct rte_eth_rss_conf rss_conf; /**< RSS configuration */
444 uint8_t rss_key[40]; /**< copy of the RSS key. */
445 enum hash_rxq_type layer; /**< Last pattern layer detected. */
446 struct ibv_counter_set *cs; /**< Holds the counter set for the rule */
448 struct ibv_flow_attr *ibv_attr;
449 /**< Pointer to Verbs attributes. */
451 /**< Current position or total size of the attribute. */
452 } queue[RTE_DIM(hash_rxq_init)];
455 static const struct rte_flow_ops mlx5_flow_ops = {
456 .validate = mlx5_flow_validate,
457 .create = mlx5_flow_create,
458 .destroy = mlx5_flow_destroy,
459 .flush = mlx5_flow_flush,
460 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
461 .query = mlx5_flow_query,
465 .isolate = mlx5_flow_isolate,
468 /* Convert FDIR request to Generic flow. */
470 struct rte_flow_attr attr;
471 struct rte_flow_action actions[2];
472 struct rte_flow_item items[4];
473 struct rte_flow_item_eth l2;
474 struct rte_flow_item_eth l2_mask;
476 struct rte_flow_item_ipv4 ipv4;
477 struct rte_flow_item_ipv6 ipv6;
480 struct rte_flow_item_udp udp;
481 struct rte_flow_item_tcp tcp;
483 struct rte_flow_action_queue queue;
486 /* Verbs specification header. */
487 struct ibv_spec_header {
488 enum ibv_flow_spec_type type;
493 * Check support for a given item.
496 * Item specification.
498 * Bit-masks covering supported fields to compare with spec, last and mask in
501 * Bit-Mask size in bytes.
507 mlx5_flow_item_validate(const struct rte_flow_item *item,
508 const uint8_t *mask, unsigned int size)
512 if (!item->spec && (item->mask || item->last))
514 if (item->spec && !item->mask) {
516 const uint8_t *spec = item->spec;
518 for (i = 0; i < size; ++i)
519 if ((spec[i] | mask[i]) != mask[i])
522 if (item->last && !item->mask) {
524 const uint8_t *spec = item->last;
526 for (i = 0; i < size; ++i)
527 if ((spec[i] | mask[i]) != mask[i])
532 const uint8_t *spec = item->spec;
534 for (i = 0; i < size; ++i)
535 if ((spec[i] | mask[i]) != mask[i])
538 if (item->spec && item->last) {
541 const uint8_t *apply = mask;
546 for (i = 0; i < size; ++i) {
547 spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
548 last[i] = ((const uint8_t *)item->last)[i] & apply[i];
550 ret = memcmp(spec, last, size);
556 * Copy the RSS configuration from the user ones.
559 * Pointer to private structure.
561 * Internal parser structure.
563 * User RSS configuration to save.
566 * 0 on success, errno value on failure.
569 priv_flow_convert_rss_conf(struct priv *priv,
570 struct mlx5_flow_parse *parser,
571 const struct rte_eth_rss_conf *rss_conf)
573 const struct rte_eth_rss_conf *rss;
576 if (rss_conf->rss_hf & MLX5_RSS_HF_MASK)
580 rss = &priv->rss_conf;
582 if (rss->rss_key_len > 40)
584 parser->rss_conf.rss_key_len = rss->rss_key_len;
585 parser->rss_conf.rss_hf = rss->rss_hf;
586 memcpy(parser->rss_key, rss->rss_key, rss->rss_key_len);
587 parser->rss_conf.rss_key = parser->rss_key;
592 * Extract attribute to the parser.
595 * Pointer to private structure.
597 * Flow rule attributes.
599 * Perform verbose error reporting if not NULL.
600 * @param[in, out] parser
601 * Internal parser structure.
604 * 0 on success, a negative errno value otherwise and rte_errno is set.
607 priv_flow_convert_attributes(struct priv *priv,
608 const struct rte_flow_attr *attr,
609 struct rte_flow_error *error,
610 struct mlx5_flow_parse *parser)
615 rte_flow_error_set(error, ENOTSUP,
616 RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
618 "groups are not supported");
621 if (attr->priority && attr->priority != MLX5_CTRL_FLOW_PRIORITY) {
622 rte_flow_error_set(error, ENOTSUP,
623 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
625 "priorities are not supported");
629 rte_flow_error_set(error, ENOTSUP,
630 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
632 "egress is not supported");
635 if (!attr->ingress) {
636 rte_flow_error_set(error, ENOTSUP,
637 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
639 "only ingress is supported");
646 * Extract actions request to the parser.
649 * Pointer to private structure.
651 * Associated actions (list terminated by the END action).
653 * Perform verbose error reporting if not NULL.
654 * @param[in, out] parser
655 * Internal parser structure.
658 * 0 on success, a negative errno value otherwise and rte_errno is set.
661 priv_flow_convert_actions(struct priv *priv,
662 const struct rte_flow_action actions[],
663 struct rte_flow_error *error,
664 struct mlx5_flow_parse *parser)
667 * Add default RSS configuration necessary for Verbs to create QP even
668 * if no RSS is necessary.
670 priv_flow_convert_rss_conf(priv, parser,
671 (const struct rte_eth_rss_conf *)
673 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
674 if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
676 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
678 } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
679 const struct rte_flow_action_queue *queue =
680 (const struct rte_flow_action_queue *)
685 if (!queue || (queue->index > (priv->rxqs_n - 1)))
686 goto exit_action_not_supported;
687 for (n = 0; n < parser->queues_n; ++n) {
688 if (parser->queues[n] == queue->index) {
693 if (parser->queues_n > 1 && !found) {
694 rte_flow_error_set(error, ENOTSUP,
695 RTE_FLOW_ERROR_TYPE_ACTION,
697 "queue action not in RSS queues");
701 parser->queues_n = 1;
702 parser->queues[0] = queue->index;
704 } else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
705 const struct rte_flow_action_rss *rss =
706 (const struct rte_flow_action_rss *)
710 if (!rss || !rss->num) {
711 rte_flow_error_set(error, EINVAL,
712 RTE_FLOW_ERROR_TYPE_ACTION,
717 if (parser->queues_n == 1) {
720 assert(parser->queues_n);
721 for (n = 0; n < rss->num; ++n) {
722 if (parser->queues[0] ==
729 rte_flow_error_set(error, ENOTSUP,
730 RTE_FLOW_ERROR_TYPE_ACTION,
732 "queue action not in RSS"
737 for (n = 0; n < rss->num; ++n) {
738 if (rss->queue[n] >= priv->rxqs_n) {
739 rte_flow_error_set(error, EINVAL,
740 RTE_FLOW_ERROR_TYPE_ACTION,
742 "queue id > number of"
747 for (n = 0; n < rss->num; ++n)
748 parser->queues[n] = rss->queue[n];
749 parser->queues_n = rss->num;
750 if (priv_flow_convert_rss_conf(priv, parser,
752 rte_flow_error_set(error, EINVAL,
753 RTE_FLOW_ERROR_TYPE_ACTION,
755 "wrong RSS configuration");
758 } else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
759 const struct rte_flow_action_mark *mark =
760 (const struct rte_flow_action_mark *)
764 rte_flow_error_set(error, EINVAL,
765 RTE_FLOW_ERROR_TYPE_ACTION,
767 "mark must be defined");
769 } else if (mark->id >= MLX5_FLOW_MARK_MAX) {
770 rte_flow_error_set(error, ENOTSUP,
771 RTE_FLOW_ERROR_TYPE_ACTION,
773 "mark must be between 0"
778 parser->mark_id = mark->id;
779 } else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
781 } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT &&
782 priv->config.flow_counter_en) {
785 goto exit_action_not_supported;
788 if (parser->drop && parser->mark)
790 if (!parser->queues_n && !parser->drop) {
791 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
792 NULL, "no valid action");
796 exit_action_not_supported:
797 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
798 actions, "action not supported");
806 * Pointer to private structure.
808 * Pattern specification (list terminated by the END pattern item).
810 * Perform verbose error reporting if not NULL.
811 * @param[in, out] parser
812 * Internal parser structure.
815 * 0 on success, a negative errno value otherwise and rte_errno is set.
818 priv_flow_convert_items_validate(struct priv *priv,
819 const struct rte_flow_item items[],
820 struct rte_flow_error *error,
821 struct mlx5_flow_parse *parser)
823 const struct mlx5_flow_items *cur_item = mlx5_flow_items;
827 /* Initialise the offsets to start after verbs attribute. */
828 for (i = 0; i != hash_rxq_init_n; ++i)
829 parser->queue[i].offset = sizeof(struct ibv_flow_attr);
830 for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
831 const struct mlx5_flow_items *token = NULL;
835 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
839 cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
841 if (cur_item->items[i] == items->type) {
842 token = &mlx5_flow_items[items->type];
847 goto exit_item_not_supported;
849 err = mlx5_flow_item_validate(items,
850 (const uint8_t *)cur_item->mask,
853 goto exit_item_not_supported;
854 if (items->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
856 rte_flow_error_set(error, ENOTSUP,
857 RTE_FLOW_ERROR_TYPE_ITEM,
859 "cannot recognize multiple"
860 " VXLAN encapsulations");
863 parser->inner = IBV_FLOW_SPEC_INNER;
865 if (parser->drop || parser->queues_n == 1) {
866 parser->queue[HASH_RXQ_ETH].offset += cur_item->dst_sz;
868 for (n = 0; n != hash_rxq_init_n; ++n)
869 parser->queue[n].offset += cur_item->dst_sz;
873 parser->queue[HASH_RXQ_ETH].offset +=
874 sizeof(struct ibv_flow_spec_action_drop);
877 for (i = 0; i != hash_rxq_init_n; ++i)
878 parser->queue[i].offset +=
879 sizeof(struct ibv_flow_spec_action_tag);
882 unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
884 for (i = 0; i != hash_rxq_init_n; ++i)
885 parser->queue[i].offset += size;
888 exit_item_not_supported:
889 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
890 items, "item not supported");
895 * Allocate memory space to store verbs flow attributes.
898 * Pointer to private structure.
899 * @param[in] priority
902 * Amount of byte to allocate.
904 * Perform verbose error reporting if not NULL.
907 * A verbs flow attribute on success, NULL otherwise.
909 static struct ibv_flow_attr*
910 priv_flow_convert_allocate(struct priv *priv,
911 unsigned int priority,
913 struct rte_flow_error *error)
915 struct ibv_flow_attr *ibv_attr;
918 ibv_attr = rte_calloc(__func__, 1, size, 0);
920 rte_flow_error_set(error, ENOMEM,
921 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
923 "cannot allocate verbs spec attributes.");
926 ibv_attr->priority = priority;
931 * Finalise verbs flow attributes.
934 * Pointer to private structure.
935 * @param[in, out] parser
936 * Internal parser structure.
939 priv_flow_convert_finalise(struct priv *priv, struct mlx5_flow_parse *parser)
941 const unsigned int ipv4 =
942 hash_rxq_init[parser->layer].ip_version == MLX5_IPV4;
943 const enum hash_rxq_type hmin = ipv4 ? HASH_RXQ_TCPV4 : HASH_RXQ_TCPV6;
944 const enum hash_rxq_type hmax = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
945 const enum hash_rxq_type ohmin = ipv4 ? HASH_RXQ_TCPV6 : HASH_RXQ_TCPV4;
946 const enum hash_rxq_type ohmax = ipv4 ? HASH_RXQ_IPV6 : HASH_RXQ_IPV4;
947 const enum hash_rxq_type ip = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
951 if (parser->layer == HASH_RXQ_ETH) {
955 * This layer becomes useless as the pattern define under
958 rte_free(parser->queue[HASH_RXQ_ETH].ibv_attr);
959 parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
961 /* Remove opposite kind of layer e.g. IPv6 if the pattern is IPv4. */
962 for (i = ohmin; i != (ohmax + 1); ++i) {
963 if (!parser->queue[i].ibv_attr)
965 rte_free(parser->queue[i].ibv_attr);
966 parser->queue[i].ibv_attr = NULL;
968 /* Remove impossible flow according to the RSS configuration. */
969 if (hash_rxq_init[parser->layer].dpdk_rss_hf &
970 parser->rss_conf.rss_hf) {
971 /* Remove any other flow. */
972 for (i = hmin; i != (hmax + 1); ++i) {
973 if ((i == parser->layer) ||
974 (!parser->queue[i].ibv_attr))
976 rte_free(parser->queue[i].ibv_attr);
977 parser->queue[i].ibv_attr = NULL;
979 } else if (!parser->queue[ip].ibv_attr) {
980 /* no RSS possible with the current configuration. */
981 parser->queues_n = 1;
986 * Fill missing layers in verbs specifications, or compute the correct
987 * offset to allocate the memory space for the attributes and
990 for (i = 0; i != hash_rxq_init_n - 1; ++i) {
992 struct ibv_flow_spec_ipv4_ext ipv4;
993 struct ibv_flow_spec_ipv6 ipv6;
994 struct ibv_flow_spec_tcp_udp udp_tcp;
999 if (i == parser->layer)
1001 if (parser->layer == HASH_RXQ_ETH) {
1002 if (hash_rxq_init[i].ip_version == MLX5_IPV4) {
1003 size = sizeof(struct ibv_flow_spec_ipv4_ext);
1004 specs.ipv4 = (struct ibv_flow_spec_ipv4_ext){
1005 .type = IBV_FLOW_SPEC_IPV4_EXT,
1009 size = sizeof(struct ibv_flow_spec_ipv6);
1010 specs.ipv6 = (struct ibv_flow_spec_ipv6){
1011 .type = IBV_FLOW_SPEC_IPV6,
1015 if (parser->queue[i].ibv_attr) {
1016 dst = (void *)((uintptr_t)
1017 parser->queue[i].ibv_attr +
1018 parser->queue[i].offset);
1019 memcpy(dst, &specs, size);
1020 ++parser->queue[i].ibv_attr->num_of_specs;
1022 parser->queue[i].offset += size;
1024 if ((i == HASH_RXQ_UDPV4) || (i == HASH_RXQ_TCPV4) ||
1025 (i == HASH_RXQ_UDPV6) || (i == HASH_RXQ_TCPV6)) {
1026 size = sizeof(struct ibv_flow_spec_tcp_udp);
1027 specs.udp_tcp = (struct ibv_flow_spec_tcp_udp) {
1028 .type = ((i == HASH_RXQ_UDPV4 ||
1029 i == HASH_RXQ_UDPV6) ?
1034 if (parser->queue[i].ibv_attr) {
1035 dst = (void *)((uintptr_t)
1036 parser->queue[i].ibv_attr +
1037 parser->queue[i].offset);
1038 memcpy(dst, &specs, size);
1039 ++parser->queue[i].ibv_attr->num_of_specs;
1041 parser->queue[i].offset += size;
1047 * Validate and convert a flow supported by the NIC.
1050 * Pointer to private structure.
1052 * Flow rule attributes.
1053 * @param[in] pattern
1054 * Pattern specification (list terminated by the END pattern item).
1055 * @param[in] actions
1056 * Associated actions (list terminated by the END action).
1058 * Perform verbose error reporting if not NULL.
1059 * @param[in, out] parser
1060 * Internal parser structure.
1063 * 0 on success, a negative errno value otherwise and rte_errno is set.
1066 priv_flow_convert(struct priv *priv,
1067 const struct rte_flow_attr *attr,
1068 const struct rte_flow_item items[],
1069 const struct rte_flow_action actions[],
1070 struct rte_flow_error *error,
1071 struct mlx5_flow_parse *parser)
1073 const struct mlx5_flow_items *cur_item = mlx5_flow_items;
1077 /* First step. Validate the attributes, items and actions. */
1078 *parser = (struct mlx5_flow_parse){
1079 .create = parser->create,
1080 .layer = HASH_RXQ_ETH,
1081 .mark_id = MLX5_FLOW_MARK_DEFAULT,
1083 ret = priv_flow_convert_attributes(priv, attr, error, parser);
1086 ret = priv_flow_convert_actions(priv, actions, error, parser);
1089 ret = priv_flow_convert_items_validate(priv, items, error, parser);
1092 priv_flow_convert_finalise(priv, parser);
1095 * Allocate the memory space to store verbs specifications.
1097 if (parser->drop || parser->queues_n == 1) {
1098 unsigned int priority =
1100 hash_rxq_init[HASH_RXQ_ETH].flow_priority;
1101 unsigned int offset = parser->queue[HASH_RXQ_ETH].offset;
1103 parser->queue[HASH_RXQ_ETH].ibv_attr =
1104 priv_flow_convert_allocate(priv, priority,
1106 if (!parser->queue[HASH_RXQ_ETH].ibv_attr)
1108 parser->queue[HASH_RXQ_ETH].offset =
1109 sizeof(struct ibv_flow_attr);
1111 for (i = 0; i != hash_rxq_init_n; ++i) {
1112 unsigned int priority =
1114 hash_rxq_init[i].flow_priority;
1115 unsigned int offset;
1117 if (!(parser->rss_conf.rss_hf &
1118 hash_rxq_init[i].dpdk_rss_hf) &&
1119 (i != HASH_RXQ_ETH))
1121 offset = parser->queue[i].offset;
1122 parser->queue[i].ibv_attr =
1123 priv_flow_convert_allocate(priv, priority,
1125 if (!parser->queue[i].ibv_attr)
1127 parser->queue[i].offset = sizeof(struct ibv_flow_attr);
1130 /* Third step. Conversion parse, fill the specifications. */
1132 for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
1133 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
1135 cur_item = &mlx5_flow_items[items->type];
1136 ret = cur_item->convert(items,
1137 (cur_item->default_mask ?
1138 cur_item->default_mask :
1142 rte_flow_error_set(error, ret,
1143 RTE_FLOW_ERROR_TYPE_ITEM,
1144 items, "item not supported");
1149 mlx5_flow_create_flag_mark(parser, parser->mark_id);
1150 if (parser->count && parser->create) {
1151 mlx5_flow_create_count(priv, parser);
1153 goto exit_count_error;
1156 * Last step. Complete missing specification to reach the RSS
1159 if (parser->queues_n > 1) {
1160 priv_flow_convert_finalise(priv, parser);
1163 * Action queue have their priority overridden with
1164 * Ethernet priority, this priority needs to be adjusted to
1165 * their most specific layer priority.
1167 parser->queue[HASH_RXQ_ETH].ibv_attr->priority =
1169 hash_rxq_init[parser->layer].flow_priority;
1171 if (parser->allmulti &&
1172 parser->layer == HASH_RXQ_ETH) {
1173 for (i = 0; i != hash_rxq_init_n; ++i) {
1174 if (!parser->queue[i].ibv_attr)
1176 if (parser->queue[i].ibv_attr->num_of_specs != 1)
1178 parser->queue[i].ibv_attr->type =
1179 IBV_FLOW_ATTR_MC_DEFAULT;
1183 /* Only verification is expected, all resources should be released. */
1184 if (!parser->create) {
1185 for (i = 0; i != hash_rxq_init_n; ++i) {
1186 if (parser->queue[i].ibv_attr) {
1187 rte_free(parser->queue[i].ibv_attr);
1188 parser->queue[i].ibv_attr = NULL;
1194 for (i = 0; i != hash_rxq_init_n; ++i) {
1195 if (parser->queue[i].ibv_attr) {
1196 rte_free(parser->queue[i].ibv_attr);
1197 parser->queue[i].ibv_attr = NULL;
1200 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1201 NULL, "cannot allocate verbs spec attributes.");
1204 rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1205 NULL, "cannot create counter.");
1210 * Copy the specification created into the flow.
1213 * Internal parser structure.
1215 * Create specification.
1217 * Size in bytes of the specification to copy.
1220 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
1226 for (i = 0; i != hash_rxq_init_n; ++i) {
1227 if (!parser->queue[i].ibv_attr)
1229 /* Specification must be the same l3 type or none. */
1230 if (parser->layer == HASH_RXQ_ETH ||
1231 (hash_rxq_init[parser->layer].ip_version ==
1232 hash_rxq_init[i].ip_version) ||
1233 (hash_rxq_init[i].ip_version == 0)) {
1234 dst = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1235 parser->queue[i].offset);
1236 memcpy(dst, src, size);
1237 ++parser->queue[i].ibv_attr->num_of_specs;
1238 parser->queue[i].offset += size;
1244 * Convert Ethernet item to Verbs specification.
1247 * Item specification.
1248 * @param default_mask[in]
1249 * Default bit-masks to use when item->mask is not provided.
1250 * @param data[in, out]
1254 mlx5_flow_create_eth(const struct rte_flow_item *item,
1255 const void *default_mask,
1258 const struct rte_flow_item_eth *spec = item->spec;
1259 const struct rte_flow_item_eth *mask = item->mask;
1260 struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1261 const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1262 struct ibv_flow_spec_eth eth = {
1263 .type = parser->inner | IBV_FLOW_SPEC_ETH,
1267 /* Don't update layer for the inner pattern. */
1269 parser->layer = HASH_RXQ_ETH;
1274 mask = default_mask;
1275 memcpy(ð.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
1276 memcpy(ð.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
1277 eth.val.ether_type = spec->type;
1278 memcpy(ð.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
1279 memcpy(ð.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
1280 eth.mask.ether_type = mask->type;
1281 /* Remove unwanted bits from values. */
1282 for (i = 0; i < ETHER_ADDR_LEN; ++i) {
1283 eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
1284 eth.val.src_mac[i] &= eth.mask.src_mac[i];
1286 eth.val.ether_type &= eth.mask.ether_type;
1288 mlx5_flow_create_copy(parser, ð, eth_size);
1289 parser->allmulti = eth.val.dst_mac[0] & 1;
1294 * Convert VLAN item to Verbs specification.
1297 * Item specification.
1298 * @param default_mask[in]
1299 * Default bit-masks to use when item->mask is not provided.
1300 * @param data[in, out]
1304 mlx5_flow_create_vlan(const struct rte_flow_item *item,
1305 const void *default_mask,
1308 const struct rte_flow_item_vlan *spec = item->spec;
1309 const struct rte_flow_item_vlan *mask = item->mask;
1310 struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1311 struct ibv_flow_spec_eth *eth;
1312 const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1317 mask = default_mask;
1319 for (i = 0; i != hash_rxq_init_n; ++i) {
1320 if (!parser->queue[i].ibv_attr)
1323 eth = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1324 parser->queue[i].offset - eth_size);
1325 eth->val.vlan_tag = spec->tci;
1326 eth->mask.vlan_tag = mask->tci;
1327 eth->val.vlan_tag &= eth->mask.vlan_tag;
1334 * Convert IPv4 item to Verbs specification.
1337 * Item specification.
1338 * @param default_mask[in]
1339 * Default bit-masks to use when item->mask is not provided.
1340 * @param data[in, out]
1344 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
1345 const void *default_mask,
1348 const struct rte_flow_item_ipv4 *spec = item->spec;
1349 const struct rte_flow_item_ipv4 *mask = item->mask;
1350 struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1351 unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4_ext);
1352 struct ibv_flow_spec_ipv4_ext ipv4 = {
1353 .type = parser->inner | IBV_FLOW_SPEC_IPV4_EXT,
1357 /* Don't update layer for the inner pattern. */
1359 parser->layer = HASH_RXQ_IPV4;
1362 mask = default_mask;
1363 ipv4.val = (struct ibv_flow_ipv4_ext_filter){
1364 .src_ip = spec->hdr.src_addr,
1365 .dst_ip = spec->hdr.dst_addr,
1366 .proto = spec->hdr.next_proto_id,
1367 .tos = spec->hdr.type_of_service,
1369 ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
1370 .src_ip = mask->hdr.src_addr,
1371 .dst_ip = mask->hdr.dst_addr,
1372 .proto = mask->hdr.next_proto_id,
1373 .tos = mask->hdr.type_of_service,
1375 /* Remove unwanted bits from values. */
1376 ipv4.val.src_ip &= ipv4.mask.src_ip;
1377 ipv4.val.dst_ip &= ipv4.mask.dst_ip;
1378 ipv4.val.proto &= ipv4.mask.proto;
1379 ipv4.val.tos &= ipv4.mask.tos;
1381 mlx5_flow_create_copy(parser, &ipv4, ipv4_size);
1386 * Convert IPv6 item to Verbs specification.
1389 * Item specification.
1390 * @param default_mask[in]
1391 * Default bit-masks to use when item->mask is not provided.
1392 * @param data[in, out]
1396 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
1397 const void *default_mask,
1400 const struct rte_flow_item_ipv6 *spec = item->spec;
1401 const struct rte_flow_item_ipv6 *mask = item->mask;
1402 struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1403 unsigned int ipv6_size = sizeof(struct ibv_flow_spec_ipv6);
1404 struct ibv_flow_spec_ipv6 ipv6 = {
1405 .type = parser->inner | IBV_FLOW_SPEC_IPV6,
1409 /* Don't update layer for the inner pattern. */
1411 parser->layer = HASH_RXQ_IPV6;
1414 uint32_t vtc_flow_val;
1415 uint32_t vtc_flow_mask;
1418 mask = default_mask;
1419 memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
1420 RTE_DIM(ipv6.val.src_ip));
1421 memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
1422 RTE_DIM(ipv6.val.dst_ip));
1423 memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
1424 RTE_DIM(ipv6.mask.src_ip));
1425 memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
1426 RTE_DIM(ipv6.mask.dst_ip));
1427 vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
1428 vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
1429 ipv6.val.flow_label =
1430 rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >>
1432 ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >>
1434 ipv6.val.next_hdr = spec->hdr.proto;
1435 ipv6.val.hop_limit = spec->hdr.hop_limits;
1436 ipv6.mask.flow_label =
1437 rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >>
1439 ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >>
1441 ipv6.mask.next_hdr = mask->hdr.proto;
1442 ipv6.mask.hop_limit = mask->hdr.hop_limits;
1443 /* Remove unwanted bits from values. */
1444 for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
1445 ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
1446 ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
1448 ipv6.val.flow_label &= ipv6.mask.flow_label;
1449 ipv6.val.traffic_class &= ipv6.mask.traffic_class;
1450 ipv6.val.next_hdr &= ipv6.mask.next_hdr;
1451 ipv6.val.hop_limit &= ipv6.mask.hop_limit;
1453 mlx5_flow_create_copy(parser, &ipv6, ipv6_size);
1458 * Convert UDP item to Verbs specification.
1461 * Item specification.
1462 * @param default_mask[in]
1463 * Default bit-masks to use when item->mask is not provided.
1464 * @param data[in, out]
1468 mlx5_flow_create_udp(const struct rte_flow_item *item,
1469 const void *default_mask,
1472 const struct rte_flow_item_udp *spec = item->spec;
1473 const struct rte_flow_item_udp *mask = item->mask;
1474 struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1475 unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1476 struct ibv_flow_spec_tcp_udp udp = {
1477 .type = parser->inner | IBV_FLOW_SPEC_UDP,
1481 /* Don't update layer for the inner pattern. */
1482 if (!parser->inner) {
1483 if (parser->layer == HASH_RXQ_IPV4)
1484 parser->layer = HASH_RXQ_UDPV4;
1486 parser->layer = HASH_RXQ_UDPV6;
1490 mask = default_mask;
1491 udp.val.dst_port = spec->hdr.dst_port;
1492 udp.val.src_port = spec->hdr.src_port;
1493 udp.mask.dst_port = mask->hdr.dst_port;
1494 udp.mask.src_port = mask->hdr.src_port;
1495 /* Remove unwanted bits from values. */
1496 udp.val.src_port &= udp.mask.src_port;
1497 udp.val.dst_port &= udp.mask.dst_port;
1499 mlx5_flow_create_copy(parser, &udp, udp_size);
1504 * Convert TCP item to Verbs specification.
1507 * Item specification.
1508 * @param default_mask[in]
1509 * Default bit-masks to use when item->mask is not provided.
1510 * @param data[in, out]
1514 mlx5_flow_create_tcp(const struct rte_flow_item *item,
1515 const void *default_mask,
1518 const struct rte_flow_item_tcp *spec = item->spec;
1519 const struct rte_flow_item_tcp *mask = item->mask;
1520 struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1521 unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1522 struct ibv_flow_spec_tcp_udp tcp = {
1523 .type = parser->inner | IBV_FLOW_SPEC_TCP,
1527 /* Don't update layer for the inner pattern. */
1528 if (!parser->inner) {
1529 if (parser->layer == HASH_RXQ_IPV4)
1530 parser->layer = HASH_RXQ_TCPV4;
1532 parser->layer = HASH_RXQ_TCPV6;
1536 mask = default_mask;
1537 tcp.val.dst_port = spec->hdr.dst_port;
1538 tcp.val.src_port = spec->hdr.src_port;
1539 tcp.mask.dst_port = mask->hdr.dst_port;
1540 tcp.mask.src_port = mask->hdr.src_port;
1541 /* Remove unwanted bits from values. */
1542 tcp.val.src_port &= tcp.mask.src_port;
1543 tcp.val.dst_port &= tcp.mask.dst_port;
1545 mlx5_flow_create_copy(parser, &tcp, tcp_size);
1550 * Convert VXLAN item to Verbs specification.
1553 * Item specification.
1554 * @param default_mask[in]
1555 * Default bit-masks to use when item->mask is not provided.
1556 * @param data[in, out]
1560 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
1561 const void *default_mask,
1564 const struct rte_flow_item_vxlan *spec = item->spec;
1565 const struct rte_flow_item_vxlan *mask = item->mask;
1566 struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1567 unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1568 struct ibv_flow_spec_tunnel vxlan = {
1569 .type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1578 parser->inner = IBV_FLOW_SPEC_INNER;
1581 mask = default_mask;
1582 memcpy(&id.vni[1], spec->vni, 3);
1583 vxlan.val.tunnel_id = id.vlan_id;
1584 memcpy(&id.vni[1], mask->vni, 3);
1585 vxlan.mask.tunnel_id = id.vlan_id;
1586 /* Remove unwanted bits from values. */
1587 vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
1590 * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
1591 * layer is defined in the Verbs specification it is interpreted as
1592 * wildcard and all packets will match this rule, if it follows a full
1593 * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
1594 * before will also match this rule.
1595 * To avoid such situation, VNI 0 is currently refused.
1597 if (!vxlan.val.tunnel_id)
1599 mlx5_flow_create_copy(parser, &vxlan, size);
1604 * Convert mark/flag action to Verbs specification.
1607 * Internal parser structure.
1612 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id)
1614 unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1615 struct ibv_flow_spec_action_tag tag = {
1616 .type = IBV_FLOW_SPEC_ACTION_TAG,
1618 .tag_id = mlx5_flow_mark_set(mark_id),
1621 assert(parser->mark);
1622 mlx5_flow_create_copy(parser, &tag, size);
1627 * Convert count action to Verbs specification.
1630 * Pointer to private structure.
1632 * Pointer to MLX5 flow parser structure.
1635 * 0 on success, errno value on failure.
1638 mlx5_flow_create_count(struct priv *priv __rte_unused,
1639 struct mlx5_flow_parse *parser __rte_unused)
1641 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
1642 unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
1643 struct ibv_counter_set_init_attr init_attr = {0};
1644 struct ibv_flow_spec_counter_action counter = {
1645 .type = IBV_FLOW_SPEC_ACTION_COUNT,
1647 .counter_set_handle = 0,
1650 init_attr.counter_set_id = 0;
1651 parser->cs = ibv_create_counter_set(priv->ctx, &init_attr);
1654 counter.counter_set_handle = parser->cs->handle;
1655 mlx5_flow_create_copy(parser, &counter, size);
1661 * Complete flow rule creation with a drop queue.
1664 * Pointer to private structure.
1666 * Internal parser structure.
1668 * Pointer to the rte_flow.
1670 * Perform verbose error reporting if not NULL.
1673 * 0 on success, errno value on failure.
1676 priv_flow_create_action_queue_drop(struct priv *priv,
1677 struct mlx5_flow_parse *parser,
1678 struct rte_flow *flow,
1679 struct rte_flow_error *error)
1681 struct ibv_flow_spec_action_drop *drop;
1682 unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
1688 drop = (void *)((uintptr_t)parser->queue[HASH_RXQ_ETH].ibv_attr +
1689 parser->queue[HASH_RXQ_ETH].offset);
1690 *drop = (struct ibv_flow_spec_action_drop){
1691 .type = IBV_FLOW_SPEC_ACTION_DROP,
1694 ++parser->queue[HASH_RXQ_ETH].ibv_attr->num_of_specs;
1695 parser->queue[HASH_RXQ_ETH].offset += size;
1696 flow->frxq[HASH_RXQ_ETH].ibv_attr =
1697 parser->queue[HASH_RXQ_ETH].ibv_attr;
1699 flow->cs = parser->cs;
1700 if (!priv->dev->data->dev_started)
1702 parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
1703 flow->frxq[HASH_RXQ_ETH].ibv_flow =
1704 ibv_create_flow(priv->flow_drop_queue->qp,
1705 flow->frxq[HASH_RXQ_ETH].ibv_attr);
1706 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
1707 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1708 NULL, "flow rule creation failure");
1715 if (flow->frxq[HASH_RXQ_ETH].ibv_flow) {
1716 claim_zero(ibv_destroy_flow(flow->frxq[HASH_RXQ_ETH].ibv_flow));
1717 flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
1719 if (flow->frxq[HASH_RXQ_ETH].ibv_attr) {
1720 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
1721 flow->frxq[HASH_RXQ_ETH].ibv_attr = NULL;
1724 claim_zero(ibv_destroy_counter_set(flow->cs));
1732 * Create hash Rx queues when RSS is enabled.
1735 * Pointer to private structure.
1737 * Internal parser structure.
1739 * Pointer to the rte_flow.
1741 * Perform verbose error reporting if not NULL.
1744 * 0 on success, a errno value otherwise and rte_errno is set.
1747 priv_flow_create_action_queue_rss(struct priv *priv,
1748 struct mlx5_flow_parse *parser,
1749 struct rte_flow *flow,
1750 struct rte_flow_error *error)
1754 for (i = 0; i != hash_rxq_init_n; ++i) {
1755 uint64_t hash_fields;
1757 if (!parser->queue[i].ibv_attr)
1759 flow->frxq[i].ibv_attr = parser->queue[i].ibv_attr;
1760 parser->queue[i].ibv_attr = NULL;
1761 hash_fields = hash_rxq_init[i].hash_fields;
1762 if (!priv->dev->data->dev_started)
1764 flow->frxq[i].hrxq =
1765 mlx5_priv_hrxq_get(priv,
1766 parser->rss_conf.rss_key,
1767 parser->rss_conf.rss_key_len,
1771 if (flow->frxq[i].hrxq)
1773 flow->frxq[i].hrxq =
1774 mlx5_priv_hrxq_new(priv,
1775 parser->rss_conf.rss_key,
1776 parser->rss_conf.rss_key_len,
1780 if (!flow->frxq[i].hrxq) {
1781 rte_flow_error_set(error, ENOMEM,
1782 RTE_FLOW_ERROR_TYPE_HANDLE,
1783 NULL, "cannot create hash rxq");
1791 * Complete flow rule creation.
1794 * Pointer to private structure.
1796 * Internal parser structure.
1798 * Pointer to the rte_flow.
1800 * Perform verbose error reporting if not NULL.
1803 * 0 on success, a errno value otherwise and rte_errno is set.
1806 priv_flow_create_action_queue(struct priv *priv,
1807 struct mlx5_flow_parse *parser,
1808 struct rte_flow *flow,
1809 struct rte_flow_error *error)
1816 assert(!parser->drop);
1817 err = priv_flow_create_action_queue_rss(priv, parser, flow, error);
1821 flow->cs = parser->cs;
1822 if (!priv->dev->data->dev_started)
1824 for (i = 0; i != hash_rxq_init_n; ++i) {
1825 if (!flow->frxq[i].hrxq)
1827 flow->frxq[i].ibv_flow =
1828 ibv_create_flow(flow->frxq[i].hrxq->qp,
1829 flow->frxq[i].ibv_attr);
1830 if (!flow->frxq[i].ibv_flow) {
1831 rte_flow_error_set(error, ENOMEM,
1832 RTE_FLOW_ERROR_TYPE_HANDLE,
1833 NULL, "flow rule creation failure");
1837 DEBUG("%p type %d QP %p ibv_flow %p",
1839 (void *)flow->frxq[i].hrxq,
1840 (void *)flow->frxq[i].ibv_flow);
1842 for (i = 0; i != parser->queues_n; ++i) {
1843 struct mlx5_rxq_data *q =
1844 (*priv->rxqs)[parser->queues[i]];
1846 q->mark |= parser->mark;
1851 for (i = 0; i != hash_rxq_init_n; ++i) {
1852 if (flow->frxq[i].ibv_flow) {
1853 struct ibv_flow *ibv_flow = flow->frxq[i].ibv_flow;
1855 claim_zero(ibv_destroy_flow(ibv_flow));
1857 if (flow->frxq[i].hrxq)
1858 mlx5_priv_hrxq_release(priv, flow->frxq[i].hrxq);
1859 if (flow->frxq[i].ibv_attr)
1860 rte_free(flow->frxq[i].ibv_attr);
1863 claim_zero(ibv_destroy_counter_set(flow->cs));
1874 * Pointer to private structure.
1876 * Pointer to a TAILQ flow list.
1878 * Flow rule attributes.
1879 * @param[in] pattern
1880 * Pattern specification (list terminated by the END pattern item).
1881 * @param[in] actions
1882 * Associated actions (list terminated by the END action).
1884 * Perform verbose error reporting if not NULL.
1887 * A flow on success, NULL otherwise.
1889 static struct rte_flow *
1890 priv_flow_create(struct priv *priv,
1891 struct mlx5_flows *list,
1892 const struct rte_flow_attr *attr,
1893 const struct rte_flow_item items[],
1894 const struct rte_flow_action actions[],
1895 struct rte_flow_error *error)
1897 struct mlx5_flow_parse parser = { .create = 1, };
1898 struct rte_flow *flow = NULL;
1902 err = priv_flow_convert(priv, attr, items, actions, error, &parser);
1905 flow = rte_calloc(__func__, 1,
1906 sizeof(*flow) + parser.queues_n * sizeof(uint16_t),
1909 rte_flow_error_set(error, ENOMEM,
1910 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1912 "cannot allocate flow memory");
1915 /* Copy queues configuration. */
1916 flow->queues = (uint16_t (*)[])(flow + 1);
1917 memcpy(flow->queues, parser.queues, parser.queues_n * sizeof(uint16_t));
1918 flow->queues_n = parser.queues_n;
1919 flow->mark = parser.mark;
1920 /* Copy RSS configuration. */
1921 flow->rss_conf = parser.rss_conf;
1922 flow->rss_conf.rss_key = flow->rss_key;
1923 memcpy(flow->rss_key, parser.rss_key, parser.rss_conf.rss_key_len);
1924 /* finalise the flow. */
1926 err = priv_flow_create_action_queue_drop(priv, &parser, flow,
1929 err = priv_flow_create_action_queue(priv, &parser, flow, error);
1932 TAILQ_INSERT_TAIL(list, flow, next);
1933 DEBUG("Flow created %p", (void *)flow);
1936 for (i = 0; i != hash_rxq_init_n; ++i) {
1937 if (parser.queue[i].ibv_attr)
1938 rte_free(parser.queue[i].ibv_attr);
1945 * Validate a flow supported by the NIC.
1947 * @see rte_flow_validate()
1951 mlx5_flow_validate(struct rte_eth_dev *dev,
1952 const struct rte_flow_attr *attr,
1953 const struct rte_flow_item items[],
1954 const struct rte_flow_action actions[],
1955 struct rte_flow_error *error)
1957 struct priv *priv = dev->data->dev_private;
1959 struct mlx5_flow_parse parser = { .create = 0, };
1962 ret = priv_flow_convert(priv, attr, items, actions, error, &parser);
1970 * @see rte_flow_create()
1974 mlx5_flow_create(struct rte_eth_dev *dev,
1975 const struct rte_flow_attr *attr,
1976 const struct rte_flow_item items[],
1977 const struct rte_flow_action actions[],
1978 struct rte_flow_error *error)
1980 struct priv *priv = dev->data->dev_private;
1981 struct rte_flow *flow;
1984 flow = priv_flow_create(priv, &priv->flows, attr, items, actions,
1994 * Pointer to private structure.
1996 * Pointer to a TAILQ flow list.
2001 priv_flow_destroy(struct priv *priv,
2002 struct mlx5_flows *list,
2003 struct rte_flow *flow)
2007 if (flow->drop || !flow->mark)
2009 for (i = 0; i != flow->queues_n; ++i) {
2010 struct rte_flow *tmp;
2014 * To remove the mark from the queue, the queue must not be
2015 * present in any other marked flow (RSS or not).
2017 TAILQ_FOREACH(tmp, list, next) {
2019 uint16_t *tqs = NULL;
2024 for (j = 0; j != hash_rxq_init_n; ++j) {
2025 if (!tmp->frxq[j].hrxq)
2027 tqs = tmp->frxq[j].hrxq->ind_table->queues;
2028 tq_n = tmp->frxq[j].hrxq->ind_table->queues_n;
2032 for (j = 0; (j != tq_n) && !mark; j++)
2033 if (tqs[j] == (*flow->queues)[i])
2036 (*priv->rxqs)[(*flow->queues)[i]]->mark = mark;
2040 if (flow->frxq[HASH_RXQ_ETH].ibv_flow)
2041 claim_zero(ibv_destroy_flow
2042 (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2043 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
2045 for (i = 0; i != hash_rxq_init_n; ++i) {
2046 struct mlx5_flow *frxq = &flow->frxq[i];
2049 claim_zero(ibv_destroy_flow(frxq->ibv_flow));
2051 mlx5_priv_hrxq_release(priv, frxq->hrxq);
2053 rte_free(frxq->ibv_attr);
2057 claim_zero(ibv_destroy_counter_set(flow->cs));
2060 TAILQ_REMOVE(list, flow, next);
2061 DEBUG("Flow destroyed %p", (void *)flow);
2066 * Destroy all flows.
2069 * Pointer to private structure.
2071 * Pointer to a TAILQ flow list.
2074 priv_flow_flush(struct priv *priv, struct mlx5_flows *list)
2076 while (!TAILQ_EMPTY(list)) {
2077 struct rte_flow *flow;
2079 flow = TAILQ_FIRST(list);
2080 priv_flow_destroy(priv, list, flow);
2085 * Create drop queue.
2088 * Pointer to private structure.
2094 priv_flow_create_drop_queue(struct priv *priv)
2096 struct mlx5_hrxq_drop *fdq = NULL;
2100 fdq = rte_calloc(__func__, 1, sizeof(*fdq), 0);
2102 WARN("cannot allocate memory for drop queue");
2105 fdq->cq = ibv_create_cq(priv->ctx, 1, NULL, NULL, 0);
2107 WARN("cannot allocate CQ for drop queue");
2110 fdq->wq = ibv_create_wq(priv->ctx,
2111 &(struct ibv_wq_init_attr){
2112 .wq_type = IBV_WQT_RQ,
2119 WARN("cannot allocate WQ for drop queue");
2122 fdq->ind_table = ibv_create_rwq_ind_table(priv->ctx,
2123 &(struct ibv_rwq_ind_table_init_attr){
2124 .log_ind_tbl_size = 0,
2125 .ind_tbl = &fdq->wq,
2128 if (!fdq->ind_table) {
2129 WARN("cannot allocate indirection table for drop queue");
2132 fdq->qp = ibv_create_qp_ex(priv->ctx,
2133 &(struct ibv_qp_init_attr_ex){
2134 .qp_type = IBV_QPT_RAW_PACKET,
2136 IBV_QP_INIT_ATTR_PD |
2137 IBV_QP_INIT_ATTR_IND_TABLE |
2138 IBV_QP_INIT_ATTR_RX_HASH,
2139 .rx_hash_conf = (struct ibv_rx_hash_conf){
2141 IBV_RX_HASH_FUNC_TOEPLITZ,
2142 .rx_hash_key_len = rss_hash_default_key_len,
2143 .rx_hash_key = rss_hash_default_key,
2144 .rx_hash_fields_mask = 0,
2146 .rwq_ind_tbl = fdq->ind_table,
2150 WARN("cannot allocate QP for drop queue");
2153 priv->flow_drop_queue = fdq;
2157 claim_zero(ibv_destroy_qp(fdq->qp));
2159 claim_zero(ibv_destroy_rwq_ind_table(fdq->ind_table));
2161 claim_zero(ibv_destroy_wq(fdq->wq));
2163 claim_zero(ibv_destroy_cq(fdq->cq));
2166 priv->flow_drop_queue = NULL;
2171 * Delete drop queue.
2174 * Pointer to private structure.
2177 priv_flow_delete_drop_queue(struct priv *priv)
2179 struct mlx5_hrxq_drop *fdq = priv->flow_drop_queue;
2184 claim_zero(ibv_destroy_qp(fdq->qp));
2186 claim_zero(ibv_destroy_rwq_ind_table(fdq->ind_table));
2188 claim_zero(ibv_destroy_wq(fdq->wq));
2190 claim_zero(ibv_destroy_cq(fdq->cq));
2192 priv->flow_drop_queue = NULL;
2199 * Pointer to private structure.
2201 * Pointer to a TAILQ flow list.
2204 priv_flow_stop(struct priv *priv, struct mlx5_flows *list)
2206 struct rte_flow *flow;
2208 TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) {
2212 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow)
2214 claim_zero(ibv_destroy_flow
2215 (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2216 flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
2221 struct mlx5_ind_table_ibv *ind_tbl = NULL;
2223 for (i = 0; i != hash_rxq_init_n; ++i) {
2224 if (!flow->frxq[i].hrxq)
2226 ind_tbl = flow->frxq[i].hrxq->ind_table;
2229 for (i = 0; i != ind_tbl->queues_n; ++i)
2230 (*priv->rxqs)[ind_tbl->queues[i]]->mark = 0;
2232 for (i = 0; i != hash_rxq_init_n; ++i) {
2233 if (!flow->frxq[i].ibv_flow)
2235 claim_zero(ibv_destroy_flow(flow->frxq[i].ibv_flow));
2236 flow->frxq[i].ibv_flow = NULL;
2237 mlx5_priv_hrxq_release(priv, flow->frxq[i].hrxq);
2238 flow->frxq[i].hrxq = NULL;
2240 DEBUG("Flow %p removed", (void *)flow);
2248 * Pointer to private structure.
2250 * Pointer to a TAILQ flow list.
2253 * 0 on success, a errno value otherwise and rte_errno is set.
2256 priv_flow_start(struct priv *priv, struct mlx5_flows *list)
2258 struct rte_flow *flow;
2260 TAILQ_FOREACH(flow, list, next) {
2264 flow->frxq[HASH_RXQ_ETH].ibv_flow =
2266 (priv->flow_drop_queue->qp,
2267 flow->frxq[HASH_RXQ_ETH].ibv_attr);
2268 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
2269 DEBUG("Flow %p cannot be applied",
2274 DEBUG("Flow %p applied", (void *)flow);
2278 for (i = 0; i != hash_rxq_init_n; ++i) {
2279 if (!flow->frxq[i].ibv_attr)
2281 flow->frxq[i].hrxq =
2282 mlx5_priv_hrxq_get(priv, flow->rss_conf.rss_key,
2283 flow->rss_conf.rss_key_len,
2284 hash_rxq_init[i].hash_fields,
2287 if (flow->frxq[i].hrxq)
2289 flow->frxq[i].hrxq =
2290 mlx5_priv_hrxq_new(priv, flow->rss_conf.rss_key,
2291 flow->rss_conf.rss_key_len,
2292 hash_rxq_init[i].hash_fields,
2295 if (!flow->frxq[i].hrxq) {
2296 DEBUG("Flow %p cannot be applied",
2302 flow->frxq[i].ibv_flow =
2303 ibv_create_flow(flow->frxq[i].hrxq->qp,
2304 flow->frxq[i].ibv_attr);
2305 if (!flow->frxq[i].ibv_flow) {
2306 DEBUG("Flow %p cannot be applied",
2311 DEBUG("Flow %p applied", (void *)flow);
2315 for (i = 0; i != flow->queues_n; ++i)
2316 (*priv->rxqs)[(*flow->queues)[i]]->mark = 1;
2322 * Verify the flow list is empty
2325 * Pointer to private structure.
2327 * @return the number of flows not released.
2330 priv_flow_verify(struct priv *priv)
2332 struct rte_flow *flow;
2335 TAILQ_FOREACH(flow, &priv->flows, next) {
2336 DEBUG("%p: flow %p still referenced", (void *)priv,
2344 * Enable a control flow configured from the control plane.
2347 * Pointer to Ethernet device.
2349 * An Ethernet flow spec to apply.
2351 * An Ethernet flow mask to apply.
2353 * A VLAN flow spec to apply.
2355 * A VLAN flow mask to apply.
2361 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
2362 struct rte_flow_item_eth *eth_spec,
2363 struct rte_flow_item_eth *eth_mask,
2364 struct rte_flow_item_vlan *vlan_spec,
2365 struct rte_flow_item_vlan *vlan_mask)
2367 struct priv *priv = dev->data->dev_private;
2368 const struct rte_flow_attr attr = {
2370 .priority = MLX5_CTRL_FLOW_PRIORITY,
2372 struct rte_flow_item items[] = {
2374 .type = RTE_FLOW_ITEM_TYPE_ETH,
2380 .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
2381 RTE_FLOW_ITEM_TYPE_END,
2387 .type = RTE_FLOW_ITEM_TYPE_END,
2390 struct rte_flow_action actions[] = {
2392 .type = RTE_FLOW_ACTION_TYPE_RSS,
2395 .type = RTE_FLOW_ACTION_TYPE_END,
2398 struct rte_flow *flow;
2399 struct rte_flow_error error;
2402 struct rte_flow_action_rss rss;
2404 const struct rte_eth_rss_conf *rss_conf;
2406 uint16_t queue[RTE_MAX_QUEUES_PER_PORT];
2410 if (!priv->reta_idx_n)
2412 for (i = 0; i != priv->reta_idx_n; ++i)
2413 action_rss.local.queue[i] = (*priv->reta_idx)[i];
2414 action_rss.local.rss_conf = &priv->rss_conf;
2415 action_rss.local.num = priv->reta_idx_n;
2416 actions[0].conf = (const void *)&action_rss.rss;
2417 flow = priv_flow_create(priv, &priv->ctrl_flows, &attr, items, actions,
2425 * Enable a flow control configured from the control plane.
2428 * Pointer to Ethernet device.
2430 * An Ethernet flow spec to apply.
2432 * An Ethernet flow mask to apply.
2438 mlx5_ctrl_flow(struct rte_eth_dev *dev,
2439 struct rte_flow_item_eth *eth_spec,
2440 struct rte_flow_item_eth *eth_mask)
2442 return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
2448 * @see rte_flow_destroy()
2452 mlx5_flow_destroy(struct rte_eth_dev *dev,
2453 struct rte_flow *flow,
2454 struct rte_flow_error *error)
2456 struct priv *priv = dev->data->dev_private;
2460 priv_flow_destroy(priv, &priv->flows, flow);
2466 * Destroy all flows.
2468 * @see rte_flow_flush()
2472 mlx5_flow_flush(struct rte_eth_dev *dev,
2473 struct rte_flow_error *error)
2475 struct priv *priv = dev->data->dev_private;
2479 priv_flow_flush(priv, &priv->flows);
2484 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
2486 * Query flow counter.
2490 * @param counter_value
2491 * returned data from the counter.
2494 * 0 on success, a errno value otherwise and rte_errno is set.
2497 priv_flow_query_count(struct ibv_counter_set *cs,
2498 struct mlx5_flow_counter_stats *counter_stats,
2499 struct rte_flow_query_count *query_count,
2500 struct rte_flow_error *error)
2502 uint64_t counters[2];
2503 struct ibv_query_counter_set_attr query_cs_attr = {
2505 .query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
2507 struct ibv_counter_set_data query_out = {
2509 .outlen = 2 * sizeof(uint64_t),
2511 int res = ibv_query_counter_set(&query_cs_attr, &query_out);
2514 rte_flow_error_set(error, -res,
2515 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2517 "cannot read counter");
2520 query_count->hits_set = 1;
2521 query_count->bytes_set = 1;
2522 query_count->hits = counters[0] - counter_stats->hits;
2523 query_count->bytes = counters[1] - counter_stats->bytes;
2524 if (query_count->reset) {
2525 counter_stats->hits = counters[0];
2526 counter_stats->bytes = counters[1];
2534 * @see rte_flow_query()
2538 mlx5_flow_query(struct rte_eth_dev *dev,
2539 struct rte_flow *flow,
2540 enum rte_flow_action_type action __rte_unused,
2542 struct rte_flow_error *error)
2544 struct priv *priv = dev->data->dev_private;
2549 res = priv_flow_query_count(flow->cs,
2550 &flow->counter_stats,
2551 (struct rte_flow_query_count *)data,
2554 rte_flow_error_set(error, res,
2555 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2557 "no counter found for flow");
2567 * @see rte_flow_isolate()
2571 mlx5_flow_isolate(struct rte_eth_dev *dev,
2573 struct rte_flow_error *error)
2575 struct priv *priv = dev->data->dev_private;
2578 if (dev->data->dev_started) {
2579 rte_flow_error_set(error, EBUSY,
2580 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2582 "port must be stopped first");
2586 priv->isolated = !!enable;
2588 priv->dev->dev_ops = &mlx5_dev_ops_isolate;
2590 priv->dev->dev_ops = &mlx5_dev_ops;
2596 * Convert a flow director filter to a generic flow.
2599 * Private structure.
2600 * @param fdir_filter
2601 * Flow director filter to add.
2603 * Generic flow parameters structure.
2606 * 0 on success, errno value on error.
2609 priv_fdir_filter_convert(struct priv *priv,
2610 const struct rte_eth_fdir_filter *fdir_filter,
2611 struct mlx5_fdir *attributes)
2613 const struct rte_eth_fdir_input *input = &fdir_filter->input;
2615 /* Validate queue number. */
2616 if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
2617 ERROR("invalid queue number %d", fdir_filter->action.rx_queue);
2620 attributes->attr.ingress = 1;
2621 attributes->items[0] = (struct rte_flow_item) {
2622 .type = RTE_FLOW_ITEM_TYPE_ETH,
2623 .spec = &attributes->l2,
2624 .mask = &attributes->l2_mask,
2626 switch (fdir_filter->action.behavior) {
2627 case RTE_ETH_FDIR_ACCEPT:
2628 attributes->actions[0] = (struct rte_flow_action){
2629 .type = RTE_FLOW_ACTION_TYPE_QUEUE,
2630 .conf = &attributes->queue,
2633 case RTE_ETH_FDIR_REJECT:
2634 attributes->actions[0] = (struct rte_flow_action){
2635 .type = RTE_FLOW_ACTION_TYPE_DROP,
2639 ERROR("invalid behavior %d", fdir_filter->action.behavior);
2642 attributes->queue.index = fdir_filter->action.rx_queue;
2643 switch (fdir_filter->input.flow_type) {
2644 case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
2645 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2646 .src_addr = input->flow.udp4_flow.ip.src_ip,
2647 .dst_addr = input->flow.udp4_flow.ip.dst_ip,
2648 .time_to_live = input->flow.udp4_flow.ip.ttl,
2649 .type_of_service = input->flow.udp4_flow.ip.tos,
2650 .next_proto_id = input->flow.udp4_flow.ip.proto,
2652 attributes->l4.udp.hdr = (struct udp_hdr){
2653 .src_port = input->flow.udp4_flow.src_port,
2654 .dst_port = input->flow.udp4_flow.dst_port,
2656 attributes->items[1] = (struct rte_flow_item){
2657 .type = RTE_FLOW_ITEM_TYPE_IPV4,
2658 .spec = &attributes->l3,
2659 .mask = &attributes->l3,
2661 attributes->items[2] = (struct rte_flow_item){
2662 .type = RTE_FLOW_ITEM_TYPE_UDP,
2663 .spec = &attributes->l4,
2664 .mask = &attributes->l4,
2667 case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
2668 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2669 .src_addr = input->flow.tcp4_flow.ip.src_ip,
2670 .dst_addr = input->flow.tcp4_flow.ip.dst_ip,
2671 .time_to_live = input->flow.tcp4_flow.ip.ttl,
2672 .type_of_service = input->flow.tcp4_flow.ip.tos,
2673 .next_proto_id = input->flow.tcp4_flow.ip.proto,
2675 attributes->l4.tcp.hdr = (struct tcp_hdr){
2676 .src_port = input->flow.tcp4_flow.src_port,
2677 .dst_port = input->flow.tcp4_flow.dst_port,
2679 attributes->items[1] = (struct rte_flow_item){
2680 .type = RTE_FLOW_ITEM_TYPE_IPV4,
2681 .spec = &attributes->l3,
2682 .mask = &attributes->l3,
2684 attributes->items[2] = (struct rte_flow_item){
2685 .type = RTE_FLOW_ITEM_TYPE_TCP,
2686 .spec = &attributes->l4,
2687 .mask = &attributes->l4,
2690 case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
2691 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2692 .src_addr = input->flow.ip4_flow.src_ip,
2693 .dst_addr = input->flow.ip4_flow.dst_ip,
2694 .time_to_live = input->flow.ip4_flow.ttl,
2695 .type_of_service = input->flow.ip4_flow.tos,
2696 .next_proto_id = input->flow.ip4_flow.proto,
2698 attributes->items[1] = (struct rte_flow_item){
2699 .type = RTE_FLOW_ITEM_TYPE_IPV4,
2700 .spec = &attributes->l3,
2701 .mask = &attributes->l3,
2704 case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
2705 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2706 .hop_limits = input->flow.udp6_flow.ip.hop_limits,
2707 .proto = input->flow.udp6_flow.ip.proto,
2709 memcpy(attributes->l3.ipv6.hdr.src_addr,
2710 input->flow.udp6_flow.ip.src_ip,
2711 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2712 memcpy(attributes->l3.ipv6.hdr.dst_addr,
2713 input->flow.udp6_flow.ip.dst_ip,
2714 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2715 attributes->l4.udp.hdr = (struct udp_hdr){
2716 .src_port = input->flow.udp6_flow.src_port,
2717 .dst_port = input->flow.udp6_flow.dst_port,
2719 attributes->items[1] = (struct rte_flow_item){
2720 .type = RTE_FLOW_ITEM_TYPE_IPV6,
2721 .spec = &attributes->l3,
2722 .mask = &attributes->l3,
2724 attributes->items[2] = (struct rte_flow_item){
2725 .type = RTE_FLOW_ITEM_TYPE_UDP,
2726 .spec = &attributes->l4,
2727 .mask = &attributes->l4,
2730 case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
2731 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2732 .hop_limits = input->flow.tcp6_flow.ip.hop_limits,
2733 .proto = input->flow.tcp6_flow.ip.proto,
2735 memcpy(attributes->l3.ipv6.hdr.src_addr,
2736 input->flow.tcp6_flow.ip.src_ip,
2737 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2738 memcpy(attributes->l3.ipv6.hdr.dst_addr,
2739 input->flow.tcp6_flow.ip.dst_ip,
2740 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2741 attributes->l4.tcp.hdr = (struct tcp_hdr){
2742 .src_port = input->flow.tcp6_flow.src_port,
2743 .dst_port = input->flow.tcp6_flow.dst_port,
2745 attributes->items[1] = (struct rte_flow_item){
2746 .type = RTE_FLOW_ITEM_TYPE_IPV6,
2747 .spec = &attributes->l3,
2748 .mask = &attributes->l3,
2750 attributes->items[2] = (struct rte_flow_item){
2751 .type = RTE_FLOW_ITEM_TYPE_TCP,
2752 .spec = &attributes->l4,
2753 .mask = &attributes->l4,
2756 case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
2757 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2758 .hop_limits = input->flow.ipv6_flow.hop_limits,
2759 .proto = input->flow.ipv6_flow.proto,
2761 memcpy(attributes->l3.ipv6.hdr.src_addr,
2762 input->flow.ipv6_flow.src_ip,
2763 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2764 memcpy(attributes->l3.ipv6.hdr.dst_addr,
2765 input->flow.ipv6_flow.dst_ip,
2766 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2767 attributes->items[1] = (struct rte_flow_item){
2768 .type = RTE_FLOW_ITEM_TYPE_IPV6,
2769 .spec = &attributes->l3,
2770 .mask = &attributes->l3,
2774 ERROR("invalid flow type%d",
2775 fdir_filter->input.flow_type);
2782 * Add new flow director filter and store it in list.
2785 * Private structure.
2786 * @param fdir_filter
2787 * Flow director filter to add.
2790 * 0 on success, errno value on failure.
2793 priv_fdir_filter_add(struct priv *priv,
2794 const struct rte_eth_fdir_filter *fdir_filter)
2796 struct mlx5_fdir attributes = {
2799 .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2800 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2804 struct mlx5_flow_parse parser = {
2805 .layer = HASH_RXQ_ETH,
2807 struct rte_flow_error error;
2808 struct rte_flow *flow;
2811 ret = priv_fdir_filter_convert(priv, fdir_filter, &attributes);
2814 ret = priv_flow_convert(priv, &attributes.attr, attributes.items,
2815 attributes.actions, &error, &parser);
2818 flow = priv_flow_create(priv,
2825 DEBUG("FDIR created %p", (void *)flow);
2832 * Delete specific filter.
2835 * Private structure.
2836 * @param fdir_filter
2837 * Filter to be deleted.
2840 * 0 on success, errno value on failure.
2843 priv_fdir_filter_delete(struct priv *priv,
2844 const struct rte_eth_fdir_filter *fdir_filter)
2846 struct mlx5_fdir attributes = {
2849 struct mlx5_flow_parse parser = {
2851 .layer = HASH_RXQ_ETH,
2853 struct rte_flow_error error;
2854 struct rte_flow *flow;
2858 ret = priv_fdir_filter_convert(priv, fdir_filter, &attributes);
2861 ret = priv_flow_convert(priv, &attributes.attr, attributes.items,
2862 attributes.actions, &error, &parser);
2866 * Special case for drop action which is only set in the
2867 * specifications when the flow is created. In this situation the
2868 * drop specification is missing.
2871 struct ibv_flow_spec_action_drop *drop;
2873 drop = (void *)((uintptr_t)parser.queue[HASH_RXQ_ETH].ibv_attr +
2874 parser.queue[HASH_RXQ_ETH].offset);
2875 *drop = (struct ibv_flow_spec_action_drop){
2876 .type = IBV_FLOW_SPEC_ACTION_DROP,
2877 .size = sizeof(struct ibv_flow_spec_action_drop),
2879 parser.queue[HASH_RXQ_ETH].ibv_attr->num_of_specs++;
2881 TAILQ_FOREACH(flow, &priv->flows, next) {
2882 struct ibv_flow_attr *attr;
2883 struct ibv_spec_header *attr_h;
2885 struct ibv_flow_attr *flow_attr;
2886 struct ibv_spec_header *flow_h;
2888 unsigned int specs_n;
2890 attr = parser.queue[HASH_RXQ_ETH].ibv_attr;
2891 flow_attr = flow->frxq[HASH_RXQ_ETH].ibv_attr;
2892 /* Compare first the attributes. */
2893 if (memcmp(attr, flow_attr, sizeof(struct ibv_flow_attr)))
2895 if (attr->num_of_specs == 0)
2897 spec = (void *)((uintptr_t)attr +
2898 sizeof(struct ibv_flow_attr));
2899 flow_spec = (void *)((uintptr_t)flow_attr +
2900 sizeof(struct ibv_flow_attr));
2901 specs_n = RTE_MIN(attr->num_of_specs, flow_attr->num_of_specs);
2902 for (i = 0; i != specs_n; ++i) {
2905 if (memcmp(spec, flow_spec,
2906 RTE_MIN(attr_h->size, flow_h->size)))
2908 spec = (void *)((uintptr_t)spec + attr_h->size);
2909 flow_spec = (void *)((uintptr_t)flow_spec +
2912 /* At this point, the flow match. */
2915 /* The flow does not match. */
2919 priv_flow_destroy(priv, &priv->flows, flow);
2921 for (i = 0; i != hash_rxq_init_n; ++i) {
2922 if (parser.queue[i].ibv_attr)
2923 rte_free(parser.queue[i].ibv_attr);
2929 * Update queue for specific filter.
2932 * Private structure.
2933 * @param fdir_filter
2934 * Filter to be updated.
2937 * 0 on success, errno value on failure.
2940 priv_fdir_filter_update(struct priv *priv,
2941 const struct rte_eth_fdir_filter *fdir_filter)
2945 ret = priv_fdir_filter_delete(priv, fdir_filter);
2948 ret = priv_fdir_filter_add(priv, fdir_filter);
2953 * Flush all filters.
2956 * Private structure.
2959 priv_fdir_filter_flush(struct priv *priv)
2961 priv_flow_flush(priv, &priv->flows);
2965 * Get flow director information.
2968 * Private structure.
2969 * @param[out] fdir_info
2970 * Resulting flow director information.
2973 priv_fdir_info_get(struct priv *priv, struct rte_eth_fdir_info *fdir_info)
2975 struct rte_eth_fdir_masks *mask =
2976 &priv->dev->data->dev_conf.fdir_conf.mask;
2978 fdir_info->mode = priv->dev->data->dev_conf.fdir_conf.mode;
2979 fdir_info->guarant_spc = 0;
2980 rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
2981 fdir_info->max_flexpayload = 0;
2982 fdir_info->flow_types_mask[0] = 0;
2983 fdir_info->flex_payload_unit = 0;
2984 fdir_info->max_flex_payload_segment_num = 0;
2985 fdir_info->flex_payload_limit = 0;
2986 memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
2990 * Deal with flow director operations.
2993 * Pointer to private structure.
2995 * Operation to perform.
2997 * Pointer to operation-specific structure.
3000 * 0 on success, errno value on failure.
3003 priv_fdir_ctrl_func(struct priv *priv, enum rte_filter_op filter_op, void *arg)
3005 enum rte_fdir_mode fdir_mode =
3006 priv->dev->data->dev_conf.fdir_conf.mode;
3009 if (filter_op == RTE_ETH_FILTER_NOP)
3011 if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
3012 fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
3013 ERROR("%p: flow director mode %d not supported",
3014 (void *)priv, fdir_mode);
3017 switch (filter_op) {
3018 case RTE_ETH_FILTER_ADD:
3019 ret = priv_fdir_filter_add(priv, arg);
3021 case RTE_ETH_FILTER_UPDATE:
3022 ret = priv_fdir_filter_update(priv, arg);
3024 case RTE_ETH_FILTER_DELETE:
3025 ret = priv_fdir_filter_delete(priv, arg);
3027 case RTE_ETH_FILTER_FLUSH:
3028 priv_fdir_filter_flush(priv);
3030 case RTE_ETH_FILTER_INFO:
3031 priv_fdir_info_get(priv, arg);
3034 DEBUG("%p: unknown operation %u", (void *)priv,
3043 * Manage filter operations.
3046 * Pointer to Ethernet device structure.
3047 * @param filter_type
3050 * Operation to perform.
3052 * Pointer to operation-specific structure.
3055 * 0 on success, negative errno value on failure.
3058 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
3059 enum rte_filter_type filter_type,
3060 enum rte_filter_op filter_op,
3064 struct priv *priv = dev->data->dev_private;
3066 switch (filter_type) {
3067 case RTE_ETH_FILTER_GENERIC:
3068 if (filter_op != RTE_ETH_FILTER_GET)
3070 *(const void **)arg = &mlx5_flow_ops;
3072 case RTE_ETH_FILTER_FDIR:
3074 ret = priv_fdir_ctrl_func(priv, filter_op, arg);
3078 ERROR("%p: filter type (%d) not supported",
3079 (void *)dev, filter_type);