4 * Copyright 2016 6WIND S.A.
5 * Copyright 2016 Mellanox.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of 6WIND S.A. nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #include <sys/queue.h>
38 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
40 #pragma GCC diagnostic ignored "-Wpedantic"
42 #include <infiniband/verbs.h>
44 #pragma GCC diagnostic error "-Wpedantic"
47 #include <rte_ethdev_driver.h>
49 #include <rte_flow_driver.h>
50 #include <rte_malloc.h>
54 #include "mlx5_defs.h"
57 /* Define minimal priority for control plane flows. */
58 #define MLX5_CTRL_FLOW_PRIORITY 4
60 /* Internet Protocol versions. */
64 #ifndef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
65 struct ibv_counter_set_init_attr {
68 struct ibv_flow_spec_counter_action {
71 struct ibv_counter_set {
76 ibv_destroy_counter_set(struct ibv_counter_set *cs)
83 /* Dev ops structure defined in mlx5.c */
84 extern const struct eth_dev_ops mlx5_dev_ops;
85 extern const struct eth_dev_ops mlx5_dev_ops_isolate;
88 mlx5_flow_create_eth(const struct rte_flow_item *item,
89 const void *default_mask,
93 mlx5_flow_create_vlan(const struct rte_flow_item *item,
94 const void *default_mask,
98 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
99 const void *default_mask,
103 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
104 const void *default_mask,
108 mlx5_flow_create_udp(const struct rte_flow_item *item,
109 const void *default_mask,
113 mlx5_flow_create_tcp(const struct rte_flow_item *item,
114 const void *default_mask,
118 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
119 const void *default_mask,
122 struct mlx5_flow_parse;
125 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
129 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id);
132 mlx5_flow_create_count(struct priv *priv, struct mlx5_flow_parse *parser);
134 /* Hash RX queue types. */
145 /* Initialization data for hash RX queue. */
146 struct hash_rxq_init {
147 uint64_t hash_fields; /* Fields that participate in the hash. */
148 uint64_t dpdk_rss_hf; /* Matching DPDK RSS hash fields. */
149 unsigned int flow_priority; /* Flow priority to use. */
150 unsigned int ip_version; /* Internet protocol. */
153 /* Initialization data for hash RX queues. */
154 const struct hash_rxq_init hash_rxq_init[] = {
156 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
157 IBV_RX_HASH_DST_IPV4 |
158 IBV_RX_HASH_SRC_PORT_TCP |
159 IBV_RX_HASH_DST_PORT_TCP),
160 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,
162 .ip_version = MLX5_IPV4,
165 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
166 IBV_RX_HASH_DST_IPV4 |
167 IBV_RX_HASH_SRC_PORT_UDP |
168 IBV_RX_HASH_DST_PORT_UDP),
169 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP,
171 .ip_version = MLX5_IPV4,
174 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
175 IBV_RX_HASH_DST_IPV4),
176 .dpdk_rss_hf = (ETH_RSS_IPV4 |
179 .ip_version = MLX5_IPV4,
182 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
183 IBV_RX_HASH_DST_IPV6 |
184 IBV_RX_HASH_SRC_PORT_TCP |
185 IBV_RX_HASH_DST_PORT_TCP),
186 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP,
188 .ip_version = MLX5_IPV6,
191 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
192 IBV_RX_HASH_DST_IPV6 |
193 IBV_RX_HASH_SRC_PORT_UDP |
194 IBV_RX_HASH_DST_PORT_UDP),
195 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP,
197 .ip_version = MLX5_IPV6,
200 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
201 IBV_RX_HASH_DST_IPV6),
202 .dpdk_rss_hf = (ETH_RSS_IPV6 |
205 .ip_version = MLX5_IPV6,
214 /* Number of entries in hash_rxq_init[]. */
215 const unsigned int hash_rxq_init_n = RTE_DIM(hash_rxq_init);
217 /** Structure for holding counter stats. */
218 struct mlx5_flow_counter_stats {
219 uint64_t hits; /**< Number of packets matched by the rule. */
220 uint64_t bytes; /**< Number of bytes matched by the rule. */
223 /** Structure for Drop queue. */
224 struct mlx5_hrxq_drop {
225 struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */
226 struct ibv_qp *qp; /**< Verbs queue pair. */
227 struct ibv_wq *wq; /**< Verbs work queue. */
228 struct ibv_cq *cq; /**< Verbs completion queue. */
231 /* Flows structures. */
233 uint64_t hash_fields; /**< Fields that participate in the hash. */
234 struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
235 struct ibv_flow *ibv_flow; /**< Verbs flow. */
236 struct mlx5_hrxq *hrxq; /**< Hash Rx queues. */
239 /* Drop flows structures. */
240 struct mlx5_flow_drop {
241 struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
242 struct ibv_flow *ibv_flow; /**< Verbs flow. */
246 TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
247 uint32_t mark:1; /**< Set if the flow is marked. */
248 uint32_t drop:1; /**< Drop queue. */
249 uint16_t queues_n; /**< Number of entries in queue[]. */
250 uint16_t (*queues)[]; /**< Queues indexes to use. */
251 struct rte_eth_rss_conf rss_conf; /**< RSS configuration */
252 uint8_t rss_key[40]; /**< copy of the RSS key. */
253 struct ibv_counter_set *cs; /**< Holds the counters for the rule. */
254 struct mlx5_flow_counter_stats counter_stats;/**<The counter stats. */
255 struct mlx5_flow frxq[RTE_DIM(hash_rxq_init)];
256 /**< Flow with Rx queue. */
259 /** Static initializer for items. */
261 (const enum rte_flow_item_type []){ \
262 __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
265 /** Structure to generate a simple graph of layers supported by the NIC. */
266 struct mlx5_flow_items {
267 /** List of possible actions for these items. */
268 const enum rte_flow_action_type *const actions;
269 /** Bit-masks corresponding to the possibilities for the item. */
272 * Default bit-masks to use when item->mask is not provided. When
273 * \default_mask is also NULL, the full supported bit-mask (\mask) is
276 const void *default_mask;
277 /** Bit-masks size in bytes. */
278 const unsigned int mask_sz;
280 * Conversion function from rte_flow to NIC specific flow.
283 * rte_flow item to convert.
284 * @param default_mask
285 * Default bit-masks to use when item->mask is not provided.
287 * Internal structure to store the conversion.
290 * 0 on success, negative value otherwise.
292 int (*convert)(const struct rte_flow_item *item,
293 const void *default_mask,
295 /** Size in bytes of the destination structure. */
296 const unsigned int dst_sz;
297 /** List of possible following items. */
298 const enum rte_flow_item_type *const items;
301 /** Valid action for this PMD. */
302 static const enum rte_flow_action_type valid_actions[] = {
303 RTE_FLOW_ACTION_TYPE_DROP,
304 RTE_FLOW_ACTION_TYPE_QUEUE,
305 RTE_FLOW_ACTION_TYPE_MARK,
306 RTE_FLOW_ACTION_TYPE_FLAG,
307 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
308 RTE_FLOW_ACTION_TYPE_COUNT,
310 RTE_FLOW_ACTION_TYPE_END,
313 /** Graph of supported items and associated actions. */
314 static const struct mlx5_flow_items mlx5_flow_items[] = {
315 [RTE_FLOW_ITEM_TYPE_END] = {
316 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
317 RTE_FLOW_ITEM_TYPE_VXLAN),
319 [RTE_FLOW_ITEM_TYPE_ETH] = {
320 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
321 RTE_FLOW_ITEM_TYPE_IPV4,
322 RTE_FLOW_ITEM_TYPE_IPV6),
323 .actions = valid_actions,
324 .mask = &(const struct rte_flow_item_eth){
325 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
326 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
329 .default_mask = &rte_flow_item_eth_mask,
330 .mask_sz = sizeof(struct rte_flow_item_eth),
331 .convert = mlx5_flow_create_eth,
332 .dst_sz = sizeof(struct ibv_flow_spec_eth),
334 [RTE_FLOW_ITEM_TYPE_VLAN] = {
335 .items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
336 RTE_FLOW_ITEM_TYPE_IPV6),
337 .actions = valid_actions,
338 .mask = &(const struct rte_flow_item_vlan){
341 .default_mask = &rte_flow_item_vlan_mask,
342 .mask_sz = sizeof(struct rte_flow_item_vlan),
343 .convert = mlx5_flow_create_vlan,
346 [RTE_FLOW_ITEM_TYPE_IPV4] = {
347 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
348 RTE_FLOW_ITEM_TYPE_TCP),
349 .actions = valid_actions,
350 .mask = &(const struct rte_flow_item_ipv4){
354 .type_of_service = -1,
359 .default_mask = &rte_flow_item_ipv4_mask,
360 .mask_sz = sizeof(struct rte_flow_item_ipv4),
361 .convert = mlx5_flow_create_ipv4,
362 .dst_sz = sizeof(struct ibv_flow_spec_ipv4_ext),
364 [RTE_FLOW_ITEM_TYPE_IPV6] = {
365 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
366 RTE_FLOW_ITEM_TYPE_TCP),
367 .actions = valid_actions,
368 .mask = &(const struct rte_flow_item_ipv6){
371 0xff, 0xff, 0xff, 0xff,
372 0xff, 0xff, 0xff, 0xff,
373 0xff, 0xff, 0xff, 0xff,
374 0xff, 0xff, 0xff, 0xff,
377 0xff, 0xff, 0xff, 0xff,
378 0xff, 0xff, 0xff, 0xff,
379 0xff, 0xff, 0xff, 0xff,
380 0xff, 0xff, 0xff, 0xff,
387 .default_mask = &rte_flow_item_ipv6_mask,
388 .mask_sz = sizeof(struct rte_flow_item_ipv6),
389 .convert = mlx5_flow_create_ipv6,
390 .dst_sz = sizeof(struct ibv_flow_spec_ipv6),
392 [RTE_FLOW_ITEM_TYPE_UDP] = {
393 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN),
394 .actions = valid_actions,
395 .mask = &(const struct rte_flow_item_udp){
401 .default_mask = &rte_flow_item_udp_mask,
402 .mask_sz = sizeof(struct rte_flow_item_udp),
403 .convert = mlx5_flow_create_udp,
404 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
406 [RTE_FLOW_ITEM_TYPE_TCP] = {
407 .actions = valid_actions,
408 .mask = &(const struct rte_flow_item_tcp){
414 .default_mask = &rte_flow_item_tcp_mask,
415 .mask_sz = sizeof(struct rte_flow_item_tcp),
416 .convert = mlx5_flow_create_tcp,
417 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
419 [RTE_FLOW_ITEM_TYPE_VXLAN] = {
420 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH),
421 .actions = valid_actions,
422 .mask = &(const struct rte_flow_item_vxlan){
423 .vni = "\xff\xff\xff",
425 .default_mask = &rte_flow_item_vxlan_mask,
426 .mask_sz = sizeof(struct rte_flow_item_vxlan),
427 .convert = mlx5_flow_create_vxlan,
428 .dst_sz = sizeof(struct ibv_flow_spec_tunnel),
432 /** Structure to pass to the conversion function. */
433 struct mlx5_flow_parse {
434 uint32_t inner; /**< Set once VXLAN is encountered. */
435 uint32_t allmulti:1; /**< Set once allmulti dst MAC is encountered. */
437 /**< Whether resources should remain after a validate. */
438 uint32_t drop:1; /**< Target is a drop queue. */
439 uint32_t mark:1; /**< Mark is present in the flow. */
440 uint32_t count:1; /**< Count is present in the flow. */
441 uint32_t mark_id; /**< Mark identifier. */
442 uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
443 uint16_t queues_n; /**< Number of entries in queue[]. */
444 struct rte_eth_rss_conf rss_conf; /**< RSS configuration */
445 uint8_t rss_key[40]; /**< copy of the RSS key. */
446 enum hash_rxq_type layer; /**< Last pattern layer detected. */
447 struct ibv_counter_set *cs; /**< Holds the counter set for the rule */
449 struct ibv_flow_attr *ibv_attr;
450 /**< Pointer to Verbs attributes. */
452 /**< Current position or total size of the attribute. */
453 } queue[RTE_DIM(hash_rxq_init)];
456 static const struct rte_flow_ops mlx5_flow_ops = {
457 .validate = mlx5_flow_validate,
458 .create = mlx5_flow_create,
459 .destroy = mlx5_flow_destroy,
460 .flush = mlx5_flow_flush,
461 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
462 .query = mlx5_flow_query,
466 .isolate = mlx5_flow_isolate,
469 /* Convert FDIR request to Generic flow. */
471 struct rte_flow_attr attr;
472 struct rte_flow_action actions[2];
473 struct rte_flow_item items[4];
474 struct rte_flow_item_eth l2;
475 struct rte_flow_item_eth l2_mask;
477 struct rte_flow_item_ipv4 ipv4;
478 struct rte_flow_item_ipv6 ipv6;
481 struct rte_flow_item_udp udp;
482 struct rte_flow_item_tcp tcp;
484 struct rte_flow_action_queue queue;
487 /* Verbs specification header. */
488 struct ibv_spec_header {
489 enum ibv_flow_spec_type type;
494 * Check support for a given item.
497 * Item specification.
499 * Bit-masks covering supported fields to compare with spec, last and mask in
502 * Bit-Mask size in bytes.
508 mlx5_flow_item_validate(const struct rte_flow_item *item,
509 const uint8_t *mask, unsigned int size)
513 if (!item->spec && (item->mask || item->last))
515 if (item->spec && !item->mask) {
517 const uint8_t *spec = item->spec;
519 for (i = 0; i < size; ++i)
520 if ((spec[i] | mask[i]) != mask[i])
523 if (item->last && !item->mask) {
525 const uint8_t *spec = item->last;
527 for (i = 0; i < size; ++i)
528 if ((spec[i] | mask[i]) != mask[i])
533 const uint8_t *spec = item->spec;
535 for (i = 0; i < size; ++i)
536 if ((spec[i] | mask[i]) != mask[i])
539 if (item->spec && item->last) {
542 const uint8_t *apply = mask;
547 for (i = 0; i < size; ++i) {
548 spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
549 last[i] = ((const uint8_t *)item->last)[i] & apply[i];
551 ret = memcmp(spec, last, size);
557 * Copy the RSS configuration from the user ones.
560 * Pointer to private structure.
562 * Internal parser structure.
564 * User RSS configuration to save.
567 * 0 on success, errno value on failure.
570 priv_flow_convert_rss_conf(struct priv *priv,
571 struct mlx5_flow_parse *parser,
572 const struct rte_eth_rss_conf *rss_conf)
574 const struct rte_eth_rss_conf *rss;
577 if (rss_conf->rss_hf & MLX5_RSS_HF_MASK)
581 rss = &priv->rss_conf;
583 if (rss->rss_key_len > 40)
585 parser->rss_conf.rss_key_len = rss->rss_key_len;
586 parser->rss_conf.rss_hf = rss->rss_hf;
587 memcpy(parser->rss_key, rss->rss_key, rss->rss_key_len);
588 parser->rss_conf.rss_key = parser->rss_key;
593 * Extract attribute to the parser.
596 * Pointer to private structure.
598 * Flow rule attributes.
600 * Perform verbose error reporting if not NULL.
601 * @param[in, out] parser
602 * Internal parser structure.
605 * 0 on success, a negative errno value otherwise and rte_errno is set.
608 priv_flow_convert_attributes(struct priv *priv,
609 const struct rte_flow_attr *attr,
610 struct rte_flow_error *error,
611 struct mlx5_flow_parse *parser)
616 rte_flow_error_set(error, ENOTSUP,
617 RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
619 "groups are not supported");
622 if (attr->priority && attr->priority != MLX5_CTRL_FLOW_PRIORITY) {
623 rte_flow_error_set(error, ENOTSUP,
624 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
626 "priorities are not supported");
630 rte_flow_error_set(error, ENOTSUP,
631 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
633 "egress is not supported");
636 if (!attr->ingress) {
637 rte_flow_error_set(error, ENOTSUP,
638 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
640 "only ingress is supported");
647 * Extract actions request to the parser.
650 * Pointer to private structure.
652 * Associated actions (list terminated by the END action).
654 * Perform verbose error reporting if not NULL.
655 * @param[in, out] parser
656 * Internal parser structure.
659 * 0 on success, a negative errno value otherwise and rte_errno is set.
662 priv_flow_convert_actions(struct priv *priv,
663 const struct rte_flow_action actions[],
664 struct rte_flow_error *error,
665 struct mlx5_flow_parse *parser)
668 * Add default RSS configuration necessary for Verbs to create QP even
669 * if no RSS is necessary.
671 priv_flow_convert_rss_conf(priv, parser,
672 (const struct rte_eth_rss_conf *)
674 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
675 if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
677 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
679 } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
680 const struct rte_flow_action_queue *queue =
681 (const struct rte_flow_action_queue *)
686 if (!queue || (queue->index > (priv->rxqs_n - 1)))
687 goto exit_action_not_supported;
688 for (n = 0; n < parser->queues_n; ++n) {
689 if (parser->queues[n] == queue->index) {
694 if (parser->queues_n > 1 && !found) {
695 rte_flow_error_set(error, ENOTSUP,
696 RTE_FLOW_ERROR_TYPE_ACTION,
698 "queue action not in RSS queues");
702 parser->queues_n = 1;
703 parser->queues[0] = queue->index;
705 } else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
706 const struct rte_flow_action_rss *rss =
707 (const struct rte_flow_action_rss *)
711 if (!rss || !rss->num) {
712 rte_flow_error_set(error, EINVAL,
713 RTE_FLOW_ERROR_TYPE_ACTION,
718 if (parser->queues_n == 1) {
721 assert(parser->queues_n);
722 for (n = 0; n < rss->num; ++n) {
723 if (parser->queues[0] ==
730 rte_flow_error_set(error, ENOTSUP,
731 RTE_FLOW_ERROR_TYPE_ACTION,
733 "queue action not in RSS"
738 for (n = 0; n < rss->num; ++n) {
739 if (rss->queue[n] >= priv->rxqs_n) {
740 rte_flow_error_set(error, EINVAL,
741 RTE_FLOW_ERROR_TYPE_ACTION,
743 "queue id > number of"
748 for (n = 0; n < rss->num; ++n)
749 parser->queues[n] = rss->queue[n];
750 parser->queues_n = rss->num;
751 if (priv_flow_convert_rss_conf(priv, parser,
753 rte_flow_error_set(error, EINVAL,
754 RTE_FLOW_ERROR_TYPE_ACTION,
756 "wrong RSS configuration");
759 } else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
760 const struct rte_flow_action_mark *mark =
761 (const struct rte_flow_action_mark *)
765 rte_flow_error_set(error, EINVAL,
766 RTE_FLOW_ERROR_TYPE_ACTION,
768 "mark must be defined");
770 } else if (mark->id >= MLX5_FLOW_MARK_MAX) {
771 rte_flow_error_set(error, ENOTSUP,
772 RTE_FLOW_ERROR_TYPE_ACTION,
774 "mark must be between 0"
779 parser->mark_id = mark->id;
780 } else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
782 } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT &&
783 priv->config.flow_counter_en) {
786 goto exit_action_not_supported;
789 if (parser->drop && parser->mark)
791 if (!parser->queues_n && !parser->drop) {
792 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
793 NULL, "no valid action");
797 exit_action_not_supported:
798 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
799 actions, "action not supported");
807 * Pointer to private structure.
809 * Pattern specification (list terminated by the END pattern item).
811 * Perform verbose error reporting if not NULL.
812 * @param[in, out] parser
813 * Internal parser structure.
816 * 0 on success, a negative errno value otherwise and rte_errno is set.
819 priv_flow_convert_items_validate(struct priv *priv,
820 const struct rte_flow_item items[],
821 struct rte_flow_error *error,
822 struct mlx5_flow_parse *parser)
824 const struct mlx5_flow_items *cur_item = mlx5_flow_items;
828 /* Initialise the offsets to start after verbs attribute. */
829 for (i = 0; i != hash_rxq_init_n; ++i)
830 parser->queue[i].offset = sizeof(struct ibv_flow_attr);
831 for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
832 const struct mlx5_flow_items *token = NULL;
836 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
840 cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
842 if (cur_item->items[i] == items->type) {
843 token = &mlx5_flow_items[items->type];
848 goto exit_item_not_supported;
850 err = mlx5_flow_item_validate(items,
851 (const uint8_t *)cur_item->mask,
854 goto exit_item_not_supported;
855 if (items->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
857 rte_flow_error_set(error, ENOTSUP,
858 RTE_FLOW_ERROR_TYPE_ITEM,
860 "cannot recognize multiple"
861 " VXLAN encapsulations");
864 parser->inner = IBV_FLOW_SPEC_INNER;
866 if (parser->drop || parser->queues_n == 1) {
867 parser->queue[HASH_RXQ_ETH].offset += cur_item->dst_sz;
869 for (n = 0; n != hash_rxq_init_n; ++n)
870 parser->queue[n].offset += cur_item->dst_sz;
874 parser->queue[HASH_RXQ_ETH].offset +=
875 sizeof(struct ibv_flow_spec_action_drop);
878 for (i = 0; i != hash_rxq_init_n; ++i)
879 parser->queue[i].offset +=
880 sizeof(struct ibv_flow_spec_action_tag);
883 unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
885 for (i = 0; i != hash_rxq_init_n; ++i)
886 parser->queue[i].offset += size;
889 exit_item_not_supported:
890 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
891 items, "item not supported");
896 * Allocate memory space to store verbs flow attributes.
899 * Pointer to private structure.
900 * @param[in] priority
903 * Amount of byte to allocate.
905 * Perform verbose error reporting if not NULL.
908 * A verbs flow attribute on success, NULL otherwise.
910 static struct ibv_flow_attr*
911 priv_flow_convert_allocate(struct priv *priv,
912 unsigned int priority,
914 struct rte_flow_error *error)
916 struct ibv_flow_attr *ibv_attr;
919 ibv_attr = rte_calloc(__func__, 1, size, 0);
921 rte_flow_error_set(error, ENOMEM,
922 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
924 "cannot allocate verbs spec attributes.");
927 ibv_attr->priority = priority;
932 * Finalise verbs flow attributes.
935 * Pointer to private structure.
936 * @param[in, out] parser
937 * Internal parser structure.
940 priv_flow_convert_finalise(struct priv *priv, struct mlx5_flow_parse *parser)
942 const unsigned int ipv4 =
943 hash_rxq_init[parser->layer].ip_version == MLX5_IPV4;
944 const enum hash_rxq_type hmin = ipv4 ? HASH_RXQ_TCPV4 : HASH_RXQ_TCPV6;
945 const enum hash_rxq_type hmax = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
946 const enum hash_rxq_type ohmin = ipv4 ? HASH_RXQ_TCPV6 : HASH_RXQ_TCPV4;
947 const enum hash_rxq_type ohmax = ipv4 ? HASH_RXQ_IPV6 : HASH_RXQ_IPV4;
948 const enum hash_rxq_type ip = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
952 if (parser->layer == HASH_RXQ_ETH) {
956 * This layer becomes useless as the pattern define under
959 rte_free(parser->queue[HASH_RXQ_ETH].ibv_attr);
960 parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
962 /* Remove opposite kind of layer e.g. IPv6 if the pattern is IPv4. */
963 for (i = ohmin; i != (ohmax + 1); ++i) {
964 if (!parser->queue[i].ibv_attr)
966 rte_free(parser->queue[i].ibv_attr);
967 parser->queue[i].ibv_attr = NULL;
969 /* Remove impossible flow according to the RSS configuration. */
970 if (hash_rxq_init[parser->layer].dpdk_rss_hf &
971 parser->rss_conf.rss_hf) {
972 /* Remove any other flow. */
973 for (i = hmin; i != (hmax + 1); ++i) {
974 if ((i == parser->layer) ||
975 (!parser->queue[i].ibv_attr))
977 rte_free(parser->queue[i].ibv_attr);
978 parser->queue[i].ibv_attr = NULL;
980 } else if (!parser->queue[ip].ibv_attr) {
981 /* no RSS possible with the current configuration. */
982 parser->queues_n = 1;
987 * Fill missing layers in verbs specifications, or compute the correct
988 * offset to allocate the memory space for the attributes and
991 for (i = 0; i != hash_rxq_init_n - 1; ++i) {
993 struct ibv_flow_spec_ipv4_ext ipv4;
994 struct ibv_flow_spec_ipv6 ipv6;
995 struct ibv_flow_spec_tcp_udp udp_tcp;
1000 if (i == parser->layer)
1002 if (parser->layer == HASH_RXQ_ETH) {
1003 if (hash_rxq_init[i].ip_version == MLX5_IPV4) {
1004 size = sizeof(struct ibv_flow_spec_ipv4_ext);
1005 specs.ipv4 = (struct ibv_flow_spec_ipv4_ext){
1006 .type = IBV_FLOW_SPEC_IPV4_EXT,
1010 size = sizeof(struct ibv_flow_spec_ipv6);
1011 specs.ipv6 = (struct ibv_flow_spec_ipv6){
1012 .type = IBV_FLOW_SPEC_IPV6,
1016 if (parser->queue[i].ibv_attr) {
1017 dst = (void *)((uintptr_t)
1018 parser->queue[i].ibv_attr +
1019 parser->queue[i].offset);
1020 memcpy(dst, &specs, size);
1021 ++parser->queue[i].ibv_attr->num_of_specs;
1023 parser->queue[i].offset += size;
1025 if ((i == HASH_RXQ_UDPV4) || (i == HASH_RXQ_TCPV4) ||
1026 (i == HASH_RXQ_UDPV6) || (i == HASH_RXQ_TCPV6)) {
1027 size = sizeof(struct ibv_flow_spec_tcp_udp);
1028 specs.udp_tcp = (struct ibv_flow_spec_tcp_udp) {
1029 .type = ((i == HASH_RXQ_UDPV4 ||
1030 i == HASH_RXQ_UDPV6) ?
1035 if (parser->queue[i].ibv_attr) {
1036 dst = (void *)((uintptr_t)
1037 parser->queue[i].ibv_attr +
1038 parser->queue[i].offset);
1039 memcpy(dst, &specs, size);
1040 ++parser->queue[i].ibv_attr->num_of_specs;
1042 parser->queue[i].offset += size;
1048 * Validate and convert a flow supported by the NIC.
1051 * Pointer to private structure.
1053 * Flow rule attributes.
1054 * @param[in] pattern
1055 * Pattern specification (list terminated by the END pattern item).
1056 * @param[in] actions
1057 * Associated actions (list terminated by the END action).
1059 * Perform verbose error reporting if not NULL.
1060 * @param[in, out] parser
1061 * Internal parser structure.
1064 * 0 on success, a negative errno value otherwise and rte_errno is set.
1067 priv_flow_convert(struct priv *priv,
1068 const struct rte_flow_attr *attr,
1069 const struct rte_flow_item items[],
1070 const struct rte_flow_action actions[],
1071 struct rte_flow_error *error,
1072 struct mlx5_flow_parse *parser)
1074 const struct mlx5_flow_items *cur_item = mlx5_flow_items;
1078 /* First step. Validate the attributes, items and actions. */
1079 *parser = (struct mlx5_flow_parse){
1080 .create = parser->create,
1081 .layer = HASH_RXQ_ETH,
1082 .mark_id = MLX5_FLOW_MARK_DEFAULT,
1084 ret = priv_flow_convert_attributes(priv, attr, error, parser);
1087 ret = priv_flow_convert_actions(priv, actions, error, parser);
1090 ret = priv_flow_convert_items_validate(priv, items, error, parser);
1093 priv_flow_convert_finalise(priv, parser);
1096 * Allocate the memory space to store verbs specifications.
1098 if (parser->drop || parser->queues_n == 1) {
1099 unsigned int priority =
1101 hash_rxq_init[HASH_RXQ_ETH].flow_priority;
1102 unsigned int offset = parser->queue[HASH_RXQ_ETH].offset;
1104 parser->queue[HASH_RXQ_ETH].ibv_attr =
1105 priv_flow_convert_allocate(priv, priority,
1107 if (!parser->queue[HASH_RXQ_ETH].ibv_attr)
1109 parser->queue[HASH_RXQ_ETH].offset =
1110 sizeof(struct ibv_flow_attr);
1112 for (i = 0; i != hash_rxq_init_n; ++i) {
1113 unsigned int priority =
1115 hash_rxq_init[i].flow_priority;
1116 unsigned int offset;
1118 if (!(parser->rss_conf.rss_hf &
1119 hash_rxq_init[i].dpdk_rss_hf) &&
1120 (i != HASH_RXQ_ETH))
1122 offset = parser->queue[i].offset;
1123 parser->queue[i].ibv_attr =
1124 priv_flow_convert_allocate(priv, priority,
1126 if (!parser->queue[i].ibv_attr)
1128 parser->queue[i].offset = sizeof(struct ibv_flow_attr);
1131 /* Third step. Conversion parse, fill the specifications. */
1133 for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
1134 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
1136 cur_item = &mlx5_flow_items[items->type];
1137 ret = cur_item->convert(items,
1138 (cur_item->default_mask ?
1139 cur_item->default_mask :
1143 rte_flow_error_set(error, ret,
1144 RTE_FLOW_ERROR_TYPE_ITEM,
1145 items, "item not supported");
1150 mlx5_flow_create_flag_mark(parser, parser->mark_id);
1151 if (parser->count && parser->create) {
1152 mlx5_flow_create_count(priv, parser);
1154 goto exit_count_error;
1157 * Last step. Complete missing specification to reach the RSS
1160 if (parser->queues_n > 1) {
1161 priv_flow_convert_finalise(priv, parser);
1164 * Action queue have their priority overridden with
1165 * Ethernet priority, this priority needs to be adjusted to
1166 * their most specific layer priority.
1168 parser->queue[HASH_RXQ_ETH].ibv_attr->priority =
1170 hash_rxq_init[parser->layer].flow_priority;
1172 if (parser->allmulti &&
1173 parser->layer == HASH_RXQ_ETH) {
1174 for (i = 0; i != hash_rxq_init_n; ++i) {
1175 if (!parser->queue[i].ibv_attr)
1177 if (parser->queue[i].ibv_attr->num_of_specs != 1)
1179 parser->queue[i].ibv_attr->type =
1180 IBV_FLOW_ATTR_MC_DEFAULT;
1184 /* Only verification is expected, all resources should be released. */
1185 if (!parser->create) {
1186 for (i = 0; i != hash_rxq_init_n; ++i) {
1187 if (parser->queue[i].ibv_attr) {
1188 rte_free(parser->queue[i].ibv_attr);
1189 parser->queue[i].ibv_attr = NULL;
1195 for (i = 0; i != hash_rxq_init_n; ++i) {
1196 if (parser->queue[i].ibv_attr) {
1197 rte_free(parser->queue[i].ibv_attr);
1198 parser->queue[i].ibv_attr = NULL;
1201 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1202 NULL, "cannot allocate verbs spec attributes.");
1205 rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1206 NULL, "cannot create counter.");
1211 * Copy the specification created into the flow.
1214 * Internal parser structure.
1216 * Create specification.
1218 * Size in bytes of the specification to copy.
1221 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
1227 for (i = 0; i != hash_rxq_init_n; ++i) {
1228 if (!parser->queue[i].ibv_attr)
1230 /* Specification must be the same l3 type or none. */
1231 if (parser->layer == HASH_RXQ_ETH ||
1232 (hash_rxq_init[parser->layer].ip_version ==
1233 hash_rxq_init[i].ip_version) ||
1234 (hash_rxq_init[i].ip_version == 0)) {
1235 dst = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1236 parser->queue[i].offset);
1237 memcpy(dst, src, size);
1238 ++parser->queue[i].ibv_attr->num_of_specs;
1239 parser->queue[i].offset += size;
1245 * Convert Ethernet item to Verbs specification.
1248 * Item specification.
1249 * @param default_mask[in]
1250 * Default bit-masks to use when item->mask is not provided.
1251 * @param data[in, out]
1255 mlx5_flow_create_eth(const struct rte_flow_item *item,
1256 const void *default_mask,
1259 const struct rte_flow_item_eth *spec = item->spec;
1260 const struct rte_flow_item_eth *mask = item->mask;
1261 struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1262 const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1263 struct ibv_flow_spec_eth eth = {
1264 .type = parser->inner | IBV_FLOW_SPEC_ETH,
1268 /* Don't update layer for the inner pattern. */
1270 parser->layer = HASH_RXQ_ETH;
1275 mask = default_mask;
1276 memcpy(ð.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
1277 memcpy(ð.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
1278 eth.val.ether_type = spec->type;
1279 memcpy(ð.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
1280 memcpy(ð.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
1281 eth.mask.ether_type = mask->type;
1282 /* Remove unwanted bits from values. */
1283 for (i = 0; i < ETHER_ADDR_LEN; ++i) {
1284 eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
1285 eth.val.src_mac[i] &= eth.mask.src_mac[i];
1287 eth.val.ether_type &= eth.mask.ether_type;
1289 mlx5_flow_create_copy(parser, ð, eth_size);
1290 parser->allmulti = eth.val.dst_mac[0] & 1;
1295 * Convert VLAN item to Verbs specification.
1298 * Item specification.
1299 * @param default_mask[in]
1300 * Default bit-masks to use when item->mask is not provided.
1301 * @param data[in, out]
1305 mlx5_flow_create_vlan(const struct rte_flow_item *item,
1306 const void *default_mask,
1309 const struct rte_flow_item_vlan *spec = item->spec;
1310 const struct rte_flow_item_vlan *mask = item->mask;
1311 struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1312 struct ibv_flow_spec_eth *eth;
1313 const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1318 mask = default_mask;
1320 for (i = 0; i != hash_rxq_init_n; ++i) {
1321 if (!parser->queue[i].ibv_attr)
1324 eth = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1325 parser->queue[i].offset - eth_size);
1326 eth->val.vlan_tag = spec->tci;
1327 eth->mask.vlan_tag = mask->tci;
1328 eth->val.vlan_tag &= eth->mask.vlan_tag;
1335 * Convert IPv4 item to Verbs specification.
1338 * Item specification.
1339 * @param default_mask[in]
1340 * Default bit-masks to use when item->mask is not provided.
1341 * @param data[in, out]
1345 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
1346 const void *default_mask,
1349 const struct rte_flow_item_ipv4 *spec = item->spec;
1350 const struct rte_flow_item_ipv4 *mask = item->mask;
1351 struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1352 unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4_ext);
1353 struct ibv_flow_spec_ipv4_ext ipv4 = {
1354 .type = parser->inner | IBV_FLOW_SPEC_IPV4_EXT,
1358 /* Don't update layer for the inner pattern. */
1360 parser->layer = HASH_RXQ_IPV4;
1363 mask = default_mask;
1364 ipv4.val = (struct ibv_flow_ipv4_ext_filter){
1365 .src_ip = spec->hdr.src_addr,
1366 .dst_ip = spec->hdr.dst_addr,
1367 .proto = spec->hdr.next_proto_id,
1368 .tos = spec->hdr.type_of_service,
1370 ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
1371 .src_ip = mask->hdr.src_addr,
1372 .dst_ip = mask->hdr.dst_addr,
1373 .proto = mask->hdr.next_proto_id,
1374 .tos = mask->hdr.type_of_service,
1376 /* Remove unwanted bits from values. */
1377 ipv4.val.src_ip &= ipv4.mask.src_ip;
1378 ipv4.val.dst_ip &= ipv4.mask.dst_ip;
1379 ipv4.val.proto &= ipv4.mask.proto;
1380 ipv4.val.tos &= ipv4.mask.tos;
1382 mlx5_flow_create_copy(parser, &ipv4, ipv4_size);
1387 * Convert IPv6 item to Verbs specification.
1390 * Item specification.
1391 * @param default_mask[in]
1392 * Default bit-masks to use when item->mask is not provided.
1393 * @param data[in, out]
1397 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
1398 const void *default_mask,
1401 const struct rte_flow_item_ipv6 *spec = item->spec;
1402 const struct rte_flow_item_ipv6 *mask = item->mask;
1403 struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1404 unsigned int ipv6_size = sizeof(struct ibv_flow_spec_ipv6);
1405 struct ibv_flow_spec_ipv6 ipv6 = {
1406 .type = parser->inner | IBV_FLOW_SPEC_IPV6,
1410 /* Don't update layer for the inner pattern. */
1412 parser->layer = HASH_RXQ_IPV6;
1415 uint32_t vtc_flow_val;
1416 uint32_t vtc_flow_mask;
1419 mask = default_mask;
1420 memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
1421 RTE_DIM(ipv6.val.src_ip));
1422 memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
1423 RTE_DIM(ipv6.val.dst_ip));
1424 memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
1425 RTE_DIM(ipv6.mask.src_ip));
1426 memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
1427 RTE_DIM(ipv6.mask.dst_ip));
1428 vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
1429 vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
1430 ipv6.val.flow_label =
1431 rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >>
1433 ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >>
1435 ipv6.val.next_hdr = spec->hdr.proto;
1436 ipv6.val.hop_limit = spec->hdr.hop_limits;
1437 ipv6.mask.flow_label =
1438 rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >>
1440 ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >>
1442 ipv6.mask.next_hdr = mask->hdr.proto;
1443 ipv6.mask.hop_limit = mask->hdr.hop_limits;
1444 /* Remove unwanted bits from values. */
1445 for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
1446 ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
1447 ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
1449 ipv6.val.flow_label &= ipv6.mask.flow_label;
1450 ipv6.val.traffic_class &= ipv6.mask.traffic_class;
1451 ipv6.val.next_hdr &= ipv6.mask.next_hdr;
1452 ipv6.val.hop_limit &= ipv6.mask.hop_limit;
1454 mlx5_flow_create_copy(parser, &ipv6, ipv6_size);
1459 * Convert UDP item to Verbs specification.
1462 * Item specification.
1463 * @param default_mask[in]
1464 * Default bit-masks to use when item->mask is not provided.
1465 * @param data[in, out]
1469 mlx5_flow_create_udp(const struct rte_flow_item *item,
1470 const void *default_mask,
1473 const struct rte_flow_item_udp *spec = item->spec;
1474 const struct rte_flow_item_udp *mask = item->mask;
1475 struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1476 unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1477 struct ibv_flow_spec_tcp_udp udp = {
1478 .type = parser->inner | IBV_FLOW_SPEC_UDP,
1482 /* Don't update layer for the inner pattern. */
1483 if (!parser->inner) {
1484 if (parser->layer == HASH_RXQ_IPV4)
1485 parser->layer = HASH_RXQ_UDPV4;
1487 parser->layer = HASH_RXQ_UDPV6;
1491 mask = default_mask;
1492 udp.val.dst_port = spec->hdr.dst_port;
1493 udp.val.src_port = spec->hdr.src_port;
1494 udp.mask.dst_port = mask->hdr.dst_port;
1495 udp.mask.src_port = mask->hdr.src_port;
1496 /* Remove unwanted bits from values. */
1497 udp.val.src_port &= udp.mask.src_port;
1498 udp.val.dst_port &= udp.mask.dst_port;
1500 mlx5_flow_create_copy(parser, &udp, udp_size);
1505 * Convert TCP item to Verbs specification.
1508 * Item specification.
1509 * @param default_mask[in]
1510 * Default bit-masks to use when item->mask is not provided.
1511 * @param data[in, out]
1515 mlx5_flow_create_tcp(const struct rte_flow_item *item,
1516 const void *default_mask,
1519 const struct rte_flow_item_tcp *spec = item->spec;
1520 const struct rte_flow_item_tcp *mask = item->mask;
1521 struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1522 unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1523 struct ibv_flow_spec_tcp_udp tcp = {
1524 .type = parser->inner | IBV_FLOW_SPEC_TCP,
1528 /* Don't update layer for the inner pattern. */
1529 if (!parser->inner) {
1530 if (parser->layer == HASH_RXQ_IPV4)
1531 parser->layer = HASH_RXQ_TCPV4;
1533 parser->layer = HASH_RXQ_TCPV6;
1537 mask = default_mask;
1538 tcp.val.dst_port = spec->hdr.dst_port;
1539 tcp.val.src_port = spec->hdr.src_port;
1540 tcp.mask.dst_port = mask->hdr.dst_port;
1541 tcp.mask.src_port = mask->hdr.src_port;
1542 /* Remove unwanted bits from values. */
1543 tcp.val.src_port &= tcp.mask.src_port;
1544 tcp.val.dst_port &= tcp.mask.dst_port;
1546 mlx5_flow_create_copy(parser, &tcp, tcp_size);
1551 * Convert VXLAN item to Verbs specification.
1554 * Item specification.
1555 * @param default_mask[in]
1556 * Default bit-masks to use when item->mask is not provided.
1557 * @param data[in, out]
1561 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
1562 const void *default_mask,
1565 const struct rte_flow_item_vxlan *spec = item->spec;
1566 const struct rte_flow_item_vxlan *mask = item->mask;
1567 struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1568 unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1569 struct ibv_flow_spec_tunnel vxlan = {
1570 .type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1579 parser->inner = IBV_FLOW_SPEC_INNER;
1582 mask = default_mask;
1583 memcpy(&id.vni[1], spec->vni, 3);
1584 vxlan.val.tunnel_id = id.vlan_id;
1585 memcpy(&id.vni[1], mask->vni, 3);
1586 vxlan.mask.tunnel_id = id.vlan_id;
1587 /* Remove unwanted bits from values. */
1588 vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
1591 * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
1592 * layer is defined in the Verbs specification it is interpreted as
1593 * wildcard and all packets will match this rule, if it follows a full
1594 * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
1595 * before will also match this rule.
1596 * To avoid such situation, VNI 0 is currently refused.
1598 if (!vxlan.val.tunnel_id)
1600 mlx5_flow_create_copy(parser, &vxlan, size);
1605 * Convert mark/flag action to Verbs specification.
1608 * Internal parser structure.
1613 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id)
1615 unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1616 struct ibv_flow_spec_action_tag tag = {
1617 .type = IBV_FLOW_SPEC_ACTION_TAG,
1619 .tag_id = mlx5_flow_mark_set(mark_id),
1622 assert(parser->mark);
1623 mlx5_flow_create_copy(parser, &tag, size);
1628 * Convert count action to Verbs specification.
1631 * Pointer to private structure.
1633 * Pointer to MLX5 flow parser structure.
1636 * 0 on success, errno value on failure.
1639 mlx5_flow_create_count(struct priv *priv __rte_unused,
1640 struct mlx5_flow_parse *parser __rte_unused)
1642 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
1643 unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
1644 struct ibv_counter_set_init_attr init_attr = {0};
1645 struct ibv_flow_spec_counter_action counter = {
1646 .type = IBV_FLOW_SPEC_ACTION_COUNT,
1648 .counter_set_handle = 0,
1651 init_attr.counter_set_id = 0;
1652 parser->cs = ibv_create_counter_set(priv->ctx, &init_attr);
1655 counter.counter_set_handle = parser->cs->handle;
1656 mlx5_flow_create_copy(parser, &counter, size);
1662 * Complete flow rule creation with a drop queue.
1665 * Pointer to private structure.
1667 * Internal parser structure.
1669 * Pointer to the rte_flow.
1671 * Perform verbose error reporting if not NULL.
1674 * 0 on success, errno value on failure.
1677 priv_flow_create_action_queue_drop(struct priv *priv,
1678 struct mlx5_flow_parse *parser,
1679 struct rte_flow *flow,
1680 struct rte_flow_error *error)
1682 struct ibv_flow_spec_action_drop *drop;
1683 unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
1689 drop = (void *)((uintptr_t)parser->queue[HASH_RXQ_ETH].ibv_attr +
1690 parser->queue[HASH_RXQ_ETH].offset);
1691 *drop = (struct ibv_flow_spec_action_drop){
1692 .type = IBV_FLOW_SPEC_ACTION_DROP,
1695 ++parser->queue[HASH_RXQ_ETH].ibv_attr->num_of_specs;
1696 parser->queue[HASH_RXQ_ETH].offset += size;
1697 flow->frxq[HASH_RXQ_ETH].ibv_attr =
1698 parser->queue[HASH_RXQ_ETH].ibv_attr;
1700 flow->cs = parser->cs;
1701 if (!priv->dev->data->dev_started)
1703 parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
1704 flow->frxq[HASH_RXQ_ETH].ibv_flow =
1705 ibv_create_flow(priv->flow_drop_queue->qp,
1706 flow->frxq[HASH_RXQ_ETH].ibv_attr);
1707 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
1708 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1709 NULL, "flow rule creation failure");
1716 if (flow->frxq[HASH_RXQ_ETH].ibv_flow) {
1717 claim_zero(ibv_destroy_flow(flow->frxq[HASH_RXQ_ETH].ibv_flow));
1718 flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
1720 if (flow->frxq[HASH_RXQ_ETH].ibv_attr) {
1721 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
1722 flow->frxq[HASH_RXQ_ETH].ibv_attr = NULL;
1725 claim_zero(ibv_destroy_counter_set(flow->cs));
1733 * Create hash Rx queues when RSS is enabled.
1736 * Pointer to private structure.
1738 * Internal parser structure.
1740 * Pointer to the rte_flow.
1742 * Perform verbose error reporting if not NULL.
1745 * 0 on success, a errno value otherwise and rte_errno is set.
1748 priv_flow_create_action_queue_rss(struct priv *priv,
1749 struct mlx5_flow_parse *parser,
1750 struct rte_flow *flow,
1751 struct rte_flow_error *error)
1755 for (i = 0; i != hash_rxq_init_n; ++i) {
1756 uint64_t hash_fields;
1758 if (!parser->queue[i].ibv_attr)
1760 flow->frxq[i].ibv_attr = parser->queue[i].ibv_attr;
1761 parser->queue[i].ibv_attr = NULL;
1762 hash_fields = hash_rxq_init[i].hash_fields;
1763 if (!priv->dev->data->dev_started)
1765 flow->frxq[i].hrxq =
1766 mlx5_priv_hrxq_get(priv,
1767 parser->rss_conf.rss_key,
1768 parser->rss_conf.rss_key_len,
1772 if (flow->frxq[i].hrxq)
1774 flow->frxq[i].hrxq =
1775 mlx5_priv_hrxq_new(priv,
1776 parser->rss_conf.rss_key,
1777 parser->rss_conf.rss_key_len,
1781 if (!flow->frxq[i].hrxq) {
1782 rte_flow_error_set(error, ENOMEM,
1783 RTE_FLOW_ERROR_TYPE_HANDLE,
1784 NULL, "cannot create hash rxq");
1792 * Complete flow rule creation.
1795 * Pointer to private structure.
1797 * Internal parser structure.
1799 * Pointer to the rte_flow.
1801 * Perform verbose error reporting if not NULL.
1804 * 0 on success, a errno value otherwise and rte_errno is set.
1807 priv_flow_create_action_queue(struct priv *priv,
1808 struct mlx5_flow_parse *parser,
1809 struct rte_flow *flow,
1810 struct rte_flow_error *error)
1817 assert(!parser->drop);
1818 err = priv_flow_create_action_queue_rss(priv, parser, flow, error);
1822 flow->cs = parser->cs;
1823 if (!priv->dev->data->dev_started)
1825 for (i = 0; i != hash_rxq_init_n; ++i) {
1826 if (!flow->frxq[i].hrxq)
1828 flow->frxq[i].ibv_flow =
1829 ibv_create_flow(flow->frxq[i].hrxq->qp,
1830 flow->frxq[i].ibv_attr);
1831 if (!flow->frxq[i].ibv_flow) {
1832 rte_flow_error_set(error, ENOMEM,
1833 RTE_FLOW_ERROR_TYPE_HANDLE,
1834 NULL, "flow rule creation failure");
1838 DEBUG("%p type %d QP %p ibv_flow %p",
1840 (void *)flow->frxq[i].hrxq,
1841 (void *)flow->frxq[i].ibv_flow);
1843 for (i = 0; i != parser->queues_n; ++i) {
1844 struct mlx5_rxq_data *q =
1845 (*priv->rxqs)[parser->queues[i]];
1847 q->mark |= parser->mark;
1852 for (i = 0; i != hash_rxq_init_n; ++i) {
1853 if (flow->frxq[i].ibv_flow) {
1854 struct ibv_flow *ibv_flow = flow->frxq[i].ibv_flow;
1856 claim_zero(ibv_destroy_flow(ibv_flow));
1858 if (flow->frxq[i].hrxq)
1859 mlx5_priv_hrxq_release(priv, flow->frxq[i].hrxq);
1860 if (flow->frxq[i].ibv_attr)
1861 rte_free(flow->frxq[i].ibv_attr);
1864 claim_zero(ibv_destroy_counter_set(flow->cs));
1875 * Pointer to private structure.
1877 * Pointer to a TAILQ flow list.
1879 * Flow rule attributes.
1880 * @param[in] pattern
1881 * Pattern specification (list terminated by the END pattern item).
1882 * @param[in] actions
1883 * Associated actions (list terminated by the END action).
1885 * Perform verbose error reporting if not NULL.
1888 * A flow on success, NULL otherwise.
1890 static struct rte_flow *
1891 priv_flow_create(struct priv *priv,
1892 struct mlx5_flows *list,
1893 const struct rte_flow_attr *attr,
1894 const struct rte_flow_item items[],
1895 const struct rte_flow_action actions[],
1896 struct rte_flow_error *error)
1898 struct mlx5_flow_parse parser = { .create = 1, };
1899 struct rte_flow *flow = NULL;
1903 err = priv_flow_convert(priv, attr, items, actions, error, &parser);
1906 flow = rte_calloc(__func__, 1,
1907 sizeof(*flow) + parser.queues_n * sizeof(uint16_t),
1910 rte_flow_error_set(error, ENOMEM,
1911 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1913 "cannot allocate flow memory");
1916 /* Copy queues configuration. */
1917 flow->queues = (uint16_t (*)[])(flow + 1);
1918 memcpy(flow->queues, parser.queues, parser.queues_n * sizeof(uint16_t));
1919 flow->queues_n = parser.queues_n;
1920 flow->mark = parser.mark;
1921 /* Copy RSS configuration. */
1922 flow->rss_conf = parser.rss_conf;
1923 flow->rss_conf.rss_key = flow->rss_key;
1924 memcpy(flow->rss_key, parser.rss_key, parser.rss_conf.rss_key_len);
1925 /* finalise the flow. */
1927 err = priv_flow_create_action_queue_drop(priv, &parser, flow,
1930 err = priv_flow_create_action_queue(priv, &parser, flow, error);
1933 TAILQ_INSERT_TAIL(list, flow, next);
1934 DEBUG("Flow created %p", (void *)flow);
1937 for (i = 0; i != hash_rxq_init_n; ++i) {
1938 if (parser.queue[i].ibv_attr)
1939 rte_free(parser.queue[i].ibv_attr);
1946 * Validate a flow supported by the NIC.
1948 * @see rte_flow_validate()
1952 mlx5_flow_validate(struct rte_eth_dev *dev,
1953 const struct rte_flow_attr *attr,
1954 const struct rte_flow_item items[],
1955 const struct rte_flow_action actions[],
1956 struct rte_flow_error *error)
1958 struct priv *priv = dev->data->dev_private;
1960 struct mlx5_flow_parse parser = { .create = 0, };
1963 ret = priv_flow_convert(priv, attr, items, actions, error, &parser);
1971 * @see rte_flow_create()
1975 mlx5_flow_create(struct rte_eth_dev *dev,
1976 const struct rte_flow_attr *attr,
1977 const struct rte_flow_item items[],
1978 const struct rte_flow_action actions[],
1979 struct rte_flow_error *error)
1981 struct priv *priv = dev->data->dev_private;
1982 struct rte_flow *flow;
1985 flow = priv_flow_create(priv, &priv->flows, attr, items, actions,
1995 * Pointer to private structure.
1997 * Pointer to a TAILQ flow list.
2002 priv_flow_destroy(struct priv *priv,
2003 struct mlx5_flows *list,
2004 struct rte_flow *flow)
2008 if (flow->drop || !flow->mark)
2010 for (i = 0; i != flow->queues_n; ++i) {
2011 struct rte_flow *tmp;
2015 * To remove the mark from the queue, the queue must not be
2016 * present in any other marked flow (RSS or not).
2018 TAILQ_FOREACH(tmp, list, next) {
2020 uint16_t *tqs = NULL;
2025 for (j = 0; j != hash_rxq_init_n; ++j) {
2026 if (!tmp->frxq[j].hrxq)
2028 tqs = tmp->frxq[j].hrxq->ind_table->queues;
2029 tq_n = tmp->frxq[j].hrxq->ind_table->queues_n;
2033 for (j = 0; (j != tq_n) && !mark; j++)
2034 if (tqs[j] == (*flow->queues)[i])
2037 (*priv->rxqs)[(*flow->queues)[i]]->mark = mark;
2041 if (flow->frxq[HASH_RXQ_ETH].ibv_flow)
2042 claim_zero(ibv_destroy_flow
2043 (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2044 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
2046 for (i = 0; i != hash_rxq_init_n; ++i) {
2047 struct mlx5_flow *frxq = &flow->frxq[i];
2050 claim_zero(ibv_destroy_flow(frxq->ibv_flow));
2052 mlx5_priv_hrxq_release(priv, frxq->hrxq);
2054 rte_free(frxq->ibv_attr);
2058 claim_zero(ibv_destroy_counter_set(flow->cs));
2061 TAILQ_REMOVE(list, flow, next);
2062 DEBUG("Flow destroyed %p", (void *)flow);
2067 * Destroy all flows.
2070 * Pointer to private structure.
2072 * Pointer to a TAILQ flow list.
2075 priv_flow_flush(struct priv *priv, struct mlx5_flows *list)
2077 while (!TAILQ_EMPTY(list)) {
2078 struct rte_flow *flow;
2080 flow = TAILQ_FIRST(list);
2081 priv_flow_destroy(priv, list, flow);
2086 * Create drop queue.
2089 * Pointer to private structure.
2095 priv_flow_create_drop_queue(struct priv *priv)
2097 struct mlx5_hrxq_drop *fdq = NULL;
2101 fdq = rte_calloc(__func__, 1, sizeof(*fdq), 0);
2103 WARN("cannot allocate memory for drop queue");
2106 fdq->cq = ibv_create_cq(priv->ctx, 1, NULL, NULL, 0);
2108 WARN("cannot allocate CQ for drop queue");
2111 fdq->wq = ibv_create_wq(priv->ctx,
2112 &(struct ibv_wq_init_attr){
2113 .wq_type = IBV_WQT_RQ,
2120 WARN("cannot allocate WQ for drop queue");
2123 fdq->ind_table = ibv_create_rwq_ind_table(priv->ctx,
2124 &(struct ibv_rwq_ind_table_init_attr){
2125 .log_ind_tbl_size = 0,
2126 .ind_tbl = &fdq->wq,
2129 if (!fdq->ind_table) {
2130 WARN("cannot allocate indirection table for drop queue");
2133 fdq->qp = ibv_create_qp_ex(priv->ctx,
2134 &(struct ibv_qp_init_attr_ex){
2135 .qp_type = IBV_QPT_RAW_PACKET,
2137 IBV_QP_INIT_ATTR_PD |
2138 IBV_QP_INIT_ATTR_IND_TABLE |
2139 IBV_QP_INIT_ATTR_RX_HASH,
2140 .rx_hash_conf = (struct ibv_rx_hash_conf){
2142 IBV_RX_HASH_FUNC_TOEPLITZ,
2143 .rx_hash_key_len = rss_hash_default_key_len,
2144 .rx_hash_key = rss_hash_default_key,
2145 .rx_hash_fields_mask = 0,
2147 .rwq_ind_tbl = fdq->ind_table,
2151 WARN("cannot allocate QP for drop queue");
2154 priv->flow_drop_queue = fdq;
2158 claim_zero(ibv_destroy_qp(fdq->qp));
2160 claim_zero(ibv_destroy_rwq_ind_table(fdq->ind_table));
2162 claim_zero(ibv_destroy_wq(fdq->wq));
2164 claim_zero(ibv_destroy_cq(fdq->cq));
2167 priv->flow_drop_queue = NULL;
2172 * Delete drop queue.
2175 * Pointer to private structure.
2178 priv_flow_delete_drop_queue(struct priv *priv)
2180 struct mlx5_hrxq_drop *fdq = priv->flow_drop_queue;
2185 claim_zero(ibv_destroy_qp(fdq->qp));
2187 claim_zero(ibv_destroy_rwq_ind_table(fdq->ind_table));
2189 claim_zero(ibv_destroy_wq(fdq->wq));
2191 claim_zero(ibv_destroy_cq(fdq->cq));
2193 priv->flow_drop_queue = NULL;
2200 * Pointer to private structure.
2202 * Pointer to a TAILQ flow list.
2205 priv_flow_stop(struct priv *priv, struct mlx5_flows *list)
2207 struct rte_flow *flow;
2209 TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) {
2213 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow)
2215 claim_zero(ibv_destroy_flow
2216 (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2217 flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
2222 struct mlx5_ind_table_ibv *ind_tbl = NULL;
2224 for (i = 0; i != hash_rxq_init_n; ++i) {
2225 if (!flow->frxq[i].hrxq)
2227 ind_tbl = flow->frxq[i].hrxq->ind_table;
2230 for (i = 0; i != ind_tbl->queues_n; ++i)
2231 (*priv->rxqs)[ind_tbl->queues[i]]->mark = 0;
2233 for (i = 0; i != hash_rxq_init_n; ++i) {
2234 if (!flow->frxq[i].ibv_flow)
2236 claim_zero(ibv_destroy_flow(flow->frxq[i].ibv_flow));
2237 flow->frxq[i].ibv_flow = NULL;
2238 mlx5_priv_hrxq_release(priv, flow->frxq[i].hrxq);
2239 flow->frxq[i].hrxq = NULL;
2241 DEBUG("Flow %p removed", (void *)flow);
2249 * Pointer to private structure.
2251 * Pointer to a TAILQ flow list.
2254 * 0 on success, a errno value otherwise and rte_errno is set.
2257 priv_flow_start(struct priv *priv, struct mlx5_flows *list)
2259 struct rte_flow *flow;
2261 TAILQ_FOREACH(flow, list, next) {
2265 flow->frxq[HASH_RXQ_ETH].ibv_flow =
2267 (priv->flow_drop_queue->qp,
2268 flow->frxq[HASH_RXQ_ETH].ibv_attr);
2269 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
2270 DEBUG("Flow %p cannot be applied",
2275 DEBUG("Flow %p applied", (void *)flow);
2279 for (i = 0; i != hash_rxq_init_n; ++i) {
2280 if (!flow->frxq[i].ibv_attr)
2282 flow->frxq[i].hrxq =
2283 mlx5_priv_hrxq_get(priv, flow->rss_conf.rss_key,
2284 flow->rss_conf.rss_key_len,
2285 hash_rxq_init[i].hash_fields,
2288 if (flow->frxq[i].hrxq)
2290 flow->frxq[i].hrxq =
2291 mlx5_priv_hrxq_new(priv, flow->rss_conf.rss_key,
2292 flow->rss_conf.rss_key_len,
2293 hash_rxq_init[i].hash_fields,
2296 if (!flow->frxq[i].hrxq) {
2297 DEBUG("Flow %p cannot be applied",
2303 flow->frxq[i].ibv_flow =
2304 ibv_create_flow(flow->frxq[i].hrxq->qp,
2305 flow->frxq[i].ibv_attr);
2306 if (!flow->frxq[i].ibv_flow) {
2307 DEBUG("Flow %p cannot be applied",
2312 DEBUG("Flow %p applied", (void *)flow);
2316 for (i = 0; i != flow->queues_n; ++i)
2317 (*priv->rxqs)[(*flow->queues)[i]]->mark = 1;
2323 * Verify the flow list is empty
2326 * Pointer to private structure.
2328 * @return the number of flows not released.
2331 priv_flow_verify(struct priv *priv)
2333 struct rte_flow *flow;
2336 TAILQ_FOREACH(flow, &priv->flows, next) {
2337 DEBUG("%p: flow %p still referenced", (void *)priv,
2345 * Enable a control flow configured from the control plane.
2348 * Pointer to Ethernet device.
2350 * An Ethernet flow spec to apply.
2352 * An Ethernet flow mask to apply.
2354 * A VLAN flow spec to apply.
2356 * A VLAN flow mask to apply.
2362 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
2363 struct rte_flow_item_eth *eth_spec,
2364 struct rte_flow_item_eth *eth_mask,
2365 struct rte_flow_item_vlan *vlan_spec,
2366 struct rte_flow_item_vlan *vlan_mask)
2368 struct priv *priv = dev->data->dev_private;
2369 const struct rte_flow_attr attr = {
2371 .priority = MLX5_CTRL_FLOW_PRIORITY,
2373 struct rte_flow_item items[] = {
2375 .type = RTE_FLOW_ITEM_TYPE_ETH,
2381 .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
2382 RTE_FLOW_ITEM_TYPE_END,
2388 .type = RTE_FLOW_ITEM_TYPE_END,
2391 struct rte_flow_action actions[] = {
2393 .type = RTE_FLOW_ACTION_TYPE_RSS,
2396 .type = RTE_FLOW_ACTION_TYPE_END,
2399 struct rte_flow *flow;
2400 struct rte_flow_error error;
2403 struct rte_flow_action_rss rss;
2405 const struct rte_eth_rss_conf *rss_conf;
2407 uint16_t queue[RTE_MAX_QUEUES_PER_PORT];
2411 if (!priv->reta_idx_n)
2413 for (i = 0; i != priv->reta_idx_n; ++i)
2414 action_rss.local.queue[i] = (*priv->reta_idx)[i];
2415 action_rss.local.rss_conf = &priv->rss_conf;
2416 action_rss.local.num = priv->reta_idx_n;
2417 actions[0].conf = (const void *)&action_rss.rss;
2418 flow = priv_flow_create(priv, &priv->ctrl_flows, &attr, items, actions,
2426 * Enable a flow control configured from the control plane.
2429 * Pointer to Ethernet device.
2431 * An Ethernet flow spec to apply.
2433 * An Ethernet flow mask to apply.
2439 mlx5_ctrl_flow(struct rte_eth_dev *dev,
2440 struct rte_flow_item_eth *eth_spec,
2441 struct rte_flow_item_eth *eth_mask)
2443 return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
2449 * @see rte_flow_destroy()
2453 mlx5_flow_destroy(struct rte_eth_dev *dev,
2454 struct rte_flow *flow,
2455 struct rte_flow_error *error)
2457 struct priv *priv = dev->data->dev_private;
2461 priv_flow_destroy(priv, &priv->flows, flow);
2467 * Destroy all flows.
2469 * @see rte_flow_flush()
2473 mlx5_flow_flush(struct rte_eth_dev *dev,
2474 struct rte_flow_error *error)
2476 struct priv *priv = dev->data->dev_private;
2480 priv_flow_flush(priv, &priv->flows);
2485 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
2487 * Query flow counter.
2491 * @param counter_value
2492 * returned data from the counter.
2495 * 0 on success, a errno value otherwise and rte_errno is set.
2498 priv_flow_query_count(struct ibv_counter_set *cs,
2499 struct mlx5_flow_counter_stats *counter_stats,
2500 struct rte_flow_query_count *query_count,
2501 struct rte_flow_error *error)
2503 uint64_t counters[2];
2504 struct ibv_query_counter_set_attr query_cs_attr = {
2506 .query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
2508 struct ibv_counter_set_data query_out = {
2510 .outlen = 2 * sizeof(uint64_t),
2512 int res = ibv_query_counter_set(&query_cs_attr, &query_out);
2515 rte_flow_error_set(error, -res,
2516 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2518 "cannot read counter");
2521 query_count->hits_set = 1;
2522 query_count->bytes_set = 1;
2523 query_count->hits = counters[0] - counter_stats->hits;
2524 query_count->bytes = counters[1] - counter_stats->bytes;
2525 if (query_count->reset) {
2526 counter_stats->hits = counters[0];
2527 counter_stats->bytes = counters[1];
2535 * @see rte_flow_query()
2539 mlx5_flow_query(struct rte_eth_dev *dev,
2540 struct rte_flow *flow,
2541 enum rte_flow_action_type action __rte_unused,
2543 struct rte_flow_error *error)
2545 struct priv *priv = dev->data->dev_private;
2550 res = priv_flow_query_count(flow->cs,
2551 &flow->counter_stats,
2552 (struct rte_flow_query_count *)data,
2555 rte_flow_error_set(error, res,
2556 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2558 "no counter found for flow");
2568 * @see rte_flow_isolate()
2572 mlx5_flow_isolate(struct rte_eth_dev *dev,
2574 struct rte_flow_error *error)
2576 struct priv *priv = dev->data->dev_private;
2579 if (dev->data->dev_started) {
2580 rte_flow_error_set(error, EBUSY,
2581 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2583 "port must be stopped first");
2587 priv->isolated = !!enable;
2589 priv->dev->dev_ops = &mlx5_dev_ops_isolate;
2591 priv->dev->dev_ops = &mlx5_dev_ops;
2597 * Convert a flow director filter to a generic flow.
2600 * Private structure.
2601 * @param fdir_filter
2602 * Flow director filter to add.
2604 * Generic flow parameters structure.
2607 * 0 on success, errno value on error.
2610 priv_fdir_filter_convert(struct priv *priv,
2611 const struct rte_eth_fdir_filter *fdir_filter,
2612 struct mlx5_fdir *attributes)
2614 const struct rte_eth_fdir_input *input = &fdir_filter->input;
2616 /* Validate queue number. */
2617 if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
2618 ERROR("invalid queue number %d", fdir_filter->action.rx_queue);
2621 attributes->attr.ingress = 1;
2622 attributes->items[0] = (struct rte_flow_item) {
2623 .type = RTE_FLOW_ITEM_TYPE_ETH,
2624 .spec = &attributes->l2,
2625 .mask = &attributes->l2_mask,
2627 switch (fdir_filter->action.behavior) {
2628 case RTE_ETH_FDIR_ACCEPT:
2629 attributes->actions[0] = (struct rte_flow_action){
2630 .type = RTE_FLOW_ACTION_TYPE_QUEUE,
2631 .conf = &attributes->queue,
2634 case RTE_ETH_FDIR_REJECT:
2635 attributes->actions[0] = (struct rte_flow_action){
2636 .type = RTE_FLOW_ACTION_TYPE_DROP,
2640 ERROR("invalid behavior %d", fdir_filter->action.behavior);
2643 attributes->queue.index = fdir_filter->action.rx_queue;
2644 switch (fdir_filter->input.flow_type) {
2645 case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
2646 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2647 .src_addr = input->flow.udp4_flow.ip.src_ip,
2648 .dst_addr = input->flow.udp4_flow.ip.dst_ip,
2649 .time_to_live = input->flow.udp4_flow.ip.ttl,
2650 .type_of_service = input->flow.udp4_flow.ip.tos,
2651 .next_proto_id = input->flow.udp4_flow.ip.proto,
2653 attributes->l4.udp.hdr = (struct udp_hdr){
2654 .src_port = input->flow.udp4_flow.src_port,
2655 .dst_port = input->flow.udp4_flow.dst_port,
2657 attributes->items[1] = (struct rte_flow_item){
2658 .type = RTE_FLOW_ITEM_TYPE_IPV4,
2659 .spec = &attributes->l3,
2660 .mask = &attributes->l3,
2662 attributes->items[2] = (struct rte_flow_item){
2663 .type = RTE_FLOW_ITEM_TYPE_UDP,
2664 .spec = &attributes->l4,
2665 .mask = &attributes->l4,
2668 case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
2669 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2670 .src_addr = input->flow.tcp4_flow.ip.src_ip,
2671 .dst_addr = input->flow.tcp4_flow.ip.dst_ip,
2672 .time_to_live = input->flow.tcp4_flow.ip.ttl,
2673 .type_of_service = input->flow.tcp4_flow.ip.tos,
2674 .next_proto_id = input->flow.tcp4_flow.ip.proto,
2676 attributes->l4.tcp.hdr = (struct tcp_hdr){
2677 .src_port = input->flow.tcp4_flow.src_port,
2678 .dst_port = input->flow.tcp4_flow.dst_port,
2680 attributes->items[1] = (struct rte_flow_item){
2681 .type = RTE_FLOW_ITEM_TYPE_IPV4,
2682 .spec = &attributes->l3,
2683 .mask = &attributes->l3,
2685 attributes->items[2] = (struct rte_flow_item){
2686 .type = RTE_FLOW_ITEM_TYPE_TCP,
2687 .spec = &attributes->l4,
2688 .mask = &attributes->l4,
2691 case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
2692 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2693 .src_addr = input->flow.ip4_flow.src_ip,
2694 .dst_addr = input->flow.ip4_flow.dst_ip,
2695 .time_to_live = input->flow.ip4_flow.ttl,
2696 .type_of_service = input->flow.ip4_flow.tos,
2697 .next_proto_id = input->flow.ip4_flow.proto,
2699 attributes->items[1] = (struct rte_flow_item){
2700 .type = RTE_FLOW_ITEM_TYPE_IPV4,
2701 .spec = &attributes->l3,
2702 .mask = &attributes->l3,
2705 case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
2706 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2707 .hop_limits = input->flow.udp6_flow.ip.hop_limits,
2708 .proto = input->flow.udp6_flow.ip.proto,
2710 memcpy(attributes->l3.ipv6.hdr.src_addr,
2711 input->flow.udp6_flow.ip.src_ip,
2712 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2713 memcpy(attributes->l3.ipv6.hdr.dst_addr,
2714 input->flow.udp6_flow.ip.dst_ip,
2715 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2716 attributes->l4.udp.hdr = (struct udp_hdr){
2717 .src_port = input->flow.udp6_flow.src_port,
2718 .dst_port = input->flow.udp6_flow.dst_port,
2720 attributes->items[1] = (struct rte_flow_item){
2721 .type = RTE_FLOW_ITEM_TYPE_IPV6,
2722 .spec = &attributes->l3,
2723 .mask = &attributes->l3,
2725 attributes->items[2] = (struct rte_flow_item){
2726 .type = RTE_FLOW_ITEM_TYPE_UDP,
2727 .spec = &attributes->l4,
2728 .mask = &attributes->l4,
2731 case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
2732 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2733 .hop_limits = input->flow.tcp6_flow.ip.hop_limits,
2734 .proto = input->flow.tcp6_flow.ip.proto,
2736 memcpy(attributes->l3.ipv6.hdr.src_addr,
2737 input->flow.tcp6_flow.ip.src_ip,
2738 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2739 memcpy(attributes->l3.ipv6.hdr.dst_addr,
2740 input->flow.tcp6_flow.ip.dst_ip,
2741 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2742 attributes->l4.tcp.hdr = (struct tcp_hdr){
2743 .src_port = input->flow.tcp6_flow.src_port,
2744 .dst_port = input->flow.tcp6_flow.dst_port,
2746 attributes->items[1] = (struct rte_flow_item){
2747 .type = RTE_FLOW_ITEM_TYPE_IPV6,
2748 .spec = &attributes->l3,
2749 .mask = &attributes->l3,
2751 attributes->items[2] = (struct rte_flow_item){
2752 .type = RTE_FLOW_ITEM_TYPE_TCP,
2753 .spec = &attributes->l4,
2754 .mask = &attributes->l4,
2757 case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
2758 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2759 .hop_limits = input->flow.ipv6_flow.hop_limits,
2760 .proto = input->flow.ipv6_flow.proto,
2762 memcpy(attributes->l3.ipv6.hdr.src_addr,
2763 input->flow.ipv6_flow.src_ip,
2764 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2765 memcpy(attributes->l3.ipv6.hdr.dst_addr,
2766 input->flow.ipv6_flow.dst_ip,
2767 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2768 attributes->items[1] = (struct rte_flow_item){
2769 .type = RTE_FLOW_ITEM_TYPE_IPV6,
2770 .spec = &attributes->l3,
2771 .mask = &attributes->l3,
2775 ERROR("invalid flow type%d",
2776 fdir_filter->input.flow_type);
2783 * Add new flow director filter and store it in list.
2786 * Private structure.
2787 * @param fdir_filter
2788 * Flow director filter to add.
2791 * 0 on success, errno value on failure.
2794 priv_fdir_filter_add(struct priv *priv,
2795 const struct rte_eth_fdir_filter *fdir_filter)
2797 struct mlx5_fdir attributes = {
2800 .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2801 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2805 struct mlx5_flow_parse parser = {
2806 .layer = HASH_RXQ_ETH,
2808 struct rte_flow_error error;
2809 struct rte_flow *flow;
2812 ret = priv_fdir_filter_convert(priv, fdir_filter, &attributes);
2815 ret = priv_flow_convert(priv, &attributes.attr, attributes.items,
2816 attributes.actions, &error, &parser);
2819 flow = priv_flow_create(priv,
2826 DEBUG("FDIR created %p", (void *)flow);
2833 * Delete specific filter.
2836 * Private structure.
2837 * @param fdir_filter
2838 * Filter to be deleted.
2841 * 0 on success, errno value on failure.
2844 priv_fdir_filter_delete(struct priv *priv,
2845 const struct rte_eth_fdir_filter *fdir_filter)
2847 struct mlx5_fdir attributes = {
2850 struct mlx5_flow_parse parser = {
2852 .layer = HASH_RXQ_ETH,
2854 struct rte_flow_error error;
2855 struct rte_flow *flow;
2859 ret = priv_fdir_filter_convert(priv, fdir_filter, &attributes);
2862 ret = priv_flow_convert(priv, &attributes.attr, attributes.items,
2863 attributes.actions, &error, &parser);
2867 * Special case for drop action which is only set in the
2868 * specifications when the flow is created. In this situation the
2869 * drop specification is missing.
2872 struct ibv_flow_spec_action_drop *drop;
2874 drop = (void *)((uintptr_t)parser.queue[HASH_RXQ_ETH].ibv_attr +
2875 parser.queue[HASH_RXQ_ETH].offset);
2876 *drop = (struct ibv_flow_spec_action_drop){
2877 .type = IBV_FLOW_SPEC_ACTION_DROP,
2878 .size = sizeof(struct ibv_flow_spec_action_drop),
2880 parser.queue[HASH_RXQ_ETH].ibv_attr->num_of_specs++;
2882 TAILQ_FOREACH(flow, &priv->flows, next) {
2883 struct ibv_flow_attr *attr;
2884 struct ibv_spec_header *attr_h;
2886 struct ibv_flow_attr *flow_attr;
2887 struct ibv_spec_header *flow_h;
2889 unsigned int specs_n;
2891 attr = parser.queue[HASH_RXQ_ETH].ibv_attr;
2892 flow_attr = flow->frxq[HASH_RXQ_ETH].ibv_attr;
2893 /* Compare first the attributes. */
2894 if (memcmp(attr, flow_attr, sizeof(struct ibv_flow_attr)))
2896 if (attr->num_of_specs == 0)
2898 spec = (void *)((uintptr_t)attr +
2899 sizeof(struct ibv_flow_attr));
2900 flow_spec = (void *)((uintptr_t)flow_attr +
2901 sizeof(struct ibv_flow_attr));
2902 specs_n = RTE_MIN(attr->num_of_specs, flow_attr->num_of_specs);
2903 for (i = 0; i != specs_n; ++i) {
2906 if (memcmp(spec, flow_spec,
2907 RTE_MIN(attr_h->size, flow_h->size)))
2909 spec = (void *)((uintptr_t)spec + attr_h->size);
2910 flow_spec = (void *)((uintptr_t)flow_spec +
2913 /* At this point, the flow match. */
2916 /* The flow does not match. */
2920 priv_flow_destroy(priv, &priv->flows, flow);
2922 for (i = 0; i != hash_rxq_init_n; ++i) {
2923 if (parser.queue[i].ibv_attr)
2924 rte_free(parser.queue[i].ibv_attr);
2930 * Update queue for specific filter.
2933 * Private structure.
2934 * @param fdir_filter
2935 * Filter to be updated.
2938 * 0 on success, errno value on failure.
2941 priv_fdir_filter_update(struct priv *priv,
2942 const struct rte_eth_fdir_filter *fdir_filter)
2946 ret = priv_fdir_filter_delete(priv, fdir_filter);
2949 ret = priv_fdir_filter_add(priv, fdir_filter);
2954 * Flush all filters.
2957 * Private structure.
2960 priv_fdir_filter_flush(struct priv *priv)
2962 priv_flow_flush(priv, &priv->flows);
2966 * Get flow director information.
2969 * Private structure.
2970 * @param[out] fdir_info
2971 * Resulting flow director information.
2974 priv_fdir_info_get(struct priv *priv, struct rte_eth_fdir_info *fdir_info)
2976 struct rte_eth_fdir_masks *mask =
2977 &priv->dev->data->dev_conf.fdir_conf.mask;
2979 fdir_info->mode = priv->dev->data->dev_conf.fdir_conf.mode;
2980 fdir_info->guarant_spc = 0;
2981 rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
2982 fdir_info->max_flexpayload = 0;
2983 fdir_info->flow_types_mask[0] = 0;
2984 fdir_info->flex_payload_unit = 0;
2985 fdir_info->max_flex_payload_segment_num = 0;
2986 fdir_info->flex_payload_limit = 0;
2987 memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
2991 * Deal with flow director operations.
2994 * Pointer to private structure.
2996 * Operation to perform.
2998 * Pointer to operation-specific structure.
3001 * 0 on success, errno value on failure.
3004 priv_fdir_ctrl_func(struct priv *priv, enum rte_filter_op filter_op, void *arg)
3006 enum rte_fdir_mode fdir_mode =
3007 priv->dev->data->dev_conf.fdir_conf.mode;
3010 if (filter_op == RTE_ETH_FILTER_NOP)
3012 if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
3013 fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
3014 ERROR("%p: flow director mode %d not supported",
3015 (void *)priv, fdir_mode);
3018 switch (filter_op) {
3019 case RTE_ETH_FILTER_ADD:
3020 ret = priv_fdir_filter_add(priv, arg);
3022 case RTE_ETH_FILTER_UPDATE:
3023 ret = priv_fdir_filter_update(priv, arg);
3025 case RTE_ETH_FILTER_DELETE:
3026 ret = priv_fdir_filter_delete(priv, arg);
3028 case RTE_ETH_FILTER_FLUSH:
3029 priv_fdir_filter_flush(priv);
3031 case RTE_ETH_FILTER_INFO:
3032 priv_fdir_info_get(priv, arg);
3035 DEBUG("%p: unknown operation %u", (void *)priv,
3044 * Manage filter operations.
3047 * Pointer to Ethernet device structure.
3048 * @param filter_type
3051 * Operation to perform.
3053 * Pointer to operation-specific structure.
3056 * 0 on success, negative errno value on failure.
3059 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
3060 enum rte_filter_type filter_type,
3061 enum rte_filter_op filter_op,
3065 struct priv *priv = dev->data->dev_private;
3067 switch (filter_type) {
3068 case RTE_ETH_FILTER_GENERIC:
3069 if (filter_op != RTE_ETH_FILTER_GET)
3071 *(const void **)arg = &mlx5_flow_ops;
3073 case RTE_ETH_FILTER_FDIR:
3075 ret = priv_fdir_ctrl_func(priv, filter_op, arg);
3079 ERROR("%p: filter type (%d) not supported",
3080 (void *)dev, filter_type);