4 * Copyright 2016 6WIND S.A.
5 * Copyright 2016 Mellanox.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of 6WIND S.A. nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #include <sys/queue.h>
38 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
40 #pragma GCC diagnostic ignored "-Wpedantic"
42 #include <infiniband/verbs.h>
44 #pragma GCC diagnostic error "-Wpedantic"
47 #include <rte_ethdev.h>
49 #include <rte_flow_driver.h>
50 #include <rte_malloc.h>
55 /* Define minimal priority for control plane flows. */
56 #define MLX5_CTRL_FLOW_PRIORITY 4
58 /* Internet Protocol versions. */
62 #ifndef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
63 struct ibv_counter_set_init_attr {
66 struct ibv_flow_spec_counter_action {
69 struct ibv_counter_set {
74 ibv_destroy_counter_set(struct ibv_counter_set *cs)
81 /* Dev ops structure defined in mlx5.c */
82 extern const struct eth_dev_ops mlx5_dev_ops;
83 extern const struct eth_dev_ops mlx5_dev_ops_isolate;
86 mlx5_flow_create_eth(const struct rte_flow_item *item,
87 const void *default_mask,
91 mlx5_flow_create_vlan(const struct rte_flow_item *item,
92 const void *default_mask,
96 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
97 const void *default_mask,
101 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
102 const void *default_mask,
106 mlx5_flow_create_udp(const struct rte_flow_item *item,
107 const void *default_mask,
111 mlx5_flow_create_tcp(const struct rte_flow_item *item,
112 const void *default_mask,
116 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
117 const void *default_mask,
120 struct mlx5_flow_parse;
123 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
127 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id);
130 mlx5_flow_create_count(struct priv *priv, struct mlx5_flow_parse *parser);
132 /* Hash RX queue types. */
143 /* Initialization data for hash RX queue. */
144 struct hash_rxq_init {
145 uint64_t hash_fields; /* Fields that participate in the hash. */
146 uint64_t dpdk_rss_hf; /* Matching DPDK RSS hash fields. */
147 unsigned int flow_priority; /* Flow priority to use. */
148 unsigned int ip_version; /* Internet protocol. */
151 /* Initialization data for hash RX queues. */
152 const struct hash_rxq_init hash_rxq_init[] = {
154 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
155 IBV_RX_HASH_DST_IPV4 |
156 IBV_RX_HASH_SRC_PORT_TCP |
157 IBV_RX_HASH_DST_PORT_TCP),
158 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,
160 .ip_version = MLX5_IPV4,
163 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
164 IBV_RX_HASH_DST_IPV4 |
165 IBV_RX_HASH_SRC_PORT_UDP |
166 IBV_RX_HASH_DST_PORT_UDP),
167 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP,
169 .ip_version = MLX5_IPV4,
172 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
173 IBV_RX_HASH_DST_IPV4),
174 .dpdk_rss_hf = (ETH_RSS_IPV4 |
177 .ip_version = MLX5_IPV4,
180 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
181 IBV_RX_HASH_DST_IPV6 |
182 IBV_RX_HASH_SRC_PORT_TCP |
183 IBV_RX_HASH_DST_PORT_TCP),
184 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP,
186 .ip_version = MLX5_IPV6,
189 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
190 IBV_RX_HASH_DST_IPV6 |
191 IBV_RX_HASH_SRC_PORT_UDP |
192 IBV_RX_HASH_DST_PORT_UDP),
193 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP,
195 .ip_version = MLX5_IPV6,
198 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
199 IBV_RX_HASH_DST_IPV6),
200 .dpdk_rss_hf = (ETH_RSS_IPV6 |
203 .ip_version = MLX5_IPV6,
212 /* Number of entries in hash_rxq_init[]. */
213 const unsigned int hash_rxq_init_n = RTE_DIM(hash_rxq_init);
215 /** Structure for holding counter stats. */
216 struct mlx5_flow_counter_stats {
217 uint64_t hits; /**< Number of packets matched by the rule. */
218 uint64_t bytes; /**< Number of bytes matched by the rule. */
221 /** Structure for Drop queue. */
222 struct mlx5_hrxq_drop {
223 struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */
224 struct ibv_qp *qp; /**< Verbs queue pair. */
225 struct ibv_wq *wq; /**< Verbs work queue. */
226 struct ibv_cq *cq; /**< Verbs completion queue. */
229 /* Flows structures. */
231 uint64_t hash_fields; /**< Fields that participate in the hash. */
232 struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
233 struct ibv_flow *ibv_flow; /**< Verbs flow. */
234 struct mlx5_hrxq *hrxq; /**< Hash Rx queues. */
237 /* Drop flows structures. */
238 struct mlx5_flow_drop {
239 struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
240 struct ibv_flow *ibv_flow; /**< Verbs flow. */
244 TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
245 uint32_t mark:1; /**< Set if the flow is marked. */
246 uint32_t drop:1; /**< Drop queue. */
247 uint16_t queues_n; /**< Number of entries in queue[]. */
248 uint16_t (*queues)[]; /**< Queues indexes to use. */
249 struct rte_eth_rss_conf rss_conf; /**< RSS configuration */
250 uint8_t rss_key[40]; /**< copy of the RSS key. */
251 struct ibv_counter_set *cs; /**< Holds the counters for the rule. */
252 struct mlx5_flow_counter_stats counter_stats;/**<The counter stats. */
253 struct mlx5_flow frxq[RTE_DIM(hash_rxq_init)];
254 /**< Flow with Rx queue. */
257 /** Static initializer for items. */
259 (const enum rte_flow_item_type []){ \
260 __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
263 /** Structure to generate a simple graph of layers supported by the NIC. */
264 struct mlx5_flow_items {
265 /** List of possible actions for these items. */
266 const enum rte_flow_action_type *const actions;
267 /** Bit-masks corresponding to the possibilities for the item. */
270 * Default bit-masks to use when item->mask is not provided. When
271 * \default_mask is also NULL, the full supported bit-mask (\mask) is
274 const void *default_mask;
275 /** Bit-masks size in bytes. */
276 const unsigned int mask_sz;
278 * Conversion function from rte_flow to NIC specific flow.
281 * rte_flow item to convert.
282 * @param default_mask
283 * Default bit-masks to use when item->mask is not provided.
285 * Internal structure to store the conversion.
288 * 0 on success, negative value otherwise.
290 int (*convert)(const struct rte_flow_item *item,
291 const void *default_mask,
293 /** Size in bytes of the destination structure. */
294 const unsigned int dst_sz;
295 /** List of possible following items. */
296 const enum rte_flow_item_type *const items;
299 /** Valid action for this PMD. */
300 static const enum rte_flow_action_type valid_actions[] = {
301 RTE_FLOW_ACTION_TYPE_DROP,
302 RTE_FLOW_ACTION_TYPE_QUEUE,
303 RTE_FLOW_ACTION_TYPE_MARK,
304 RTE_FLOW_ACTION_TYPE_FLAG,
305 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
306 RTE_FLOW_ACTION_TYPE_COUNT,
308 RTE_FLOW_ACTION_TYPE_END,
311 /** Graph of supported items and associated actions. */
312 static const struct mlx5_flow_items mlx5_flow_items[] = {
313 [RTE_FLOW_ITEM_TYPE_END] = {
314 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
315 RTE_FLOW_ITEM_TYPE_VXLAN),
317 [RTE_FLOW_ITEM_TYPE_ETH] = {
318 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
319 RTE_FLOW_ITEM_TYPE_IPV4,
320 RTE_FLOW_ITEM_TYPE_IPV6),
321 .actions = valid_actions,
322 .mask = &(const struct rte_flow_item_eth){
323 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
324 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
327 .default_mask = &rte_flow_item_eth_mask,
328 .mask_sz = sizeof(struct rte_flow_item_eth),
329 .convert = mlx5_flow_create_eth,
330 .dst_sz = sizeof(struct ibv_flow_spec_eth),
332 [RTE_FLOW_ITEM_TYPE_VLAN] = {
333 .items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
334 RTE_FLOW_ITEM_TYPE_IPV6),
335 .actions = valid_actions,
336 .mask = &(const struct rte_flow_item_vlan){
339 .default_mask = &rte_flow_item_vlan_mask,
340 .mask_sz = sizeof(struct rte_flow_item_vlan),
341 .convert = mlx5_flow_create_vlan,
344 [RTE_FLOW_ITEM_TYPE_IPV4] = {
345 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
346 RTE_FLOW_ITEM_TYPE_TCP),
347 .actions = valid_actions,
348 .mask = &(const struct rte_flow_item_ipv4){
352 .type_of_service = -1,
356 .default_mask = &rte_flow_item_ipv4_mask,
357 .mask_sz = sizeof(struct rte_flow_item_ipv4),
358 .convert = mlx5_flow_create_ipv4,
359 .dst_sz = sizeof(struct ibv_flow_spec_ipv4_ext),
361 [RTE_FLOW_ITEM_TYPE_IPV6] = {
362 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
363 RTE_FLOW_ITEM_TYPE_TCP),
364 .actions = valid_actions,
365 .mask = &(const struct rte_flow_item_ipv6){
368 0xff, 0xff, 0xff, 0xff,
369 0xff, 0xff, 0xff, 0xff,
370 0xff, 0xff, 0xff, 0xff,
371 0xff, 0xff, 0xff, 0xff,
374 0xff, 0xff, 0xff, 0xff,
375 0xff, 0xff, 0xff, 0xff,
376 0xff, 0xff, 0xff, 0xff,
377 0xff, 0xff, 0xff, 0xff,
384 .default_mask = &rte_flow_item_ipv6_mask,
385 .mask_sz = sizeof(struct rte_flow_item_ipv6),
386 .convert = mlx5_flow_create_ipv6,
387 .dst_sz = sizeof(struct ibv_flow_spec_ipv6),
389 [RTE_FLOW_ITEM_TYPE_UDP] = {
390 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN),
391 .actions = valid_actions,
392 .mask = &(const struct rte_flow_item_udp){
398 .default_mask = &rte_flow_item_udp_mask,
399 .mask_sz = sizeof(struct rte_flow_item_udp),
400 .convert = mlx5_flow_create_udp,
401 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
403 [RTE_FLOW_ITEM_TYPE_TCP] = {
404 .actions = valid_actions,
405 .mask = &(const struct rte_flow_item_tcp){
411 .default_mask = &rte_flow_item_tcp_mask,
412 .mask_sz = sizeof(struct rte_flow_item_tcp),
413 .convert = mlx5_flow_create_tcp,
414 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
416 [RTE_FLOW_ITEM_TYPE_VXLAN] = {
417 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH),
418 .actions = valid_actions,
419 .mask = &(const struct rte_flow_item_vxlan){
420 .vni = "\xff\xff\xff",
422 .default_mask = &rte_flow_item_vxlan_mask,
423 .mask_sz = sizeof(struct rte_flow_item_vxlan),
424 .convert = mlx5_flow_create_vxlan,
425 .dst_sz = sizeof(struct ibv_flow_spec_tunnel),
429 /** Structure to pass to the conversion function. */
430 struct mlx5_flow_parse {
431 uint32_t inner; /**< Set once VXLAN is encountered. */
433 /**< Whether resources should remain after a validate. */
434 uint32_t drop:1; /**< Target is a drop queue. */
435 uint32_t mark:1; /**< Mark is present in the flow. */
436 uint32_t count:1; /**< Count is present in the flow. */
437 uint32_t mark_id; /**< Mark identifier. */
438 uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
439 uint16_t queues_n; /**< Number of entries in queue[]. */
440 struct rte_eth_rss_conf rss_conf; /**< RSS configuration */
441 uint8_t rss_key[40]; /**< copy of the RSS key. */
442 enum hash_rxq_type layer; /**< Last pattern layer detected. */
443 struct ibv_counter_set *cs; /**< Holds the counter set for the rule */
445 struct ibv_flow_attr *ibv_attr;
446 /**< Pointer to Verbs attributes. */
448 /**< Current position or total size of the attribute. */
449 } queue[RTE_DIM(hash_rxq_init)];
452 static const struct rte_flow_ops mlx5_flow_ops = {
453 .validate = mlx5_flow_validate,
454 .create = mlx5_flow_create,
455 .destroy = mlx5_flow_destroy,
456 .flush = mlx5_flow_flush,
457 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
458 .query = mlx5_flow_query,
462 .isolate = mlx5_flow_isolate,
465 /* Convert FDIR request to Generic flow. */
467 struct rte_flow_attr attr;
468 struct rte_flow_action actions[2];
469 struct rte_flow_item items[4];
470 struct rte_flow_item_eth l2;
471 struct rte_flow_item_eth l2_mask;
473 struct rte_flow_item_ipv4 ipv4;
474 struct rte_flow_item_ipv6 ipv6;
477 struct rte_flow_item_udp udp;
478 struct rte_flow_item_tcp tcp;
480 struct rte_flow_action_queue queue;
483 /* Verbs specification header. */
484 struct ibv_spec_header {
485 enum ibv_flow_spec_type type;
490 * Check support for a given item.
493 * Item specification.
495 * Bit-masks covering supported fields to compare with spec, last and mask in
498 * Bit-Mask size in bytes.
504 mlx5_flow_item_validate(const struct rte_flow_item *item,
505 const uint8_t *mask, unsigned int size)
509 if (!item->spec && (item->mask || item->last))
511 if (item->spec && !item->mask) {
513 const uint8_t *spec = item->spec;
515 for (i = 0; i < size; ++i)
516 if ((spec[i] | mask[i]) != mask[i])
519 if (item->last && !item->mask) {
521 const uint8_t *spec = item->last;
523 for (i = 0; i < size; ++i)
524 if ((spec[i] | mask[i]) != mask[i])
529 const uint8_t *spec = item->mask;
531 for (i = 0; i < size; ++i)
532 if ((spec[i] | mask[i]) != mask[i])
535 if (item->spec && item->last) {
538 const uint8_t *apply = mask;
543 for (i = 0; i < size; ++i) {
544 spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
545 last[i] = ((const uint8_t *)item->last)[i] & apply[i];
547 ret = memcmp(spec, last, size);
553 * Copy the RSS configuration from the user ones.
556 * Pointer to private structure.
558 * Internal parser structure.
560 * User RSS configuration to save.
563 * 0 on success, errno value on failure.
566 priv_flow_convert_rss_conf(struct priv *priv,
567 struct mlx5_flow_parse *parser,
568 const struct rte_eth_rss_conf *rss_conf)
570 const struct rte_eth_rss_conf *rss =
571 rss_conf ? rss_conf : &priv->rss_conf;
573 if (rss->rss_key_len > 40)
575 parser->rss_conf.rss_key_len = rss->rss_key_len;
576 parser->rss_conf.rss_hf = rss->rss_hf;
577 memcpy(parser->rss_key, rss->rss_key, rss->rss_key_len);
578 parser->rss_conf.rss_key = parser->rss_key;
583 * Extract attribute to the parser.
586 * Pointer to private structure.
588 * Flow rule attributes.
590 * Perform verbose error reporting if not NULL.
591 * @param[in, out] parser
592 * Internal parser structure.
595 * 0 on success, a negative errno value otherwise and rte_errno is set.
598 priv_flow_convert_attributes(struct priv *priv,
599 const struct rte_flow_attr *attr,
600 struct rte_flow_error *error,
601 struct mlx5_flow_parse *parser)
606 rte_flow_error_set(error, ENOTSUP,
607 RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
609 "groups are not supported");
612 if (attr->priority && attr->priority != MLX5_CTRL_FLOW_PRIORITY) {
613 rte_flow_error_set(error, ENOTSUP,
614 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
616 "priorities are not supported");
620 rte_flow_error_set(error, ENOTSUP,
621 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
623 "egress is not supported");
626 if (!attr->ingress) {
627 rte_flow_error_set(error, ENOTSUP,
628 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
630 "only ingress is supported");
637 * Extract actions request to the parser.
640 * Pointer to private structure.
642 * Associated actions (list terminated by the END action).
644 * Perform verbose error reporting if not NULL.
645 * @param[in, out] parser
646 * Internal parser structure.
649 * 0 on success, a negative errno value otherwise and rte_errno is set.
652 priv_flow_convert_actions(struct priv *priv,
653 const struct rte_flow_action actions[],
654 struct rte_flow_error *error,
655 struct mlx5_flow_parse *parser)
658 * Add default RSS configuration necessary for Verbs to create QP even
659 * if no RSS is necessary.
661 priv_flow_convert_rss_conf(priv, parser,
662 (const struct rte_eth_rss_conf *)
664 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
665 if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
667 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
669 } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
670 const struct rte_flow_action_queue *queue =
671 (const struct rte_flow_action_queue *)
676 if (!queue || (queue->index > (priv->rxqs_n - 1)))
677 goto exit_action_not_supported;
678 for (n = 0; n < parser->queues_n; ++n) {
679 if (parser->queues[n] == queue->index) {
684 if (parser->queues_n > 1 && !found) {
685 rte_flow_error_set(error, ENOTSUP,
686 RTE_FLOW_ERROR_TYPE_ACTION,
688 "queue action not in RSS queues");
692 parser->queues_n = 1;
693 parser->queues[0] = queue->index;
695 } else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
696 const struct rte_flow_action_rss *rss =
697 (const struct rte_flow_action_rss *)
701 if (!rss || !rss->num) {
702 rte_flow_error_set(error, EINVAL,
703 RTE_FLOW_ERROR_TYPE_ACTION,
708 if (parser->queues_n == 1) {
711 assert(parser->queues_n);
712 for (n = 0; n < rss->num; ++n) {
713 if (parser->queues[0] ==
720 rte_flow_error_set(error, ENOTSUP,
721 RTE_FLOW_ERROR_TYPE_ACTION,
723 "queue action not in RSS"
728 for (n = 0; n < rss->num; ++n) {
729 if (rss->queue[n] >= priv->rxqs_n) {
730 rte_flow_error_set(error, EINVAL,
731 RTE_FLOW_ERROR_TYPE_ACTION,
733 "queue id > number of"
738 for (n = 0; n < rss->num; ++n)
739 parser->queues[n] = rss->queue[n];
740 parser->queues_n = rss->num;
741 if (priv_flow_convert_rss_conf(priv, parser,
743 rte_flow_error_set(error, EINVAL,
744 RTE_FLOW_ERROR_TYPE_ACTION,
746 "wrong RSS configuration");
749 } else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
750 const struct rte_flow_action_mark *mark =
751 (const struct rte_flow_action_mark *)
755 rte_flow_error_set(error, EINVAL,
756 RTE_FLOW_ERROR_TYPE_ACTION,
758 "mark must be defined");
760 } else if (mark->id >= MLX5_FLOW_MARK_MAX) {
761 rte_flow_error_set(error, ENOTSUP,
762 RTE_FLOW_ERROR_TYPE_ACTION,
764 "mark must be between 0"
769 parser->mark_id = mark->id;
770 } else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
772 } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT &&
773 priv->counter_set_supported) {
776 goto exit_action_not_supported;
779 if (parser->drop && parser->mark)
781 if (!parser->queues_n && !parser->drop) {
782 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
783 NULL, "no valid action");
787 exit_action_not_supported:
788 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
789 actions, "action not supported");
797 * Pointer to private structure.
799 * Pattern specification (list terminated by the END pattern item).
801 * Perform verbose error reporting if not NULL.
802 * @param[in, out] parser
803 * Internal parser structure.
806 * 0 on success, a negative errno value otherwise and rte_errno is set.
809 priv_flow_convert_items_validate(struct priv *priv,
810 const struct rte_flow_item items[],
811 struct rte_flow_error *error,
812 struct mlx5_flow_parse *parser)
814 const struct mlx5_flow_items *cur_item = mlx5_flow_items;
818 /* Initialise the offsets to start after verbs attribute. */
819 for (i = 0; i != hash_rxq_init_n; ++i)
820 parser->queue[i].offset = sizeof(struct ibv_flow_attr);
821 for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
822 const struct mlx5_flow_items *token = NULL;
826 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
830 cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
832 if (cur_item->items[i] == items->type) {
833 token = &mlx5_flow_items[items->type];
838 goto exit_item_not_supported;
840 err = mlx5_flow_item_validate(items,
841 (const uint8_t *)cur_item->mask,
844 goto exit_item_not_supported;
845 if (items->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
847 rte_flow_error_set(error, ENOTSUP,
848 RTE_FLOW_ERROR_TYPE_ITEM,
850 "cannot recognize multiple"
851 " VXLAN encapsulations");
854 parser->inner = IBV_FLOW_SPEC_INNER;
856 if (parser->drop || parser->queues_n == 1) {
857 parser->queue[HASH_RXQ_ETH].offset += cur_item->dst_sz;
859 for (n = 0; n != hash_rxq_init_n; ++n)
860 parser->queue[n].offset += cur_item->dst_sz;
864 for (i = 0; i != hash_rxq_init_n; ++i)
865 parser->queue[i].offset +=
866 sizeof(struct ibv_flow_spec_action_tag);
869 unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
871 for (i = 0; i != hash_rxq_init_n; ++i)
872 parser->queue[i].offset += size;
875 exit_item_not_supported:
876 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
877 items, "item not supported");
882 * Allocate memory space to store verbs flow attributes.
885 * Pointer to private structure.
886 * @param[in] priority
889 * Amount of byte to allocate.
891 * Perform verbose error reporting if not NULL.
894 * A verbs flow attribute on success, NULL otherwise.
896 static struct ibv_flow_attr*
897 priv_flow_convert_allocate(struct priv *priv,
898 unsigned int priority,
900 struct rte_flow_error *error)
902 struct ibv_flow_attr *ibv_attr;
905 ibv_attr = rte_calloc(__func__, 1, size, 0);
907 rte_flow_error_set(error, ENOMEM,
908 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
910 "cannot allocate verbs spec attributes.");
913 ibv_attr->priority = priority;
918 * Finalise verbs flow attributes.
921 * Pointer to private structure.
922 * @param[in, out] parser
923 * Internal parser structure.
926 priv_flow_convert_finalise(struct priv *priv, struct mlx5_flow_parse *parser)
928 const unsigned int ipv4 =
929 hash_rxq_init[parser->layer].ip_version == MLX5_IPV4;
930 const enum hash_rxq_type hmin = ipv4 ? HASH_RXQ_TCPV4 : HASH_RXQ_TCPV6;
931 const enum hash_rxq_type hmax = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
932 const enum hash_rxq_type ohmin = ipv4 ? HASH_RXQ_TCPV6 : HASH_RXQ_TCPV4;
933 const enum hash_rxq_type ohmax = ipv4 ? HASH_RXQ_IPV6 : HASH_RXQ_IPV4;
934 const enum hash_rxq_type ip = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
938 if (parser->layer == HASH_RXQ_ETH) {
942 * This layer becomes useless as the pattern define under
945 rte_free(parser->queue[HASH_RXQ_ETH].ibv_attr);
946 parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
948 /* Remove opposite kind of layer e.g. IPv6 if the pattern is IPv4. */
949 for (i = ohmin; i != (ohmax + 1); ++i) {
950 if (!parser->queue[i].ibv_attr)
952 rte_free(parser->queue[i].ibv_attr);
953 parser->queue[i].ibv_attr = NULL;
955 /* Remove impossible flow according to the RSS configuration. */
956 if (hash_rxq_init[parser->layer].dpdk_rss_hf &
957 parser->rss_conf.rss_hf) {
958 /* Remove any other flow. */
959 for (i = hmin; i != (hmax + 1); ++i) {
960 if ((i == parser->layer) ||
961 (!parser->queue[i].ibv_attr))
963 rte_free(parser->queue[i].ibv_attr);
964 parser->queue[i].ibv_attr = NULL;
966 } else if (!parser->queue[ip].ibv_attr) {
967 /* no RSS possible with the current configuration. */
968 parser->queues_n = 1;
973 * Fill missing layers in verbs specifications, or compute the correct
974 * offset to allocate the memory space for the attributes and
977 for (i = 0; i != hash_rxq_init_n - 1; ++i) {
979 struct ibv_flow_spec_ipv4_ext ipv4;
980 struct ibv_flow_spec_ipv6 ipv6;
981 struct ibv_flow_spec_tcp_udp udp_tcp;
986 if (i == parser->layer)
988 if (parser->layer == HASH_RXQ_ETH) {
989 if (hash_rxq_init[i].ip_version == MLX5_IPV4) {
990 size = sizeof(struct ibv_flow_spec_ipv4_ext);
991 specs.ipv4 = (struct ibv_flow_spec_ipv4_ext){
992 .type = IBV_FLOW_SPEC_IPV4_EXT,
996 size = sizeof(struct ibv_flow_spec_ipv6);
997 specs.ipv6 = (struct ibv_flow_spec_ipv6){
998 .type = IBV_FLOW_SPEC_IPV6,
1002 if (parser->queue[i].ibv_attr) {
1003 dst = (void *)((uintptr_t)
1004 parser->queue[i].ibv_attr +
1005 parser->queue[i].offset);
1006 memcpy(dst, &specs, size);
1007 ++parser->queue[i].ibv_attr->num_of_specs;
1009 parser->queue[i].offset += size;
1011 if ((i == HASH_RXQ_UDPV4) || (i == HASH_RXQ_TCPV4) ||
1012 (i == HASH_RXQ_UDPV6) || (i == HASH_RXQ_TCPV6)) {
1013 size = sizeof(struct ibv_flow_spec_tcp_udp);
1014 specs.udp_tcp = (struct ibv_flow_spec_tcp_udp) {
1015 .type = ((i == HASH_RXQ_UDPV4 ||
1016 i == HASH_RXQ_UDPV6) ?
1021 if (parser->queue[i].ibv_attr) {
1022 dst = (void *)((uintptr_t)
1023 parser->queue[i].ibv_attr +
1024 parser->queue[i].offset);
1025 memcpy(dst, &specs, size);
1026 ++parser->queue[i].ibv_attr->num_of_specs;
1028 parser->queue[i].offset += size;
1034 * Validate and convert a flow supported by the NIC.
1037 * Pointer to private structure.
1039 * Flow rule attributes.
1040 * @param[in] pattern
1041 * Pattern specification (list terminated by the END pattern item).
1042 * @param[in] actions
1043 * Associated actions (list terminated by the END action).
1045 * Perform verbose error reporting if not NULL.
1046 * @param[in, out] parser
1047 * Internal parser structure.
1050 * 0 on success, a negative errno value otherwise and rte_errno is set.
1053 priv_flow_convert(struct priv *priv,
1054 const struct rte_flow_attr *attr,
1055 const struct rte_flow_item items[],
1056 const struct rte_flow_action actions[],
1057 struct rte_flow_error *error,
1058 struct mlx5_flow_parse *parser)
1060 const struct mlx5_flow_items *cur_item = mlx5_flow_items;
1064 /* First step. Validate the attributes, items and actions. */
1065 *parser = (struct mlx5_flow_parse){
1066 .create = parser->create,
1067 .layer = HASH_RXQ_ETH,
1068 .mark_id = MLX5_FLOW_MARK_DEFAULT,
1070 ret = priv_flow_convert_attributes(priv, attr, error, parser);
1073 ret = priv_flow_convert_actions(priv, actions, error, parser);
1076 ret = priv_flow_convert_items_validate(priv, items, error, parser);
1079 priv_flow_convert_finalise(priv, parser);
1082 * Allocate the memory space to store verbs specifications.
1084 if (parser->drop || parser->queues_n == 1) {
1085 unsigned int priority =
1087 hash_rxq_init[HASH_RXQ_ETH].flow_priority;
1088 unsigned int offset = parser->queue[HASH_RXQ_ETH].offset;
1090 parser->queue[HASH_RXQ_ETH].ibv_attr =
1091 priv_flow_convert_allocate(priv, priority,
1093 if (!parser->queue[HASH_RXQ_ETH].ibv_attr)
1095 parser->queue[HASH_RXQ_ETH].offset =
1096 sizeof(struct ibv_flow_attr);
1098 for (i = 0; i != hash_rxq_init_n; ++i) {
1099 unsigned int priority =
1101 hash_rxq_init[i].flow_priority;
1102 unsigned int offset;
1104 if (!(parser->rss_conf.rss_hf &
1105 hash_rxq_init[i].dpdk_rss_hf) &&
1106 (i != HASH_RXQ_ETH))
1108 offset = parser->queue[i].offset;
1109 parser->queue[i].ibv_attr =
1110 priv_flow_convert_allocate(priv, priority,
1112 if (!parser->queue[i].ibv_attr)
1114 parser->queue[i].offset = sizeof(struct ibv_flow_attr);
1117 /* Third step. Conversion parse, fill the specifications. */
1119 for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
1120 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
1122 cur_item = &mlx5_flow_items[items->type];
1123 ret = cur_item->convert(items,
1124 (cur_item->default_mask ?
1125 cur_item->default_mask :
1129 rte_flow_error_set(error, ret,
1130 RTE_FLOW_ERROR_TYPE_ITEM,
1131 items, "item not supported");
1136 mlx5_flow_create_flag_mark(parser, parser->mark_id);
1137 if (parser->count && parser->create) {
1138 mlx5_flow_create_count(priv, parser);
1140 goto exit_count_error;
1143 * Last step. Complete missing specification to reach the RSS
1146 if (parser->queues_n > 1) {
1147 priv_flow_convert_finalise(priv, parser);
1150 * Action queue have their priority overridden with
1151 * Ethernet priority, this priority needs to be adjusted to
1152 * their most specific layer priority.
1154 parser->queue[HASH_RXQ_ETH].ibv_attr->priority =
1156 hash_rxq_init[parser->layer].flow_priority;
1159 /* Only verification is expected, all resources should be released. */
1160 if (!parser->create) {
1161 for (i = 0; i != hash_rxq_init_n; ++i) {
1162 if (parser->queue[i].ibv_attr) {
1163 rte_free(parser->queue[i].ibv_attr);
1164 parser->queue[i].ibv_attr = NULL;
1170 for (i = 0; i != hash_rxq_init_n; ++i) {
1171 if (parser->queue[i].ibv_attr) {
1172 rte_free(parser->queue[i].ibv_attr);
1173 parser->queue[i].ibv_attr = NULL;
1176 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1177 NULL, "cannot allocate verbs spec attributes.");
1180 rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1181 NULL, "cannot create counter.");
1186 * Copy the specification created into the flow.
1189 * Internal parser structure.
1191 * Create specification.
1193 * Size in bytes of the specification to copy.
1196 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
1202 for (i = 0; i != hash_rxq_init_n; ++i) {
1203 if (!parser->queue[i].ibv_attr)
1205 /* Specification must be the same l3 type or none. */
1206 if (parser->layer == HASH_RXQ_ETH ||
1207 (hash_rxq_init[parser->layer].ip_version ==
1208 hash_rxq_init[i].ip_version) ||
1209 (hash_rxq_init[i].ip_version == 0)) {
1210 dst = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1211 parser->queue[i].offset);
1212 memcpy(dst, src, size);
1213 ++parser->queue[i].ibv_attr->num_of_specs;
1214 parser->queue[i].offset += size;
1220 * Convert Ethernet item to Verbs specification.
1223 * Item specification.
1224 * @param default_mask[in]
1225 * Default bit-masks to use when item->mask is not provided.
1226 * @param data[in, out]
1230 mlx5_flow_create_eth(const struct rte_flow_item *item,
1231 const void *default_mask,
1234 const struct rte_flow_item_eth *spec = item->spec;
1235 const struct rte_flow_item_eth *mask = item->mask;
1236 struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1237 const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1238 struct ibv_flow_spec_eth eth = {
1239 .type = parser->inner | IBV_FLOW_SPEC_ETH,
1243 /* Don't update layer for the inner pattern. */
1245 parser->layer = HASH_RXQ_ETH;
1250 mask = default_mask;
1251 memcpy(ð.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
1252 memcpy(ð.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
1253 eth.val.ether_type = spec->type;
1254 memcpy(ð.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
1255 memcpy(ð.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
1256 eth.mask.ether_type = mask->type;
1257 /* Remove unwanted bits from values. */
1258 for (i = 0; i < ETHER_ADDR_LEN; ++i) {
1259 eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
1260 eth.val.src_mac[i] &= eth.mask.src_mac[i];
1262 eth.val.ether_type &= eth.mask.ether_type;
1264 mlx5_flow_create_copy(parser, ð, eth_size);
1269 * Convert VLAN item to Verbs specification.
1272 * Item specification.
1273 * @param default_mask[in]
1274 * Default bit-masks to use when item->mask is not provided.
1275 * @param data[in, out]
1279 mlx5_flow_create_vlan(const struct rte_flow_item *item,
1280 const void *default_mask,
1283 const struct rte_flow_item_vlan *spec = item->spec;
1284 const struct rte_flow_item_vlan *mask = item->mask;
1285 struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1286 struct ibv_flow_spec_eth *eth;
1287 const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1292 mask = default_mask;
1294 for (i = 0; i != hash_rxq_init_n; ++i) {
1295 if (!parser->queue[i].ibv_attr)
1298 eth = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1299 parser->queue[i].offset - eth_size);
1300 eth->val.vlan_tag = spec->tci;
1301 eth->mask.vlan_tag = mask->tci;
1302 eth->val.vlan_tag &= eth->mask.vlan_tag;
1309 * Convert IPv4 item to Verbs specification.
1312 * Item specification.
1313 * @param default_mask[in]
1314 * Default bit-masks to use when item->mask is not provided.
1315 * @param data[in, out]
1319 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
1320 const void *default_mask,
1323 const struct rte_flow_item_ipv4 *spec = item->spec;
1324 const struct rte_flow_item_ipv4 *mask = item->mask;
1325 struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1326 unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4_ext);
1327 struct ibv_flow_spec_ipv4_ext ipv4 = {
1328 .type = parser->inner | IBV_FLOW_SPEC_IPV4_EXT,
1332 /* Don't update layer for the inner pattern. */
1334 parser->layer = HASH_RXQ_IPV4;
1337 mask = default_mask;
1338 ipv4.val = (struct ibv_flow_ipv4_ext_filter){
1339 .src_ip = spec->hdr.src_addr,
1340 .dst_ip = spec->hdr.dst_addr,
1341 .proto = spec->hdr.next_proto_id,
1342 .tos = spec->hdr.type_of_service,
1344 ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
1345 .src_ip = mask->hdr.src_addr,
1346 .dst_ip = mask->hdr.dst_addr,
1347 .proto = mask->hdr.next_proto_id,
1348 .tos = mask->hdr.type_of_service,
1350 /* Remove unwanted bits from values. */
1351 ipv4.val.src_ip &= ipv4.mask.src_ip;
1352 ipv4.val.dst_ip &= ipv4.mask.dst_ip;
1353 ipv4.val.proto &= ipv4.mask.proto;
1354 ipv4.val.tos &= ipv4.mask.tos;
1356 mlx5_flow_create_copy(parser, &ipv4, ipv4_size);
1361 * Convert IPv6 item to Verbs specification.
1364 * Item specification.
1365 * @param default_mask[in]
1366 * Default bit-masks to use when item->mask is not provided.
1367 * @param data[in, out]
1371 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
1372 const void *default_mask,
1375 const struct rte_flow_item_ipv6 *spec = item->spec;
1376 const struct rte_flow_item_ipv6 *mask = item->mask;
1377 struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1378 unsigned int ipv6_size = sizeof(struct ibv_flow_spec_ipv6);
1379 struct ibv_flow_spec_ipv6 ipv6 = {
1380 .type = parser->inner | IBV_FLOW_SPEC_IPV6,
1384 /* Don't update layer for the inner pattern. */
1386 parser->layer = HASH_RXQ_IPV6;
1391 mask = default_mask;
1392 memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
1393 RTE_DIM(ipv6.val.src_ip));
1394 memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
1395 RTE_DIM(ipv6.val.dst_ip));
1396 memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
1397 RTE_DIM(ipv6.mask.src_ip));
1398 memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
1399 RTE_DIM(ipv6.mask.dst_ip));
1400 ipv6.mask.flow_label = mask->hdr.vtc_flow;
1401 ipv6.mask.next_hdr = mask->hdr.proto;
1402 ipv6.mask.hop_limit = mask->hdr.hop_limits;
1403 /* Remove unwanted bits from values. */
1404 for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
1405 ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
1406 ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
1408 ipv6.val.flow_label &= ipv6.mask.flow_label;
1409 ipv6.val.next_hdr &= ipv6.mask.next_hdr;
1410 ipv6.val.hop_limit &= ipv6.mask.hop_limit;
1412 mlx5_flow_create_copy(parser, &ipv6, ipv6_size);
1417 * Convert UDP item to Verbs specification.
1420 * Item specification.
1421 * @param default_mask[in]
1422 * Default bit-masks to use when item->mask is not provided.
1423 * @param data[in, out]
1427 mlx5_flow_create_udp(const struct rte_flow_item *item,
1428 const void *default_mask,
1431 const struct rte_flow_item_udp *spec = item->spec;
1432 const struct rte_flow_item_udp *mask = item->mask;
1433 struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1434 unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1435 struct ibv_flow_spec_tcp_udp udp = {
1436 .type = parser->inner | IBV_FLOW_SPEC_UDP,
1440 /* Don't update layer for the inner pattern. */
1441 if (!parser->inner) {
1442 if (parser->layer == HASH_RXQ_IPV4)
1443 parser->layer = HASH_RXQ_UDPV4;
1445 parser->layer = HASH_RXQ_UDPV6;
1449 mask = default_mask;
1450 udp.val.dst_port = spec->hdr.dst_port;
1451 udp.val.src_port = spec->hdr.src_port;
1452 udp.mask.dst_port = mask->hdr.dst_port;
1453 udp.mask.src_port = mask->hdr.src_port;
1454 /* Remove unwanted bits from values. */
1455 udp.val.src_port &= udp.mask.src_port;
1456 udp.val.dst_port &= udp.mask.dst_port;
1458 mlx5_flow_create_copy(parser, &udp, udp_size);
1463 * Convert TCP item to Verbs specification.
1466 * Item specification.
1467 * @param default_mask[in]
1468 * Default bit-masks to use when item->mask is not provided.
1469 * @param data[in, out]
1473 mlx5_flow_create_tcp(const struct rte_flow_item *item,
1474 const void *default_mask,
1477 const struct rte_flow_item_tcp *spec = item->spec;
1478 const struct rte_flow_item_tcp *mask = item->mask;
1479 struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1480 unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1481 struct ibv_flow_spec_tcp_udp tcp = {
1482 .type = parser->inner | IBV_FLOW_SPEC_TCP,
1486 /* Don't update layer for the inner pattern. */
1487 if (!parser->inner) {
1488 if (parser->layer == HASH_RXQ_IPV4)
1489 parser->layer = HASH_RXQ_TCPV4;
1491 parser->layer = HASH_RXQ_TCPV6;
1495 mask = default_mask;
1496 tcp.val.dst_port = spec->hdr.dst_port;
1497 tcp.val.src_port = spec->hdr.src_port;
1498 tcp.mask.dst_port = mask->hdr.dst_port;
1499 tcp.mask.src_port = mask->hdr.src_port;
1500 /* Remove unwanted bits from values. */
1501 tcp.val.src_port &= tcp.mask.src_port;
1502 tcp.val.dst_port &= tcp.mask.dst_port;
1504 mlx5_flow_create_copy(parser, &tcp, tcp_size);
1509 * Convert VXLAN item to Verbs specification.
1512 * Item specification.
1513 * @param default_mask[in]
1514 * Default bit-masks to use when item->mask is not provided.
1515 * @param data[in, out]
1519 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
1520 const void *default_mask,
1523 const struct rte_flow_item_vxlan *spec = item->spec;
1524 const struct rte_flow_item_vxlan *mask = item->mask;
1525 struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1526 unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1527 struct ibv_flow_spec_tunnel vxlan = {
1528 .type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1537 parser->inner = IBV_FLOW_SPEC_INNER;
1540 mask = default_mask;
1541 memcpy(&id.vni[1], spec->vni, 3);
1542 vxlan.val.tunnel_id = id.vlan_id;
1543 memcpy(&id.vni[1], mask->vni, 3);
1544 vxlan.mask.tunnel_id = id.vlan_id;
1545 /* Remove unwanted bits from values. */
1546 vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
1549 * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
1550 * layer is defined in the Verbs specification it is interpreted as
1551 * wildcard and all packets will match this rule, if it follows a full
1552 * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
1553 * before will also match this rule.
1554 * To avoid such situation, VNI 0 is currently refused.
1556 if (!vxlan.val.tunnel_id)
1558 mlx5_flow_create_copy(parser, &vxlan, size);
1563 * Convert mark/flag action to Verbs specification.
1566 * Internal parser structure.
1571 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id)
1573 unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1574 struct ibv_flow_spec_action_tag tag = {
1575 .type = IBV_FLOW_SPEC_ACTION_TAG,
1577 .tag_id = mlx5_flow_mark_set(mark_id),
1580 assert(parser->mark);
1581 mlx5_flow_create_copy(parser, &tag, size);
1586 * Convert count action to Verbs specification.
1589 * Pointer to private structure.
1591 * Pointer to MLX5 flow parser structure.
1594 * 0 on success, errno value on failure.
1597 mlx5_flow_create_count(struct priv *priv __rte_unused,
1598 struct mlx5_flow_parse *parser __rte_unused)
1600 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
1601 unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
1602 struct ibv_counter_set_init_attr init_attr = {0};
1603 struct ibv_flow_spec_counter_action counter = {
1604 .type = IBV_FLOW_SPEC_ACTION_COUNT,
1606 .counter_set_handle = 0,
1609 init_attr.counter_set_id = 0;
1610 parser->cs = ibv_create_counter_set(priv->ctx, &init_attr);
1613 counter.counter_set_handle = parser->cs->handle;
1614 mlx5_flow_create_copy(parser, &counter, size);
1620 * Complete flow rule creation with a drop queue.
1623 * Pointer to private structure.
1625 * Internal parser structure.
1627 * Pointer to the rte_flow.
1629 * Perform verbose error reporting if not NULL.
1632 * 0 on success, errno value on failure.
1635 priv_flow_create_action_queue_drop(struct priv *priv,
1636 struct mlx5_flow_parse *parser,
1637 struct rte_flow *flow,
1638 struct rte_flow_error *error)
1640 struct ibv_flow_spec_action_drop *drop;
1641 unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
1647 drop = (void *)((uintptr_t)parser->queue[HASH_RXQ_ETH].ibv_attr +
1648 parser->queue[HASH_RXQ_ETH].offset);
1649 *drop = (struct ibv_flow_spec_action_drop){
1650 .type = IBV_FLOW_SPEC_ACTION_DROP,
1653 ++parser->queue[HASH_RXQ_ETH].ibv_attr->num_of_specs;
1654 parser->queue[HASH_RXQ_ETH].offset += size;
1655 flow->frxq[HASH_RXQ_ETH].ibv_attr =
1656 parser->queue[HASH_RXQ_ETH].ibv_attr;
1658 flow->cs = parser->cs;
1659 if (!priv->dev->data->dev_started)
1661 parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
1662 flow->frxq[HASH_RXQ_ETH].ibv_flow =
1663 ibv_create_flow(priv->flow_drop_queue->qp,
1664 flow->frxq[HASH_RXQ_ETH].ibv_attr);
1665 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
1666 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1667 NULL, "flow rule creation failure");
1674 if (flow->frxq[HASH_RXQ_ETH].ibv_flow) {
1675 claim_zero(ibv_destroy_flow(flow->frxq[HASH_RXQ_ETH].ibv_flow));
1676 flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
1678 if (flow->frxq[HASH_RXQ_ETH].ibv_attr) {
1679 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
1680 flow->frxq[HASH_RXQ_ETH].ibv_attr = NULL;
1683 claim_zero(ibv_destroy_counter_set(flow->cs));
1691 * Create hash Rx queues when RSS is enabled.
1694 * Pointer to private structure.
1696 * Internal parser structure.
1698 * Pointer to the rte_flow.
1700 * Perform verbose error reporting if not NULL.
1703 * 0 on success, a errno value otherwise and rte_errno is set.
1706 priv_flow_create_action_queue_rss(struct priv *priv,
1707 struct mlx5_flow_parse *parser,
1708 struct rte_flow *flow,
1709 struct rte_flow_error *error)
1713 for (i = 0; i != hash_rxq_init_n; ++i) {
1714 uint64_t hash_fields;
1716 if (!parser->queue[i].ibv_attr)
1718 flow->frxq[i].ibv_attr = parser->queue[i].ibv_attr;
1719 parser->queue[i].ibv_attr = NULL;
1720 hash_fields = hash_rxq_init[i].hash_fields;
1721 if (!priv->dev->data->dev_started)
1723 flow->frxq[i].hrxq =
1724 mlx5_priv_hrxq_get(priv,
1725 parser->rss_conf.rss_key,
1726 parser->rss_conf.rss_key_len,
1730 if (flow->frxq[i].hrxq)
1732 flow->frxq[i].hrxq =
1733 mlx5_priv_hrxq_new(priv,
1734 parser->rss_conf.rss_key,
1735 parser->rss_conf.rss_key_len,
1739 if (!flow->frxq[i].hrxq) {
1740 rte_flow_error_set(error, ENOMEM,
1741 RTE_FLOW_ERROR_TYPE_HANDLE,
1742 NULL, "cannot create hash rxq");
1750 * Complete flow rule creation.
1753 * Pointer to private structure.
1755 * Internal parser structure.
1757 * Pointer to the rte_flow.
1759 * Perform verbose error reporting if not NULL.
1762 * 0 on success, a errno value otherwise and rte_errno is set.
1765 priv_flow_create_action_queue(struct priv *priv,
1766 struct mlx5_flow_parse *parser,
1767 struct rte_flow *flow,
1768 struct rte_flow_error *error)
1775 assert(!parser->drop);
1776 err = priv_flow_create_action_queue_rss(priv, parser, flow, error);
1780 flow->cs = parser->cs;
1781 if (!priv->dev->data->dev_started)
1783 for (i = 0; i != hash_rxq_init_n; ++i) {
1784 if (!flow->frxq[i].hrxq)
1786 flow->frxq[i].ibv_flow =
1787 ibv_create_flow(flow->frxq[i].hrxq->qp,
1788 flow->frxq[i].ibv_attr);
1789 if (!flow->frxq[i].ibv_flow) {
1790 rte_flow_error_set(error, ENOMEM,
1791 RTE_FLOW_ERROR_TYPE_HANDLE,
1792 NULL, "flow rule creation failure");
1796 DEBUG("%p type %d QP %p ibv_flow %p",
1798 (void *)flow->frxq[i].hrxq,
1799 (void *)flow->frxq[i].ibv_flow);
1801 for (i = 0; i != parser->queues_n; ++i) {
1802 struct mlx5_rxq_data *q =
1803 (*priv->rxqs)[parser->queues[i]];
1805 q->mark |= parser->mark;
1810 for (i = 0; i != hash_rxq_init_n; ++i) {
1811 if (flow->frxq[i].ibv_flow) {
1812 struct ibv_flow *ibv_flow = flow->frxq[i].ibv_flow;
1814 claim_zero(ibv_destroy_flow(ibv_flow));
1816 if (flow->frxq[i].hrxq)
1817 mlx5_priv_hrxq_release(priv, flow->frxq[i].hrxq);
1818 if (flow->frxq[i].ibv_attr)
1819 rte_free(flow->frxq[i].ibv_attr);
1822 claim_zero(ibv_destroy_counter_set(flow->cs));
1833 * Pointer to private structure.
1835 * Pointer to a TAILQ flow list.
1837 * Flow rule attributes.
1838 * @param[in] pattern
1839 * Pattern specification (list terminated by the END pattern item).
1840 * @param[in] actions
1841 * Associated actions (list terminated by the END action).
1843 * Perform verbose error reporting if not NULL.
1846 * A flow on success, NULL otherwise.
1848 static struct rte_flow *
1849 priv_flow_create(struct priv *priv,
1850 struct mlx5_flows *list,
1851 const struct rte_flow_attr *attr,
1852 const struct rte_flow_item items[],
1853 const struct rte_flow_action actions[],
1854 struct rte_flow_error *error)
1856 struct mlx5_flow_parse parser = { .create = 1, };
1857 struct rte_flow *flow = NULL;
1861 err = priv_flow_convert(priv, attr, items, actions, error, &parser);
1864 flow = rte_calloc(__func__, 1,
1865 sizeof(*flow) + parser.queues_n * sizeof(uint16_t),
1868 rte_flow_error_set(error, ENOMEM,
1869 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1871 "cannot allocate flow memory");
1874 /* Copy queues configuration. */
1875 flow->queues = (uint16_t (*)[])(flow + 1);
1876 memcpy(flow->queues, parser.queues, parser.queues_n * sizeof(uint16_t));
1877 flow->queues_n = parser.queues_n;
1878 flow->mark = parser.mark;
1879 /* Copy RSS configuration. */
1880 flow->rss_conf = parser.rss_conf;
1881 flow->rss_conf.rss_key = flow->rss_key;
1882 memcpy(flow->rss_key, parser.rss_key, parser.rss_conf.rss_key_len);
1883 /* finalise the flow. */
1885 err = priv_flow_create_action_queue_drop(priv, &parser, flow,
1888 err = priv_flow_create_action_queue(priv, &parser, flow, error);
1891 TAILQ_INSERT_TAIL(list, flow, next);
1892 DEBUG("Flow created %p", (void *)flow);
1895 for (i = 0; i != hash_rxq_init_n; ++i) {
1896 if (parser.queue[i].ibv_attr)
1897 rte_free(parser.queue[i].ibv_attr);
1904 * Validate a flow supported by the NIC.
1906 * @see rte_flow_validate()
1910 mlx5_flow_validate(struct rte_eth_dev *dev,
1911 const struct rte_flow_attr *attr,
1912 const struct rte_flow_item items[],
1913 const struct rte_flow_action actions[],
1914 struct rte_flow_error *error)
1916 struct priv *priv = dev->data->dev_private;
1918 struct mlx5_flow_parse parser = { .create = 0, };
1921 ret = priv_flow_convert(priv, attr, items, actions, error, &parser);
1929 * @see rte_flow_create()
1933 mlx5_flow_create(struct rte_eth_dev *dev,
1934 const struct rte_flow_attr *attr,
1935 const struct rte_flow_item items[],
1936 const struct rte_flow_action actions[],
1937 struct rte_flow_error *error)
1939 struct priv *priv = dev->data->dev_private;
1940 struct rte_flow *flow;
1943 flow = priv_flow_create(priv, &priv->flows, attr, items, actions,
1953 * Pointer to private structure.
1955 * Pointer to a TAILQ flow list.
1960 priv_flow_destroy(struct priv *priv,
1961 struct mlx5_flows *list,
1962 struct rte_flow *flow)
1966 if (flow->drop || !flow->mark)
1968 for (i = 0; i != flow->queues_n; ++i) {
1969 struct rte_flow *tmp;
1973 * To remove the mark from the queue, the queue must not be
1974 * present in any other marked flow (RSS or not).
1976 TAILQ_FOREACH(tmp, list, next) {
1978 uint16_t *tqs = NULL;
1983 for (j = 0; j != hash_rxq_init_n; ++j) {
1984 if (!tmp->frxq[j].hrxq)
1986 tqs = tmp->frxq[j].hrxq->ind_table->queues;
1987 tq_n = tmp->frxq[j].hrxq->ind_table->queues_n;
1991 for (j = 0; (j != tq_n) && !mark; j++)
1992 if (tqs[j] == (*flow->queues)[i])
1995 (*priv->rxqs)[(*flow->queues)[i]]->mark = mark;
1999 if (flow->frxq[HASH_RXQ_ETH].ibv_flow)
2000 claim_zero(ibv_destroy_flow
2001 (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2002 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
2004 for (i = 0; i != hash_rxq_init_n; ++i) {
2005 struct mlx5_flow *frxq = &flow->frxq[i];
2008 claim_zero(ibv_destroy_flow(frxq->ibv_flow));
2010 mlx5_priv_hrxq_release(priv, frxq->hrxq);
2012 rte_free(frxq->ibv_attr);
2016 claim_zero(ibv_destroy_counter_set(flow->cs));
2019 TAILQ_REMOVE(list, flow, next);
2020 DEBUG("Flow destroyed %p", (void *)flow);
2025 * Destroy all flows.
2028 * Pointer to private structure.
2030 * Pointer to a TAILQ flow list.
2033 priv_flow_flush(struct priv *priv, struct mlx5_flows *list)
2035 while (!TAILQ_EMPTY(list)) {
2036 struct rte_flow *flow;
2038 flow = TAILQ_FIRST(list);
2039 priv_flow_destroy(priv, list, flow);
2044 * Create drop queue.
2047 * Pointer to private structure.
2053 priv_flow_create_drop_queue(struct priv *priv)
2055 struct mlx5_hrxq_drop *fdq = NULL;
2059 fdq = rte_calloc(__func__, 1, sizeof(*fdq), 0);
2061 WARN("cannot allocate memory for drop queue");
2064 fdq->cq = ibv_create_cq(priv->ctx, 1, NULL, NULL, 0);
2066 WARN("cannot allocate CQ for drop queue");
2069 fdq->wq = ibv_create_wq(priv->ctx,
2070 &(struct ibv_wq_init_attr){
2071 .wq_type = IBV_WQT_RQ,
2078 WARN("cannot allocate WQ for drop queue");
2081 fdq->ind_table = ibv_create_rwq_ind_table(priv->ctx,
2082 &(struct ibv_rwq_ind_table_init_attr){
2083 .log_ind_tbl_size = 0,
2084 .ind_tbl = &fdq->wq,
2087 if (!fdq->ind_table) {
2088 WARN("cannot allocate indirection table for drop queue");
2091 fdq->qp = ibv_create_qp_ex(priv->ctx,
2092 &(struct ibv_qp_init_attr_ex){
2093 .qp_type = IBV_QPT_RAW_PACKET,
2095 IBV_QP_INIT_ATTR_PD |
2096 IBV_QP_INIT_ATTR_IND_TABLE |
2097 IBV_QP_INIT_ATTR_RX_HASH,
2098 .rx_hash_conf = (struct ibv_rx_hash_conf){
2100 IBV_RX_HASH_FUNC_TOEPLITZ,
2101 .rx_hash_key_len = rss_hash_default_key_len,
2102 .rx_hash_key = rss_hash_default_key,
2103 .rx_hash_fields_mask = 0,
2105 .rwq_ind_tbl = fdq->ind_table,
2109 WARN("cannot allocate QP for drop queue");
2112 priv->flow_drop_queue = fdq;
2116 claim_zero(ibv_destroy_qp(fdq->qp));
2118 claim_zero(ibv_destroy_rwq_ind_table(fdq->ind_table));
2120 claim_zero(ibv_destroy_wq(fdq->wq));
2122 claim_zero(ibv_destroy_cq(fdq->cq));
2125 priv->flow_drop_queue = NULL;
2130 * Delete drop queue.
2133 * Pointer to private structure.
2136 priv_flow_delete_drop_queue(struct priv *priv)
2138 struct mlx5_hrxq_drop *fdq = priv->flow_drop_queue;
2143 claim_zero(ibv_destroy_qp(fdq->qp));
2145 claim_zero(ibv_destroy_rwq_ind_table(fdq->ind_table));
2147 claim_zero(ibv_destroy_wq(fdq->wq));
2149 claim_zero(ibv_destroy_cq(fdq->cq));
2151 priv->flow_drop_queue = NULL;
2158 * Pointer to private structure.
2160 * Pointer to a TAILQ flow list.
2163 priv_flow_stop(struct priv *priv, struct mlx5_flows *list)
2165 struct rte_flow *flow;
2167 TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) {
2171 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow)
2173 claim_zero(ibv_destroy_flow
2174 (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2175 flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
2180 struct mlx5_ind_table_ibv *ind_tbl = NULL;
2182 for (i = 0; i != hash_rxq_init_n; ++i) {
2183 if (!flow->frxq[i].hrxq)
2185 ind_tbl = flow->frxq[i].hrxq->ind_table;
2188 for (i = 0; i != ind_tbl->queues_n; ++i)
2189 (*priv->rxqs)[ind_tbl->queues[i]]->mark = 0;
2191 for (i = 0; i != hash_rxq_init_n; ++i) {
2192 if (!flow->frxq[i].ibv_flow)
2194 claim_zero(ibv_destroy_flow(flow->frxq[i].ibv_flow));
2195 flow->frxq[i].ibv_flow = NULL;
2196 mlx5_priv_hrxq_release(priv, flow->frxq[i].hrxq);
2197 flow->frxq[i].hrxq = NULL;
2199 DEBUG("Flow %p removed", (void *)flow);
2207 * Pointer to private structure.
2209 * Pointer to a TAILQ flow list.
2212 * 0 on success, a errno value otherwise and rte_errno is set.
2215 priv_flow_start(struct priv *priv, struct mlx5_flows *list)
2217 struct rte_flow *flow;
2219 TAILQ_FOREACH(flow, list, next) {
2223 flow->frxq[HASH_RXQ_ETH].ibv_flow =
2225 (priv->flow_drop_queue->qp,
2226 flow->frxq[HASH_RXQ_ETH].ibv_attr);
2227 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
2228 DEBUG("Flow %p cannot be applied",
2233 DEBUG("Flow %p applied", (void *)flow);
2237 for (i = 0; i != hash_rxq_init_n; ++i) {
2238 if (!flow->frxq[i].ibv_attr)
2240 flow->frxq[i].hrxq =
2241 mlx5_priv_hrxq_get(priv, flow->rss_conf.rss_key,
2242 flow->rss_conf.rss_key_len,
2243 hash_rxq_init[i].hash_fields,
2246 if (flow->frxq[i].hrxq)
2248 flow->frxq[i].hrxq =
2249 mlx5_priv_hrxq_new(priv, flow->rss_conf.rss_key,
2250 flow->rss_conf.rss_key_len,
2251 hash_rxq_init[i].hash_fields,
2254 if (!flow->frxq[i].hrxq) {
2255 DEBUG("Flow %p cannot be applied",
2261 flow->frxq[i].ibv_flow =
2262 ibv_create_flow(flow->frxq[i].hrxq->qp,
2263 flow->frxq[i].ibv_attr);
2264 if (!flow->frxq[i].ibv_flow) {
2265 DEBUG("Flow %p cannot be applied",
2270 DEBUG("Flow %p applied", (void *)flow);
2274 for (i = 0; i != flow->queues_n; ++i)
2275 (*priv->rxqs)[(*flow->queues)[i]]->mark = 1;
2281 * Verify the flow list is empty
2284 * Pointer to private structure.
2286 * @return the number of flows not released.
2289 priv_flow_verify(struct priv *priv)
2291 struct rte_flow *flow;
2294 TAILQ_FOREACH(flow, &priv->flows, next) {
2295 DEBUG("%p: flow %p still referenced", (void *)priv,
2303 * Enable a control flow configured from the control plane.
2306 * Pointer to Ethernet device.
2308 * An Ethernet flow spec to apply.
2310 * An Ethernet flow mask to apply.
2312 * A VLAN flow spec to apply.
2314 * A VLAN flow mask to apply.
2320 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
2321 struct rte_flow_item_eth *eth_spec,
2322 struct rte_flow_item_eth *eth_mask,
2323 struct rte_flow_item_vlan *vlan_spec,
2324 struct rte_flow_item_vlan *vlan_mask)
2326 struct priv *priv = dev->data->dev_private;
2327 const struct rte_flow_attr attr = {
2329 .priority = MLX5_CTRL_FLOW_PRIORITY,
2331 struct rte_flow_item items[] = {
2333 .type = RTE_FLOW_ITEM_TYPE_ETH,
2339 .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
2340 RTE_FLOW_ITEM_TYPE_END,
2346 .type = RTE_FLOW_ITEM_TYPE_END,
2349 struct rte_flow_action actions[] = {
2351 .type = RTE_FLOW_ACTION_TYPE_RSS,
2354 .type = RTE_FLOW_ACTION_TYPE_END,
2357 struct rte_flow *flow;
2358 struct rte_flow_error error;
2361 struct rte_flow_action_rss rss;
2363 const struct rte_eth_rss_conf *rss_conf;
2365 uint16_t queue[RTE_MAX_QUEUES_PER_PORT];
2369 if (!priv->reta_idx_n)
2371 for (i = 0; i != priv->reta_idx_n; ++i)
2372 action_rss.local.queue[i] = (*priv->reta_idx)[i];
2373 action_rss.local.rss_conf = &priv->rss_conf;
2374 action_rss.local.num = priv->reta_idx_n;
2375 actions[0].conf = (const void *)&action_rss.rss;
2376 flow = priv_flow_create(priv, &priv->ctrl_flows, &attr, items, actions,
2384 * Enable a flow control configured from the control plane.
2387 * Pointer to Ethernet device.
2389 * An Ethernet flow spec to apply.
2391 * An Ethernet flow mask to apply.
2397 mlx5_ctrl_flow(struct rte_eth_dev *dev,
2398 struct rte_flow_item_eth *eth_spec,
2399 struct rte_flow_item_eth *eth_mask)
2401 return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
2407 * @see rte_flow_destroy()
2411 mlx5_flow_destroy(struct rte_eth_dev *dev,
2412 struct rte_flow *flow,
2413 struct rte_flow_error *error)
2415 struct priv *priv = dev->data->dev_private;
2419 priv_flow_destroy(priv, &priv->flows, flow);
2425 * Destroy all flows.
2427 * @see rte_flow_flush()
2431 mlx5_flow_flush(struct rte_eth_dev *dev,
2432 struct rte_flow_error *error)
2434 struct priv *priv = dev->data->dev_private;
2438 priv_flow_flush(priv, &priv->flows);
2443 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
2445 * Query flow counter.
2449 * @param counter_value
2450 * returned data from the counter.
2453 * 0 on success, a errno value otherwise and rte_errno is set.
2456 priv_flow_query_count(struct ibv_counter_set *cs,
2457 struct mlx5_flow_counter_stats *counter_stats,
2458 struct rte_flow_query_count *query_count,
2459 struct rte_flow_error *error)
2461 uint64_t counters[2];
2462 struct ibv_query_counter_set_attr query_cs_attr = {
2464 .query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
2466 struct ibv_counter_set_data query_out = {
2468 .outlen = 2 * sizeof(uint64_t),
2470 int res = ibv_query_counter_set(&query_cs_attr, &query_out);
2473 rte_flow_error_set(error, -res,
2474 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2476 "cannot read counter");
2479 query_count->hits_set = 1;
2480 query_count->bytes_set = 1;
2481 query_count->hits = counters[0] - counter_stats->hits;
2482 query_count->bytes = counters[1] - counter_stats->bytes;
2483 if (query_count->reset) {
2484 counter_stats->hits = counters[0];
2485 counter_stats->bytes = counters[1];
2493 * @see rte_flow_query()
2497 mlx5_flow_query(struct rte_eth_dev *dev,
2498 struct rte_flow *flow,
2499 enum rte_flow_action_type action __rte_unused,
2501 struct rte_flow_error *error)
2503 struct priv *priv = dev->data->dev_private;
2508 res = priv_flow_query_count(flow->cs,
2509 &flow->counter_stats,
2510 (struct rte_flow_query_count *)data,
2513 rte_flow_error_set(error, res,
2514 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2516 "no counter found for flow");
2526 * @see rte_flow_isolate()
2530 mlx5_flow_isolate(struct rte_eth_dev *dev,
2532 struct rte_flow_error *error)
2534 struct priv *priv = dev->data->dev_private;
2537 if (dev->data->dev_started) {
2538 rte_flow_error_set(error, EBUSY,
2539 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2541 "port must be stopped first");
2545 priv->isolated = !!enable;
2547 priv->dev->dev_ops = &mlx5_dev_ops_isolate;
2549 priv->dev->dev_ops = &mlx5_dev_ops;
2555 * Convert a flow director filter to a generic flow.
2558 * Private structure.
2559 * @param fdir_filter
2560 * Flow director filter to add.
2562 * Generic flow parameters structure.
2565 * 0 on success, errno value on error.
2568 priv_fdir_filter_convert(struct priv *priv,
2569 const struct rte_eth_fdir_filter *fdir_filter,
2570 struct mlx5_fdir *attributes)
2572 const struct rte_eth_fdir_input *input = &fdir_filter->input;
2574 /* Validate queue number. */
2575 if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
2576 ERROR("invalid queue number %d", fdir_filter->action.rx_queue);
2579 attributes->attr.ingress = 1;
2580 attributes->items[0] = (struct rte_flow_item) {
2581 .type = RTE_FLOW_ITEM_TYPE_ETH,
2582 .spec = &attributes->l2,
2583 .mask = &attributes->l2_mask,
2585 switch (fdir_filter->action.behavior) {
2586 case RTE_ETH_FDIR_ACCEPT:
2587 attributes->actions[0] = (struct rte_flow_action){
2588 .type = RTE_FLOW_ACTION_TYPE_QUEUE,
2589 .conf = &attributes->queue,
2592 case RTE_ETH_FDIR_REJECT:
2593 attributes->actions[0] = (struct rte_flow_action){
2594 .type = RTE_FLOW_ACTION_TYPE_DROP,
2598 ERROR("invalid behavior %d", fdir_filter->action.behavior);
2601 attributes->queue.index = fdir_filter->action.rx_queue;
2602 switch (fdir_filter->input.flow_type) {
2603 case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
2604 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2605 .src_addr = input->flow.udp4_flow.ip.src_ip,
2606 .dst_addr = input->flow.udp4_flow.ip.dst_ip,
2607 .time_to_live = input->flow.udp4_flow.ip.ttl,
2608 .type_of_service = input->flow.udp4_flow.ip.tos,
2609 .next_proto_id = input->flow.udp4_flow.ip.proto,
2611 attributes->l4.udp.hdr = (struct udp_hdr){
2612 .src_port = input->flow.udp4_flow.src_port,
2613 .dst_port = input->flow.udp4_flow.dst_port,
2615 attributes->items[1] = (struct rte_flow_item){
2616 .type = RTE_FLOW_ITEM_TYPE_IPV4,
2617 .spec = &attributes->l3,
2619 attributes->items[2] = (struct rte_flow_item){
2620 .type = RTE_FLOW_ITEM_TYPE_UDP,
2621 .spec = &attributes->l4,
2624 case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
2625 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2626 .src_addr = input->flow.tcp4_flow.ip.src_ip,
2627 .dst_addr = input->flow.tcp4_flow.ip.dst_ip,
2628 .time_to_live = input->flow.tcp4_flow.ip.ttl,
2629 .type_of_service = input->flow.tcp4_flow.ip.tos,
2630 .next_proto_id = input->flow.tcp4_flow.ip.proto,
2632 attributes->l4.tcp.hdr = (struct tcp_hdr){
2633 .src_port = input->flow.tcp4_flow.src_port,
2634 .dst_port = input->flow.tcp4_flow.dst_port,
2636 attributes->items[1] = (struct rte_flow_item){
2637 .type = RTE_FLOW_ITEM_TYPE_IPV4,
2638 .spec = &attributes->l3,
2640 attributes->items[2] = (struct rte_flow_item){
2641 .type = RTE_FLOW_ITEM_TYPE_TCP,
2642 .spec = &attributes->l4,
2645 case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
2646 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2647 .src_addr = input->flow.ip4_flow.src_ip,
2648 .dst_addr = input->flow.ip4_flow.dst_ip,
2649 .time_to_live = input->flow.ip4_flow.ttl,
2650 .type_of_service = input->flow.ip4_flow.tos,
2651 .next_proto_id = input->flow.ip4_flow.proto,
2653 attributes->items[1] = (struct rte_flow_item){
2654 .type = RTE_FLOW_ITEM_TYPE_IPV4,
2655 .spec = &attributes->l3,
2658 case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
2659 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2660 .hop_limits = input->flow.udp6_flow.ip.hop_limits,
2661 .proto = input->flow.udp6_flow.ip.proto,
2663 memcpy(attributes->l3.ipv6.hdr.src_addr,
2664 input->flow.udp6_flow.ip.src_ip,
2665 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2666 memcpy(attributes->l3.ipv6.hdr.dst_addr,
2667 input->flow.udp6_flow.ip.dst_ip,
2668 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2669 attributes->l4.udp.hdr = (struct udp_hdr){
2670 .src_port = input->flow.udp6_flow.src_port,
2671 .dst_port = input->flow.udp6_flow.dst_port,
2673 attributes->items[1] = (struct rte_flow_item){
2674 .type = RTE_FLOW_ITEM_TYPE_IPV6,
2675 .spec = &attributes->l3,
2677 attributes->items[2] = (struct rte_flow_item){
2678 .type = RTE_FLOW_ITEM_TYPE_UDP,
2679 .spec = &attributes->l4,
2682 case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
2683 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2684 .hop_limits = input->flow.tcp6_flow.ip.hop_limits,
2685 .proto = input->flow.tcp6_flow.ip.proto,
2687 memcpy(attributes->l3.ipv6.hdr.src_addr,
2688 input->flow.tcp6_flow.ip.src_ip,
2689 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2690 memcpy(attributes->l3.ipv6.hdr.dst_addr,
2691 input->flow.tcp6_flow.ip.dst_ip,
2692 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2693 attributes->l4.tcp.hdr = (struct tcp_hdr){
2694 .src_port = input->flow.tcp6_flow.src_port,
2695 .dst_port = input->flow.tcp6_flow.dst_port,
2697 attributes->items[1] = (struct rte_flow_item){
2698 .type = RTE_FLOW_ITEM_TYPE_IPV6,
2699 .spec = &attributes->l3,
2701 attributes->items[2] = (struct rte_flow_item){
2702 .type = RTE_FLOW_ITEM_TYPE_TCP,
2703 .spec = &attributes->l4,
2706 case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
2707 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2708 .hop_limits = input->flow.ipv6_flow.hop_limits,
2709 .proto = input->flow.ipv6_flow.proto,
2711 memcpy(attributes->l3.ipv6.hdr.src_addr,
2712 input->flow.ipv6_flow.src_ip,
2713 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2714 memcpy(attributes->l3.ipv6.hdr.dst_addr,
2715 input->flow.ipv6_flow.dst_ip,
2716 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2717 attributes->items[1] = (struct rte_flow_item){
2718 .type = RTE_FLOW_ITEM_TYPE_IPV6,
2719 .spec = &attributes->l3,
2723 ERROR("invalid flow type%d",
2724 fdir_filter->input.flow_type);
2731 * Add new flow director filter and store it in list.
2734 * Private structure.
2735 * @param fdir_filter
2736 * Flow director filter to add.
2739 * 0 on success, errno value on failure.
2742 priv_fdir_filter_add(struct priv *priv,
2743 const struct rte_eth_fdir_filter *fdir_filter)
2745 struct mlx5_fdir attributes = {
2748 .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2749 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2753 struct mlx5_flow_parse parser = {
2754 .layer = HASH_RXQ_ETH,
2756 struct rte_flow_error error;
2757 struct rte_flow *flow;
2760 ret = priv_fdir_filter_convert(priv, fdir_filter, &attributes);
2763 ret = priv_flow_convert(priv, &attributes.attr, attributes.items,
2764 attributes.actions, &error, &parser);
2767 flow = priv_flow_create(priv,
2774 DEBUG("FDIR created %p", (void *)flow);
2781 * Delete specific filter.
2784 * Private structure.
2785 * @param fdir_filter
2786 * Filter to be deleted.
2789 * 0 on success, errno value on failure.
2792 priv_fdir_filter_delete(struct priv *priv,
2793 const struct rte_eth_fdir_filter *fdir_filter)
2795 struct mlx5_fdir attributes = {
2798 struct mlx5_flow_parse parser = {
2800 .layer = HASH_RXQ_ETH,
2802 struct rte_flow_error error;
2803 struct rte_flow *flow;
2807 ret = priv_fdir_filter_convert(priv, fdir_filter, &attributes);
2810 ret = priv_flow_convert(priv, &attributes.attr, attributes.items,
2811 attributes.actions, &error, &parser);
2815 * Special case for drop action which is only set in the
2816 * specifications when the flow is created. In this situation the
2817 * drop specification is missing.
2820 struct ibv_flow_spec_action_drop *drop;
2822 drop = (void *)((uintptr_t)parser.queue[HASH_RXQ_ETH].ibv_attr +
2823 parser.queue[HASH_RXQ_ETH].offset);
2824 *drop = (struct ibv_flow_spec_action_drop){
2825 .type = IBV_FLOW_SPEC_ACTION_DROP,
2826 .size = sizeof(struct ibv_flow_spec_action_drop),
2828 parser.queue[HASH_RXQ_ETH].ibv_attr->num_of_specs++;
2830 TAILQ_FOREACH(flow, &priv->flows, next) {
2831 struct ibv_flow_attr *attr;
2832 struct ibv_spec_header *attr_h;
2834 struct ibv_flow_attr *flow_attr;
2835 struct ibv_spec_header *flow_h;
2837 unsigned int specs_n;
2839 attr = parser.queue[HASH_RXQ_ETH].ibv_attr;
2840 flow_attr = flow->frxq[HASH_RXQ_ETH].ibv_attr;
2841 /* Compare first the attributes. */
2842 if (memcmp(attr, flow_attr, sizeof(struct ibv_flow_attr)))
2844 if (attr->num_of_specs == 0)
2846 spec = (void *)((uintptr_t)attr +
2847 sizeof(struct ibv_flow_attr));
2848 flow_spec = (void *)((uintptr_t)flow_attr +
2849 sizeof(struct ibv_flow_attr));
2850 specs_n = RTE_MIN(attr->num_of_specs, flow_attr->num_of_specs);
2851 for (i = 0; i != specs_n; ++i) {
2854 if (memcmp(spec, flow_spec,
2855 RTE_MIN(attr_h->size, flow_h->size)))
2857 spec = (void *)((uintptr_t)spec + attr_h->size);
2858 flow_spec = (void *)((uintptr_t)flow_spec +
2861 /* At this point, the flow match. */
2864 /* The flow does not match. */
2868 priv_flow_destroy(priv, &priv->flows, flow);
2870 for (i = 0; i != hash_rxq_init_n; ++i) {
2871 if (parser.queue[i].ibv_attr)
2872 rte_free(parser.queue[i].ibv_attr);
2878 * Update queue for specific filter.
2881 * Private structure.
2882 * @param fdir_filter
2883 * Filter to be updated.
2886 * 0 on success, errno value on failure.
2889 priv_fdir_filter_update(struct priv *priv,
2890 const struct rte_eth_fdir_filter *fdir_filter)
2894 ret = priv_fdir_filter_delete(priv, fdir_filter);
2897 ret = priv_fdir_filter_add(priv, fdir_filter);
2902 * Flush all filters.
2905 * Private structure.
2908 priv_fdir_filter_flush(struct priv *priv)
2910 priv_flow_flush(priv, &priv->flows);
2914 * Get flow director information.
2917 * Private structure.
2918 * @param[out] fdir_info
2919 * Resulting flow director information.
2922 priv_fdir_info_get(struct priv *priv, struct rte_eth_fdir_info *fdir_info)
2924 struct rte_eth_fdir_masks *mask =
2925 &priv->dev->data->dev_conf.fdir_conf.mask;
2927 fdir_info->mode = priv->dev->data->dev_conf.fdir_conf.mode;
2928 fdir_info->guarant_spc = 0;
2929 rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
2930 fdir_info->max_flexpayload = 0;
2931 fdir_info->flow_types_mask[0] = 0;
2932 fdir_info->flex_payload_unit = 0;
2933 fdir_info->max_flex_payload_segment_num = 0;
2934 fdir_info->flex_payload_limit = 0;
2935 memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
2939 * Deal with flow director operations.
2942 * Pointer to private structure.
2944 * Operation to perform.
2946 * Pointer to operation-specific structure.
2949 * 0 on success, errno value on failure.
2952 priv_fdir_ctrl_func(struct priv *priv, enum rte_filter_op filter_op, void *arg)
2954 enum rte_fdir_mode fdir_mode =
2955 priv->dev->data->dev_conf.fdir_conf.mode;
2958 if (filter_op == RTE_ETH_FILTER_NOP)
2960 if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
2961 fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
2962 ERROR("%p: flow director mode %d not supported",
2963 (void *)priv, fdir_mode);
2966 switch (filter_op) {
2967 case RTE_ETH_FILTER_ADD:
2968 ret = priv_fdir_filter_add(priv, arg);
2970 case RTE_ETH_FILTER_UPDATE:
2971 ret = priv_fdir_filter_update(priv, arg);
2973 case RTE_ETH_FILTER_DELETE:
2974 ret = priv_fdir_filter_delete(priv, arg);
2976 case RTE_ETH_FILTER_FLUSH:
2977 priv_fdir_filter_flush(priv);
2979 case RTE_ETH_FILTER_INFO:
2980 priv_fdir_info_get(priv, arg);
2983 DEBUG("%p: unknown operation %u", (void *)priv,
2992 * Manage filter operations.
2995 * Pointer to Ethernet device structure.
2996 * @param filter_type
2999 * Operation to perform.
3001 * Pointer to operation-specific structure.
3004 * 0 on success, negative errno value on failure.
3007 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
3008 enum rte_filter_type filter_type,
3009 enum rte_filter_op filter_op,
3013 struct priv *priv = dev->data->dev_private;
3015 switch (filter_type) {
3016 case RTE_ETH_FILTER_GENERIC:
3017 if (filter_op != RTE_ETH_FILTER_GET)
3019 *(const void **)arg = &mlx5_flow_ops;
3021 case RTE_ETH_FILTER_FDIR:
3023 ret = priv_fdir_ctrl_func(priv, filter_op, arg);
3027 ERROR("%p: filter type (%d) not supported",
3028 (void *)dev, filter_type);