1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2016 6WIND S.A.
3 * Copyright 2016 Mellanox Technologies, Ltd
11 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
13 #pragma GCC diagnostic ignored "-Wpedantic"
15 #include <infiniband/verbs.h>
17 #pragma GCC diagnostic error "-Wpedantic"
20 #include <rte_common.h>
21 #include <rte_ether.h>
22 #include <rte_eth_ctrl.h>
23 #include <rte_ethdev_driver.h>
25 #include <rte_flow_driver.h>
26 #include <rte_malloc.h>
30 #include "mlx5_defs.h"
32 #include "mlx5_glue.h"
34 /* Flow priority for control plane flows. */
35 #define MLX5_CTRL_FLOW_PRIORITY 1
37 /* Internet Protocol versions. */
42 #ifndef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
43 struct ibv_flow_spec_counter_action {
48 /* Dev ops structure defined in mlx5.c */
49 extern const struct eth_dev_ops mlx5_dev_ops;
50 extern const struct eth_dev_ops mlx5_dev_ops_isolate;
52 /** Structure give to the conversion functions. */
53 struct mlx5_flow_data {
54 struct rte_eth_dev *dev; /** Ethernet device. */
55 struct mlx5_flow_parse *parser; /** Parser context. */
56 struct rte_flow_error *error; /** Error context. */
60 mlx5_flow_create_eth(const struct rte_flow_item *item,
61 const void *default_mask,
62 struct mlx5_flow_data *data);
65 mlx5_flow_create_vlan(const struct rte_flow_item *item,
66 const void *default_mask,
67 struct mlx5_flow_data *data);
70 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
71 const void *default_mask,
72 struct mlx5_flow_data *data);
75 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
76 const void *default_mask,
77 struct mlx5_flow_data *data);
80 mlx5_flow_create_udp(const struct rte_flow_item *item,
81 const void *default_mask,
82 struct mlx5_flow_data *data);
85 mlx5_flow_create_tcp(const struct rte_flow_item *item,
86 const void *default_mask,
87 struct mlx5_flow_data *data);
90 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
91 const void *default_mask,
92 struct mlx5_flow_data *data);
95 mlx5_flow_create_vxlan_gpe(const struct rte_flow_item *item,
96 const void *default_mask,
97 struct mlx5_flow_data *data);
100 mlx5_flow_create_gre(const struct rte_flow_item *item,
101 const void *default_mask,
102 struct mlx5_flow_data *data);
104 struct mlx5_flow_parse;
107 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
111 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id);
114 mlx5_flow_create_count(struct rte_eth_dev *dev, struct mlx5_flow_parse *parser);
116 /* Hash RX queue types. */
128 /* Initialization data for hash RX queue. */
129 struct hash_rxq_init {
130 uint64_t hash_fields; /* Fields that participate in the hash. */
131 uint64_t dpdk_rss_hf; /* Matching DPDK RSS hash fields. */
132 unsigned int flow_priority; /* Flow priority to use. */
133 unsigned int ip_version; /* Internet protocol. */
136 /* Initialization data for hash RX queues. */
137 const struct hash_rxq_init hash_rxq_init[] = {
139 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
140 IBV_RX_HASH_DST_IPV4 |
141 IBV_RX_HASH_SRC_PORT_TCP |
142 IBV_RX_HASH_DST_PORT_TCP),
143 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,
145 .ip_version = MLX5_IPV4,
148 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
149 IBV_RX_HASH_DST_IPV4 |
150 IBV_RX_HASH_SRC_PORT_UDP |
151 IBV_RX_HASH_DST_PORT_UDP),
152 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP,
154 .ip_version = MLX5_IPV4,
157 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
158 IBV_RX_HASH_DST_IPV4),
159 .dpdk_rss_hf = (ETH_RSS_IPV4 |
162 .ip_version = MLX5_IPV4,
165 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
166 IBV_RX_HASH_DST_IPV6 |
167 IBV_RX_HASH_SRC_PORT_TCP |
168 IBV_RX_HASH_DST_PORT_TCP),
169 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP,
171 .ip_version = MLX5_IPV6,
174 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
175 IBV_RX_HASH_DST_IPV6 |
176 IBV_RX_HASH_SRC_PORT_UDP |
177 IBV_RX_HASH_DST_PORT_UDP),
178 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP,
180 .ip_version = MLX5_IPV6,
183 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
184 IBV_RX_HASH_DST_IPV6),
185 .dpdk_rss_hf = (ETH_RSS_IPV6 |
188 .ip_version = MLX5_IPV6,
197 /* Number of entries in hash_rxq_init[]. */
198 const unsigned int hash_rxq_init_n = RTE_DIM(hash_rxq_init);
200 /** Structure for holding counter stats. */
201 struct mlx5_flow_counter_stats {
202 uint64_t hits; /**< Number of packets matched by the rule. */
203 uint64_t bytes; /**< Number of bytes matched by the rule. */
206 /** Structure for Drop queue. */
207 struct mlx5_hrxq_drop {
208 struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */
209 struct ibv_qp *qp; /**< Verbs queue pair. */
210 struct ibv_wq *wq; /**< Verbs work queue. */
211 struct ibv_cq *cq; /**< Verbs completion queue. */
214 /* Flows structures. */
216 uint64_t hash_fields; /**< Fields that participate in the hash. */
217 struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
218 struct ibv_flow *ibv_flow; /**< Verbs flow. */
219 struct mlx5_hrxq *hrxq; /**< Hash Rx queues. */
222 /* Drop flows structures. */
223 struct mlx5_flow_drop {
224 struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
225 struct ibv_flow *ibv_flow; /**< Verbs flow. */
229 TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
230 uint32_t mark:1; /**< Set if the flow is marked. */
231 uint32_t drop:1; /**< Drop queue. */
232 struct rte_flow_action_rss rss_conf; /**< RSS configuration */
233 uint16_t (*queues)[]; /**< Queues indexes to use. */
234 uint8_t rss_key[40]; /**< copy of the RSS key. */
235 uint32_t tunnel; /**< Tunnel type of RTE_PTYPE_TUNNEL_XXX. */
236 struct ibv_counter_set *cs; /**< Holds the counters for the rule. */
237 struct mlx5_flow_counter_stats counter_stats;/**<The counter stats. */
238 struct mlx5_flow frxq[RTE_DIM(hash_rxq_init)];
239 /**< Flow with Rx queue. */
242 /** Static initializer for items. */
244 (const enum rte_flow_item_type []){ \
245 __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
248 #define IS_TUNNEL(type) ( \
249 (type) == RTE_FLOW_ITEM_TYPE_VXLAN || \
250 (type) == RTE_FLOW_ITEM_TYPE_VXLAN_GPE || \
251 (type) == RTE_FLOW_ITEM_TYPE_GRE)
253 const uint32_t flow_ptype[] = {
254 [RTE_FLOW_ITEM_TYPE_VXLAN] = RTE_PTYPE_TUNNEL_VXLAN,
255 [RTE_FLOW_ITEM_TYPE_VXLAN_GPE] = RTE_PTYPE_TUNNEL_VXLAN_GPE,
256 [RTE_FLOW_ITEM_TYPE_GRE] = RTE_PTYPE_TUNNEL_GRE,
259 #define PTYPE_IDX(t) ((RTE_PTYPE_TUNNEL_MASK & (t)) >> 12)
261 const uint32_t ptype_ext[] = {
262 [PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN)] = RTE_PTYPE_TUNNEL_VXLAN |
264 [PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN_GPE)] = RTE_PTYPE_TUNNEL_VXLAN_GPE |
266 [PTYPE_IDX(RTE_PTYPE_TUNNEL_GRE)] = RTE_PTYPE_TUNNEL_GRE,
269 /** Structure to generate a simple graph of layers supported by the NIC. */
270 struct mlx5_flow_items {
271 /** List of possible actions for these items. */
272 const enum rte_flow_action_type *const actions;
273 /** Bit-masks corresponding to the possibilities for the item. */
276 * Default bit-masks to use when item->mask is not provided. When
277 * \default_mask is also NULL, the full supported bit-mask (\mask) is
280 const void *default_mask;
281 /** Bit-masks size in bytes. */
282 const unsigned int mask_sz;
284 * Conversion function from rte_flow to NIC specific flow.
287 * rte_flow item to convert.
288 * @param default_mask
289 * Default bit-masks to use when item->mask is not provided.
291 * Internal structure to store the conversion.
294 * 0 on success, a negative errno value otherwise and rte_errno is
297 int (*convert)(const struct rte_flow_item *item,
298 const void *default_mask,
299 struct mlx5_flow_data *data);
300 /** Size in bytes of the destination structure. */
301 const unsigned int dst_sz;
302 /** List of possible following items. */
303 const enum rte_flow_item_type *const items;
306 /** Valid action for this PMD. */
307 static const enum rte_flow_action_type valid_actions[] = {
308 RTE_FLOW_ACTION_TYPE_DROP,
309 RTE_FLOW_ACTION_TYPE_QUEUE,
310 RTE_FLOW_ACTION_TYPE_MARK,
311 RTE_FLOW_ACTION_TYPE_FLAG,
312 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
313 RTE_FLOW_ACTION_TYPE_COUNT,
315 RTE_FLOW_ACTION_TYPE_END,
318 /** Graph of supported items and associated actions. */
319 static const struct mlx5_flow_items mlx5_flow_items[] = {
320 [RTE_FLOW_ITEM_TYPE_END] = {
321 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
322 RTE_FLOW_ITEM_TYPE_VXLAN,
323 RTE_FLOW_ITEM_TYPE_VXLAN_GPE,
324 RTE_FLOW_ITEM_TYPE_GRE),
326 [RTE_FLOW_ITEM_TYPE_ETH] = {
327 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
328 RTE_FLOW_ITEM_TYPE_IPV4,
329 RTE_FLOW_ITEM_TYPE_IPV6),
330 .actions = valid_actions,
331 .mask = &(const struct rte_flow_item_eth){
332 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
333 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
336 .default_mask = &rte_flow_item_eth_mask,
337 .mask_sz = sizeof(struct rte_flow_item_eth),
338 .convert = mlx5_flow_create_eth,
339 .dst_sz = sizeof(struct ibv_flow_spec_eth),
341 [RTE_FLOW_ITEM_TYPE_VLAN] = {
342 .items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
343 RTE_FLOW_ITEM_TYPE_IPV6),
344 .actions = valid_actions,
345 .mask = &(const struct rte_flow_item_vlan){
349 .default_mask = &rte_flow_item_vlan_mask,
350 .mask_sz = sizeof(struct rte_flow_item_vlan),
351 .convert = mlx5_flow_create_vlan,
354 [RTE_FLOW_ITEM_TYPE_IPV4] = {
355 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
356 RTE_FLOW_ITEM_TYPE_TCP,
357 RTE_FLOW_ITEM_TYPE_GRE),
358 .actions = valid_actions,
359 .mask = &(const struct rte_flow_item_ipv4){
363 .type_of_service = -1,
367 .default_mask = &rte_flow_item_ipv4_mask,
368 .mask_sz = sizeof(struct rte_flow_item_ipv4),
369 .convert = mlx5_flow_create_ipv4,
370 .dst_sz = sizeof(struct ibv_flow_spec_ipv4_ext),
372 [RTE_FLOW_ITEM_TYPE_IPV6] = {
373 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
374 RTE_FLOW_ITEM_TYPE_TCP,
375 RTE_FLOW_ITEM_TYPE_GRE),
376 .actions = valid_actions,
377 .mask = &(const struct rte_flow_item_ipv6){
380 0xff, 0xff, 0xff, 0xff,
381 0xff, 0xff, 0xff, 0xff,
382 0xff, 0xff, 0xff, 0xff,
383 0xff, 0xff, 0xff, 0xff,
386 0xff, 0xff, 0xff, 0xff,
387 0xff, 0xff, 0xff, 0xff,
388 0xff, 0xff, 0xff, 0xff,
389 0xff, 0xff, 0xff, 0xff,
396 .default_mask = &rte_flow_item_ipv6_mask,
397 .mask_sz = sizeof(struct rte_flow_item_ipv6),
398 .convert = mlx5_flow_create_ipv6,
399 .dst_sz = sizeof(struct ibv_flow_spec_ipv6),
401 [RTE_FLOW_ITEM_TYPE_UDP] = {
402 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN,
403 RTE_FLOW_ITEM_TYPE_VXLAN_GPE),
404 .actions = valid_actions,
405 .mask = &(const struct rte_flow_item_udp){
411 .default_mask = &rte_flow_item_udp_mask,
412 .mask_sz = sizeof(struct rte_flow_item_udp),
413 .convert = mlx5_flow_create_udp,
414 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
416 [RTE_FLOW_ITEM_TYPE_TCP] = {
417 .actions = valid_actions,
418 .mask = &(const struct rte_flow_item_tcp){
424 .default_mask = &rte_flow_item_tcp_mask,
425 .mask_sz = sizeof(struct rte_flow_item_tcp),
426 .convert = mlx5_flow_create_tcp,
427 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
429 [RTE_FLOW_ITEM_TYPE_GRE] = {
430 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
431 RTE_FLOW_ITEM_TYPE_IPV4,
432 RTE_FLOW_ITEM_TYPE_IPV6),
433 .actions = valid_actions,
434 .mask = &(const struct rte_flow_item_gre){
437 .default_mask = &rte_flow_item_gre_mask,
438 .mask_sz = sizeof(struct rte_flow_item_gre),
439 .convert = mlx5_flow_create_gre,
440 .dst_sz = sizeof(struct ibv_flow_spec_tunnel),
442 [RTE_FLOW_ITEM_TYPE_VXLAN] = {
443 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
444 RTE_FLOW_ITEM_TYPE_IPV4, /* For L3 VXLAN. */
445 RTE_FLOW_ITEM_TYPE_IPV6), /* For L3 VXLAN. */
446 .actions = valid_actions,
447 .mask = &(const struct rte_flow_item_vxlan){
448 .vni = "\xff\xff\xff",
450 .default_mask = &rte_flow_item_vxlan_mask,
451 .mask_sz = sizeof(struct rte_flow_item_vxlan),
452 .convert = mlx5_flow_create_vxlan,
453 .dst_sz = sizeof(struct ibv_flow_spec_tunnel),
455 [RTE_FLOW_ITEM_TYPE_VXLAN_GPE] = {
456 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
457 RTE_FLOW_ITEM_TYPE_IPV4,
458 RTE_FLOW_ITEM_TYPE_IPV6),
459 .actions = valid_actions,
460 .mask = &(const struct rte_flow_item_vxlan_gpe){
461 .vni = "\xff\xff\xff",
463 .default_mask = &rte_flow_item_vxlan_gpe_mask,
464 .mask_sz = sizeof(struct rte_flow_item_vxlan_gpe),
465 .convert = mlx5_flow_create_vxlan_gpe,
466 .dst_sz = sizeof(struct ibv_flow_spec_tunnel),
470 /** Structure to pass to the conversion function. */
471 struct mlx5_flow_parse {
472 uint32_t inner; /**< Verbs value, set once tunnel is encountered. */
474 /**< Whether resources should remain after a validate. */
475 uint32_t drop:1; /**< Target is a drop queue. */
476 uint32_t mark:1; /**< Mark is present in the flow. */
477 uint32_t count:1; /**< Count is present in the flow. */
478 uint32_t mark_id; /**< Mark identifier. */
479 struct rte_flow_action_rss rss_conf; /**< RSS configuration */
480 uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
481 uint8_t rss_key[40]; /**< copy of the RSS key. */
482 enum hash_rxq_type layer; /**< Last pattern layer detected. */
483 enum hash_rxq_type out_layer; /**< Last outer pattern layer detected. */
484 uint32_t tunnel; /**< Tunnel type of RTE_PTYPE_TUNNEL_XXX. */
485 struct ibv_counter_set *cs; /**< Holds the counter set for the rule */
487 struct ibv_flow_attr *ibv_attr;
488 /**< Pointer to Verbs attributes. */
490 /**< Current position or total size of the attribute. */
491 uint64_t hash_fields; /**< Verbs hash fields. */
492 } queue[RTE_DIM(hash_rxq_init)];
495 static const struct rte_flow_ops mlx5_flow_ops = {
496 .validate = mlx5_flow_validate,
497 .create = mlx5_flow_create,
498 .destroy = mlx5_flow_destroy,
499 .flush = mlx5_flow_flush,
500 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
501 .query = mlx5_flow_query,
505 .isolate = mlx5_flow_isolate,
508 /* Convert FDIR request to Generic flow. */
510 struct rte_flow_attr attr;
511 struct rte_flow_action actions[2];
512 struct rte_flow_item items[4];
513 struct rte_flow_item_eth l2;
514 struct rte_flow_item_eth l2_mask;
516 struct rte_flow_item_ipv4 ipv4;
517 struct rte_flow_item_ipv6 ipv6;
520 struct rte_flow_item_ipv4 ipv4;
521 struct rte_flow_item_ipv6 ipv6;
524 struct rte_flow_item_udp udp;
525 struct rte_flow_item_tcp tcp;
528 struct rte_flow_item_udp udp;
529 struct rte_flow_item_tcp tcp;
531 struct rte_flow_action_queue queue;
534 /* Verbs specification header. */
535 struct ibv_spec_header {
536 enum ibv_flow_spec_type type;
541 * Check item is fully supported by the NIC matching capability.
544 * Item specification.
546 * Bit-masks covering supported fields to compare with spec, last and mask in
549 * Bit-Mask size in bytes.
552 * 0 on success, a negative errno value otherwise and rte_errno is set.
555 mlx5_flow_item_validate(const struct rte_flow_item *item,
556 const uint8_t *mask, unsigned int size)
559 const uint8_t *spec = item->spec;
560 const uint8_t *last = item->last;
561 const uint8_t *m = item->mask ? item->mask : mask;
563 if (!spec && (item->mask || last))
568 * Single-pass check to make sure that:
569 * - item->mask is supported, no bits are set outside mask.
570 * - Both masked item->spec and item->last are equal (no range
573 for (i = 0; i < size; i++) {
576 if ((m[i] | mask[i]) != mask[i])
578 if (last && ((spec[i] & m[i]) != (last[i] & m[i])))
588 * Extract attribute to the parser.
591 * Flow rule attributes.
593 * Perform verbose error reporting if not NULL.
596 * 0 on success, a negative errno value otherwise and rte_errno is set.
599 mlx5_flow_convert_attributes(const struct rte_flow_attr *attr,
600 struct rte_flow_error *error)
603 rte_flow_error_set(error, ENOTSUP,
604 RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
606 "groups are not supported");
609 if (attr->priority && attr->priority != MLX5_CTRL_FLOW_PRIORITY) {
610 rte_flow_error_set(error, ENOTSUP,
611 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
613 "priorities are not supported");
617 rte_flow_error_set(error, ENOTSUP,
618 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
620 "egress is not supported");
623 if (attr->transfer) {
624 rte_flow_error_set(error, ENOTSUP,
625 RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
627 "transfer is not supported");
630 if (!attr->ingress) {
631 rte_flow_error_set(error, ENOTSUP,
632 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
634 "only ingress is supported");
641 * Extract actions request to the parser.
644 * Pointer to Ethernet device.
646 * Associated actions (list terminated by the END action).
648 * Perform verbose error reporting if not NULL.
649 * @param[in, out] parser
650 * Internal parser structure.
653 * 0 on success, a negative errno value otherwise and rte_errno is set.
656 mlx5_flow_convert_actions(struct rte_eth_dev *dev,
657 const struct rte_flow_action actions[],
658 struct rte_flow_error *error,
659 struct mlx5_flow_parse *parser)
661 enum { FATE = 1, MARK = 2, COUNT = 4, };
662 uint32_t overlap = 0;
663 struct priv *priv = dev->data->dev_private;
665 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
666 if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
668 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
670 goto exit_action_overlap;
673 } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
674 const struct rte_flow_action_queue *queue =
675 (const struct rte_flow_action_queue *)
679 goto exit_action_overlap;
681 if (!queue || (queue->index > (priv->rxqs_n - 1)))
682 goto exit_action_not_supported;
683 parser->queues[0] = queue->index;
684 parser->rss_conf = (struct rte_flow_action_rss){
686 .queue = parser->queues,
688 } else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
689 const struct rte_flow_action_rss *rss =
690 (const struct rte_flow_action_rss *)
692 const uint8_t *rss_key;
693 uint32_t rss_key_len;
697 goto exit_action_overlap;
700 rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ) {
701 rte_flow_error_set(error, EINVAL,
702 RTE_FLOW_ERROR_TYPE_ACTION,
704 "the only supported RSS hash"
705 " function is Toeplitz");
708 #ifndef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
709 if (parser->rss_conf.level > 1) {
710 rte_flow_error_set(error, EINVAL,
711 RTE_FLOW_ERROR_TYPE_ACTION,
713 "a nonzero RSS encapsulation"
714 " level is not supported");
718 if (parser->rss_conf.level > 2) {
719 rte_flow_error_set(error, EINVAL,
720 RTE_FLOW_ERROR_TYPE_ACTION,
722 "RSS encapsulation level"
723 " > 1 is not supported");
726 if (rss->types & MLX5_RSS_HF_MASK) {
727 rte_flow_error_set(error, EINVAL,
728 RTE_FLOW_ERROR_TYPE_ACTION,
730 "unsupported RSS type"
735 rss_key_len = rss->key_len;
738 rss_key_len = rss_hash_default_key_len;
739 rss_key = rss_hash_default_key;
741 if (rss_key_len != RTE_DIM(parser->rss_key)) {
742 rte_flow_error_set(error, EINVAL,
743 RTE_FLOW_ERROR_TYPE_ACTION,
745 "RSS hash key must be"
746 " exactly 40 bytes long");
749 if (!rss->queue_num) {
750 rte_flow_error_set(error, EINVAL,
751 RTE_FLOW_ERROR_TYPE_ACTION,
756 if (rss->queue_num > RTE_DIM(parser->queues)) {
757 rte_flow_error_set(error, EINVAL,
758 RTE_FLOW_ERROR_TYPE_ACTION,
760 "too many queues for RSS"
764 for (n = 0; n < rss->queue_num; ++n) {
765 if (rss->queue[n] >= priv->rxqs_n) {
766 rte_flow_error_set(error, EINVAL,
767 RTE_FLOW_ERROR_TYPE_ACTION,
769 "queue id > number of"
774 parser->rss_conf = (struct rte_flow_action_rss){
775 .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
778 .key_len = rss_key_len,
779 .queue_num = rss->queue_num,
780 .key = memcpy(parser->rss_key, rss_key,
781 sizeof(*rss_key) * rss_key_len),
782 .queue = memcpy(parser->queues, rss->queue,
783 sizeof(*rss->queue) *
786 } else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
787 const struct rte_flow_action_mark *mark =
788 (const struct rte_flow_action_mark *)
792 goto exit_action_overlap;
795 rte_flow_error_set(error, EINVAL,
796 RTE_FLOW_ERROR_TYPE_ACTION,
798 "mark must be defined");
800 } else if (mark->id >= MLX5_FLOW_MARK_MAX) {
801 rte_flow_error_set(error, ENOTSUP,
802 RTE_FLOW_ERROR_TYPE_ACTION,
804 "mark must be between 0"
809 parser->mark_id = mark->id;
810 } else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
812 goto exit_action_overlap;
815 } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT &&
816 priv->config.flow_counter_en) {
818 goto exit_action_overlap;
822 goto exit_action_not_supported;
825 /* When fate is unknown, drop traffic. */
826 if (!(overlap & FATE))
828 if (parser->drop && parser->mark)
830 if (!parser->rss_conf.queue_num && !parser->drop) {
831 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
832 NULL, "no valid action");
836 exit_action_not_supported:
837 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
838 actions, "action not supported");
841 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
842 actions, "overlapping actions are not supported");
850 * Pattern specification (list terminated by the END pattern item).
852 * Perform verbose error reporting if not NULL.
853 * @param[in, out] parser
854 * Internal parser structure.
857 * 0 on success, a negative errno value otherwise and rte_errno is set.
860 mlx5_flow_convert_items_validate(struct rte_eth_dev *dev,
861 const struct rte_flow_item items[],
862 struct rte_flow_error *error,
863 struct mlx5_flow_parse *parser)
865 struct priv *priv = dev->data->dev_private;
866 const struct mlx5_flow_items *cur_item = mlx5_flow_items;
870 /* Initialise the offsets to start after verbs attribute. */
871 for (i = 0; i != hash_rxq_init_n; ++i)
872 parser->queue[i].offset = sizeof(struct ibv_flow_attr);
873 for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
874 const struct mlx5_flow_items *token = NULL;
877 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
881 cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
883 if (cur_item->items[i] == items->type) {
884 token = &mlx5_flow_items[items->type];
890 goto exit_item_not_supported;
893 ret = mlx5_flow_item_validate(items,
894 (const uint8_t *)cur_item->mask,
897 goto exit_item_not_supported;
898 if (IS_TUNNEL(items->type)) {
899 if (parser->tunnel) {
900 rte_flow_error_set(error, ENOTSUP,
901 RTE_FLOW_ERROR_TYPE_ITEM,
903 "Cannot recognize multiple"
904 " tunnel encapsulations.");
907 if (!priv->config.tunnel_en &&
908 parser->rss_conf.level > 1) {
909 rte_flow_error_set(error, ENOTSUP,
910 RTE_FLOW_ERROR_TYPE_ITEM,
912 "RSS on tunnel is not supported");
915 parser->inner = IBV_FLOW_SPEC_INNER;
916 parser->tunnel = flow_ptype[items->type];
919 parser->queue[HASH_RXQ_ETH].offset += cur_item->dst_sz;
921 for (n = 0; n != hash_rxq_init_n; ++n)
922 parser->queue[n].offset += cur_item->dst_sz;
926 parser->queue[HASH_RXQ_ETH].offset +=
927 sizeof(struct ibv_flow_spec_action_drop);
930 for (i = 0; i != hash_rxq_init_n; ++i)
931 parser->queue[i].offset +=
932 sizeof(struct ibv_flow_spec_action_tag);
935 unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
937 for (i = 0; i != hash_rxq_init_n; ++i)
938 parser->queue[i].offset += size;
941 exit_item_not_supported:
942 return rte_flow_error_set(error, -ret, RTE_FLOW_ERROR_TYPE_ITEM,
943 items, "item not supported");
947 * Allocate memory space to store verbs flow attributes.
950 * Amount of byte to allocate.
952 * Perform verbose error reporting if not NULL.
955 * A verbs flow attribute on success, NULL otherwise and rte_errno is set.
957 static struct ibv_flow_attr *
958 mlx5_flow_convert_allocate(unsigned int size, struct rte_flow_error *error)
960 struct ibv_flow_attr *ibv_attr;
962 ibv_attr = rte_calloc(__func__, 1, size, 0);
964 rte_flow_error_set(error, ENOMEM,
965 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
967 "cannot allocate verbs spec attributes");
974 * Make inner packet matching with an higher priority from the non Inner
978 * Pointer to Ethernet device.
979 * @param[in, out] parser
980 * Internal parser structure.
982 * User flow attribute.
985 mlx5_flow_update_priority(struct rte_eth_dev *dev,
986 struct mlx5_flow_parse *parser,
987 const struct rte_flow_attr *attr)
989 struct priv *priv = dev->data->dev_private;
993 /* 8 priorities >= 16 priorities
994 * Control flow: 4-7 8-15
995 * User normal flow: 1-3 4-7
996 * User tunnel flow: 0-2 0-3
998 priority = attr->priority * MLX5_VERBS_FLOW_PRIO_8;
999 if (priv->config.max_verbs_prio == MLX5_VERBS_FLOW_PRIO_8)
1002 * Lower non-tunnel flow Verbs priority 1 if only support 8 Verbs
1003 * priorities, lower 4 otherwise.
1005 if (!parser->inner) {
1006 if (priv->config.max_verbs_prio == MLX5_VERBS_FLOW_PRIO_8)
1009 priority += MLX5_VERBS_FLOW_PRIO_8 / 2;
1012 parser->queue[HASH_RXQ_ETH].ibv_attr->priority = priority +
1013 hash_rxq_init[HASH_RXQ_ETH].flow_priority;
1016 for (i = 0; i != hash_rxq_init_n; ++i) {
1017 if (!parser->queue[i].ibv_attr)
1019 parser->queue[i].ibv_attr->priority = priority +
1020 hash_rxq_init[i].flow_priority;
1025 * Finalise verbs flow attributes.
1027 * @param[in, out] parser
1028 * Internal parser structure.
1031 mlx5_flow_convert_finalise(struct mlx5_flow_parse *parser)
1034 uint32_t inner = parser->inner;
1036 /* Don't create extra flows for outer RSS. */
1037 if (parser->tunnel && parser->rss_conf.level < 2)
1040 * Fill missing layers in verbs specifications, or compute the correct
1041 * offset to allocate the memory space for the attributes and
1044 for (i = 0; i != hash_rxq_init_n - 1; ++i) {
1046 struct ibv_flow_spec_ipv4_ext ipv4;
1047 struct ibv_flow_spec_ipv6 ipv6;
1048 struct ibv_flow_spec_tcp_udp udp_tcp;
1049 struct ibv_flow_spec_eth eth;
1054 if (i == parser->layer)
1056 if (parser->layer == HASH_RXQ_ETH ||
1057 parser->layer == HASH_RXQ_TUNNEL) {
1058 if (hash_rxq_init[i].ip_version == MLX5_IPV4) {
1059 size = sizeof(struct ibv_flow_spec_ipv4_ext);
1060 specs.ipv4 = (struct ibv_flow_spec_ipv4_ext){
1061 .type = inner | IBV_FLOW_SPEC_IPV4_EXT,
1065 size = sizeof(struct ibv_flow_spec_ipv6);
1066 specs.ipv6 = (struct ibv_flow_spec_ipv6){
1067 .type = inner | IBV_FLOW_SPEC_IPV6,
1071 if (parser->queue[i].ibv_attr) {
1072 dst = (void *)((uintptr_t)
1073 parser->queue[i].ibv_attr +
1074 parser->queue[i].offset);
1075 memcpy(dst, &specs, size);
1076 ++parser->queue[i].ibv_attr->num_of_specs;
1078 parser->queue[i].offset += size;
1080 if ((i == HASH_RXQ_UDPV4) || (i == HASH_RXQ_TCPV4) ||
1081 (i == HASH_RXQ_UDPV6) || (i == HASH_RXQ_TCPV6)) {
1082 size = sizeof(struct ibv_flow_spec_tcp_udp);
1083 specs.udp_tcp = (struct ibv_flow_spec_tcp_udp) {
1084 .type = inner | ((i == HASH_RXQ_UDPV4 ||
1085 i == HASH_RXQ_UDPV6) ?
1090 if (parser->queue[i].ibv_attr) {
1091 dst = (void *)((uintptr_t)
1092 parser->queue[i].ibv_attr +
1093 parser->queue[i].offset);
1094 memcpy(dst, &specs, size);
1095 ++parser->queue[i].ibv_attr->num_of_specs;
1097 parser->queue[i].offset += size;
1103 * Update flows according to pattern and RSS hash fields.
1105 * @param[in, out] parser
1106 * Internal parser structure.
1109 * 0 on success, a negative errno value otherwise and rte_errno is set.
1112 mlx5_flow_convert_rss(struct mlx5_flow_parse *parser)
1115 enum hash_rxq_type start;
1116 enum hash_rxq_type layer;
1117 int outer = parser->tunnel && parser->rss_conf.level < 2;
1118 uint64_t rss = parser->rss_conf.types;
1120 /* Default to outer RSS. */
1121 if (!parser->rss_conf.level)
1122 parser->rss_conf.level = 1;
1123 layer = outer ? parser->out_layer : parser->layer;
1124 if (layer == HASH_RXQ_TUNNEL)
1125 layer = HASH_RXQ_ETH;
1127 /* Only one hash type for outer RSS. */
1128 if (rss && layer == HASH_RXQ_ETH) {
1129 start = HASH_RXQ_TCPV4;
1130 } else if (rss && layer != HASH_RXQ_ETH &&
1131 !(rss & hash_rxq_init[layer].dpdk_rss_hf)) {
1132 /* If RSS not match L4 pattern, try L3 RSS. */
1133 if (layer < HASH_RXQ_IPV4)
1134 layer = HASH_RXQ_IPV4;
1135 else if (layer > HASH_RXQ_IPV4 && layer < HASH_RXQ_IPV6)
1136 layer = HASH_RXQ_IPV6;
1141 /* Scan first valid hash type. */
1142 for (i = start; rss && i <= layer; ++i) {
1143 if (!parser->queue[i].ibv_attr)
1145 if (hash_rxq_init[i].dpdk_rss_hf & rss)
1148 if (rss && i <= layer)
1149 parser->queue[layer].hash_fields =
1150 hash_rxq_init[i].hash_fields;
1151 /* Trim unused hash types. */
1152 for (i = 0; i != hash_rxq_init_n; ++i) {
1153 if (parser->queue[i].ibv_attr && i != layer) {
1154 rte_free(parser->queue[i].ibv_attr);
1155 parser->queue[i].ibv_attr = NULL;
1159 /* Expand for inner or normal RSS. */
1160 if (rss && (layer == HASH_RXQ_ETH || layer == HASH_RXQ_IPV4))
1161 start = HASH_RXQ_TCPV4;
1162 else if (rss && layer == HASH_RXQ_IPV6)
1163 start = HASH_RXQ_TCPV6;
1166 /* For L4 pattern, try L3 RSS if no L4 RSS. */
1167 /* Trim unused hash types. */
1168 for (i = 0; i != hash_rxq_init_n; ++i) {
1169 if (!parser->queue[i].ibv_attr)
1171 if (i < start || i > layer) {
1172 rte_free(parser->queue[i].ibv_attr);
1173 parser->queue[i].ibv_attr = NULL;
1178 if (hash_rxq_init[i].dpdk_rss_hf & rss) {
1179 parser->queue[i].hash_fields =
1180 hash_rxq_init[i].hash_fields;
1181 } else if (i != layer) {
1182 /* Remove unused RSS expansion. */
1183 rte_free(parser->queue[i].ibv_attr);
1184 parser->queue[i].ibv_attr = NULL;
1185 } else if (layer < HASH_RXQ_IPV4 &&
1186 (hash_rxq_init[HASH_RXQ_IPV4].dpdk_rss_hf &
1188 /* Allow IPv4 RSS on L4 pattern. */
1189 parser->queue[i].hash_fields =
1190 hash_rxq_init[HASH_RXQ_IPV4]
1192 } else if (i > HASH_RXQ_IPV4 && i < HASH_RXQ_IPV6 &&
1193 (hash_rxq_init[HASH_RXQ_IPV6].dpdk_rss_hf &
1195 /* Allow IPv4 RSS on L4 pattern. */
1196 parser->queue[i].hash_fields =
1197 hash_rxq_init[HASH_RXQ_IPV6]
1206 * Validate and convert a flow supported by the NIC.
1209 * Pointer to Ethernet device.
1211 * Flow rule attributes.
1212 * @param[in] pattern
1213 * Pattern specification (list terminated by the END pattern item).
1214 * @param[in] actions
1215 * Associated actions (list terminated by the END action).
1217 * Perform verbose error reporting if not NULL.
1218 * @param[in, out] parser
1219 * Internal parser structure.
1222 * 0 on success, a negative errno value otherwise and rte_errno is set.
1225 mlx5_flow_convert(struct rte_eth_dev *dev,
1226 const struct rte_flow_attr *attr,
1227 const struct rte_flow_item items[],
1228 const struct rte_flow_action actions[],
1229 struct rte_flow_error *error,
1230 struct mlx5_flow_parse *parser)
1232 const struct mlx5_flow_items *cur_item = mlx5_flow_items;
1236 /* First step. Validate the attributes, items and actions. */
1237 *parser = (struct mlx5_flow_parse){
1238 .create = parser->create,
1239 .layer = HASH_RXQ_ETH,
1240 .mark_id = MLX5_FLOW_MARK_DEFAULT,
1242 ret = mlx5_flow_convert_attributes(attr, error);
1245 ret = mlx5_flow_convert_actions(dev, actions, error, parser);
1248 ret = mlx5_flow_convert_items_validate(dev, items, error, parser);
1251 mlx5_flow_convert_finalise(parser);
1254 * Allocate the memory space to store verbs specifications.
1257 unsigned int offset = parser->queue[HASH_RXQ_ETH].offset;
1259 parser->queue[HASH_RXQ_ETH].ibv_attr =
1260 mlx5_flow_convert_allocate(offset, error);
1261 if (!parser->queue[HASH_RXQ_ETH].ibv_attr)
1263 parser->queue[HASH_RXQ_ETH].offset =
1264 sizeof(struct ibv_flow_attr);
1266 for (i = 0; i != hash_rxq_init_n; ++i) {
1267 unsigned int offset;
1269 offset = parser->queue[i].offset;
1270 parser->queue[i].ibv_attr =
1271 mlx5_flow_convert_allocate(offset, error);
1272 if (!parser->queue[i].ibv_attr)
1274 parser->queue[i].offset = sizeof(struct ibv_flow_attr);
1277 /* Third step. Conversion parse, fill the specifications. */
1280 parser->layer = HASH_RXQ_ETH;
1281 for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
1282 struct mlx5_flow_data data = {
1288 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
1290 cur_item = &mlx5_flow_items[items->type];
1291 ret = cur_item->convert(items,
1292 (cur_item->default_mask ?
1293 cur_item->default_mask :
1299 if (!parser->drop) {
1300 /* RSS check, remove unused hash types. */
1301 ret = mlx5_flow_convert_rss(parser);
1304 /* Complete missing specification. */
1305 mlx5_flow_convert_finalise(parser);
1307 mlx5_flow_update_priority(dev, parser, attr);
1309 mlx5_flow_create_flag_mark(parser, parser->mark_id);
1310 if (parser->count && parser->create) {
1311 mlx5_flow_create_count(dev, parser);
1313 goto exit_count_error;
1316 /* Only verification is expected, all resources should be released. */
1317 if (!parser->create) {
1318 for (i = 0; i != hash_rxq_init_n; ++i) {
1319 if (parser->queue[i].ibv_attr) {
1320 rte_free(parser->queue[i].ibv_attr);
1321 parser->queue[i].ibv_attr = NULL;
1327 for (i = 0; i != hash_rxq_init_n; ++i) {
1328 if (parser->queue[i].ibv_attr) {
1329 rte_free(parser->queue[i].ibv_attr);
1330 parser->queue[i].ibv_attr = NULL;
1333 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1334 NULL, "cannot allocate verbs spec attributes");
1337 rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1338 NULL, "cannot create counter");
1343 * Copy the specification created into the flow.
1346 * Internal parser structure.
1348 * Create specification.
1350 * Size in bytes of the specification to copy.
1353 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
1359 for (i = 0; i != hash_rxq_init_n; ++i) {
1360 if (!parser->queue[i].ibv_attr)
1362 dst = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1363 parser->queue[i].offset);
1364 memcpy(dst, src, size);
1365 ++parser->queue[i].ibv_attr->num_of_specs;
1366 parser->queue[i].offset += size;
1371 * Convert Ethernet item to Verbs specification.
1374 * Item specification.
1375 * @param default_mask[in]
1376 * Default bit-masks to use when item->mask is not provided.
1377 * @param data[in, out]
1381 * 0 on success, a negative errno value otherwise and rte_errno is set.
1384 mlx5_flow_create_eth(const struct rte_flow_item *item,
1385 const void *default_mask,
1386 struct mlx5_flow_data *data)
1388 const struct rte_flow_item_eth *spec = item->spec;
1389 const struct rte_flow_item_eth *mask = item->mask;
1390 struct mlx5_flow_parse *parser = data->parser;
1391 const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1392 struct ibv_flow_spec_eth eth = {
1393 .type = parser->inner | IBV_FLOW_SPEC_ETH,
1397 parser->layer = HASH_RXQ_ETH;
1402 mask = default_mask;
1403 memcpy(ð.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
1404 memcpy(ð.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
1405 eth.val.ether_type = spec->type;
1406 memcpy(ð.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
1407 memcpy(ð.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
1408 eth.mask.ether_type = mask->type;
1409 /* Remove unwanted bits from values. */
1410 for (i = 0; i < ETHER_ADDR_LEN; ++i) {
1411 eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
1412 eth.val.src_mac[i] &= eth.mask.src_mac[i];
1414 eth.val.ether_type &= eth.mask.ether_type;
1416 mlx5_flow_create_copy(parser, ð, eth_size);
1421 * Convert VLAN item to Verbs specification.
1424 * Item specification.
1425 * @param default_mask[in]
1426 * Default bit-masks to use when item->mask is not provided.
1427 * @param data[in, out]
1431 * 0 on success, a negative errno value otherwise and rte_errno is set.
1434 mlx5_flow_create_vlan(const struct rte_flow_item *item,
1435 const void *default_mask,
1436 struct mlx5_flow_data *data)
1438 const struct rte_flow_item_vlan *spec = item->spec;
1439 const struct rte_flow_item_vlan *mask = item->mask;
1440 struct mlx5_flow_parse *parser = data->parser;
1441 struct ibv_flow_spec_eth *eth;
1442 const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1443 const char *msg = "VLAN cannot be empty";
1448 mask = default_mask;
1450 for (i = 0; i != hash_rxq_init_n; ++i) {
1451 if (!parser->queue[i].ibv_attr)
1454 eth = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1455 parser->queue[i].offset - eth_size);
1456 eth->val.vlan_tag = spec->tci;
1457 eth->mask.vlan_tag = mask->tci;
1458 eth->val.vlan_tag &= eth->mask.vlan_tag;
1460 * From verbs perspective an empty VLAN is equivalent
1461 * to a packet without VLAN layer.
1463 if (!eth->mask.vlan_tag)
1465 /* Outer TPID cannot be matched. */
1466 if (eth->mask.ether_type) {
1467 msg = "VLAN TPID matching is not supported";
1470 eth->val.ether_type = spec->inner_type;
1471 eth->mask.ether_type = mask->inner_type;
1472 eth->val.ether_type &= eth->mask.ether_type;
1477 return rte_flow_error_set(data->error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM,
1482 * Convert IPv4 item to Verbs specification.
1485 * Item specification.
1486 * @param default_mask[in]
1487 * Default bit-masks to use when item->mask is not provided.
1488 * @param data[in, out]
1492 * 0 on success, a negative errno value otherwise and rte_errno is set.
1495 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
1496 const void *default_mask,
1497 struct mlx5_flow_data *data)
1499 struct priv *priv = data->dev->data->dev_private;
1500 const struct rte_flow_item_ipv4 *spec = item->spec;
1501 const struct rte_flow_item_ipv4 *mask = item->mask;
1502 struct mlx5_flow_parse *parser = data->parser;
1503 unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4_ext);
1504 struct ibv_flow_spec_ipv4_ext ipv4 = {
1505 .type = parser->inner | IBV_FLOW_SPEC_IPV4_EXT,
1509 if (parser->layer == HASH_RXQ_TUNNEL &&
1510 parser->tunnel == ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN)] &&
1511 !priv->config.l3_vxlan_en)
1512 return rte_flow_error_set(data->error, EINVAL,
1513 RTE_FLOW_ERROR_TYPE_ITEM,
1515 "L3 VXLAN not enabled by device"
1516 " parameter and/or not configured"
1518 parser->layer = HASH_RXQ_IPV4;
1521 mask = default_mask;
1522 ipv4.val = (struct ibv_flow_ipv4_ext_filter){
1523 .src_ip = spec->hdr.src_addr,
1524 .dst_ip = spec->hdr.dst_addr,
1525 .proto = spec->hdr.next_proto_id,
1526 .tos = spec->hdr.type_of_service,
1528 ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
1529 .src_ip = mask->hdr.src_addr,
1530 .dst_ip = mask->hdr.dst_addr,
1531 .proto = mask->hdr.next_proto_id,
1532 .tos = mask->hdr.type_of_service,
1534 /* Remove unwanted bits from values. */
1535 ipv4.val.src_ip &= ipv4.mask.src_ip;
1536 ipv4.val.dst_ip &= ipv4.mask.dst_ip;
1537 ipv4.val.proto &= ipv4.mask.proto;
1538 ipv4.val.tos &= ipv4.mask.tos;
1540 mlx5_flow_create_copy(parser, &ipv4, ipv4_size);
1545 * Convert IPv6 item to Verbs specification.
1548 * Item specification.
1549 * @param default_mask[in]
1550 * Default bit-masks to use when item->mask is not provided.
1551 * @param data[in, out]
1555 * 0 on success, a negative errno value otherwise and rte_errno is set.
1558 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
1559 const void *default_mask,
1560 struct mlx5_flow_data *data)
1562 struct priv *priv = data->dev->data->dev_private;
1563 const struct rte_flow_item_ipv6 *spec = item->spec;
1564 const struct rte_flow_item_ipv6 *mask = item->mask;
1565 struct mlx5_flow_parse *parser = data->parser;
1566 unsigned int ipv6_size = sizeof(struct ibv_flow_spec_ipv6);
1567 struct ibv_flow_spec_ipv6 ipv6 = {
1568 .type = parser->inner | IBV_FLOW_SPEC_IPV6,
1572 if (parser->layer == HASH_RXQ_TUNNEL &&
1573 parser->tunnel == ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN)] &&
1574 !priv->config.l3_vxlan_en)
1575 return rte_flow_error_set(data->error, EINVAL,
1576 RTE_FLOW_ERROR_TYPE_ITEM,
1578 "L3 VXLAN not enabled by device"
1579 " parameter and/or not configured"
1581 parser->layer = HASH_RXQ_IPV6;
1584 uint32_t vtc_flow_val;
1585 uint32_t vtc_flow_mask;
1588 mask = default_mask;
1589 memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
1590 RTE_DIM(ipv6.val.src_ip));
1591 memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
1592 RTE_DIM(ipv6.val.dst_ip));
1593 memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
1594 RTE_DIM(ipv6.mask.src_ip));
1595 memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
1596 RTE_DIM(ipv6.mask.dst_ip));
1597 vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
1598 vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
1599 ipv6.val.flow_label =
1600 rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >>
1602 ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >>
1604 ipv6.val.next_hdr = spec->hdr.proto;
1605 ipv6.val.hop_limit = spec->hdr.hop_limits;
1606 ipv6.mask.flow_label =
1607 rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >>
1609 ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >>
1611 ipv6.mask.next_hdr = mask->hdr.proto;
1612 ipv6.mask.hop_limit = mask->hdr.hop_limits;
1613 /* Remove unwanted bits from values. */
1614 for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
1615 ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
1616 ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
1618 ipv6.val.flow_label &= ipv6.mask.flow_label;
1619 ipv6.val.traffic_class &= ipv6.mask.traffic_class;
1620 ipv6.val.next_hdr &= ipv6.mask.next_hdr;
1621 ipv6.val.hop_limit &= ipv6.mask.hop_limit;
1623 mlx5_flow_create_copy(parser, &ipv6, ipv6_size);
1628 * Convert UDP item to Verbs specification.
1631 * Item specification.
1632 * @param default_mask[in]
1633 * Default bit-masks to use when item->mask is not provided.
1634 * @param data[in, out]
1638 * 0 on success, a negative errno value otherwise and rte_errno is set.
1641 mlx5_flow_create_udp(const struct rte_flow_item *item,
1642 const void *default_mask,
1643 struct mlx5_flow_data *data)
1645 const struct rte_flow_item_udp *spec = item->spec;
1646 const struct rte_flow_item_udp *mask = item->mask;
1647 struct mlx5_flow_parse *parser = data->parser;
1648 unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1649 struct ibv_flow_spec_tcp_udp udp = {
1650 .type = parser->inner | IBV_FLOW_SPEC_UDP,
1654 if (parser->layer == HASH_RXQ_IPV4)
1655 parser->layer = HASH_RXQ_UDPV4;
1657 parser->layer = HASH_RXQ_UDPV6;
1660 mask = default_mask;
1661 udp.val.dst_port = spec->hdr.dst_port;
1662 udp.val.src_port = spec->hdr.src_port;
1663 udp.mask.dst_port = mask->hdr.dst_port;
1664 udp.mask.src_port = mask->hdr.src_port;
1665 /* Remove unwanted bits from values. */
1666 udp.val.src_port &= udp.mask.src_port;
1667 udp.val.dst_port &= udp.mask.dst_port;
1669 mlx5_flow_create_copy(parser, &udp, udp_size);
1674 * Convert TCP item to Verbs specification.
1677 * Item specification.
1678 * @param default_mask[in]
1679 * Default bit-masks to use when item->mask is not provided.
1680 * @param data[in, out]
1684 * 0 on success, a negative errno value otherwise and rte_errno is set.
1687 mlx5_flow_create_tcp(const struct rte_flow_item *item,
1688 const void *default_mask,
1689 struct mlx5_flow_data *data)
1691 const struct rte_flow_item_tcp *spec = item->spec;
1692 const struct rte_flow_item_tcp *mask = item->mask;
1693 struct mlx5_flow_parse *parser = data->parser;
1694 unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1695 struct ibv_flow_spec_tcp_udp tcp = {
1696 .type = parser->inner | IBV_FLOW_SPEC_TCP,
1700 if (parser->layer == HASH_RXQ_IPV4)
1701 parser->layer = HASH_RXQ_TCPV4;
1703 parser->layer = HASH_RXQ_TCPV6;
1706 mask = default_mask;
1707 tcp.val.dst_port = spec->hdr.dst_port;
1708 tcp.val.src_port = spec->hdr.src_port;
1709 tcp.mask.dst_port = mask->hdr.dst_port;
1710 tcp.mask.src_port = mask->hdr.src_port;
1711 /* Remove unwanted bits from values. */
1712 tcp.val.src_port &= tcp.mask.src_port;
1713 tcp.val.dst_port &= tcp.mask.dst_port;
1715 mlx5_flow_create_copy(parser, &tcp, tcp_size);
1720 * Convert VXLAN item to Verbs specification.
1723 * Item specification.
1724 * @param default_mask[in]
1725 * Default bit-masks to use when item->mask is not provided.
1726 * @param data[in, out]
1730 * 0 on success, a negative errno value otherwise and rte_errno is set.
1733 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
1734 const void *default_mask,
1735 struct mlx5_flow_data *data)
1737 const struct rte_flow_item_vxlan *spec = item->spec;
1738 const struct rte_flow_item_vxlan *mask = item->mask;
1739 struct mlx5_flow_parse *parser = data->parser;
1740 unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1741 struct ibv_flow_spec_tunnel vxlan = {
1742 .type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1751 parser->inner = IBV_FLOW_SPEC_INNER;
1752 parser->tunnel = ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN)];
1753 parser->out_layer = parser->layer;
1754 parser->layer = HASH_RXQ_TUNNEL;
1755 /* Default VXLAN to outer RSS. */
1756 if (!parser->rss_conf.level)
1757 parser->rss_conf.level = 1;
1760 mask = default_mask;
1761 memcpy(&id.vni[1], spec->vni, 3);
1762 vxlan.val.tunnel_id = id.vlan_id;
1763 memcpy(&id.vni[1], mask->vni, 3);
1764 vxlan.mask.tunnel_id = id.vlan_id;
1765 /* Remove unwanted bits from values. */
1766 vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
1769 * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
1770 * layer is defined in the Verbs specification it is interpreted as
1771 * wildcard and all packets will match this rule, if it follows a full
1772 * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
1773 * before will also match this rule.
1774 * To avoid such situation, VNI 0 is currently refused.
1776 /* Only allow tunnel w/o tunnel id pattern after proper outer spec. */
1777 if (parser->out_layer == HASH_RXQ_ETH && !vxlan.val.tunnel_id)
1778 return rte_flow_error_set(data->error, EINVAL,
1779 RTE_FLOW_ERROR_TYPE_ITEM,
1781 "VxLAN vni cannot be 0");
1782 mlx5_flow_create_copy(parser, &vxlan, size);
1787 * Convert VXLAN-GPE item to Verbs specification.
1790 * Item specification.
1791 * @param default_mask[in]
1792 * Default bit-masks to use when item->mask is not provided.
1793 * @param data[in, out]
1797 * 0 on success, a negative errno value otherwise and rte_errno is set.
1800 mlx5_flow_create_vxlan_gpe(const struct rte_flow_item *item,
1801 const void *default_mask,
1802 struct mlx5_flow_data *data)
1804 struct priv *priv = data->dev->data->dev_private;
1805 const struct rte_flow_item_vxlan_gpe *spec = item->spec;
1806 const struct rte_flow_item_vxlan_gpe *mask = item->mask;
1807 struct mlx5_flow_parse *parser = data->parser;
1808 unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1809 struct ibv_flow_spec_tunnel vxlan = {
1810 .type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1818 if (!priv->config.l3_vxlan_en)
1819 return rte_flow_error_set(data->error, EINVAL,
1820 RTE_FLOW_ERROR_TYPE_ITEM,
1822 "L3 VXLAN not enabled by device"
1823 " parameter and/or not configured"
1826 parser->inner = IBV_FLOW_SPEC_INNER;
1827 parser->tunnel = ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN_GPE)];
1828 parser->out_layer = parser->layer;
1829 parser->layer = HASH_RXQ_TUNNEL;
1830 /* Default VXLAN-GPE to outer RSS. */
1831 if (!parser->rss_conf.level)
1832 parser->rss_conf.level = 1;
1835 mask = default_mask;
1836 memcpy(&id.vni[1], spec->vni, 3);
1837 vxlan.val.tunnel_id = id.vlan_id;
1838 memcpy(&id.vni[1], mask->vni, 3);
1839 vxlan.mask.tunnel_id = id.vlan_id;
1841 return rte_flow_error_set(data->error, EINVAL,
1842 RTE_FLOW_ERROR_TYPE_ITEM,
1844 "VxLAN-GPE protocol not"
1846 /* Remove unwanted bits from values. */
1847 vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
1850 * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
1851 * layer is defined in the Verbs specification it is interpreted as
1852 * wildcard and all packets will match this rule, if it follows a full
1853 * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
1854 * before will also match this rule.
1855 * To avoid such situation, VNI 0 is currently refused.
1857 /* Only allow tunnel w/o tunnel id pattern after proper outer spec. */
1858 if (parser->out_layer == HASH_RXQ_ETH && !vxlan.val.tunnel_id)
1859 return rte_flow_error_set(data->error, EINVAL,
1860 RTE_FLOW_ERROR_TYPE_ITEM,
1862 "VxLAN-GPE vni cannot be 0");
1863 mlx5_flow_create_copy(parser, &vxlan, size);
1868 * Convert GRE item to Verbs specification.
1871 * Item specification.
1872 * @param default_mask[in]
1873 * Default bit-masks to use when item->mask is not provided.
1874 * @param data[in, out]
1878 * 0 on success, a negative errno value otherwise and rte_errno is set.
1881 mlx5_flow_create_gre(const struct rte_flow_item *item __rte_unused,
1882 const void *default_mask __rte_unused,
1883 struct mlx5_flow_data *data)
1885 struct mlx5_flow_parse *parser = data->parser;
1886 unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1887 struct ibv_flow_spec_tunnel tunnel = {
1888 .type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1891 struct ibv_flow_spec_ipv4_ext *ipv4;
1892 struct ibv_flow_spec_ipv6 *ipv6;
1895 parser->inner = IBV_FLOW_SPEC_INNER;
1896 parser->tunnel = ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_GRE)];
1897 parser->out_layer = parser->layer;
1898 parser->layer = HASH_RXQ_TUNNEL;
1899 /* Default GRE to inner RSS. */
1900 if (!parser->rss_conf.level)
1901 parser->rss_conf.level = 2;
1902 /* Update encapsulation IP layer protocol. */
1903 for (i = 0; i != hash_rxq_init_n; ++i) {
1904 if (!parser->queue[i].ibv_attr)
1906 if (parser->out_layer == HASH_RXQ_IPV4) {
1907 ipv4 = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1908 parser->queue[i].offset -
1909 sizeof(struct ibv_flow_spec_ipv4_ext));
1910 if (ipv4->mask.proto && ipv4->val.proto != MLX5_GRE)
1912 ipv4->val.proto = MLX5_GRE;
1913 ipv4->mask.proto = 0xff;
1914 } else if (parser->out_layer == HASH_RXQ_IPV6) {
1915 ipv6 = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1916 parser->queue[i].offset -
1917 sizeof(struct ibv_flow_spec_ipv6));
1918 if (ipv6->mask.next_hdr &&
1919 ipv6->val.next_hdr != MLX5_GRE)
1921 ipv6->val.next_hdr = MLX5_GRE;
1922 ipv6->mask.next_hdr = 0xff;
1925 if (i != hash_rxq_init_n)
1926 return rte_flow_error_set(data->error, EINVAL,
1927 RTE_FLOW_ERROR_TYPE_ITEM,
1929 "IP protocol of GRE must be 47");
1930 mlx5_flow_create_copy(parser, &tunnel, size);
1935 * Convert mark/flag action to Verbs specification.
1938 * Internal parser structure.
1943 * 0 on success, a negative errno value otherwise and rte_errno is set.
1946 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id)
1948 unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1949 struct ibv_flow_spec_action_tag tag = {
1950 .type = IBV_FLOW_SPEC_ACTION_TAG,
1952 .tag_id = mlx5_flow_mark_set(mark_id),
1955 assert(parser->mark);
1956 mlx5_flow_create_copy(parser, &tag, size);
1961 * Convert count action to Verbs specification.
1964 * Pointer to Ethernet device.
1966 * Pointer to MLX5 flow parser structure.
1969 * 0 on success, a negative errno value otherwise and rte_errno is set.
1972 mlx5_flow_create_count(struct rte_eth_dev *dev __rte_unused,
1973 struct mlx5_flow_parse *parser __rte_unused)
1975 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
1976 struct priv *priv = dev->data->dev_private;
1977 unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
1978 struct ibv_counter_set_init_attr init_attr = {0};
1979 struct ibv_flow_spec_counter_action counter = {
1980 .type = IBV_FLOW_SPEC_ACTION_COUNT,
1982 .counter_set_handle = 0,
1985 init_attr.counter_set_id = 0;
1986 parser->cs = mlx5_glue->create_counter_set(priv->ctx, &init_attr);
1991 counter.counter_set_handle = parser->cs->handle;
1992 mlx5_flow_create_copy(parser, &counter, size);
1998 * Complete flow rule creation with a drop queue.
2001 * Pointer to Ethernet device.
2003 * Internal parser structure.
2005 * Pointer to the rte_flow.
2007 * Perform verbose error reporting if not NULL.
2010 * 0 on success, a negative errno value otherwise and rte_errno is set.
2013 mlx5_flow_create_action_queue_drop(struct rte_eth_dev *dev,
2014 struct mlx5_flow_parse *parser,
2015 struct rte_flow *flow,
2016 struct rte_flow_error *error)
2018 struct priv *priv = dev->data->dev_private;
2019 struct ibv_flow_spec_action_drop *drop;
2020 unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
2025 drop = (void *)((uintptr_t)parser->queue[HASH_RXQ_ETH].ibv_attr +
2026 parser->queue[HASH_RXQ_ETH].offset);
2027 *drop = (struct ibv_flow_spec_action_drop){
2028 .type = IBV_FLOW_SPEC_ACTION_DROP,
2031 ++parser->queue[HASH_RXQ_ETH].ibv_attr->num_of_specs;
2032 parser->queue[HASH_RXQ_ETH].offset += size;
2033 flow->frxq[HASH_RXQ_ETH].ibv_attr =
2034 parser->queue[HASH_RXQ_ETH].ibv_attr;
2036 flow->cs = parser->cs;
2037 if (!priv->dev->data->dev_started)
2039 parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
2040 flow->frxq[HASH_RXQ_ETH].ibv_flow =
2041 mlx5_glue->create_flow(priv->flow_drop_queue->qp,
2042 flow->frxq[HASH_RXQ_ETH].ibv_attr);
2043 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
2044 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
2045 NULL, "flow rule creation failure");
2051 if (flow->frxq[HASH_RXQ_ETH].ibv_flow) {
2052 claim_zero(mlx5_glue->destroy_flow
2053 (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2054 flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
2056 if (flow->frxq[HASH_RXQ_ETH].ibv_attr) {
2057 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
2058 flow->frxq[HASH_RXQ_ETH].ibv_attr = NULL;
2061 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
2069 * Create hash Rx queues when RSS is enabled.
2072 * Pointer to Ethernet device.
2074 * Internal parser structure.
2076 * Pointer to the rte_flow.
2078 * Perform verbose error reporting if not NULL.
2081 * 0 on success, a negative errno value otherwise and rte_errno is set.
2084 mlx5_flow_create_action_queue_rss(struct rte_eth_dev *dev,
2085 struct mlx5_flow_parse *parser,
2086 struct rte_flow *flow,
2087 struct rte_flow_error *error)
2089 struct priv *priv = dev->data->dev_private;
2092 for (i = 0; i != hash_rxq_init_n; ++i) {
2093 if (!parser->queue[i].ibv_attr)
2095 flow->frxq[i].ibv_attr = parser->queue[i].ibv_attr;
2096 parser->queue[i].ibv_attr = NULL;
2097 flow->frxq[i].hash_fields = parser->queue[i].hash_fields;
2098 if (!priv->dev->data->dev_started)
2100 flow->frxq[i].hrxq =
2102 parser->rss_conf.key,
2103 parser->rss_conf.key_len,
2104 flow->frxq[i].hash_fields,
2105 parser->rss_conf.queue,
2106 parser->rss_conf.queue_num,
2108 parser->rss_conf.level);
2109 if (flow->frxq[i].hrxq)
2111 flow->frxq[i].hrxq =
2113 parser->rss_conf.key,
2114 parser->rss_conf.key_len,
2115 flow->frxq[i].hash_fields,
2116 parser->rss_conf.queue,
2117 parser->rss_conf.queue_num,
2119 parser->rss_conf.level);
2120 if (!flow->frxq[i].hrxq) {
2121 return rte_flow_error_set(error, ENOMEM,
2122 RTE_FLOW_ERROR_TYPE_HANDLE,
2124 "cannot create hash rxq");
2131 * RXQ update after flow rule creation.
2134 * Pointer to Ethernet device.
2136 * Pointer to the flow rule.
2139 mlx5_flow_create_update_rxqs(struct rte_eth_dev *dev, struct rte_flow *flow)
2141 struct priv *priv = dev->data->dev_private;
2145 if (!dev->data->dev_started)
2147 for (i = 0; i != flow->rss_conf.queue_num; ++i) {
2148 struct mlx5_rxq_data *rxq_data = (*priv->rxqs)
2149 [(*flow->queues)[i]];
2150 struct mlx5_rxq_ctrl *rxq_ctrl =
2151 container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
2152 uint8_t tunnel = PTYPE_IDX(flow->tunnel);
2154 rxq_data->mark |= flow->mark;
2157 rxq_ctrl->tunnel_types[tunnel] += 1;
2158 /* Clear tunnel type if more than one tunnel types set. */
2159 for (j = 0; j != RTE_DIM(rxq_ctrl->tunnel_types); ++j) {
2162 if (rxq_ctrl->tunnel_types[j] > 0) {
2163 rxq_data->tunnel = 0;
2167 if (j == RTE_DIM(rxq_ctrl->tunnel_types))
2168 rxq_data->tunnel = flow->tunnel;
2173 * Dump flow hash RX queue detail.
2176 * Pointer to Ethernet device.
2178 * Pointer to the rte_flow.
2180 * Hash RX queue index.
2183 mlx5_flow_dump(struct rte_eth_dev *dev __rte_unused,
2184 struct rte_flow *flow __rte_unused,
2185 unsigned int hrxq_idx __rte_unused)
2193 spec_ptr = (uintptr_t)(flow->frxq[hrxq_idx].ibv_attr + 1);
2194 for (j = 0, off = 0; j < flow->frxq[hrxq_idx].ibv_attr->num_of_specs;
2196 struct ibv_flow_spec *spec = (void *)spec_ptr;
2197 off += sprintf(buf + off, " %x(%hu)", spec->hdr.type,
2199 spec_ptr += spec->hdr.size;
2202 "port %u Verbs flow %p type %u: hrxq:%p qp:%p ind:%p,"
2203 " hash:%" PRIx64 "/%u specs:%hhu(%hu), priority:%hu, type:%d,"
2204 " flags:%x, comp_mask:%x specs:%s",
2205 dev->data->port_id, (void *)flow, hrxq_idx,
2206 (void *)flow->frxq[hrxq_idx].hrxq,
2207 (void *)flow->frxq[hrxq_idx].hrxq->qp,
2208 (void *)flow->frxq[hrxq_idx].hrxq->ind_table,
2209 flow->frxq[hrxq_idx].hash_fields |
2211 flow->rss_conf.level > 1 ? (uint32_t)IBV_RX_HASH_INNER : 0),
2212 flow->rss_conf.queue_num,
2213 flow->frxq[hrxq_idx].ibv_attr->num_of_specs,
2214 flow->frxq[hrxq_idx].ibv_attr->size,
2215 flow->frxq[hrxq_idx].ibv_attr->priority,
2216 flow->frxq[hrxq_idx].ibv_attr->type,
2217 flow->frxq[hrxq_idx].ibv_attr->flags,
2218 flow->frxq[hrxq_idx].ibv_attr->comp_mask,
2224 * Complete flow rule creation.
2227 * Pointer to Ethernet device.
2229 * Internal parser structure.
2231 * Pointer to the rte_flow.
2233 * Perform verbose error reporting if not NULL.
2236 * 0 on success, a negative errno value otherwise and rte_errno is set.
2239 mlx5_flow_create_action_queue(struct rte_eth_dev *dev,
2240 struct mlx5_flow_parse *parser,
2241 struct rte_flow *flow,
2242 struct rte_flow_error *error)
2244 struct priv *priv = dev->data->dev_private;
2247 unsigned int flows_n = 0;
2251 assert(!parser->drop);
2252 ret = mlx5_flow_create_action_queue_rss(dev, parser, flow, error);
2256 flow->cs = parser->cs;
2257 if (!priv->dev->data->dev_started)
2259 for (i = 0; i != hash_rxq_init_n; ++i) {
2260 if (!flow->frxq[i].hrxq)
2262 flow->frxq[i].ibv_flow =
2263 mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
2264 flow->frxq[i].ibv_attr);
2265 mlx5_flow_dump(dev, flow, i);
2266 if (!flow->frxq[i].ibv_flow) {
2267 rte_flow_error_set(error, ENOMEM,
2268 RTE_FLOW_ERROR_TYPE_HANDLE,
2269 NULL, "flow rule creation failure");
2275 rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_HANDLE,
2276 NULL, "internal error in flow creation");
2279 mlx5_flow_create_update_rxqs(dev, flow);
2282 ret = rte_errno; /* Save rte_errno before cleanup. */
2284 for (i = 0; i != hash_rxq_init_n; ++i) {
2285 if (flow->frxq[i].ibv_flow) {
2286 struct ibv_flow *ibv_flow = flow->frxq[i].ibv_flow;
2288 claim_zero(mlx5_glue->destroy_flow(ibv_flow));
2290 if (flow->frxq[i].hrxq)
2291 mlx5_hrxq_release(dev, flow->frxq[i].hrxq);
2292 if (flow->frxq[i].ibv_attr)
2293 rte_free(flow->frxq[i].ibv_attr);
2296 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
2300 rte_errno = ret; /* Restore rte_errno. */
2308 * Pointer to Ethernet device.
2310 * Pointer to a TAILQ flow list.
2312 * Flow rule attributes.
2313 * @param[in] pattern
2314 * Pattern specification (list terminated by the END pattern item).
2315 * @param[in] actions
2316 * Associated actions (list terminated by the END action).
2318 * Perform verbose error reporting if not NULL.
2321 * A flow on success, NULL otherwise and rte_errno is set.
2323 static struct rte_flow *
2324 mlx5_flow_list_create(struct rte_eth_dev *dev,
2325 struct mlx5_flows *list,
2326 const struct rte_flow_attr *attr,
2327 const struct rte_flow_item items[],
2328 const struct rte_flow_action actions[],
2329 struct rte_flow_error *error)
2331 struct mlx5_flow_parse parser = { .create = 1, };
2332 struct rte_flow *flow = NULL;
2336 ret = mlx5_flow_convert(dev, attr, items, actions, error, &parser);
2339 flow = rte_calloc(__func__, 1,
2341 parser.rss_conf.queue_num * sizeof(uint16_t),
2344 rte_flow_error_set(error, ENOMEM,
2345 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2347 "cannot allocate flow memory");
2350 /* Copy configuration. */
2351 flow->queues = (uint16_t (*)[])(flow + 1);
2352 flow->tunnel = parser.tunnel;
2353 flow->rss_conf = (struct rte_flow_action_rss){
2354 .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
2356 .types = parser.rss_conf.types,
2357 .key_len = parser.rss_conf.key_len,
2358 .queue_num = parser.rss_conf.queue_num,
2359 .key = memcpy(flow->rss_key, parser.rss_conf.key,
2360 sizeof(*parser.rss_conf.key) *
2361 parser.rss_conf.key_len),
2362 .queue = memcpy(flow->queues, parser.rss_conf.queue,
2363 sizeof(*parser.rss_conf.queue) *
2364 parser.rss_conf.queue_num),
2366 flow->mark = parser.mark;
2367 /* finalise the flow. */
2369 ret = mlx5_flow_create_action_queue_drop(dev, &parser, flow,
2372 ret = mlx5_flow_create_action_queue(dev, &parser, flow, error);
2375 TAILQ_INSERT_TAIL(list, flow, next);
2376 DRV_LOG(DEBUG, "port %u flow created %p", dev->data->port_id,
2380 DRV_LOG(ERR, "port %u flow creation error: %s", dev->data->port_id,
2382 for (i = 0; i != hash_rxq_init_n; ++i) {
2383 if (parser.queue[i].ibv_attr)
2384 rte_free(parser.queue[i].ibv_attr);
2391 * Validate a flow supported by the NIC.
2393 * @see rte_flow_validate()
2397 mlx5_flow_validate(struct rte_eth_dev *dev,
2398 const struct rte_flow_attr *attr,
2399 const struct rte_flow_item items[],
2400 const struct rte_flow_action actions[],
2401 struct rte_flow_error *error)
2403 struct mlx5_flow_parse parser = { .create = 0, };
2405 return mlx5_flow_convert(dev, attr, items, actions, error, &parser);
2411 * @see rte_flow_create()
2415 mlx5_flow_create(struct rte_eth_dev *dev,
2416 const struct rte_flow_attr *attr,
2417 const struct rte_flow_item items[],
2418 const struct rte_flow_action actions[],
2419 struct rte_flow_error *error)
2421 struct priv *priv = dev->data->dev_private;
2423 return mlx5_flow_list_create(dev, &priv->flows, attr, items, actions,
2428 * Destroy a flow in a list.
2431 * Pointer to Ethernet device.
2433 * Pointer to a TAILQ flow list.
2438 mlx5_flow_list_destroy(struct rte_eth_dev *dev, struct mlx5_flows *list,
2439 struct rte_flow *flow)
2441 struct priv *priv = dev->data->dev_private;
2444 if (flow->drop || !dev->data->dev_started)
2446 for (i = 0; flow->tunnel && i != flow->rss_conf.queue_num; ++i) {
2447 /* Update queue tunnel type. */
2448 struct mlx5_rxq_data *rxq_data = (*priv->rxqs)
2449 [(*flow->queues)[i]];
2450 struct mlx5_rxq_ctrl *rxq_ctrl =
2451 container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
2452 uint8_t tunnel = PTYPE_IDX(flow->tunnel);
2454 assert(rxq_ctrl->tunnel_types[tunnel] > 0);
2455 rxq_ctrl->tunnel_types[tunnel] -= 1;
2456 if (!rxq_ctrl->tunnel_types[tunnel]) {
2457 /* Update tunnel type. */
2462 for (j = 0; j < RTE_DIM(rxq_ctrl->tunnel_types); j++)
2463 if (rxq_ctrl->tunnel_types[j]) {
2467 /* Keep same if more than one tunnel types left. */
2469 rxq_data->tunnel = ptype_ext[last];
2470 else if (types == 0)
2471 /* No tunnel type left. */
2472 rxq_data->tunnel = 0;
2475 for (i = 0; flow->mark && i != flow->rss_conf.queue_num; ++i) {
2476 struct rte_flow *tmp;
2480 * To remove the mark from the queue, the queue must not be
2481 * present in any other marked flow (RSS or not).
2483 TAILQ_FOREACH(tmp, list, next) {
2485 uint16_t *tqs = NULL;
2490 for (j = 0; j != hash_rxq_init_n; ++j) {
2491 if (!tmp->frxq[j].hrxq)
2493 tqs = tmp->frxq[j].hrxq->ind_table->queues;
2494 tq_n = tmp->frxq[j].hrxq->ind_table->queues_n;
2498 for (j = 0; (j != tq_n) && !mark; j++)
2499 if (tqs[j] == (*flow->queues)[i])
2502 (*priv->rxqs)[(*flow->queues)[i]]->mark = mark;
2506 if (flow->frxq[HASH_RXQ_ETH].ibv_flow)
2507 claim_zero(mlx5_glue->destroy_flow
2508 (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2509 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
2511 for (i = 0; i != hash_rxq_init_n; ++i) {
2512 struct mlx5_flow *frxq = &flow->frxq[i];
2515 claim_zero(mlx5_glue->destroy_flow
2518 mlx5_hrxq_release(dev, frxq->hrxq);
2520 rte_free(frxq->ibv_attr);
2524 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
2527 TAILQ_REMOVE(list, flow, next);
2528 DRV_LOG(DEBUG, "port %u flow destroyed %p", dev->data->port_id,
2534 * Destroy all flows.
2537 * Pointer to Ethernet device.
2539 * Pointer to a TAILQ flow list.
2542 mlx5_flow_list_flush(struct rte_eth_dev *dev, struct mlx5_flows *list)
2544 while (!TAILQ_EMPTY(list)) {
2545 struct rte_flow *flow;
2547 flow = TAILQ_FIRST(list);
2548 mlx5_flow_list_destroy(dev, list, flow);
2553 * Create drop queue.
2556 * Pointer to Ethernet device.
2559 * 0 on success, a negative errno value otherwise and rte_errno is set.
2562 mlx5_flow_create_drop_queue(struct rte_eth_dev *dev)
2564 struct priv *priv = dev->data->dev_private;
2565 struct mlx5_hrxq_drop *fdq = NULL;
2569 fdq = rte_calloc(__func__, 1, sizeof(*fdq), 0);
2572 "port %u cannot allocate memory for drop queue",
2573 dev->data->port_id);
2577 fdq->cq = mlx5_glue->create_cq(priv->ctx, 1, NULL, NULL, 0);
2579 DRV_LOG(WARNING, "port %u cannot allocate CQ for drop queue",
2580 dev->data->port_id);
2584 fdq->wq = mlx5_glue->create_wq
2586 &(struct ibv_wq_init_attr){
2587 .wq_type = IBV_WQT_RQ,
2594 DRV_LOG(WARNING, "port %u cannot allocate WQ for drop queue",
2595 dev->data->port_id);
2599 fdq->ind_table = mlx5_glue->create_rwq_ind_table
2601 &(struct ibv_rwq_ind_table_init_attr){
2602 .log_ind_tbl_size = 0,
2603 .ind_tbl = &fdq->wq,
2606 if (!fdq->ind_table) {
2608 "port %u cannot allocate indirection table for drop"
2610 dev->data->port_id);
2614 fdq->qp = mlx5_glue->create_qp_ex
2616 &(struct ibv_qp_init_attr_ex){
2617 .qp_type = IBV_QPT_RAW_PACKET,
2619 IBV_QP_INIT_ATTR_PD |
2620 IBV_QP_INIT_ATTR_IND_TABLE |
2621 IBV_QP_INIT_ATTR_RX_HASH,
2622 .rx_hash_conf = (struct ibv_rx_hash_conf){
2624 IBV_RX_HASH_FUNC_TOEPLITZ,
2625 .rx_hash_key_len = rss_hash_default_key_len,
2626 .rx_hash_key = rss_hash_default_key,
2627 .rx_hash_fields_mask = 0,
2629 .rwq_ind_tbl = fdq->ind_table,
2633 DRV_LOG(WARNING, "port %u cannot allocate QP for drop queue",
2634 dev->data->port_id);
2638 priv->flow_drop_queue = fdq;
2642 claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2644 claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2646 claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2648 claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2651 priv->flow_drop_queue = NULL;
2656 * Delete drop queue.
2659 * Pointer to Ethernet device.
2662 mlx5_flow_delete_drop_queue(struct rte_eth_dev *dev)
2664 struct priv *priv = dev->data->dev_private;
2665 struct mlx5_hrxq_drop *fdq = priv->flow_drop_queue;
2670 claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2672 claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2674 claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2676 claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2678 priv->flow_drop_queue = NULL;
2685 * Pointer to Ethernet device.
2687 * Pointer to a TAILQ flow list.
2690 mlx5_flow_stop(struct rte_eth_dev *dev, struct mlx5_flows *list)
2692 struct priv *priv = dev->data->dev_private;
2693 struct rte_flow *flow;
2696 TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) {
2697 struct mlx5_ind_table_ibv *ind_tbl = NULL;
2700 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow)
2702 claim_zero(mlx5_glue->destroy_flow
2703 (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2704 flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
2705 DRV_LOG(DEBUG, "port %u flow %p removed",
2706 dev->data->port_id, (void *)flow);
2710 /* Verify the flow has not already been cleaned. */
2711 for (i = 0; i != hash_rxq_init_n; ++i) {
2712 if (!flow->frxq[i].ibv_flow)
2715 * Indirection table may be necessary to remove the
2716 * flags in the Rx queues.
2717 * This helps to speed-up the process by avoiding
2720 ind_tbl = flow->frxq[i].hrxq->ind_table;
2723 if (i == hash_rxq_init_n)
2727 for (i = 0; i != ind_tbl->queues_n; ++i)
2728 (*priv->rxqs)[ind_tbl->queues[i]]->mark = 0;
2730 for (i = 0; i != hash_rxq_init_n; ++i) {
2731 if (!flow->frxq[i].ibv_flow)
2733 claim_zero(mlx5_glue->destroy_flow
2734 (flow->frxq[i].ibv_flow));
2735 flow->frxq[i].ibv_flow = NULL;
2736 mlx5_hrxq_release(dev, flow->frxq[i].hrxq);
2737 flow->frxq[i].hrxq = NULL;
2739 DRV_LOG(DEBUG, "port %u flow %p removed", dev->data->port_id,
2742 /* Cleanup Rx queue tunnel info. */
2743 for (i = 0; i != priv->rxqs_n; ++i) {
2744 struct mlx5_rxq_data *q = (*priv->rxqs)[i];
2745 struct mlx5_rxq_ctrl *rxq_ctrl =
2746 container_of(q, struct mlx5_rxq_ctrl, rxq);
2750 memset((void *)rxq_ctrl->tunnel_types, 0,
2751 sizeof(rxq_ctrl->tunnel_types));
2760 * Pointer to Ethernet device.
2762 * Pointer to a TAILQ flow list.
2765 * 0 on success, a negative errno value otherwise and rte_errno is set.
2768 mlx5_flow_start(struct rte_eth_dev *dev, struct mlx5_flows *list)
2770 struct priv *priv = dev->data->dev_private;
2771 struct rte_flow *flow;
2773 TAILQ_FOREACH(flow, list, next) {
2777 flow->frxq[HASH_RXQ_ETH].ibv_flow =
2778 mlx5_glue->create_flow
2779 (priv->flow_drop_queue->qp,
2780 flow->frxq[HASH_RXQ_ETH].ibv_attr);
2781 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
2783 "port %u flow %p cannot be applied",
2784 dev->data->port_id, (void *)flow);
2788 DRV_LOG(DEBUG, "port %u flow %p applied",
2789 dev->data->port_id, (void *)flow);
2793 for (i = 0; i != hash_rxq_init_n; ++i) {
2794 if (!flow->frxq[i].ibv_attr)
2796 flow->frxq[i].hrxq =
2797 mlx5_hrxq_get(dev, flow->rss_conf.key,
2798 flow->rss_conf.key_len,
2799 flow->frxq[i].hash_fields,
2800 flow->rss_conf.queue,
2801 flow->rss_conf.queue_num,
2803 flow->rss_conf.level);
2804 if (flow->frxq[i].hrxq)
2806 flow->frxq[i].hrxq =
2807 mlx5_hrxq_new(dev, flow->rss_conf.key,
2808 flow->rss_conf.key_len,
2809 flow->frxq[i].hash_fields,
2810 flow->rss_conf.queue,
2811 flow->rss_conf.queue_num,
2813 flow->rss_conf.level);
2814 if (!flow->frxq[i].hrxq) {
2816 "port %u flow %p cannot create hash"
2818 dev->data->port_id, (void *)flow);
2823 mlx5_flow_dump(dev, flow, i);
2824 flow->frxq[i].ibv_flow =
2825 mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
2826 flow->frxq[i].ibv_attr);
2827 if (!flow->frxq[i].ibv_flow) {
2829 "port %u flow %p type %u cannot be"
2831 dev->data->port_id, (void *)flow, i);
2836 mlx5_flow_create_update_rxqs(dev, flow);
2842 * Verify the flow list is empty
2845 * Pointer to Ethernet device.
2847 * @return the number of flows not released.
2850 mlx5_flow_verify(struct rte_eth_dev *dev)
2852 struct priv *priv = dev->data->dev_private;
2853 struct rte_flow *flow;
2856 TAILQ_FOREACH(flow, &priv->flows, next) {
2857 DRV_LOG(DEBUG, "port %u flow %p still referenced",
2858 dev->data->port_id, (void *)flow);
2865 * Enable a control flow configured from the control plane.
2868 * Pointer to Ethernet device.
2870 * An Ethernet flow spec to apply.
2872 * An Ethernet flow mask to apply.
2874 * A VLAN flow spec to apply.
2876 * A VLAN flow mask to apply.
2879 * 0 on success, a negative errno value otherwise and rte_errno is set.
2882 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
2883 struct rte_flow_item_eth *eth_spec,
2884 struct rte_flow_item_eth *eth_mask,
2885 struct rte_flow_item_vlan *vlan_spec,
2886 struct rte_flow_item_vlan *vlan_mask)
2888 struct priv *priv = dev->data->dev_private;
2889 const struct rte_flow_attr attr = {
2891 .priority = MLX5_CTRL_FLOW_PRIORITY,
2893 struct rte_flow_item items[] = {
2895 .type = RTE_FLOW_ITEM_TYPE_ETH,
2901 .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
2902 RTE_FLOW_ITEM_TYPE_END,
2908 .type = RTE_FLOW_ITEM_TYPE_END,
2911 uint16_t queue[priv->reta_idx_n];
2912 struct rte_flow_action_rss action_rss = {
2913 .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
2915 .types = priv->rss_conf.rss_hf,
2916 .key_len = priv->rss_conf.rss_key_len,
2917 .queue_num = priv->reta_idx_n,
2918 .key = priv->rss_conf.rss_key,
2921 struct rte_flow_action actions[] = {
2923 .type = RTE_FLOW_ACTION_TYPE_RSS,
2924 .conf = &action_rss,
2927 .type = RTE_FLOW_ACTION_TYPE_END,
2930 struct rte_flow *flow;
2931 struct rte_flow_error error;
2934 if (!priv->reta_idx_n) {
2938 for (i = 0; i != priv->reta_idx_n; ++i)
2939 queue[i] = (*priv->reta_idx)[i];
2940 flow = mlx5_flow_list_create(dev, &priv->ctrl_flows, &attr, items,
2948 * Enable a flow control configured from the control plane.
2951 * Pointer to Ethernet device.
2953 * An Ethernet flow spec to apply.
2955 * An Ethernet flow mask to apply.
2958 * 0 on success, a negative errno value otherwise and rte_errno is set.
2961 mlx5_ctrl_flow(struct rte_eth_dev *dev,
2962 struct rte_flow_item_eth *eth_spec,
2963 struct rte_flow_item_eth *eth_mask)
2965 return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
2971 * @see rte_flow_destroy()
2975 mlx5_flow_destroy(struct rte_eth_dev *dev,
2976 struct rte_flow *flow,
2977 struct rte_flow_error *error __rte_unused)
2979 struct priv *priv = dev->data->dev_private;
2981 mlx5_flow_list_destroy(dev, &priv->flows, flow);
2986 * Destroy all flows.
2988 * @see rte_flow_flush()
2992 mlx5_flow_flush(struct rte_eth_dev *dev,
2993 struct rte_flow_error *error __rte_unused)
2995 struct priv *priv = dev->data->dev_private;
2997 mlx5_flow_list_flush(dev, &priv->flows);
3001 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
3003 * Query flow counter.
3007 * @param counter_value
3008 * returned data from the counter.
3011 * 0 on success, a negative errno value otherwise and rte_errno is set.
3014 mlx5_flow_query_count(struct ibv_counter_set *cs,
3015 struct mlx5_flow_counter_stats *counter_stats,
3016 struct rte_flow_query_count *query_count,
3017 struct rte_flow_error *error)
3019 uint64_t counters[2];
3020 struct ibv_query_counter_set_attr query_cs_attr = {
3022 .query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
3024 struct ibv_counter_set_data query_out = {
3026 .outlen = 2 * sizeof(uint64_t),
3028 int err = mlx5_glue->query_counter_set(&query_cs_attr, &query_out);
3031 return rte_flow_error_set(error, err,
3032 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
3034 "cannot read counter");
3035 query_count->hits_set = 1;
3036 query_count->bytes_set = 1;
3037 query_count->hits = counters[0] - counter_stats->hits;
3038 query_count->bytes = counters[1] - counter_stats->bytes;
3039 if (query_count->reset) {
3040 counter_stats->hits = counters[0];
3041 counter_stats->bytes = counters[1];
3049 * @see rte_flow_query()
3053 mlx5_flow_query(struct rte_eth_dev *dev __rte_unused,
3054 struct rte_flow *flow,
3055 const struct rte_flow_action *action __rte_unused,
3057 struct rte_flow_error *error)
3062 ret = mlx5_flow_query_count(flow->cs,
3063 &flow->counter_stats,
3064 (struct rte_flow_query_count *)data,
3069 return rte_flow_error_set(error, EINVAL,
3070 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
3072 "no counter found for flow");
3081 * @see rte_flow_isolate()
3085 mlx5_flow_isolate(struct rte_eth_dev *dev,
3087 struct rte_flow_error *error)
3089 struct priv *priv = dev->data->dev_private;
3091 if (dev->data->dev_started) {
3092 rte_flow_error_set(error, EBUSY,
3093 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
3095 "port must be stopped first");
3098 priv->isolated = !!enable;
3100 priv->dev->dev_ops = &mlx5_dev_ops_isolate;
3102 priv->dev->dev_ops = &mlx5_dev_ops;
3107 * Convert a flow director filter to a generic flow.
3110 * Pointer to Ethernet device.
3111 * @param fdir_filter
3112 * Flow director filter to add.
3114 * Generic flow parameters structure.
3117 * 0 on success, a negative errno value otherwise and rte_errno is set.
3120 mlx5_fdir_filter_convert(struct rte_eth_dev *dev,
3121 const struct rte_eth_fdir_filter *fdir_filter,
3122 struct mlx5_fdir *attributes)
3124 struct priv *priv = dev->data->dev_private;
3125 const struct rte_eth_fdir_input *input = &fdir_filter->input;
3126 const struct rte_eth_fdir_masks *mask =
3127 &dev->data->dev_conf.fdir_conf.mask;
3129 /* Validate queue number. */
3130 if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
3131 DRV_LOG(ERR, "port %u invalid queue number %d",
3132 dev->data->port_id, fdir_filter->action.rx_queue);
3136 attributes->attr.ingress = 1;
3137 attributes->items[0] = (struct rte_flow_item) {
3138 .type = RTE_FLOW_ITEM_TYPE_ETH,
3139 .spec = &attributes->l2,
3140 .mask = &attributes->l2_mask,
3142 switch (fdir_filter->action.behavior) {
3143 case RTE_ETH_FDIR_ACCEPT:
3144 attributes->actions[0] = (struct rte_flow_action){
3145 .type = RTE_FLOW_ACTION_TYPE_QUEUE,
3146 .conf = &attributes->queue,
3149 case RTE_ETH_FDIR_REJECT:
3150 attributes->actions[0] = (struct rte_flow_action){
3151 .type = RTE_FLOW_ACTION_TYPE_DROP,
3155 DRV_LOG(ERR, "port %u invalid behavior %d",
3157 fdir_filter->action.behavior);
3158 rte_errno = ENOTSUP;
3161 attributes->queue.index = fdir_filter->action.rx_queue;
3163 switch (fdir_filter->input.flow_type) {
3164 case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
3165 case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
3166 case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
3167 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
3168 .src_addr = input->flow.ip4_flow.src_ip,
3169 .dst_addr = input->flow.ip4_flow.dst_ip,
3170 .time_to_live = input->flow.ip4_flow.ttl,
3171 .type_of_service = input->flow.ip4_flow.tos,
3172 .next_proto_id = input->flow.ip4_flow.proto,
3174 attributes->l3_mask.ipv4.hdr = (struct ipv4_hdr){
3175 .src_addr = mask->ipv4_mask.src_ip,
3176 .dst_addr = mask->ipv4_mask.dst_ip,
3177 .time_to_live = mask->ipv4_mask.ttl,
3178 .type_of_service = mask->ipv4_mask.tos,
3179 .next_proto_id = mask->ipv4_mask.proto,
3181 attributes->items[1] = (struct rte_flow_item){
3182 .type = RTE_FLOW_ITEM_TYPE_IPV4,
3183 .spec = &attributes->l3,
3184 .mask = &attributes->l3_mask,
3187 case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
3188 case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
3189 case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
3190 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
3191 .hop_limits = input->flow.ipv6_flow.hop_limits,
3192 .proto = input->flow.ipv6_flow.proto,
3195 memcpy(attributes->l3.ipv6.hdr.src_addr,
3196 input->flow.ipv6_flow.src_ip,
3197 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
3198 memcpy(attributes->l3.ipv6.hdr.dst_addr,
3199 input->flow.ipv6_flow.dst_ip,
3200 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
3201 memcpy(attributes->l3_mask.ipv6.hdr.src_addr,
3202 mask->ipv6_mask.src_ip,
3203 RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
3204 memcpy(attributes->l3_mask.ipv6.hdr.dst_addr,
3205 mask->ipv6_mask.dst_ip,
3206 RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
3207 attributes->items[1] = (struct rte_flow_item){
3208 .type = RTE_FLOW_ITEM_TYPE_IPV6,
3209 .spec = &attributes->l3,
3210 .mask = &attributes->l3_mask,
3214 DRV_LOG(ERR, "port %u invalid flow type%d",
3215 dev->data->port_id, fdir_filter->input.flow_type);
3216 rte_errno = ENOTSUP;
3220 switch (fdir_filter->input.flow_type) {
3221 case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
3222 attributes->l4.udp.hdr = (struct udp_hdr){
3223 .src_port = input->flow.udp4_flow.src_port,
3224 .dst_port = input->flow.udp4_flow.dst_port,
3226 attributes->l4_mask.udp.hdr = (struct udp_hdr){
3227 .src_port = mask->src_port_mask,
3228 .dst_port = mask->dst_port_mask,
3230 attributes->items[2] = (struct rte_flow_item){
3231 .type = RTE_FLOW_ITEM_TYPE_UDP,
3232 .spec = &attributes->l4,
3233 .mask = &attributes->l4_mask,
3236 case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
3237 attributes->l4.tcp.hdr = (struct tcp_hdr){
3238 .src_port = input->flow.tcp4_flow.src_port,
3239 .dst_port = input->flow.tcp4_flow.dst_port,
3241 attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
3242 .src_port = mask->src_port_mask,
3243 .dst_port = mask->dst_port_mask,
3245 attributes->items[2] = (struct rte_flow_item){
3246 .type = RTE_FLOW_ITEM_TYPE_TCP,
3247 .spec = &attributes->l4,
3248 .mask = &attributes->l4_mask,
3251 case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
3252 attributes->l4.udp.hdr = (struct udp_hdr){
3253 .src_port = input->flow.udp6_flow.src_port,
3254 .dst_port = input->flow.udp6_flow.dst_port,
3256 attributes->l4_mask.udp.hdr = (struct udp_hdr){
3257 .src_port = mask->src_port_mask,
3258 .dst_port = mask->dst_port_mask,
3260 attributes->items[2] = (struct rte_flow_item){
3261 .type = RTE_FLOW_ITEM_TYPE_UDP,
3262 .spec = &attributes->l4,
3263 .mask = &attributes->l4_mask,
3266 case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
3267 attributes->l4.tcp.hdr = (struct tcp_hdr){
3268 .src_port = input->flow.tcp6_flow.src_port,
3269 .dst_port = input->flow.tcp6_flow.dst_port,
3271 attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
3272 .src_port = mask->src_port_mask,
3273 .dst_port = mask->dst_port_mask,
3275 attributes->items[2] = (struct rte_flow_item){
3276 .type = RTE_FLOW_ITEM_TYPE_TCP,
3277 .spec = &attributes->l4,
3278 .mask = &attributes->l4_mask,
3281 case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
3282 case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
3285 DRV_LOG(ERR, "port %u invalid flow type%d",
3286 dev->data->port_id, fdir_filter->input.flow_type);
3287 rte_errno = ENOTSUP;
3294 * Add new flow director filter and store it in list.
3297 * Pointer to Ethernet device.
3298 * @param fdir_filter
3299 * Flow director filter to add.
3302 * 0 on success, a negative errno value otherwise and rte_errno is set.
3305 mlx5_fdir_filter_add(struct rte_eth_dev *dev,
3306 const struct rte_eth_fdir_filter *fdir_filter)
3308 struct priv *priv = dev->data->dev_private;
3309 struct mlx5_fdir attributes = {
3312 .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
3313 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
3317 struct mlx5_flow_parse parser = {
3318 .layer = HASH_RXQ_ETH,
3320 struct rte_flow_error error;
3321 struct rte_flow *flow;
3324 ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
3327 ret = mlx5_flow_convert(dev, &attributes.attr, attributes.items,
3328 attributes.actions, &error, &parser);
3331 flow = mlx5_flow_list_create(dev, &priv->flows, &attributes.attr,
3332 attributes.items, attributes.actions,
3335 DRV_LOG(DEBUG, "port %u FDIR created %p", dev->data->port_id,
3343 * Delete specific filter.
3346 * Pointer to Ethernet device.
3347 * @param fdir_filter
3348 * Filter to be deleted.
3351 * 0 on success, a negative errno value otherwise and rte_errno is set.
3354 mlx5_fdir_filter_delete(struct rte_eth_dev *dev,
3355 const struct rte_eth_fdir_filter *fdir_filter)
3357 struct priv *priv = dev->data->dev_private;
3358 struct mlx5_fdir attributes = {
3361 struct mlx5_flow_parse parser = {
3363 .layer = HASH_RXQ_ETH,
3365 struct rte_flow_error error;
3366 struct rte_flow *flow;
3370 ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
3373 ret = mlx5_flow_convert(dev, &attributes.attr, attributes.items,
3374 attributes.actions, &error, &parser);
3378 * Special case for drop action which is only set in the
3379 * specifications when the flow is created. In this situation the
3380 * drop specification is missing.
3383 struct ibv_flow_spec_action_drop *drop;
3385 drop = (void *)((uintptr_t)parser.queue[parser.layer].ibv_attr +
3386 parser.queue[parser.layer].offset);
3387 *drop = (struct ibv_flow_spec_action_drop){
3388 .type = IBV_FLOW_SPEC_ACTION_DROP,
3389 .size = sizeof(struct ibv_flow_spec_action_drop),
3391 parser.queue[parser.layer].ibv_attr->num_of_specs++;
3393 TAILQ_FOREACH(flow, &priv->flows, next) {
3394 struct ibv_flow_attr *attr;
3395 struct ibv_spec_header *attr_h;
3397 struct ibv_flow_attr *flow_attr;
3398 struct ibv_spec_header *flow_h;
3400 unsigned int specs_n;
3402 attr = parser.queue[parser.layer].ibv_attr;
3403 flow_attr = flow->frxq[parser.layer].ibv_attr;
3404 /* Compare first the attributes. */
3405 if (memcmp(attr, flow_attr, sizeof(struct ibv_flow_attr)))
3407 if (attr->num_of_specs == 0)
3409 spec = (void *)((uintptr_t)attr +
3410 sizeof(struct ibv_flow_attr));
3411 flow_spec = (void *)((uintptr_t)flow_attr +
3412 sizeof(struct ibv_flow_attr));
3413 specs_n = RTE_MIN(attr->num_of_specs, flow_attr->num_of_specs);
3414 for (i = 0; i != specs_n; ++i) {
3417 if (memcmp(spec, flow_spec,
3418 RTE_MIN(attr_h->size, flow_h->size)))
3420 spec = (void *)((uintptr_t)spec + attr_h->size);
3421 flow_spec = (void *)((uintptr_t)flow_spec +
3424 /* At this point, the flow match. */
3427 /* The flow does not match. */
3430 ret = rte_errno; /* Save rte_errno before cleanup. */
3432 mlx5_flow_list_destroy(dev, &priv->flows, flow);
3434 for (i = 0; i != hash_rxq_init_n; ++i) {
3435 if (parser.queue[i].ibv_attr)
3436 rte_free(parser.queue[i].ibv_attr);
3438 rte_errno = ret; /* Restore rte_errno. */
3443 * Update queue for specific filter.
3446 * Pointer to Ethernet device.
3447 * @param fdir_filter
3448 * Filter to be updated.
3451 * 0 on success, a negative errno value otherwise and rte_errno is set.
3454 mlx5_fdir_filter_update(struct rte_eth_dev *dev,
3455 const struct rte_eth_fdir_filter *fdir_filter)
3459 ret = mlx5_fdir_filter_delete(dev, fdir_filter);
3462 return mlx5_fdir_filter_add(dev, fdir_filter);
3466 * Flush all filters.
3469 * Pointer to Ethernet device.
3472 mlx5_fdir_filter_flush(struct rte_eth_dev *dev)
3474 struct priv *priv = dev->data->dev_private;
3476 mlx5_flow_list_flush(dev, &priv->flows);
3480 * Get flow director information.
3483 * Pointer to Ethernet device.
3484 * @param[out] fdir_info
3485 * Resulting flow director information.
3488 mlx5_fdir_info_get(struct rte_eth_dev *dev, struct rte_eth_fdir_info *fdir_info)
3490 struct priv *priv = dev->data->dev_private;
3491 struct rte_eth_fdir_masks *mask =
3492 &priv->dev->data->dev_conf.fdir_conf.mask;
3494 fdir_info->mode = priv->dev->data->dev_conf.fdir_conf.mode;
3495 fdir_info->guarant_spc = 0;
3496 rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
3497 fdir_info->max_flexpayload = 0;
3498 fdir_info->flow_types_mask[0] = 0;
3499 fdir_info->flex_payload_unit = 0;
3500 fdir_info->max_flex_payload_segment_num = 0;
3501 fdir_info->flex_payload_limit = 0;
3502 memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
3506 * Deal with flow director operations.
3509 * Pointer to Ethernet device.
3511 * Operation to perform.
3513 * Pointer to operation-specific structure.
3516 * 0 on success, a negative errno value otherwise and rte_errno is set.
3519 mlx5_fdir_ctrl_func(struct rte_eth_dev *dev, enum rte_filter_op filter_op,
3522 struct priv *priv = dev->data->dev_private;
3523 enum rte_fdir_mode fdir_mode =
3524 priv->dev->data->dev_conf.fdir_conf.mode;
3526 if (filter_op == RTE_ETH_FILTER_NOP)
3528 if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
3529 fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
3530 DRV_LOG(ERR, "port %u flow director mode %d not supported",
3531 dev->data->port_id, fdir_mode);
3535 switch (filter_op) {
3536 case RTE_ETH_FILTER_ADD:
3537 return mlx5_fdir_filter_add(dev, arg);
3538 case RTE_ETH_FILTER_UPDATE:
3539 return mlx5_fdir_filter_update(dev, arg);
3540 case RTE_ETH_FILTER_DELETE:
3541 return mlx5_fdir_filter_delete(dev, arg);
3542 case RTE_ETH_FILTER_FLUSH:
3543 mlx5_fdir_filter_flush(dev);
3545 case RTE_ETH_FILTER_INFO:
3546 mlx5_fdir_info_get(dev, arg);
3549 DRV_LOG(DEBUG, "port %u unknown operation %u",
3550 dev->data->port_id, filter_op);
3558 * Manage filter operations.
3561 * Pointer to Ethernet device structure.
3562 * @param filter_type
3565 * Operation to perform.
3567 * Pointer to operation-specific structure.
3570 * 0 on success, a negative errno value otherwise and rte_errno is set.
3573 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
3574 enum rte_filter_type filter_type,
3575 enum rte_filter_op filter_op,
3578 switch (filter_type) {
3579 case RTE_ETH_FILTER_GENERIC:
3580 if (filter_op != RTE_ETH_FILTER_GET) {
3584 *(const void **)arg = &mlx5_flow_ops;
3586 case RTE_ETH_FILTER_FDIR:
3587 return mlx5_fdir_ctrl_func(dev, filter_op, arg);
3589 DRV_LOG(ERR, "port %u filter type (%d) not supported",
3590 dev->data->port_id, filter_type);
3591 rte_errno = ENOTSUP;
3598 * Detect number of Verbs flow priorities supported.
3601 * Pointer to Ethernet device.
3604 * number of supported Verbs flow priority.
3607 mlx5_get_max_verbs_prio(struct rte_eth_dev *dev)
3609 struct priv *priv = dev->data->dev_private;
3610 unsigned int verb_priorities = MLX5_VERBS_FLOW_PRIO_8;
3612 struct ibv_flow_attr attr;
3613 struct ibv_flow_spec_eth eth;
3614 struct ibv_flow_spec_action_drop drop;
3620 .type = IBV_FLOW_SPEC_ETH,
3621 .size = sizeof(struct ibv_flow_spec_eth),
3624 .size = sizeof(struct ibv_flow_spec_action_drop),
3625 .type = IBV_FLOW_SPEC_ACTION_DROP,
3628 struct ibv_flow *flow;
3631 flow_attr.attr.priority = verb_priorities - 1;
3632 flow = mlx5_glue->create_flow(priv->flow_drop_queue->qp,
3635 claim_zero(mlx5_glue->destroy_flow(flow));
3636 /* Try more priorities. */
3637 verb_priorities *= 2;
3639 /* Failed, restore last right number. */
3640 verb_priorities /= 2;
3644 DRV_LOG(DEBUG, "port %u Verbs flow priorities: %d,"
3645 " user flow priorities: %d",
3646 dev->data->port_id, verb_priorities, MLX5_CTRL_FLOW_PRIORITY);
3647 return verb_priorities;