1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2016 6WIND S.A.
3 * Copyright 2016 Mellanox Technologies, Ltd
11 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
13 #pragma GCC diagnostic ignored "-Wpedantic"
15 #include <infiniband/verbs.h>
17 #pragma GCC diagnostic error "-Wpedantic"
20 #include <rte_common.h>
21 #include <rte_ether.h>
22 #include <rte_eth_ctrl.h>
23 #include <rte_ethdev_driver.h>
25 #include <rte_flow_driver.h>
26 #include <rte_malloc.h>
30 #include "mlx5_defs.h"
32 #include "mlx5_glue.h"
34 /* Flow priority for control plane flows. */
35 #define MLX5_CTRL_FLOW_PRIORITY 1
37 /* Internet Protocol versions. */
42 #ifndef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
43 struct ibv_flow_spec_counter_action {
48 /* Dev ops structure defined in mlx5.c */
49 extern const struct eth_dev_ops mlx5_dev_ops;
50 extern const struct eth_dev_ops mlx5_dev_ops_isolate;
52 /** Structure give to the conversion functions. */
53 struct mlx5_flow_data {
54 struct rte_eth_dev *dev; /** Ethernet device. */
55 struct mlx5_flow_parse *parser; /** Parser context. */
56 struct rte_flow_error *error; /** Error context. */
60 mlx5_flow_create_eth(const struct rte_flow_item *item,
61 const void *default_mask,
62 struct mlx5_flow_data *data);
65 mlx5_flow_create_vlan(const struct rte_flow_item *item,
66 const void *default_mask,
67 struct mlx5_flow_data *data);
70 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
71 const void *default_mask,
72 struct mlx5_flow_data *data);
75 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
76 const void *default_mask,
77 struct mlx5_flow_data *data);
80 mlx5_flow_create_udp(const struct rte_flow_item *item,
81 const void *default_mask,
82 struct mlx5_flow_data *data);
85 mlx5_flow_create_tcp(const struct rte_flow_item *item,
86 const void *default_mask,
87 struct mlx5_flow_data *data);
90 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
91 const void *default_mask,
92 struct mlx5_flow_data *data);
95 mlx5_flow_create_gre(const struct rte_flow_item *item,
96 const void *default_mask,
97 struct mlx5_flow_data *data);
99 struct mlx5_flow_parse;
102 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
106 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id);
109 mlx5_flow_create_count(struct rte_eth_dev *dev, struct mlx5_flow_parse *parser);
111 /* Hash RX queue types. */
123 /* Initialization data for hash RX queue. */
124 struct hash_rxq_init {
125 uint64_t hash_fields; /* Fields that participate in the hash. */
126 uint64_t dpdk_rss_hf; /* Matching DPDK RSS hash fields. */
127 unsigned int flow_priority; /* Flow priority to use. */
128 unsigned int ip_version; /* Internet protocol. */
131 /* Initialization data for hash RX queues. */
132 const struct hash_rxq_init hash_rxq_init[] = {
134 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
135 IBV_RX_HASH_DST_IPV4 |
136 IBV_RX_HASH_SRC_PORT_TCP |
137 IBV_RX_HASH_DST_PORT_TCP),
138 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,
140 .ip_version = MLX5_IPV4,
143 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
144 IBV_RX_HASH_DST_IPV4 |
145 IBV_RX_HASH_SRC_PORT_UDP |
146 IBV_RX_HASH_DST_PORT_UDP),
147 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP,
149 .ip_version = MLX5_IPV4,
152 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
153 IBV_RX_HASH_DST_IPV4),
154 .dpdk_rss_hf = (ETH_RSS_IPV4 |
157 .ip_version = MLX5_IPV4,
160 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
161 IBV_RX_HASH_DST_IPV6 |
162 IBV_RX_HASH_SRC_PORT_TCP |
163 IBV_RX_HASH_DST_PORT_TCP),
164 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP,
166 .ip_version = MLX5_IPV6,
169 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
170 IBV_RX_HASH_DST_IPV6 |
171 IBV_RX_HASH_SRC_PORT_UDP |
172 IBV_RX_HASH_DST_PORT_UDP),
173 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP,
175 .ip_version = MLX5_IPV6,
178 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
179 IBV_RX_HASH_DST_IPV6),
180 .dpdk_rss_hf = (ETH_RSS_IPV6 |
183 .ip_version = MLX5_IPV6,
192 /* Number of entries in hash_rxq_init[]. */
193 const unsigned int hash_rxq_init_n = RTE_DIM(hash_rxq_init);
195 /** Structure for holding counter stats. */
196 struct mlx5_flow_counter_stats {
197 uint64_t hits; /**< Number of packets matched by the rule. */
198 uint64_t bytes; /**< Number of bytes matched by the rule. */
201 /** Structure for Drop queue. */
202 struct mlx5_hrxq_drop {
203 struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */
204 struct ibv_qp *qp; /**< Verbs queue pair. */
205 struct ibv_wq *wq; /**< Verbs work queue. */
206 struct ibv_cq *cq; /**< Verbs completion queue. */
209 /* Flows structures. */
211 uint64_t hash_fields; /**< Fields that participate in the hash. */
212 struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
213 struct ibv_flow *ibv_flow; /**< Verbs flow. */
214 struct mlx5_hrxq *hrxq; /**< Hash Rx queues. */
217 /* Drop flows structures. */
218 struct mlx5_flow_drop {
219 struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
220 struct ibv_flow *ibv_flow; /**< Verbs flow. */
224 TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
225 uint32_t mark:1; /**< Set if the flow is marked. */
226 uint32_t drop:1; /**< Drop queue. */
227 struct rte_flow_action_rss rss_conf; /**< RSS configuration */
228 uint16_t (*queues)[]; /**< Queues indexes to use. */
229 uint8_t rss_key[40]; /**< copy of the RSS key. */
230 uint32_t tunnel; /**< Tunnel type of RTE_PTYPE_TUNNEL_XXX. */
231 struct ibv_counter_set *cs; /**< Holds the counters for the rule. */
232 struct mlx5_flow_counter_stats counter_stats;/**<The counter stats. */
233 struct mlx5_flow frxq[RTE_DIM(hash_rxq_init)];
234 /**< Flow with Rx queue. */
237 /** Static initializer for items. */
239 (const enum rte_flow_item_type []){ \
240 __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
243 #define IS_TUNNEL(type) ( \
244 (type) == RTE_FLOW_ITEM_TYPE_VXLAN || \
245 (type) == RTE_FLOW_ITEM_TYPE_GRE)
247 const uint32_t flow_ptype[] = {
248 [RTE_FLOW_ITEM_TYPE_VXLAN] = RTE_PTYPE_TUNNEL_VXLAN,
249 [RTE_FLOW_ITEM_TYPE_GRE] = RTE_PTYPE_TUNNEL_GRE,
252 #define PTYPE_IDX(t) ((RTE_PTYPE_TUNNEL_MASK & (t)) >> 12)
254 const uint32_t ptype_ext[] = {
255 [PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN)] = RTE_PTYPE_TUNNEL_VXLAN |
257 [PTYPE_IDX(RTE_PTYPE_TUNNEL_GRE)] = RTE_PTYPE_TUNNEL_GRE,
260 /** Structure to generate a simple graph of layers supported by the NIC. */
261 struct mlx5_flow_items {
262 /** List of possible actions for these items. */
263 const enum rte_flow_action_type *const actions;
264 /** Bit-masks corresponding to the possibilities for the item. */
267 * Default bit-masks to use when item->mask is not provided. When
268 * \default_mask is also NULL, the full supported bit-mask (\mask) is
271 const void *default_mask;
272 /** Bit-masks size in bytes. */
273 const unsigned int mask_sz;
275 * Conversion function from rte_flow to NIC specific flow.
278 * rte_flow item to convert.
279 * @param default_mask
280 * Default bit-masks to use when item->mask is not provided.
282 * Internal structure to store the conversion.
285 * 0 on success, a negative errno value otherwise and rte_errno is
288 int (*convert)(const struct rte_flow_item *item,
289 const void *default_mask,
290 struct mlx5_flow_data *data);
291 /** Size in bytes of the destination structure. */
292 const unsigned int dst_sz;
293 /** List of possible following items. */
294 const enum rte_flow_item_type *const items;
297 /** Valid action for this PMD. */
298 static const enum rte_flow_action_type valid_actions[] = {
299 RTE_FLOW_ACTION_TYPE_DROP,
300 RTE_FLOW_ACTION_TYPE_QUEUE,
301 RTE_FLOW_ACTION_TYPE_MARK,
302 RTE_FLOW_ACTION_TYPE_FLAG,
303 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
304 RTE_FLOW_ACTION_TYPE_COUNT,
306 RTE_FLOW_ACTION_TYPE_END,
309 /** Graph of supported items and associated actions. */
310 static const struct mlx5_flow_items mlx5_flow_items[] = {
311 [RTE_FLOW_ITEM_TYPE_END] = {
312 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
313 RTE_FLOW_ITEM_TYPE_VXLAN,
314 RTE_FLOW_ITEM_TYPE_GRE),
316 [RTE_FLOW_ITEM_TYPE_ETH] = {
317 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
318 RTE_FLOW_ITEM_TYPE_IPV4,
319 RTE_FLOW_ITEM_TYPE_IPV6),
320 .actions = valid_actions,
321 .mask = &(const struct rte_flow_item_eth){
322 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
323 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
326 .default_mask = &rte_flow_item_eth_mask,
327 .mask_sz = sizeof(struct rte_flow_item_eth),
328 .convert = mlx5_flow_create_eth,
329 .dst_sz = sizeof(struct ibv_flow_spec_eth),
331 [RTE_FLOW_ITEM_TYPE_VLAN] = {
332 .items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
333 RTE_FLOW_ITEM_TYPE_IPV6),
334 .actions = valid_actions,
335 .mask = &(const struct rte_flow_item_vlan){
339 .default_mask = &rte_flow_item_vlan_mask,
340 .mask_sz = sizeof(struct rte_flow_item_vlan),
341 .convert = mlx5_flow_create_vlan,
344 [RTE_FLOW_ITEM_TYPE_IPV4] = {
345 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
346 RTE_FLOW_ITEM_TYPE_TCP,
347 RTE_FLOW_ITEM_TYPE_GRE),
348 .actions = valid_actions,
349 .mask = &(const struct rte_flow_item_ipv4){
353 .type_of_service = -1,
357 .default_mask = &rte_flow_item_ipv4_mask,
358 .mask_sz = sizeof(struct rte_flow_item_ipv4),
359 .convert = mlx5_flow_create_ipv4,
360 .dst_sz = sizeof(struct ibv_flow_spec_ipv4_ext),
362 [RTE_FLOW_ITEM_TYPE_IPV6] = {
363 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
364 RTE_FLOW_ITEM_TYPE_TCP,
365 RTE_FLOW_ITEM_TYPE_GRE),
366 .actions = valid_actions,
367 .mask = &(const struct rte_flow_item_ipv6){
370 0xff, 0xff, 0xff, 0xff,
371 0xff, 0xff, 0xff, 0xff,
372 0xff, 0xff, 0xff, 0xff,
373 0xff, 0xff, 0xff, 0xff,
376 0xff, 0xff, 0xff, 0xff,
377 0xff, 0xff, 0xff, 0xff,
378 0xff, 0xff, 0xff, 0xff,
379 0xff, 0xff, 0xff, 0xff,
386 .default_mask = &rte_flow_item_ipv6_mask,
387 .mask_sz = sizeof(struct rte_flow_item_ipv6),
388 .convert = mlx5_flow_create_ipv6,
389 .dst_sz = sizeof(struct ibv_flow_spec_ipv6),
391 [RTE_FLOW_ITEM_TYPE_UDP] = {
392 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN),
393 .actions = valid_actions,
394 .mask = &(const struct rte_flow_item_udp){
400 .default_mask = &rte_flow_item_udp_mask,
401 .mask_sz = sizeof(struct rte_flow_item_udp),
402 .convert = mlx5_flow_create_udp,
403 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
405 [RTE_FLOW_ITEM_TYPE_TCP] = {
406 .actions = valid_actions,
407 .mask = &(const struct rte_flow_item_tcp){
413 .default_mask = &rte_flow_item_tcp_mask,
414 .mask_sz = sizeof(struct rte_flow_item_tcp),
415 .convert = mlx5_flow_create_tcp,
416 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
418 [RTE_FLOW_ITEM_TYPE_GRE] = {
419 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
420 RTE_FLOW_ITEM_TYPE_IPV4,
421 RTE_FLOW_ITEM_TYPE_IPV6),
422 .actions = valid_actions,
423 .mask = &(const struct rte_flow_item_gre){
426 .default_mask = &rte_flow_item_gre_mask,
427 .mask_sz = sizeof(struct rte_flow_item_gre),
428 .convert = mlx5_flow_create_gre,
429 .dst_sz = sizeof(struct ibv_flow_spec_tunnel),
431 [RTE_FLOW_ITEM_TYPE_VXLAN] = {
432 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
433 RTE_FLOW_ITEM_TYPE_IPV4, /* For L3 VXLAN. */
434 RTE_FLOW_ITEM_TYPE_IPV6), /* For L3 VXLAN. */
435 .actions = valid_actions,
436 .mask = &(const struct rte_flow_item_vxlan){
437 .vni = "\xff\xff\xff",
439 .default_mask = &rte_flow_item_vxlan_mask,
440 .mask_sz = sizeof(struct rte_flow_item_vxlan),
441 .convert = mlx5_flow_create_vxlan,
442 .dst_sz = sizeof(struct ibv_flow_spec_tunnel),
446 /** Structure to pass to the conversion function. */
447 struct mlx5_flow_parse {
448 uint32_t inner; /**< Verbs value, set once tunnel is encountered. */
450 /**< Whether resources should remain after a validate. */
451 uint32_t drop:1; /**< Target is a drop queue. */
452 uint32_t mark:1; /**< Mark is present in the flow. */
453 uint32_t count:1; /**< Count is present in the flow. */
454 uint32_t mark_id; /**< Mark identifier. */
455 struct rte_flow_action_rss rss_conf; /**< RSS configuration */
456 uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
457 uint8_t rss_key[40]; /**< copy of the RSS key. */
458 enum hash_rxq_type layer; /**< Last pattern layer detected. */
459 enum hash_rxq_type out_layer; /**< Last outer pattern layer detected. */
460 uint32_t tunnel; /**< Tunnel type of RTE_PTYPE_TUNNEL_XXX. */
461 struct ibv_counter_set *cs; /**< Holds the counter set for the rule */
463 struct ibv_flow_attr *ibv_attr;
464 /**< Pointer to Verbs attributes. */
466 /**< Current position or total size of the attribute. */
467 uint64_t hash_fields; /**< Verbs hash fields. */
468 } queue[RTE_DIM(hash_rxq_init)];
471 static const struct rte_flow_ops mlx5_flow_ops = {
472 .validate = mlx5_flow_validate,
473 .create = mlx5_flow_create,
474 .destroy = mlx5_flow_destroy,
475 .flush = mlx5_flow_flush,
476 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
477 .query = mlx5_flow_query,
481 .isolate = mlx5_flow_isolate,
484 /* Convert FDIR request to Generic flow. */
486 struct rte_flow_attr attr;
487 struct rte_flow_action actions[2];
488 struct rte_flow_item items[4];
489 struct rte_flow_item_eth l2;
490 struct rte_flow_item_eth l2_mask;
492 struct rte_flow_item_ipv4 ipv4;
493 struct rte_flow_item_ipv6 ipv6;
496 struct rte_flow_item_ipv4 ipv4;
497 struct rte_flow_item_ipv6 ipv6;
500 struct rte_flow_item_udp udp;
501 struct rte_flow_item_tcp tcp;
504 struct rte_flow_item_udp udp;
505 struct rte_flow_item_tcp tcp;
507 struct rte_flow_action_queue queue;
510 /* Verbs specification header. */
511 struct ibv_spec_header {
512 enum ibv_flow_spec_type type;
517 * Check support for a given item.
520 * Item specification.
522 * Bit-masks covering supported fields to compare with spec, last and mask in
525 * Bit-Mask size in bytes.
528 * 0 on success, a negative errno value otherwise and rte_errno is set.
531 mlx5_flow_item_validate(const struct rte_flow_item *item,
532 const uint8_t *mask, unsigned int size)
534 if (!item->spec && (item->mask || item->last)) {
538 if (item->spec && !item->mask) {
540 const uint8_t *spec = item->spec;
542 for (i = 0; i < size; ++i)
543 if ((spec[i] | mask[i]) != mask[i]) {
548 if (item->last && !item->mask) {
550 const uint8_t *spec = item->last;
552 for (i = 0; i < size; ++i)
553 if ((spec[i] | mask[i]) != mask[i]) {
560 const uint8_t *spec = item->spec;
562 for (i = 0; i < size; ++i)
563 if ((spec[i] | mask[i]) != mask[i]) {
568 if (item->spec && item->last) {
571 const uint8_t *apply = mask;
577 for (i = 0; i < size; ++i) {
578 spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
579 last[i] = ((const uint8_t *)item->last)[i] & apply[i];
581 ret = memcmp(spec, last, size);
591 * Extract attribute to the parser.
594 * Flow rule attributes.
596 * Perform verbose error reporting if not NULL.
599 * 0 on success, a negative errno value otherwise and rte_errno is set.
602 mlx5_flow_convert_attributes(const struct rte_flow_attr *attr,
603 struct rte_flow_error *error)
606 rte_flow_error_set(error, ENOTSUP,
607 RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
609 "groups are not supported");
612 if (attr->priority && attr->priority != MLX5_CTRL_FLOW_PRIORITY) {
613 rte_flow_error_set(error, ENOTSUP,
614 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
616 "priorities are not supported");
620 rte_flow_error_set(error, ENOTSUP,
621 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
623 "egress is not supported");
626 if (attr->transfer) {
627 rte_flow_error_set(error, ENOTSUP,
628 RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
630 "transfer is not supported");
633 if (!attr->ingress) {
634 rte_flow_error_set(error, ENOTSUP,
635 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
637 "only ingress is supported");
644 * Extract actions request to the parser.
647 * Pointer to Ethernet device.
649 * Associated actions (list terminated by the END action).
651 * Perform verbose error reporting if not NULL.
652 * @param[in, out] parser
653 * Internal parser structure.
656 * 0 on success, a negative errno value otherwise and rte_errno is set.
659 mlx5_flow_convert_actions(struct rte_eth_dev *dev,
660 const struct rte_flow_action actions[],
661 struct rte_flow_error *error,
662 struct mlx5_flow_parse *parser)
664 enum { FATE = 1, MARK = 2, COUNT = 4, };
665 uint32_t overlap = 0;
666 struct priv *priv = dev->data->dev_private;
668 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
669 if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
671 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
673 goto exit_action_overlap;
676 } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
677 const struct rte_flow_action_queue *queue =
678 (const struct rte_flow_action_queue *)
682 goto exit_action_overlap;
684 if (!queue || (queue->index > (priv->rxqs_n - 1)))
685 goto exit_action_not_supported;
686 parser->queues[0] = queue->index;
687 parser->rss_conf = (struct rte_flow_action_rss){
689 .queue = parser->queues,
691 } else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
692 const struct rte_flow_action_rss *rss =
693 (const struct rte_flow_action_rss *)
695 const uint8_t *rss_key;
696 uint32_t rss_key_len;
700 goto exit_action_overlap;
703 rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ) {
704 rte_flow_error_set(error, EINVAL,
705 RTE_FLOW_ERROR_TYPE_ACTION,
707 "the only supported RSS hash"
708 " function is Toeplitz");
711 #ifndef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
712 if (parser->rss_conf.level > 1) {
713 rte_flow_error_set(error, EINVAL,
714 RTE_FLOW_ERROR_TYPE_ACTION,
716 "a nonzero RSS encapsulation"
717 " level is not supported");
721 if (parser->rss_conf.level > 2) {
722 rte_flow_error_set(error, EINVAL,
723 RTE_FLOW_ERROR_TYPE_ACTION,
725 "RSS encapsulation level"
726 " > 1 is not supported");
729 if (rss->types & MLX5_RSS_HF_MASK) {
730 rte_flow_error_set(error, EINVAL,
731 RTE_FLOW_ERROR_TYPE_ACTION,
733 "unsupported RSS type"
738 rss_key_len = rss->key_len;
741 rss_key_len = rss_hash_default_key_len;
742 rss_key = rss_hash_default_key;
744 if (rss_key_len != RTE_DIM(parser->rss_key)) {
745 rte_flow_error_set(error, EINVAL,
746 RTE_FLOW_ERROR_TYPE_ACTION,
748 "RSS hash key must be"
749 " exactly 40 bytes long");
752 if (!rss->queue_num) {
753 rte_flow_error_set(error, EINVAL,
754 RTE_FLOW_ERROR_TYPE_ACTION,
759 if (rss->queue_num > RTE_DIM(parser->queues)) {
760 rte_flow_error_set(error, EINVAL,
761 RTE_FLOW_ERROR_TYPE_ACTION,
763 "too many queues for RSS"
767 for (n = 0; n < rss->queue_num; ++n) {
768 if (rss->queue[n] >= priv->rxqs_n) {
769 rte_flow_error_set(error, EINVAL,
770 RTE_FLOW_ERROR_TYPE_ACTION,
772 "queue id > number of"
777 parser->rss_conf = (struct rte_flow_action_rss){
778 .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
781 .key_len = rss_key_len,
782 .queue_num = rss->queue_num,
783 .key = memcpy(parser->rss_key, rss_key,
784 sizeof(*rss_key) * rss_key_len),
785 .queue = memcpy(parser->queues, rss->queue,
786 sizeof(*rss->queue) *
789 } else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
790 const struct rte_flow_action_mark *mark =
791 (const struct rte_flow_action_mark *)
795 goto exit_action_overlap;
798 rte_flow_error_set(error, EINVAL,
799 RTE_FLOW_ERROR_TYPE_ACTION,
801 "mark must be defined");
803 } else if (mark->id >= MLX5_FLOW_MARK_MAX) {
804 rte_flow_error_set(error, ENOTSUP,
805 RTE_FLOW_ERROR_TYPE_ACTION,
807 "mark must be between 0"
812 parser->mark_id = mark->id;
813 } else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
815 goto exit_action_overlap;
818 } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT &&
819 priv->config.flow_counter_en) {
821 goto exit_action_overlap;
825 goto exit_action_not_supported;
828 /* When fate is unknown, drop traffic. */
829 if (!(overlap & FATE))
831 if (parser->drop && parser->mark)
833 if (!parser->rss_conf.queue_num && !parser->drop) {
834 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
835 NULL, "no valid action");
839 exit_action_not_supported:
840 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
841 actions, "action not supported");
844 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
845 actions, "overlapping actions are not supported");
853 * Pattern specification (list terminated by the END pattern item).
855 * Perform verbose error reporting if not NULL.
856 * @param[in, out] parser
857 * Internal parser structure.
860 * 0 on success, a negative errno value otherwise and rte_errno is set.
863 mlx5_flow_convert_items_validate(struct rte_eth_dev *dev,
864 const struct rte_flow_item items[],
865 struct rte_flow_error *error,
866 struct mlx5_flow_parse *parser)
868 struct priv *priv = dev->data->dev_private;
869 const struct mlx5_flow_items *cur_item = mlx5_flow_items;
873 /* Initialise the offsets to start after verbs attribute. */
874 for (i = 0; i != hash_rxq_init_n; ++i)
875 parser->queue[i].offset = sizeof(struct ibv_flow_attr);
876 for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
877 const struct mlx5_flow_items *token = NULL;
880 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
884 cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
886 if (cur_item->items[i] == items->type) {
887 token = &mlx5_flow_items[items->type];
893 goto exit_item_not_supported;
896 ret = mlx5_flow_item_validate(items,
897 (const uint8_t *)cur_item->mask,
900 goto exit_item_not_supported;
901 if (IS_TUNNEL(items->type)) {
902 if (parser->tunnel) {
903 rte_flow_error_set(error, ENOTSUP,
904 RTE_FLOW_ERROR_TYPE_ITEM,
906 "Cannot recognize multiple"
907 " tunnel encapsulations.");
910 if (!priv->config.tunnel_en &&
911 parser->rss_conf.level > 1) {
912 rte_flow_error_set(error, ENOTSUP,
913 RTE_FLOW_ERROR_TYPE_ITEM,
915 "RSS on tunnel is not supported");
918 parser->inner = IBV_FLOW_SPEC_INNER;
919 parser->tunnel = flow_ptype[items->type];
922 parser->queue[HASH_RXQ_ETH].offset += cur_item->dst_sz;
924 for (n = 0; n != hash_rxq_init_n; ++n)
925 parser->queue[n].offset += cur_item->dst_sz;
929 parser->queue[HASH_RXQ_ETH].offset +=
930 sizeof(struct ibv_flow_spec_action_drop);
933 for (i = 0; i != hash_rxq_init_n; ++i)
934 parser->queue[i].offset +=
935 sizeof(struct ibv_flow_spec_action_tag);
938 unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
940 for (i = 0; i != hash_rxq_init_n; ++i)
941 parser->queue[i].offset += size;
944 exit_item_not_supported:
945 return rte_flow_error_set(error, -ret, RTE_FLOW_ERROR_TYPE_ITEM,
946 items, "item not supported");
950 * Allocate memory space to store verbs flow attributes.
953 * Amount of byte to allocate.
955 * Perform verbose error reporting if not NULL.
958 * A verbs flow attribute on success, NULL otherwise and rte_errno is set.
960 static struct ibv_flow_attr *
961 mlx5_flow_convert_allocate(unsigned int size, struct rte_flow_error *error)
963 struct ibv_flow_attr *ibv_attr;
965 ibv_attr = rte_calloc(__func__, 1, size, 0);
967 rte_flow_error_set(error, ENOMEM,
968 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
970 "cannot allocate verbs spec attributes");
977 * Make inner packet matching with an higher priority from the non Inner
981 * Pointer to Ethernet device.
982 * @param[in, out] parser
983 * Internal parser structure.
985 * User flow attribute.
988 mlx5_flow_update_priority(struct rte_eth_dev *dev,
989 struct mlx5_flow_parse *parser,
990 const struct rte_flow_attr *attr)
992 struct priv *priv = dev->data->dev_private;
996 /* 8 priorities >= 16 priorities
997 * Control flow: 4-7 8-15
998 * User normal flow: 1-3 4-7
999 * User tunnel flow: 0-2 0-3
1001 priority = attr->priority * MLX5_VERBS_FLOW_PRIO_8;
1002 if (priv->config.max_verbs_prio == MLX5_VERBS_FLOW_PRIO_8)
1005 * Lower non-tunnel flow Verbs priority 1 if only support 8 Verbs
1006 * priorities, lower 4 otherwise.
1008 if (!parser->inner) {
1009 if (priv->config.max_verbs_prio == MLX5_VERBS_FLOW_PRIO_8)
1012 priority += MLX5_VERBS_FLOW_PRIO_8 / 2;
1015 parser->queue[HASH_RXQ_ETH].ibv_attr->priority = priority +
1016 hash_rxq_init[HASH_RXQ_ETH].flow_priority;
1019 for (i = 0; i != hash_rxq_init_n; ++i) {
1020 if (!parser->queue[i].ibv_attr)
1022 parser->queue[i].ibv_attr->priority = priority +
1023 hash_rxq_init[i].flow_priority;
1028 * Finalise verbs flow attributes.
1030 * @param[in, out] parser
1031 * Internal parser structure.
1034 mlx5_flow_convert_finalise(struct mlx5_flow_parse *parser)
1037 uint32_t inner = parser->inner;
1039 /* Don't create extra flows for outer RSS. */
1040 if (parser->tunnel && parser->rss_conf.level < 2)
1043 * Fill missing layers in verbs specifications, or compute the correct
1044 * offset to allocate the memory space for the attributes and
1047 for (i = 0; i != hash_rxq_init_n - 1; ++i) {
1049 struct ibv_flow_spec_ipv4_ext ipv4;
1050 struct ibv_flow_spec_ipv6 ipv6;
1051 struct ibv_flow_spec_tcp_udp udp_tcp;
1052 struct ibv_flow_spec_eth eth;
1057 if (i == parser->layer)
1059 if (parser->layer == HASH_RXQ_ETH ||
1060 parser->layer == HASH_RXQ_TUNNEL) {
1061 if (hash_rxq_init[i].ip_version == MLX5_IPV4) {
1062 size = sizeof(struct ibv_flow_spec_ipv4_ext);
1063 specs.ipv4 = (struct ibv_flow_spec_ipv4_ext){
1064 .type = inner | IBV_FLOW_SPEC_IPV4_EXT,
1068 size = sizeof(struct ibv_flow_spec_ipv6);
1069 specs.ipv6 = (struct ibv_flow_spec_ipv6){
1070 .type = inner | IBV_FLOW_SPEC_IPV6,
1074 if (parser->queue[i].ibv_attr) {
1075 dst = (void *)((uintptr_t)
1076 parser->queue[i].ibv_attr +
1077 parser->queue[i].offset);
1078 memcpy(dst, &specs, size);
1079 ++parser->queue[i].ibv_attr->num_of_specs;
1081 parser->queue[i].offset += size;
1083 if ((i == HASH_RXQ_UDPV4) || (i == HASH_RXQ_TCPV4) ||
1084 (i == HASH_RXQ_UDPV6) || (i == HASH_RXQ_TCPV6)) {
1085 size = sizeof(struct ibv_flow_spec_tcp_udp);
1086 specs.udp_tcp = (struct ibv_flow_spec_tcp_udp) {
1087 .type = inner | ((i == HASH_RXQ_UDPV4 ||
1088 i == HASH_RXQ_UDPV6) ?
1093 if (parser->queue[i].ibv_attr) {
1094 dst = (void *)((uintptr_t)
1095 parser->queue[i].ibv_attr +
1096 parser->queue[i].offset);
1097 memcpy(dst, &specs, size);
1098 ++parser->queue[i].ibv_attr->num_of_specs;
1100 parser->queue[i].offset += size;
1106 * Update flows according to pattern and RSS hash fields.
1108 * @param[in, out] parser
1109 * Internal parser structure.
1112 * 0 on success, a negative errno value otherwise and rte_errno is set.
1115 mlx5_flow_convert_rss(struct mlx5_flow_parse *parser)
1118 enum hash_rxq_type start;
1119 enum hash_rxq_type layer;
1120 int outer = parser->tunnel && parser->rss_conf.level < 2;
1121 uint64_t rss = parser->rss_conf.types;
1123 /* Default to outer RSS. */
1124 if (!parser->rss_conf.level)
1125 parser->rss_conf.level = 1;
1126 layer = outer ? parser->out_layer : parser->layer;
1127 if (layer == HASH_RXQ_TUNNEL)
1128 layer = HASH_RXQ_ETH;
1130 /* Only one hash type for outer RSS. */
1131 if (rss && layer == HASH_RXQ_ETH) {
1132 start = HASH_RXQ_TCPV4;
1133 } else if (rss && layer != HASH_RXQ_ETH &&
1134 !(rss & hash_rxq_init[layer].dpdk_rss_hf)) {
1135 /* If RSS not match L4 pattern, try L3 RSS. */
1136 if (layer < HASH_RXQ_IPV4)
1137 layer = HASH_RXQ_IPV4;
1138 else if (layer > HASH_RXQ_IPV4 && layer < HASH_RXQ_IPV6)
1139 layer = HASH_RXQ_IPV6;
1144 /* Scan first valid hash type. */
1145 for (i = start; rss && i <= layer; ++i) {
1146 if (!parser->queue[i].ibv_attr)
1148 if (hash_rxq_init[i].dpdk_rss_hf & rss)
1151 if (rss && i <= layer)
1152 parser->queue[layer].hash_fields =
1153 hash_rxq_init[i].hash_fields;
1154 /* Trim unused hash types. */
1155 for (i = 0; i != hash_rxq_init_n; ++i) {
1156 if (parser->queue[i].ibv_attr && i != layer) {
1157 rte_free(parser->queue[i].ibv_attr);
1158 parser->queue[i].ibv_attr = NULL;
1162 /* Expand for inner or normal RSS. */
1163 if (rss && (layer == HASH_RXQ_ETH || layer == HASH_RXQ_IPV4))
1164 start = HASH_RXQ_TCPV4;
1165 else if (rss && layer == HASH_RXQ_IPV6)
1166 start = HASH_RXQ_TCPV6;
1169 /* For L4 pattern, try L3 RSS if no L4 RSS. */
1170 /* Trim unused hash types. */
1171 for (i = 0; i != hash_rxq_init_n; ++i) {
1172 if (!parser->queue[i].ibv_attr)
1174 if (i < start || i > layer) {
1175 rte_free(parser->queue[i].ibv_attr);
1176 parser->queue[i].ibv_attr = NULL;
1181 if (hash_rxq_init[i].dpdk_rss_hf & rss) {
1182 parser->queue[i].hash_fields =
1183 hash_rxq_init[i].hash_fields;
1184 } else if (i != layer) {
1185 /* Remove unused RSS expansion. */
1186 rte_free(parser->queue[i].ibv_attr);
1187 parser->queue[i].ibv_attr = NULL;
1188 } else if (layer < HASH_RXQ_IPV4 &&
1189 (hash_rxq_init[HASH_RXQ_IPV4].dpdk_rss_hf &
1191 /* Allow IPv4 RSS on L4 pattern. */
1192 parser->queue[i].hash_fields =
1193 hash_rxq_init[HASH_RXQ_IPV4]
1195 } else if (i > HASH_RXQ_IPV4 && i < HASH_RXQ_IPV6 &&
1196 (hash_rxq_init[HASH_RXQ_IPV6].dpdk_rss_hf &
1198 /* Allow IPv4 RSS on L4 pattern. */
1199 parser->queue[i].hash_fields =
1200 hash_rxq_init[HASH_RXQ_IPV6]
1209 * Validate and convert a flow supported by the NIC.
1212 * Pointer to Ethernet device.
1214 * Flow rule attributes.
1215 * @param[in] pattern
1216 * Pattern specification (list terminated by the END pattern item).
1217 * @param[in] actions
1218 * Associated actions (list terminated by the END action).
1220 * Perform verbose error reporting if not NULL.
1221 * @param[in, out] parser
1222 * Internal parser structure.
1225 * 0 on success, a negative errno value otherwise and rte_errno is set.
1228 mlx5_flow_convert(struct rte_eth_dev *dev,
1229 const struct rte_flow_attr *attr,
1230 const struct rte_flow_item items[],
1231 const struct rte_flow_action actions[],
1232 struct rte_flow_error *error,
1233 struct mlx5_flow_parse *parser)
1235 const struct mlx5_flow_items *cur_item = mlx5_flow_items;
1239 /* First step. Validate the attributes, items and actions. */
1240 *parser = (struct mlx5_flow_parse){
1241 .create = parser->create,
1242 .layer = HASH_RXQ_ETH,
1243 .mark_id = MLX5_FLOW_MARK_DEFAULT,
1245 ret = mlx5_flow_convert_attributes(attr, error);
1248 ret = mlx5_flow_convert_actions(dev, actions, error, parser);
1251 ret = mlx5_flow_convert_items_validate(dev, items, error, parser);
1254 mlx5_flow_convert_finalise(parser);
1257 * Allocate the memory space to store verbs specifications.
1260 unsigned int offset = parser->queue[HASH_RXQ_ETH].offset;
1262 parser->queue[HASH_RXQ_ETH].ibv_attr =
1263 mlx5_flow_convert_allocate(offset, error);
1264 if (!parser->queue[HASH_RXQ_ETH].ibv_attr)
1266 parser->queue[HASH_RXQ_ETH].offset =
1267 sizeof(struct ibv_flow_attr);
1269 for (i = 0; i != hash_rxq_init_n; ++i) {
1270 unsigned int offset;
1272 offset = parser->queue[i].offset;
1273 parser->queue[i].ibv_attr =
1274 mlx5_flow_convert_allocate(offset, error);
1275 if (!parser->queue[i].ibv_attr)
1277 parser->queue[i].offset = sizeof(struct ibv_flow_attr);
1280 /* Third step. Conversion parse, fill the specifications. */
1283 parser->layer = HASH_RXQ_ETH;
1284 for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
1285 struct mlx5_flow_data data = {
1291 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
1293 cur_item = &mlx5_flow_items[items->type];
1294 ret = cur_item->convert(items,
1295 (cur_item->default_mask ?
1296 cur_item->default_mask :
1302 if (!parser->drop) {
1303 /* RSS check, remove unused hash types. */
1304 ret = mlx5_flow_convert_rss(parser);
1307 /* Complete missing specification. */
1308 mlx5_flow_convert_finalise(parser);
1310 mlx5_flow_update_priority(dev, parser, attr);
1312 mlx5_flow_create_flag_mark(parser, parser->mark_id);
1313 if (parser->count && parser->create) {
1314 mlx5_flow_create_count(dev, parser);
1316 goto exit_count_error;
1319 /* Only verification is expected, all resources should be released. */
1320 if (!parser->create) {
1321 for (i = 0; i != hash_rxq_init_n; ++i) {
1322 if (parser->queue[i].ibv_attr) {
1323 rte_free(parser->queue[i].ibv_attr);
1324 parser->queue[i].ibv_attr = NULL;
1330 for (i = 0; i != hash_rxq_init_n; ++i) {
1331 if (parser->queue[i].ibv_attr) {
1332 rte_free(parser->queue[i].ibv_attr);
1333 parser->queue[i].ibv_attr = NULL;
1336 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1337 NULL, "cannot allocate verbs spec attributes");
1340 rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1341 NULL, "cannot create counter");
1346 * Copy the specification created into the flow.
1349 * Internal parser structure.
1351 * Create specification.
1353 * Size in bytes of the specification to copy.
1356 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
1362 for (i = 0; i != hash_rxq_init_n; ++i) {
1363 if (!parser->queue[i].ibv_attr)
1365 dst = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1366 parser->queue[i].offset);
1367 memcpy(dst, src, size);
1368 ++parser->queue[i].ibv_attr->num_of_specs;
1369 parser->queue[i].offset += size;
1374 * Convert Ethernet item to Verbs specification.
1377 * Item specification.
1378 * @param default_mask[in]
1379 * Default bit-masks to use when item->mask is not provided.
1380 * @param data[in, out]
1384 * 0 on success, a negative errno value otherwise and rte_errno is set.
1387 mlx5_flow_create_eth(const struct rte_flow_item *item,
1388 const void *default_mask,
1389 struct mlx5_flow_data *data)
1391 const struct rte_flow_item_eth *spec = item->spec;
1392 const struct rte_flow_item_eth *mask = item->mask;
1393 struct mlx5_flow_parse *parser = data->parser;
1394 const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1395 struct ibv_flow_spec_eth eth = {
1396 .type = parser->inner | IBV_FLOW_SPEC_ETH,
1400 parser->layer = HASH_RXQ_ETH;
1405 mask = default_mask;
1406 memcpy(ð.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
1407 memcpy(ð.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
1408 eth.val.ether_type = spec->type;
1409 memcpy(ð.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
1410 memcpy(ð.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
1411 eth.mask.ether_type = mask->type;
1412 /* Remove unwanted bits from values. */
1413 for (i = 0; i < ETHER_ADDR_LEN; ++i) {
1414 eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
1415 eth.val.src_mac[i] &= eth.mask.src_mac[i];
1417 eth.val.ether_type &= eth.mask.ether_type;
1419 mlx5_flow_create_copy(parser, ð, eth_size);
1424 * Convert VLAN item to Verbs specification.
1427 * Item specification.
1428 * @param default_mask[in]
1429 * Default bit-masks to use when item->mask is not provided.
1430 * @param data[in, out]
1434 * 0 on success, a negative errno value otherwise and rte_errno is set.
1437 mlx5_flow_create_vlan(const struct rte_flow_item *item,
1438 const void *default_mask,
1439 struct mlx5_flow_data *data)
1441 const struct rte_flow_item_vlan *spec = item->spec;
1442 const struct rte_flow_item_vlan *mask = item->mask;
1443 struct mlx5_flow_parse *parser = data->parser;
1444 struct ibv_flow_spec_eth *eth;
1445 const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1446 const char *msg = "VLAN cannot be empty";
1451 mask = default_mask;
1453 for (i = 0; i != hash_rxq_init_n; ++i) {
1454 if (!parser->queue[i].ibv_attr)
1457 eth = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1458 parser->queue[i].offset - eth_size);
1459 eth->val.vlan_tag = spec->tci;
1460 eth->mask.vlan_tag = mask->tci;
1461 eth->val.vlan_tag &= eth->mask.vlan_tag;
1463 * From verbs perspective an empty VLAN is equivalent
1464 * to a packet without VLAN layer.
1466 if (!eth->mask.vlan_tag)
1468 /* Outer TPID cannot be matched. */
1469 if (eth->mask.ether_type) {
1470 msg = "VLAN TPID matching is not supported";
1473 eth->val.ether_type = spec->inner_type;
1474 eth->mask.ether_type = mask->inner_type;
1475 eth->val.ether_type &= eth->mask.ether_type;
1480 return rte_flow_error_set(data->error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM,
1485 * Convert IPv4 item to Verbs specification.
1488 * Item specification.
1489 * @param default_mask[in]
1490 * Default bit-masks to use when item->mask is not provided.
1491 * @param data[in, out]
1495 * 0 on success, a negative errno value otherwise and rte_errno is set.
1498 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
1499 const void *default_mask,
1500 struct mlx5_flow_data *data)
1502 struct priv *priv = data->dev->data->dev_private;
1503 const struct rte_flow_item_ipv4 *spec = item->spec;
1504 const struct rte_flow_item_ipv4 *mask = item->mask;
1505 struct mlx5_flow_parse *parser = data->parser;
1506 unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4_ext);
1507 struct ibv_flow_spec_ipv4_ext ipv4 = {
1508 .type = parser->inner | IBV_FLOW_SPEC_IPV4_EXT,
1512 if (parser->layer == HASH_RXQ_TUNNEL &&
1513 parser->tunnel == ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN)] &&
1514 !priv->config.l3_vxlan_en)
1515 return rte_flow_error_set(data->error, EINVAL,
1516 RTE_FLOW_ERROR_TYPE_ITEM,
1518 "L3 VXLAN not enabled by device"
1519 " parameter and/or not configured"
1521 parser->layer = HASH_RXQ_IPV4;
1524 mask = default_mask;
1525 ipv4.val = (struct ibv_flow_ipv4_ext_filter){
1526 .src_ip = spec->hdr.src_addr,
1527 .dst_ip = spec->hdr.dst_addr,
1528 .proto = spec->hdr.next_proto_id,
1529 .tos = spec->hdr.type_of_service,
1531 ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
1532 .src_ip = mask->hdr.src_addr,
1533 .dst_ip = mask->hdr.dst_addr,
1534 .proto = mask->hdr.next_proto_id,
1535 .tos = mask->hdr.type_of_service,
1537 /* Remove unwanted bits from values. */
1538 ipv4.val.src_ip &= ipv4.mask.src_ip;
1539 ipv4.val.dst_ip &= ipv4.mask.dst_ip;
1540 ipv4.val.proto &= ipv4.mask.proto;
1541 ipv4.val.tos &= ipv4.mask.tos;
1543 mlx5_flow_create_copy(parser, &ipv4, ipv4_size);
1548 * Convert IPv6 item to Verbs specification.
1551 * Item specification.
1552 * @param default_mask[in]
1553 * Default bit-masks to use when item->mask is not provided.
1554 * @param data[in, out]
1558 * 0 on success, a negative errno value otherwise and rte_errno is set.
1561 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
1562 const void *default_mask,
1563 struct mlx5_flow_data *data)
1565 struct priv *priv = data->dev->data->dev_private;
1566 const struct rte_flow_item_ipv6 *spec = item->spec;
1567 const struct rte_flow_item_ipv6 *mask = item->mask;
1568 struct mlx5_flow_parse *parser = data->parser;
1569 unsigned int ipv6_size = sizeof(struct ibv_flow_spec_ipv6);
1570 struct ibv_flow_spec_ipv6 ipv6 = {
1571 .type = parser->inner | IBV_FLOW_SPEC_IPV6,
1575 if (parser->layer == HASH_RXQ_TUNNEL &&
1576 parser->tunnel == ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN)] &&
1577 !priv->config.l3_vxlan_en)
1578 return rte_flow_error_set(data->error, EINVAL,
1579 RTE_FLOW_ERROR_TYPE_ITEM,
1581 "L3 VXLAN not enabled by device"
1582 " parameter and/or not configured"
1584 parser->layer = HASH_RXQ_IPV6;
1587 uint32_t vtc_flow_val;
1588 uint32_t vtc_flow_mask;
1591 mask = default_mask;
1592 memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
1593 RTE_DIM(ipv6.val.src_ip));
1594 memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
1595 RTE_DIM(ipv6.val.dst_ip));
1596 memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
1597 RTE_DIM(ipv6.mask.src_ip));
1598 memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
1599 RTE_DIM(ipv6.mask.dst_ip));
1600 vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
1601 vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
1602 ipv6.val.flow_label =
1603 rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >>
1605 ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >>
1607 ipv6.val.next_hdr = spec->hdr.proto;
1608 ipv6.val.hop_limit = spec->hdr.hop_limits;
1609 ipv6.mask.flow_label =
1610 rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >>
1612 ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >>
1614 ipv6.mask.next_hdr = mask->hdr.proto;
1615 ipv6.mask.hop_limit = mask->hdr.hop_limits;
1616 /* Remove unwanted bits from values. */
1617 for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
1618 ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
1619 ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
1621 ipv6.val.flow_label &= ipv6.mask.flow_label;
1622 ipv6.val.traffic_class &= ipv6.mask.traffic_class;
1623 ipv6.val.next_hdr &= ipv6.mask.next_hdr;
1624 ipv6.val.hop_limit &= ipv6.mask.hop_limit;
1626 mlx5_flow_create_copy(parser, &ipv6, ipv6_size);
1631 * Convert UDP item to Verbs specification.
1634 * Item specification.
1635 * @param default_mask[in]
1636 * Default bit-masks to use when item->mask is not provided.
1637 * @param data[in, out]
1641 * 0 on success, a negative errno value otherwise and rte_errno is set.
1644 mlx5_flow_create_udp(const struct rte_flow_item *item,
1645 const void *default_mask,
1646 struct mlx5_flow_data *data)
1648 const struct rte_flow_item_udp *spec = item->spec;
1649 const struct rte_flow_item_udp *mask = item->mask;
1650 struct mlx5_flow_parse *parser = data->parser;
1651 unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1652 struct ibv_flow_spec_tcp_udp udp = {
1653 .type = parser->inner | IBV_FLOW_SPEC_UDP,
1657 if (parser->layer == HASH_RXQ_IPV4)
1658 parser->layer = HASH_RXQ_UDPV4;
1660 parser->layer = HASH_RXQ_UDPV6;
1663 mask = default_mask;
1664 udp.val.dst_port = spec->hdr.dst_port;
1665 udp.val.src_port = spec->hdr.src_port;
1666 udp.mask.dst_port = mask->hdr.dst_port;
1667 udp.mask.src_port = mask->hdr.src_port;
1668 /* Remove unwanted bits from values. */
1669 udp.val.src_port &= udp.mask.src_port;
1670 udp.val.dst_port &= udp.mask.dst_port;
1672 mlx5_flow_create_copy(parser, &udp, udp_size);
1677 * Convert TCP item to Verbs specification.
1680 * Item specification.
1681 * @param default_mask[in]
1682 * Default bit-masks to use when item->mask is not provided.
1683 * @param data[in, out]
1687 * 0 on success, a negative errno value otherwise and rte_errno is set.
1690 mlx5_flow_create_tcp(const struct rte_flow_item *item,
1691 const void *default_mask,
1692 struct mlx5_flow_data *data)
1694 const struct rte_flow_item_tcp *spec = item->spec;
1695 const struct rte_flow_item_tcp *mask = item->mask;
1696 struct mlx5_flow_parse *parser = data->parser;
1697 unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1698 struct ibv_flow_spec_tcp_udp tcp = {
1699 .type = parser->inner | IBV_FLOW_SPEC_TCP,
1703 if (parser->layer == HASH_RXQ_IPV4)
1704 parser->layer = HASH_RXQ_TCPV4;
1706 parser->layer = HASH_RXQ_TCPV6;
1709 mask = default_mask;
1710 tcp.val.dst_port = spec->hdr.dst_port;
1711 tcp.val.src_port = spec->hdr.src_port;
1712 tcp.mask.dst_port = mask->hdr.dst_port;
1713 tcp.mask.src_port = mask->hdr.src_port;
1714 /* Remove unwanted bits from values. */
1715 tcp.val.src_port &= tcp.mask.src_port;
1716 tcp.val.dst_port &= tcp.mask.dst_port;
1718 mlx5_flow_create_copy(parser, &tcp, tcp_size);
1723 * Convert VXLAN item to Verbs specification.
1726 * Item specification.
1727 * @param default_mask[in]
1728 * Default bit-masks to use when item->mask is not provided.
1729 * @param data[in, out]
1733 * 0 on success, a negative errno value otherwise and rte_errno is set.
1736 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
1737 const void *default_mask,
1738 struct mlx5_flow_data *data)
1740 const struct rte_flow_item_vxlan *spec = item->spec;
1741 const struct rte_flow_item_vxlan *mask = item->mask;
1742 struct mlx5_flow_parse *parser = data->parser;
1743 unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1744 struct ibv_flow_spec_tunnel vxlan = {
1745 .type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1754 parser->inner = IBV_FLOW_SPEC_INNER;
1755 parser->tunnel = ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN)];
1756 parser->out_layer = parser->layer;
1757 parser->layer = HASH_RXQ_TUNNEL;
1758 /* Default VXLAN to outer RSS. */
1759 if (!parser->rss_conf.level)
1760 parser->rss_conf.level = 1;
1763 mask = default_mask;
1764 memcpy(&id.vni[1], spec->vni, 3);
1765 vxlan.val.tunnel_id = id.vlan_id;
1766 memcpy(&id.vni[1], mask->vni, 3);
1767 vxlan.mask.tunnel_id = id.vlan_id;
1768 /* Remove unwanted bits from values. */
1769 vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
1772 * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
1773 * layer is defined in the Verbs specification it is interpreted as
1774 * wildcard and all packets will match this rule, if it follows a full
1775 * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
1776 * before will also match this rule.
1777 * To avoid such situation, VNI 0 is currently refused.
1779 if (!vxlan.val.tunnel_id)
1780 return rte_flow_error_set(data->error, EINVAL,
1781 RTE_FLOW_ERROR_TYPE_ITEM,
1783 "VxLAN vni cannot be 0");
1784 mlx5_flow_create_copy(parser, &vxlan, size);
1789 * Convert GRE item to Verbs specification.
1792 * Item specification.
1793 * @param default_mask[in]
1794 * Default bit-masks to use when item->mask is not provided.
1795 * @param data[in, out]
1799 * 0 on success, a negative errno value otherwise and rte_errno is set.
1802 mlx5_flow_create_gre(const struct rte_flow_item *item __rte_unused,
1803 const void *default_mask __rte_unused,
1804 struct mlx5_flow_data *data)
1806 struct mlx5_flow_parse *parser = data->parser;
1807 unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1808 struct ibv_flow_spec_tunnel tunnel = {
1809 .type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1812 struct ibv_flow_spec_ipv4_ext *ipv4;
1813 struct ibv_flow_spec_ipv6 *ipv6;
1816 parser->inner = IBV_FLOW_SPEC_INNER;
1817 parser->tunnel = ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_GRE)];
1818 parser->out_layer = parser->layer;
1819 parser->layer = HASH_RXQ_TUNNEL;
1820 /* Default GRE to inner RSS. */
1821 if (!parser->rss_conf.level)
1822 parser->rss_conf.level = 2;
1823 /* Update encapsulation IP layer protocol. */
1824 for (i = 0; i != hash_rxq_init_n; ++i) {
1825 if (!parser->queue[i].ibv_attr)
1827 if (parser->out_layer == HASH_RXQ_IPV4) {
1828 ipv4 = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1829 parser->queue[i].offset -
1830 sizeof(struct ibv_flow_spec_ipv4_ext));
1831 if (ipv4->mask.proto && ipv4->val.proto != MLX5_GRE)
1833 ipv4->val.proto = MLX5_GRE;
1834 ipv4->mask.proto = 0xff;
1835 } else if (parser->out_layer == HASH_RXQ_IPV6) {
1836 ipv6 = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1837 parser->queue[i].offset -
1838 sizeof(struct ibv_flow_spec_ipv6));
1839 if (ipv6->mask.next_hdr &&
1840 ipv6->val.next_hdr != MLX5_GRE)
1842 ipv6->val.next_hdr = MLX5_GRE;
1843 ipv6->mask.next_hdr = 0xff;
1846 if (i != hash_rxq_init_n)
1847 return rte_flow_error_set(data->error, EINVAL,
1848 RTE_FLOW_ERROR_TYPE_ITEM,
1850 "IP protocol of GRE must be 47");
1851 mlx5_flow_create_copy(parser, &tunnel, size);
1856 * Convert mark/flag action to Verbs specification.
1859 * Internal parser structure.
1864 * 0 on success, a negative errno value otherwise and rte_errno is set.
1867 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id)
1869 unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1870 struct ibv_flow_spec_action_tag tag = {
1871 .type = IBV_FLOW_SPEC_ACTION_TAG,
1873 .tag_id = mlx5_flow_mark_set(mark_id),
1876 assert(parser->mark);
1877 mlx5_flow_create_copy(parser, &tag, size);
1882 * Convert count action to Verbs specification.
1885 * Pointer to Ethernet device.
1887 * Pointer to MLX5 flow parser structure.
1890 * 0 on success, a negative errno value otherwise and rte_errno is set.
1893 mlx5_flow_create_count(struct rte_eth_dev *dev __rte_unused,
1894 struct mlx5_flow_parse *parser __rte_unused)
1896 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
1897 struct priv *priv = dev->data->dev_private;
1898 unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
1899 struct ibv_counter_set_init_attr init_attr = {0};
1900 struct ibv_flow_spec_counter_action counter = {
1901 .type = IBV_FLOW_SPEC_ACTION_COUNT,
1903 .counter_set_handle = 0,
1906 init_attr.counter_set_id = 0;
1907 parser->cs = mlx5_glue->create_counter_set(priv->ctx, &init_attr);
1912 counter.counter_set_handle = parser->cs->handle;
1913 mlx5_flow_create_copy(parser, &counter, size);
1919 * Complete flow rule creation with a drop queue.
1922 * Pointer to Ethernet device.
1924 * Internal parser structure.
1926 * Pointer to the rte_flow.
1928 * Perform verbose error reporting if not NULL.
1931 * 0 on success, a negative errno value otherwise and rte_errno is set.
1934 mlx5_flow_create_action_queue_drop(struct rte_eth_dev *dev,
1935 struct mlx5_flow_parse *parser,
1936 struct rte_flow *flow,
1937 struct rte_flow_error *error)
1939 struct priv *priv = dev->data->dev_private;
1940 struct ibv_flow_spec_action_drop *drop;
1941 unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
1946 drop = (void *)((uintptr_t)parser->queue[HASH_RXQ_ETH].ibv_attr +
1947 parser->queue[HASH_RXQ_ETH].offset);
1948 *drop = (struct ibv_flow_spec_action_drop){
1949 .type = IBV_FLOW_SPEC_ACTION_DROP,
1952 ++parser->queue[HASH_RXQ_ETH].ibv_attr->num_of_specs;
1953 parser->queue[HASH_RXQ_ETH].offset += size;
1954 flow->frxq[HASH_RXQ_ETH].ibv_attr =
1955 parser->queue[HASH_RXQ_ETH].ibv_attr;
1957 flow->cs = parser->cs;
1958 if (!priv->dev->data->dev_started)
1960 parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
1961 flow->frxq[HASH_RXQ_ETH].ibv_flow =
1962 mlx5_glue->create_flow(priv->flow_drop_queue->qp,
1963 flow->frxq[HASH_RXQ_ETH].ibv_attr);
1964 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
1965 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1966 NULL, "flow rule creation failure");
1972 if (flow->frxq[HASH_RXQ_ETH].ibv_flow) {
1973 claim_zero(mlx5_glue->destroy_flow
1974 (flow->frxq[HASH_RXQ_ETH].ibv_flow));
1975 flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
1977 if (flow->frxq[HASH_RXQ_ETH].ibv_attr) {
1978 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
1979 flow->frxq[HASH_RXQ_ETH].ibv_attr = NULL;
1982 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
1990 * Create hash Rx queues when RSS is enabled.
1993 * Pointer to Ethernet device.
1995 * Internal parser structure.
1997 * Pointer to the rte_flow.
1999 * Perform verbose error reporting if not NULL.
2002 * 0 on success, a negative errno value otherwise and rte_errno is set.
2005 mlx5_flow_create_action_queue_rss(struct rte_eth_dev *dev,
2006 struct mlx5_flow_parse *parser,
2007 struct rte_flow *flow,
2008 struct rte_flow_error *error)
2010 struct priv *priv = dev->data->dev_private;
2013 for (i = 0; i != hash_rxq_init_n; ++i) {
2014 if (!parser->queue[i].ibv_attr)
2016 flow->frxq[i].ibv_attr = parser->queue[i].ibv_attr;
2017 parser->queue[i].ibv_attr = NULL;
2018 flow->frxq[i].hash_fields = parser->queue[i].hash_fields;
2019 if (!priv->dev->data->dev_started)
2021 flow->frxq[i].hrxq =
2023 parser->rss_conf.key,
2024 parser->rss_conf.key_len,
2025 flow->frxq[i].hash_fields,
2026 parser->rss_conf.queue,
2027 parser->rss_conf.queue_num,
2029 parser->rss_conf.level);
2030 if (flow->frxq[i].hrxq)
2032 flow->frxq[i].hrxq =
2034 parser->rss_conf.key,
2035 parser->rss_conf.key_len,
2036 flow->frxq[i].hash_fields,
2037 parser->rss_conf.queue,
2038 parser->rss_conf.queue_num,
2040 parser->rss_conf.level);
2041 if (!flow->frxq[i].hrxq) {
2042 return rte_flow_error_set(error, ENOMEM,
2043 RTE_FLOW_ERROR_TYPE_HANDLE,
2045 "cannot create hash rxq");
2052 * RXQ update after flow rule creation.
2055 * Pointer to Ethernet device.
2057 * Pointer to the flow rule.
2060 mlx5_flow_create_update_rxqs(struct rte_eth_dev *dev, struct rte_flow *flow)
2062 struct priv *priv = dev->data->dev_private;
2066 if (!dev->data->dev_started)
2068 for (i = 0; i != flow->rss_conf.queue_num; ++i) {
2069 struct mlx5_rxq_data *rxq_data = (*priv->rxqs)
2070 [(*flow->queues)[i]];
2071 struct mlx5_rxq_ctrl *rxq_ctrl =
2072 container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
2073 uint8_t tunnel = PTYPE_IDX(flow->tunnel);
2075 rxq_data->mark |= flow->mark;
2078 rxq_ctrl->tunnel_types[tunnel] += 1;
2079 /* Clear tunnel type if more than one tunnel types set. */
2080 for (j = 0; j != RTE_DIM(rxq_ctrl->tunnel_types); ++j) {
2083 if (rxq_ctrl->tunnel_types[j] > 0) {
2084 rxq_data->tunnel = 0;
2088 if (j == RTE_DIM(rxq_ctrl->tunnel_types))
2089 rxq_data->tunnel = flow->tunnel;
2094 * Complete flow rule creation.
2097 * Pointer to Ethernet device.
2099 * Internal parser structure.
2101 * Pointer to the rte_flow.
2103 * Perform verbose error reporting if not NULL.
2106 * 0 on success, a negative errno value otherwise and rte_errno is set.
2109 mlx5_flow_create_action_queue(struct rte_eth_dev *dev,
2110 struct mlx5_flow_parse *parser,
2111 struct rte_flow *flow,
2112 struct rte_flow_error *error)
2114 struct priv *priv = dev->data->dev_private;
2117 unsigned int flows_n = 0;
2121 assert(!parser->drop);
2122 ret = mlx5_flow_create_action_queue_rss(dev, parser, flow, error);
2126 flow->cs = parser->cs;
2127 if (!priv->dev->data->dev_started)
2129 for (i = 0; i != hash_rxq_init_n; ++i) {
2130 if (!flow->frxq[i].hrxq)
2132 flow->frxq[i].ibv_flow =
2133 mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
2134 flow->frxq[i].ibv_attr);
2135 if (!flow->frxq[i].ibv_flow) {
2136 rte_flow_error_set(error, ENOMEM,
2137 RTE_FLOW_ERROR_TYPE_HANDLE,
2138 NULL, "flow rule creation failure");
2142 DRV_LOG(DEBUG, "port %u %p type %d QP %p ibv_flow %p",
2145 (void *)flow->frxq[i].hrxq->qp,
2146 (void *)flow->frxq[i].ibv_flow);
2149 rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_HANDLE,
2150 NULL, "internal error in flow creation");
2153 mlx5_flow_create_update_rxqs(dev, flow);
2156 ret = rte_errno; /* Save rte_errno before cleanup. */
2158 for (i = 0; i != hash_rxq_init_n; ++i) {
2159 if (flow->frxq[i].ibv_flow) {
2160 struct ibv_flow *ibv_flow = flow->frxq[i].ibv_flow;
2162 claim_zero(mlx5_glue->destroy_flow(ibv_flow));
2164 if (flow->frxq[i].hrxq)
2165 mlx5_hrxq_release(dev, flow->frxq[i].hrxq);
2166 if (flow->frxq[i].ibv_attr)
2167 rte_free(flow->frxq[i].ibv_attr);
2170 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
2174 rte_errno = ret; /* Restore rte_errno. */
2182 * Pointer to Ethernet device.
2184 * Pointer to a TAILQ flow list.
2186 * Flow rule attributes.
2187 * @param[in] pattern
2188 * Pattern specification (list terminated by the END pattern item).
2189 * @param[in] actions
2190 * Associated actions (list terminated by the END action).
2192 * Perform verbose error reporting if not NULL.
2195 * A flow on success, NULL otherwise and rte_errno is set.
2197 static struct rte_flow *
2198 mlx5_flow_list_create(struct rte_eth_dev *dev,
2199 struct mlx5_flows *list,
2200 const struct rte_flow_attr *attr,
2201 const struct rte_flow_item items[],
2202 const struct rte_flow_action actions[],
2203 struct rte_flow_error *error)
2205 struct mlx5_flow_parse parser = { .create = 1, };
2206 struct rte_flow *flow = NULL;
2210 ret = mlx5_flow_convert(dev, attr, items, actions, error, &parser);
2213 flow = rte_calloc(__func__, 1,
2215 parser.rss_conf.queue_num * sizeof(uint16_t),
2218 rte_flow_error_set(error, ENOMEM,
2219 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2221 "cannot allocate flow memory");
2224 /* Copy configuration. */
2225 flow->queues = (uint16_t (*)[])(flow + 1);
2226 flow->tunnel = parser.tunnel;
2227 flow->rss_conf = (struct rte_flow_action_rss){
2228 .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
2230 .types = parser.rss_conf.types,
2231 .key_len = parser.rss_conf.key_len,
2232 .queue_num = parser.rss_conf.queue_num,
2233 .key = memcpy(flow->rss_key, parser.rss_conf.key,
2234 sizeof(*parser.rss_conf.key) *
2235 parser.rss_conf.key_len),
2236 .queue = memcpy(flow->queues, parser.rss_conf.queue,
2237 sizeof(*parser.rss_conf.queue) *
2238 parser.rss_conf.queue_num),
2240 flow->mark = parser.mark;
2241 /* finalise the flow. */
2243 ret = mlx5_flow_create_action_queue_drop(dev, &parser, flow,
2246 ret = mlx5_flow_create_action_queue(dev, &parser, flow, error);
2249 TAILQ_INSERT_TAIL(list, flow, next);
2250 DRV_LOG(DEBUG, "port %u flow created %p", dev->data->port_id,
2254 DRV_LOG(ERR, "port %u flow creation error: %s", dev->data->port_id,
2256 for (i = 0; i != hash_rxq_init_n; ++i) {
2257 if (parser.queue[i].ibv_attr)
2258 rte_free(parser.queue[i].ibv_attr);
2265 * Validate a flow supported by the NIC.
2267 * @see rte_flow_validate()
2271 mlx5_flow_validate(struct rte_eth_dev *dev,
2272 const struct rte_flow_attr *attr,
2273 const struct rte_flow_item items[],
2274 const struct rte_flow_action actions[],
2275 struct rte_flow_error *error)
2277 struct mlx5_flow_parse parser = { .create = 0, };
2279 return mlx5_flow_convert(dev, attr, items, actions, error, &parser);
2285 * @see rte_flow_create()
2289 mlx5_flow_create(struct rte_eth_dev *dev,
2290 const struct rte_flow_attr *attr,
2291 const struct rte_flow_item items[],
2292 const struct rte_flow_action actions[],
2293 struct rte_flow_error *error)
2295 struct priv *priv = dev->data->dev_private;
2297 return mlx5_flow_list_create(dev, &priv->flows, attr, items, actions,
2302 * Destroy a flow in a list.
2305 * Pointer to Ethernet device.
2307 * Pointer to a TAILQ flow list.
2312 mlx5_flow_list_destroy(struct rte_eth_dev *dev, struct mlx5_flows *list,
2313 struct rte_flow *flow)
2315 struct priv *priv = dev->data->dev_private;
2318 if (flow->drop || !dev->data->dev_started)
2320 for (i = 0; flow->tunnel && i != flow->rss_conf.queue_num; ++i) {
2321 /* Update queue tunnel type. */
2322 struct mlx5_rxq_data *rxq_data = (*priv->rxqs)
2323 [(*flow->queues)[i]];
2324 struct mlx5_rxq_ctrl *rxq_ctrl =
2325 container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
2326 uint8_t tunnel = PTYPE_IDX(flow->tunnel);
2328 assert(rxq_ctrl->tunnel_types[tunnel] > 0);
2329 rxq_ctrl->tunnel_types[tunnel] -= 1;
2330 if (!rxq_ctrl->tunnel_types[tunnel]) {
2331 /* Update tunnel type. */
2336 for (j = 0; j < RTE_DIM(rxq_ctrl->tunnel_types); j++)
2337 if (rxq_ctrl->tunnel_types[j]) {
2341 /* Keep same if more than one tunnel types left. */
2343 rxq_data->tunnel = ptype_ext[last];
2344 else if (types == 0)
2345 /* No tunnel type left. */
2346 rxq_data->tunnel = 0;
2349 for (i = 0; flow->mark && i != flow->rss_conf.queue_num; ++i) {
2350 struct rte_flow *tmp;
2354 * To remove the mark from the queue, the queue must not be
2355 * present in any other marked flow (RSS or not).
2357 TAILQ_FOREACH(tmp, list, next) {
2359 uint16_t *tqs = NULL;
2364 for (j = 0; j != hash_rxq_init_n; ++j) {
2365 if (!tmp->frxq[j].hrxq)
2367 tqs = tmp->frxq[j].hrxq->ind_table->queues;
2368 tq_n = tmp->frxq[j].hrxq->ind_table->queues_n;
2372 for (j = 0; (j != tq_n) && !mark; j++)
2373 if (tqs[j] == (*flow->queues)[i])
2376 (*priv->rxqs)[(*flow->queues)[i]]->mark = mark;
2380 if (flow->frxq[HASH_RXQ_ETH].ibv_flow)
2381 claim_zero(mlx5_glue->destroy_flow
2382 (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2383 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
2385 for (i = 0; i != hash_rxq_init_n; ++i) {
2386 struct mlx5_flow *frxq = &flow->frxq[i];
2389 claim_zero(mlx5_glue->destroy_flow
2392 mlx5_hrxq_release(dev, frxq->hrxq);
2394 rte_free(frxq->ibv_attr);
2398 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
2401 TAILQ_REMOVE(list, flow, next);
2402 DRV_LOG(DEBUG, "port %u flow destroyed %p", dev->data->port_id,
2408 * Destroy all flows.
2411 * Pointer to Ethernet device.
2413 * Pointer to a TAILQ flow list.
2416 mlx5_flow_list_flush(struct rte_eth_dev *dev, struct mlx5_flows *list)
2418 while (!TAILQ_EMPTY(list)) {
2419 struct rte_flow *flow;
2421 flow = TAILQ_FIRST(list);
2422 mlx5_flow_list_destroy(dev, list, flow);
2427 * Create drop queue.
2430 * Pointer to Ethernet device.
2433 * 0 on success, a negative errno value otherwise and rte_errno is set.
2436 mlx5_flow_create_drop_queue(struct rte_eth_dev *dev)
2438 struct priv *priv = dev->data->dev_private;
2439 struct mlx5_hrxq_drop *fdq = NULL;
2443 fdq = rte_calloc(__func__, 1, sizeof(*fdq), 0);
2446 "port %u cannot allocate memory for drop queue",
2447 dev->data->port_id);
2451 fdq->cq = mlx5_glue->create_cq(priv->ctx, 1, NULL, NULL, 0);
2453 DRV_LOG(WARNING, "port %u cannot allocate CQ for drop queue",
2454 dev->data->port_id);
2458 fdq->wq = mlx5_glue->create_wq
2460 &(struct ibv_wq_init_attr){
2461 .wq_type = IBV_WQT_RQ,
2468 DRV_LOG(WARNING, "port %u cannot allocate WQ for drop queue",
2469 dev->data->port_id);
2473 fdq->ind_table = mlx5_glue->create_rwq_ind_table
2475 &(struct ibv_rwq_ind_table_init_attr){
2476 .log_ind_tbl_size = 0,
2477 .ind_tbl = &fdq->wq,
2480 if (!fdq->ind_table) {
2482 "port %u cannot allocate indirection table for drop"
2484 dev->data->port_id);
2488 fdq->qp = mlx5_glue->create_qp_ex
2490 &(struct ibv_qp_init_attr_ex){
2491 .qp_type = IBV_QPT_RAW_PACKET,
2493 IBV_QP_INIT_ATTR_PD |
2494 IBV_QP_INIT_ATTR_IND_TABLE |
2495 IBV_QP_INIT_ATTR_RX_HASH,
2496 .rx_hash_conf = (struct ibv_rx_hash_conf){
2498 IBV_RX_HASH_FUNC_TOEPLITZ,
2499 .rx_hash_key_len = rss_hash_default_key_len,
2500 .rx_hash_key = rss_hash_default_key,
2501 .rx_hash_fields_mask = 0,
2503 .rwq_ind_tbl = fdq->ind_table,
2507 DRV_LOG(WARNING, "port %u cannot allocate QP for drop queue",
2508 dev->data->port_id);
2512 priv->flow_drop_queue = fdq;
2516 claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2518 claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2520 claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2522 claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2525 priv->flow_drop_queue = NULL;
2530 * Delete drop queue.
2533 * Pointer to Ethernet device.
2536 mlx5_flow_delete_drop_queue(struct rte_eth_dev *dev)
2538 struct priv *priv = dev->data->dev_private;
2539 struct mlx5_hrxq_drop *fdq = priv->flow_drop_queue;
2544 claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2546 claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2548 claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2550 claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2552 priv->flow_drop_queue = NULL;
2559 * Pointer to Ethernet device.
2561 * Pointer to a TAILQ flow list.
2564 mlx5_flow_stop(struct rte_eth_dev *dev, struct mlx5_flows *list)
2566 struct priv *priv = dev->data->dev_private;
2567 struct rte_flow *flow;
2570 TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) {
2571 struct mlx5_ind_table_ibv *ind_tbl = NULL;
2574 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow)
2576 claim_zero(mlx5_glue->destroy_flow
2577 (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2578 flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
2579 DRV_LOG(DEBUG, "port %u flow %p removed",
2580 dev->data->port_id, (void *)flow);
2584 /* Verify the flow has not already been cleaned. */
2585 for (i = 0; i != hash_rxq_init_n; ++i) {
2586 if (!flow->frxq[i].ibv_flow)
2589 * Indirection table may be necessary to remove the
2590 * flags in the Rx queues.
2591 * This helps to speed-up the process by avoiding
2594 ind_tbl = flow->frxq[i].hrxq->ind_table;
2597 if (i == hash_rxq_init_n)
2601 for (i = 0; i != ind_tbl->queues_n; ++i)
2602 (*priv->rxqs)[ind_tbl->queues[i]]->mark = 0;
2604 for (i = 0; i != hash_rxq_init_n; ++i) {
2605 if (!flow->frxq[i].ibv_flow)
2607 claim_zero(mlx5_glue->destroy_flow
2608 (flow->frxq[i].ibv_flow));
2609 flow->frxq[i].ibv_flow = NULL;
2610 mlx5_hrxq_release(dev, flow->frxq[i].hrxq);
2611 flow->frxq[i].hrxq = NULL;
2613 DRV_LOG(DEBUG, "port %u flow %p removed", dev->data->port_id,
2616 /* Cleanup Rx queue tunnel info. */
2617 for (i = 0; i != priv->rxqs_n; ++i) {
2618 struct mlx5_rxq_data *q = (*priv->rxqs)[i];
2619 struct mlx5_rxq_ctrl *rxq_ctrl =
2620 container_of(q, struct mlx5_rxq_ctrl, rxq);
2624 memset((void *)rxq_ctrl->tunnel_types, 0,
2625 sizeof(rxq_ctrl->tunnel_types));
2634 * Pointer to Ethernet device.
2636 * Pointer to a TAILQ flow list.
2639 * 0 on success, a negative errno value otherwise and rte_errno is set.
2642 mlx5_flow_start(struct rte_eth_dev *dev, struct mlx5_flows *list)
2644 struct priv *priv = dev->data->dev_private;
2645 struct rte_flow *flow;
2647 TAILQ_FOREACH(flow, list, next) {
2651 flow->frxq[HASH_RXQ_ETH].ibv_flow =
2652 mlx5_glue->create_flow
2653 (priv->flow_drop_queue->qp,
2654 flow->frxq[HASH_RXQ_ETH].ibv_attr);
2655 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
2657 "port %u flow %p cannot be applied",
2658 dev->data->port_id, (void *)flow);
2662 DRV_LOG(DEBUG, "port %u flow %p applied",
2663 dev->data->port_id, (void *)flow);
2667 for (i = 0; i != hash_rxq_init_n; ++i) {
2668 if (!flow->frxq[i].ibv_attr)
2670 flow->frxq[i].hrxq =
2671 mlx5_hrxq_get(dev, flow->rss_conf.key,
2672 flow->rss_conf.key_len,
2673 flow->frxq[i].hash_fields,
2674 flow->rss_conf.queue,
2675 flow->rss_conf.queue_num,
2677 flow->rss_conf.level);
2678 if (flow->frxq[i].hrxq)
2680 flow->frxq[i].hrxq =
2681 mlx5_hrxq_new(dev, flow->rss_conf.key,
2682 flow->rss_conf.key_len,
2683 flow->frxq[i].hash_fields,
2684 flow->rss_conf.queue,
2685 flow->rss_conf.queue_num,
2687 flow->rss_conf.level);
2688 if (!flow->frxq[i].hrxq) {
2690 "port %u flow %p cannot be applied",
2691 dev->data->port_id, (void *)flow);
2696 flow->frxq[i].ibv_flow =
2697 mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
2698 flow->frxq[i].ibv_attr);
2699 if (!flow->frxq[i].ibv_flow) {
2701 "port %u flow %p cannot be applied",
2702 dev->data->port_id, (void *)flow);
2706 DRV_LOG(DEBUG, "port %u flow %p applied",
2707 dev->data->port_id, (void *)flow);
2709 mlx5_flow_create_update_rxqs(dev, flow);
2715 * Verify the flow list is empty
2718 * Pointer to Ethernet device.
2720 * @return the number of flows not released.
2723 mlx5_flow_verify(struct rte_eth_dev *dev)
2725 struct priv *priv = dev->data->dev_private;
2726 struct rte_flow *flow;
2729 TAILQ_FOREACH(flow, &priv->flows, next) {
2730 DRV_LOG(DEBUG, "port %u flow %p still referenced",
2731 dev->data->port_id, (void *)flow);
2738 * Enable a control flow configured from the control plane.
2741 * Pointer to Ethernet device.
2743 * An Ethernet flow spec to apply.
2745 * An Ethernet flow mask to apply.
2747 * A VLAN flow spec to apply.
2749 * A VLAN flow mask to apply.
2752 * 0 on success, a negative errno value otherwise and rte_errno is set.
2755 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
2756 struct rte_flow_item_eth *eth_spec,
2757 struct rte_flow_item_eth *eth_mask,
2758 struct rte_flow_item_vlan *vlan_spec,
2759 struct rte_flow_item_vlan *vlan_mask)
2761 struct priv *priv = dev->data->dev_private;
2762 const struct rte_flow_attr attr = {
2764 .priority = MLX5_CTRL_FLOW_PRIORITY,
2766 struct rte_flow_item items[] = {
2768 .type = RTE_FLOW_ITEM_TYPE_ETH,
2774 .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
2775 RTE_FLOW_ITEM_TYPE_END,
2781 .type = RTE_FLOW_ITEM_TYPE_END,
2784 uint16_t queue[priv->reta_idx_n];
2785 struct rte_flow_action_rss action_rss = {
2786 .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
2788 .types = priv->rss_conf.rss_hf,
2789 .key_len = priv->rss_conf.rss_key_len,
2790 .queue_num = priv->reta_idx_n,
2791 .key = priv->rss_conf.rss_key,
2794 struct rte_flow_action actions[] = {
2796 .type = RTE_FLOW_ACTION_TYPE_RSS,
2797 .conf = &action_rss,
2800 .type = RTE_FLOW_ACTION_TYPE_END,
2803 struct rte_flow *flow;
2804 struct rte_flow_error error;
2807 if (!priv->reta_idx_n) {
2811 for (i = 0; i != priv->reta_idx_n; ++i)
2812 queue[i] = (*priv->reta_idx)[i];
2813 flow = mlx5_flow_list_create(dev, &priv->ctrl_flows, &attr, items,
2821 * Enable a flow control configured from the control plane.
2824 * Pointer to Ethernet device.
2826 * An Ethernet flow spec to apply.
2828 * An Ethernet flow mask to apply.
2831 * 0 on success, a negative errno value otherwise and rte_errno is set.
2834 mlx5_ctrl_flow(struct rte_eth_dev *dev,
2835 struct rte_flow_item_eth *eth_spec,
2836 struct rte_flow_item_eth *eth_mask)
2838 return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
2844 * @see rte_flow_destroy()
2848 mlx5_flow_destroy(struct rte_eth_dev *dev,
2849 struct rte_flow *flow,
2850 struct rte_flow_error *error __rte_unused)
2852 struct priv *priv = dev->data->dev_private;
2854 mlx5_flow_list_destroy(dev, &priv->flows, flow);
2859 * Destroy all flows.
2861 * @see rte_flow_flush()
2865 mlx5_flow_flush(struct rte_eth_dev *dev,
2866 struct rte_flow_error *error __rte_unused)
2868 struct priv *priv = dev->data->dev_private;
2870 mlx5_flow_list_flush(dev, &priv->flows);
2874 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
2876 * Query flow counter.
2880 * @param counter_value
2881 * returned data from the counter.
2884 * 0 on success, a negative errno value otherwise and rte_errno is set.
2887 mlx5_flow_query_count(struct ibv_counter_set *cs,
2888 struct mlx5_flow_counter_stats *counter_stats,
2889 struct rte_flow_query_count *query_count,
2890 struct rte_flow_error *error)
2892 uint64_t counters[2];
2893 struct ibv_query_counter_set_attr query_cs_attr = {
2895 .query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
2897 struct ibv_counter_set_data query_out = {
2899 .outlen = 2 * sizeof(uint64_t),
2901 int err = mlx5_glue->query_counter_set(&query_cs_attr, &query_out);
2904 return rte_flow_error_set(error, err,
2905 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2907 "cannot read counter");
2908 query_count->hits_set = 1;
2909 query_count->bytes_set = 1;
2910 query_count->hits = counters[0] - counter_stats->hits;
2911 query_count->bytes = counters[1] - counter_stats->bytes;
2912 if (query_count->reset) {
2913 counter_stats->hits = counters[0];
2914 counter_stats->bytes = counters[1];
2922 * @see rte_flow_query()
2926 mlx5_flow_query(struct rte_eth_dev *dev __rte_unused,
2927 struct rte_flow *flow,
2928 enum rte_flow_action_type action __rte_unused,
2930 struct rte_flow_error *error)
2935 ret = mlx5_flow_query_count(flow->cs,
2936 &flow->counter_stats,
2937 (struct rte_flow_query_count *)data,
2942 return rte_flow_error_set(error, EINVAL,
2943 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2945 "no counter found for flow");
2954 * @see rte_flow_isolate()
2958 mlx5_flow_isolate(struct rte_eth_dev *dev,
2960 struct rte_flow_error *error)
2962 struct priv *priv = dev->data->dev_private;
2964 if (dev->data->dev_started) {
2965 rte_flow_error_set(error, EBUSY,
2966 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2968 "port must be stopped first");
2971 priv->isolated = !!enable;
2973 priv->dev->dev_ops = &mlx5_dev_ops_isolate;
2975 priv->dev->dev_ops = &mlx5_dev_ops;
2980 * Convert a flow director filter to a generic flow.
2983 * Pointer to Ethernet device.
2984 * @param fdir_filter
2985 * Flow director filter to add.
2987 * Generic flow parameters structure.
2990 * 0 on success, a negative errno value otherwise and rte_errno is set.
2993 mlx5_fdir_filter_convert(struct rte_eth_dev *dev,
2994 const struct rte_eth_fdir_filter *fdir_filter,
2995 struct mlx5_fdir *attributes)
2997 struct priv *priv = dev->data->dev_private;
2998 const struct rte_eth_fdir_input *input = &fdir_filter->input;
2999 const struct rte_eth_fdir_masks *mask =
3000 &dev->data->dev_conf.fdir_conf.mask;
3002 /* Validate queue number. */
3003 if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
3004 DRV_LOG(ERR, "port %u invalid queue number %d",
3005 dev->data->port_id, fdir_filter->action.rx_queue);
3009 attributes->attr.ingress = 1;
3010 attributes->items[0] = (struct rte_flow_item) {
3011 .type = RTE_FLOW_ITEM_TYPE_ETH,
3012 .spec = &attributes->l2,
3013 .mask = &attributes->l2_mask,
3015 switch (fdir_filter->action.behavior) {
3016 case RTE_ETH_FDIR_ACCEPT:
3017 attributes->actions[0] = (struct rte_flow_action){
3018 .type = RTE_FLOW_ACTION_TYPE_QUEUE,
3019 .conf = &attributes->queue,
3022 case RTE_ETH_FDIR_REJECT:
3023 attributes->actions[0] = (struct rte_flow_action){
3024 .type = RTE_FLOW_ACTION_TYPE_DROP,
3028 DRV_LOG(ERR, "port %u invalid behavior %d",
3030 fdir_filter->action.behavior);
3031 rte_errno = ENOTSUP;
3034 attributes->queue.index = fdir_filter->action.rx_queue;
3036 switch (fdir_filter->input.flow_type) {
3037 case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
3038 case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
3039 case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
3040 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
3041 .src_addr = input->flow.ip4_flow.src_ip,
3042 .dst_addr = input->flow.ip4_flow.dst_ip,
3043 .time_to_live = input->flow.ip4_flow.ttl,
3044 .type_of_service = input->flow.ip4_flow.tos,
3045 .next_proto_id = input->flow.ip4_flow.proto,
3047 attributes->l3_mask.ipv4.hdr = (struct ipv4_hdr){
3048 .src_addr = mask->ipv4_mask.src_ip,
3049 .dst_addr = mask->ipv4_mask.dst_ip,
3050 .time_to_live = mask->ipv4_mask.ttl,
3051 .type_of_service = mask->ipv4_mask.tos,
3052 .next_proto_id = mask->ipv4_mask.proto,
3054 attributes->items[1] = (struct rte_flow_item){
3055 .type = RTE_FLOW_ITEM_TYPE_IPV4,
3056 .spec = &attributes->l3,
3057 .mask = &attributes->l3_mask,
3060 case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
3061 case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
3062 case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
3063 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
3064 .hop_limits = input->flow.ipv6_flow.hop_limits,
3065 .proto = input->flow.ipv6_flow.proto,
3068 memcpy(attributes->l3.ipv6.hdr.src_addr,
3069 input->flow.ipv6_flow.src_ip,
3070 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
3071 memcpy(attributes->l3.ipv6.hdr.dst_addr,
3072 input->flow.ipv6_flow.dst_ip,
3073 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
3074 memcpy(attributes->l3_mask.ipv6.hdr.src_addr,
3075 mask->ipv6_mask.src_ip,
3076 RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
3077 memcpy(attributes->l3_mask.ipv6.hdr.dst_addr,
3078 mask->ipv6_mask.dst_ip,
3079 RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
3080 attributes->items[1] = (struct rte_flow_item){
3081 .type = RTE_FLOW_ITEM_TYPE_IPV6,
3082 .spec = &attributes->l3,
3083 .mask = &attributes->l3_mask,
3087 DRV_LOG(ERR, "port %u invalid flow type%d",
3088 dev->data->port_id, fdir_filter->input.flow_type);
3089 rte_errno = ENOTSUP;
3093 switch (fdir_filter->input.flow_type) {
3094 case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
3095 attributes->l4.udp.hdr = (struct udp_hdr){
3096 .src_port = input->flow.udp4_flow.src_port,
3097 .dst_port = input->flow.udp4_flow.dst_port,
3099 attributes->l4_mask.udp.hdr = (struct udp_hdr){
3100 .src_port = mask->src_port_mask,
3101 .dst_port = mask->dst_port_mask,
3103 attributes->items[2] = (struct rte_flow_item){
3104 .type = RTE_FLOW_ITEM_TYPE_UDP,
3105 .spec = &attributes->l4,
3106 .mask = &attributes->l4_mask,
3109 case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
3110 attributes->l4.tcp.hdr = (struct tcp_hdr){
3111 .src_port = input->flow.tcp4_flow.src_port,
3112 .dst_port = input->flow.tcp4_flow.dst_port,
3114 attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
3115 .src_port = mask->src_port_mask,
3116 .dst_port = mask->dst_port_mask,
3118 attributes->items[2] = (struct rte_flow_item){
3119 .type = RTE_FLOW_ITEM_TYPE_TCP,
3120 .spec = &attributes->l4,
3121 .mask = &attributes->l4_mask,
3124 case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
3125 attributes->l4.udp.hdr = (struct udp_hdr){
3126 .src_port = input->flow.udp6_flow.src_port,
3127 .dst_port = input->flow.udp6_flow.dst_port,
3129 attributes->l4_mask.udp.hdr = (struct udp_hdr){
3130 .src_port = mask->src_port_mask,
3131 .dst_port = mask->dst_port_mask,
3133 attributes->items[2] = (struct rte_flow_item){
3134 .type = RTE_FLOW_ITEM_TYPE_UDP,
3135 .spec = &attributes->l4,
3136 .mask = &attributes->l4_mask,
3139 case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
3140 attributes->l4.tcp.hdr = (struct tcp_hdr){
3141 .src_port = input->flow.tcp6_flow.src_port,
3142 .dst_port = input->flow.tcp6_flow.dst_port,
3144 attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
3145 .src_port = mask->src_port_mask,
3146 .dst_port = mask->dst_port_mask,
3148 attributes->items[2] = (struct rte_flow_item){
3149 .type = RTE_FLOW_ITEM_TYPE_TCP,
3150 .spec = &attributes->l4,
3151 .mask = &attributes->l4_mask,
3154 case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
3155 case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
3158 DRV_LOG(ERR, "port %u invalid flow type%d",
3159 dev->data->port_id, fdir_filter->input.flow_type);
3160 rte_errno = ENOTSUP;
3167 * Add new flow director filter and store it in list.
3170 * Pointer to Ethernet device.
3171 * @param fdir_filter
3172 * Flow director filter to add.
3175 * 0 on success, a negative errno value otherwise and rte_errno is set.
3178 mlx5_fdir_filter_add(struct rte_eth_dev *dev,
3179 const struct rte_eth_fdir_filter *fdir_filter)
3181 struct priv *priv = dev->data->dev_private;
3182 struct mlx5_fdir attributes = {
3185 .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
3186 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
3190 struct mlx5_flow_parse parser = {
3191 .layer = HASH_RXQ_ETH,
3193 struct rte_flow_error error;
3194 struct rte_flow *flow;
3197 ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
3200 ret = mlx5_flow_convert(dev, &attributes.attr, attributes.items,
3201 attributes.actions, &error, &parser);
3204 flow = mlx5_flow_list_create(dev, &priv->flows, &attributes.attr,
3205 attributes.items, attributes.actions,
3208 DRV_LOG(DEBUG, "port %u FDIR created %p", dev->data->port_id,
3216 * Delete specific filter.
3219 * Pointer to Ethernet device.
3220 * @param fdir_filter
3221 * Filter to be deleted.
3224 * 0 on success, a negative errno value otherwise and rte_errno is set.
3227 mlx5_fdir_filter_delete(struct rte_eth_dev *dev,
3228 const struct rte_eth_fdir_filter *fdir_filter)
3230 struct priv *priv = dev->data->dev_private;
3231 struct mlx5_fdir attributes = {
3234 struct mlx5_flow_parse parser = {
3236 .layer = HASH_RXQ_ETH,
3238 struct rte_flow_error error;
3239 struct rte_flow *flow;
3243 ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
3246 ret = mlx5_flow_convert(dev, &attributes.attr, attributes.items,
3247 attributes.actions, &error, &parser);
3251 * Special case for drop action which is only set in the
3252 * specifications when the flow is created. In this situation the
3253 * drop specification is missing.
3256 struct ibv_flow_spec_action_drop *drop;
3258 drop = (void *)((uintptr_t)parser.queue[HASH_RXQ_ETH].ibv_attr +
3259 parser.queue[HASH_RXQ_ETH].offset);
3260 *drop = (struct ibv_flow_spec_action_drop){
3261 .type = IBV_FLOW_SPEC_ACTION_DROP,
3262 .size = sizeof(struct ibv_flow_spec_action_drop),
3264 parser.queue[HASH_RXQ_ETH].ibv_attr->num_of_specs++;
3266 TAILQ_FOREACH(flow, &priv->flows, next) {
3267 struct ibv_flow_attr *attr;
3268 struct ibv_spec_header *attr_h;
3270 struct ibv_flow_attr *flow_attr;
3271 struct ibv_spec_header *flow_h;
3273 unsigned int specs_n;
3275 attr = parser.queue[HASH_RXQ_ETH].ibv_attr;
3276 flow_attr = flow->frxq[HASH_RXQ_ETH].ibv_attr;
3277 /* Compare first the attributes. */
3278 if (memcmp(attr, flow_attr, sizeof(struct ibv_flow_attr)))
3280 if (attr->num_of_specs == 0)
3282 spec = (void *)((uintptr_t)attr +
3283 sizeof(struct ibv_flow_attr));
3284 flow_spec = (void *)((uintptr_t)flow_attr +
3285 sizeof(struct ibv_flow_attr));
3286 specs_n = RTE_MIN(attr->num_of_specs, flow_attr->num_of_specs);
3287 for (i = 0; i != specs_n; ++i) {
3290 if (memcmp(spec, flow_spec,
3291 RTE_MIN(attr_h->size, flow_h->size)))
3293 spec = (void *)((uintptr_t)spec + attr_h->size);
3294 flow_spec = (void *)((uintptr_t)flow_spec +
3297 /* At this point, the flow match. */
3300 /* The flow does not match. */
3303 ret = rte_errno; /* Save rte_errno before cleanup. */
3305 mlx5_flow_list_destroy(dev, &priv->flows, flow);
3307 for (i = 0; i != hash_rxq_init_n; ++i) {
3308 if (parser.queue[i].ibv_attr)
3309 rte_free(parser.queue[i].ibv_attr);
3311 rte_errno = ret; /* Restore rte_errno. */
3316 * Update queue for specific filter.
3319 * Pointer to Ethernet device.
3320 * @param fdir_filter
3321 * Filter to be updated.
3324 * 0 on success, a negative errno value otherwise and rte_errno is set.
3327 mlx5_fdir_filter_update(struct rte_eth_dev *dev,
3328 const struct rte_eth_fdir_filter *fdir_filter)
3332 ret = mlx5_fdir_filter_delete(dev, fdir_filter);
3335 return mlx5_fdir_filter_add(dev, fdir_filter);
3339 * Flush all filters.
3342 * Pointer to Ethernet device.
3345 mlx5_fdir_filter_flush(struct rte_eth_dev *dev)
3347 struct priv *priv = dev->data->dev_private;
3349 mlx5_flow_list_flush(dev, &priv->flows);
3353 * Get flow director information.
3356 * Pointer to Ethernet device.
3357 * @param[out] fdir_info
3358 * Resulting flow director information.
3361 mlx5_fdir_info_get(struct rte_eth_dev *dev, struct rte_eth_fdir_info *fdir_info)
3363 struct priv *priv = dev->data->dev_private;
3364 struct rte_eth_fdir_masks *mask =
3365 &priv->dev->data->dev_conf.fdir_conf.mask;
3367 fdir_info->mode = priv->dev->data->dev_conf.fdir_conf.mode;
3368 fdir_info->guarant_spc = 0;
3369 rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
3370 fdir_info->max_flexpayload = 0;
3371 fdir_info->flow_types_mask[0] = 0;
3372 fdir_info->flex_payload_unit = 0;
3373 fdir_info->max_flex_payload_segment_num = 0;
3374 fdir_info->flex_payload_limit = 0;
3375 memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
3379 * Deal with flow director operations.
3382 * Pointer to Ethernet device.
3384 * Operation to perform.
3386 * Pointer to operation-specific structure.
3389 * 0 on success, a negative errno value otherwise and rte_errno is set.
3392 mlx5_fdir_ctrl_func(struct rte_eth_dev *dev, enum rte_filter_op filter_op,
3395 struct priv *priv = dev->data->dev_private;
3396 enum rte_fdir_mode fdir_mode =
3397 priv->dev->data->dev_conf.fdir_conf.mode;
3399 if (filter_op == RTE_ETH_FILTER_NOP)
3401 if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
3402 fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
3403 DRV_LOG(ERR, "port %u flow director mode %d not supported",
3404 dev->data->port_id, fdir_mode);
3408 switch (filter_op) {
3409 case RTE_ETH_FILTER_ADD:
3410 return mlx5_fdir_filter_add(dev, arg);
3411 case RTE_ETH_FILTER_UPDATE:
3412 return mlx5_fdir_filter_update(dev, arg);
3413 case RTE_ETH_FILTER_DELETE:
3414 return mlx5_fdir_filter_delete(dev, arg);
3415 case RTE_ETH_FILTER_FLUSH:
3416 mlx5_fdir_filter_flush(dev);
3418 case RTE_ETH_FILTER_INFO:
3419 mlx5_fdir_info_get(dev, arg);
3422 DRV_LOG(DEBUG, "port %u unknown operation %u",
3423 dev->data->port_id, filter_op);
3431 * Manage filter operations.
3434 * Pointer to Ethernet device structure.
3435 * @param filter_type
3438 * Operation to perform.
3440 * Pointer to operation-specific structure.
3443 * 0 on success, a negative errno value otherwise and rte_errno is set.
3446 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
3447 enum rte_filter_type filter_type,
3448 enum rte_filter_op filter_op,
3451 switch (filter_type) {
3452 case RTE_ETH_FILTER_GENERIC:
3453 if (filter_op != RTE_ETH_FILTER_GET) {
3457 *(const void **)arg = &mlx5_flow_ops;
3459 case RTE_ETH_FILTER_FDIR:
3460 return mlx5_fdir_ctrl_func(dev, filter_op, arg);
3462 DRV_LOG(ERR, "port %u filter type (%d) not supported",
3463 dev->data->port_id, filter_type);
3464 rte_errno = ENOTSUP;
3471 * Detect number of Verbs flow priorities supported.
3474 * Pointer to Ethernet device.
3477 * number of supported Verbs flow priority.
3480 mlx5_get_max_verbs_prio(struct rte_eth_dev *dev)
3482 struct priv *priv = dev->data->dev_private;
3483 unsigned int verb_priorities = MLX5_VERBS_FLOW_PRIO_8;
3485 struct ibv_flow_attr attr;
3486 struct ibv_flow_spec_eth eth;
3487 struct ibv_flow_spec_action_drop drop;
3493 .type = IBV_FLOW_SPEC_ETH,
3494 .size = sizeof(struct ibv_flow_spec_eth),
3497 .size = sizeof(struct ibv_flow_spec_action_drop),
3498 .type = IBV_FLOW_SPEC_ACTION_DROP,
3501 struct ibv_flow *flow;
3504 flow_attr.attr.priority = verb_priorities - 1;
3505 flow = mlx5_glue->create_flow(priv->flow_drop_queue->qp,
3508 claim_zero(mlx5_glue->destroy_flow(flow));
3509 /* Try more priorities. */
3510 verb_priorities *= 2;
3512 /* Failed, restore last right number. */
3513 verb_priorities /= 2;
3517 DRV_LOG(DEBUG, "port %u Verbs flow priorities: %d,"
3518 " user flow priorities: %d",
3519 dev->data->port_id, verb_priorities, MLX5_CTRL_FLOW_PRIORITY);
3520 return verb_priorities;