1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2016 6WIND S.A.
3 * Copyright 2016 Mellanox Technologies, Ltd
11 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
13 #pragma GCC diagnostic ignored "-Wpedantic"
15 #include <infiniband/verbs.h>
17 #pragma GCC diagnostic error "-Wpedantic"
20 #include <rte_common.h>
21 #include <rte_ether.h>
22 #include <rte_eth_ctrl.h>
23 #include <rte_ethdev_driver.h>
25 #include <rte_flow_driver.h>
26 #include <rte_malloc.h>
30 #include "mlx5_defs.h"
32 #include "mlx5_glue.h"
34 /* Flow priority for control plane flows. */
35 #define MLX5_CTRL_FLOW_PRIORITY 1
37 /* Internet Protocol versions. */
42 #ifndef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
43 struct ibv_flow_spec_counter_action {
48 /* Dev ops structure defined in mlx5.c */
49 extern const struct eth_dev_ops mlx5_dev_ops;
50 extern const struct eth_dev_ops mlx5_dev_ops_isolate;
52 /** Structure give to the conversion functions. */
53 struct mlx5_flow_data {
54 struct rte_eth_dev *dev; /** Ethernet device. */
55 struct mlx5_flow_parse *parser; /** Parser context. */
56 struct rte_flow_error *error; /** Error context. */
60 mlx5_flow_create_eth(const struct rte_flow_item *item,
61 const void *default_mask,
62 struct mlx5_flow_data *data);
65 mlx5_flow_create_vlan(const struct rte_flow_item *item,
66 const void *default_mask,
67 struct mlx5_flow_data *data);
70 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
71 const void *default_mask,
72 struct mlx5_flow_data *data);
75 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
76 const void *default_mask,
77 struct mlx5_flow_data *data);
80 mlx5_flow_create_udp(const struct rte_flow_item *item,
81 const void *default_mask,
82 struct mlx5_flow_data *data);
85 mlx5_flow_create_tcp(const struct rte_flow_item *item,
86 const void *default_mask,
87 struct mlx5_flow_data *data);
90 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
91 const void *default_mask,
92 struct mlx5_flow_data *data);
95 mlx5_flow_create_gre(const struct rte_flow_item *item,
96 const void *default_mask,
97 struct mlx5_flow_data *data);
99 struct mlx5_flow_parse;
102 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
106 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id);
109 mlx5_flow_create_count(struct rte_eth_dev *dev, struct mlx5_flow_parse *parser);
111 /* Hash RX queue types. */
123 /* Initialization data for hash RX queue. */
124 struct hash_rxq_init {
125 uint64_t hash_fields; /* Fields that participate in the hash. */
126 uint64_t dpdk_rss_hf; /* Matching DPDK RSS hash fields. */
127 unsigned int flow_priority; /* Flow priority to use. */
128 unsigned int ip_version; /* Internet protocol. */
131 /* Initialization data for hash RX queues. */
132 const struct hash_rxq_init hash_rxq_init[] = {
134 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
135 IBV_RX_HASH_DST_IPV4 |
136 IBV_RX_HASH_SRC_PORT_TCP |
137 IBV_RX_HASH_DST_PORT_TCP),
138 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,
140 .ip_version = MLX5_IPV4,
143 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
144 IBV_RX_HASH_DST_IPV4 |
145 IBV_RX_HASH_SRC_PORT_UDP |
146 IBV_RX_HASH_DST_PORT_UDP),
147 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP,
149 .ip_version = MLX5_IPV4,
152 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
153 IBV_RX_HASH_DST_IPV4),
154 .dpdk_rss_hf = (ETH_RSS_IPV4 |
157 .ip_version = MLX5_IPV4,
160 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
161 IBV_RX_HASH_DST_IPV6 |
162 IBV_RX_HASH_SRC_PORT_TCP |
163 IBV_RX_HASH_DST_PORT_TCP),
164 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP,
166 .ip_version = MLX5_IPV6,
169 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
170 IBV_RX_HASH_DST_IPV6 |
171 IBV_RX_HASH_SRC_PORT_UDP |
172 IBV_RX_HASH_DST_PORT_UDP),
173 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP,
175 .ip_version = MLX5_IPV6,
178 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
179 IBV_RX_HASH_DST_IPV6),
180 .dpdk_rss_hf = (ETH_RSS_IPV6 |
183 .ip_version = MLX5_IPV6,
192 /* Number of entries in hash_rxq_init[]. */
193 const unsigned int hash_rxq_init_n = RTE_DIM(hash_rxq_init);
195 /** Structure for holding counter stats. */
196 struct mlx5_flow_counter_stats {
197 uint64_t hits; /**< Number of packets matched by the rule. */
198 uint64_t bytes; /**< Number of bytes matched by the rule. */
201 /** Structure for Drop queue. */
202 struct mlx5_hrxq_drop {
203 struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */
204 struct ibv_qp *qp; /**< Verbs queue pair. */
205 struct ibv_wq *wq; /**< Verbs work queue. */
206 struct ibv_cq *cq; /**< Verbs completion queue. */
209 /* Flows structures. */
211 uint64_t hash_fields; /**< Fields that participate in the hash. */
212 struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
213 struct ibv_flow *ibv_flow; /**< Verbs flow. */
214 struct mlx5_hrxq *hrxq; /**< Hash Rx queues. */
217 /* Drop flows structures. */
218 struct mlx5_flow_drop {
219 struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
220 struct ibv_flow *ibv_flow; /**< Verbs flow. */
224 TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
225 uint32_t mark:1; /**< Set if the flow is marked. */
226 uint32_t drop:1; /**< Drop queue. */
227 struct rte_flow_action_rss rss_conf; /**< RSS configuration */
228 uint16_t (*queues)[]; /**< Queues indexes to use. */
229 uint8_t rss_key[40]; /**< copy of the RSS key. */
230 uint32_t tunnel; /**< Tunnel type of RTE_PTYPE_TUNNEL_XXX. */
231 struct ibv_counter_set *cs; /**< Holds the counters for the rule. */
232 struct mlx5_flow_counter_stats counter_stats;/**<The counter stats. */
233 struct mlx5_flow frxq[RTE_DIM(hash_rxq_init)];
234 /**< Flow with Rx queue. */
237 /** Static initializer for items. */
239 (const enum rte_flow_item_type []){ \
240 __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
243 #define IS_TUNNEL(type) ( \
244 (type) == RTE_FLOW_ITEM_TYPE_VXLAN || \
245 (type) == RTE_FLOW_ITEM_TYPE_GRE)
247 const uint32_t flow_ptype[] = {
248 [RTE_FLOW_ITEM_TYPE_VXLAN] = RTE_PTYPE_TUNNEL_VXLAN,
249 [RTE_FLOW_ITEM_TYPE_GRE] = RTE_PTYPE_TUNNEL_GRE,
252 #define PTYPE_IDX(t) ((RTE_PTYPE_TUNNEL_MASK & (t)) >> 12)
254 const uint32_t ptype_ext[] = {
255 [PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN)] = RTE_PTYPE_TUNNEL_VXLAN |
257 [PTYPE_IDX(RTE_PTYPE_TUNNEL_GRE)] = RTE_PTYPE_TUNNEL_GRE,
260 /** Structure to generate a simple graph of layers supported by the NIC. */
261 struct mlx5_flow_items {
262 /** List of possible actions for these items. */
263 const enum rte_flow_action_type *const actions;
264 /** Bit-masks corresponding to the possibilities for the item. */
267 * Default bit-masks to use when item->mask is not provided. When
268 * \default_mask is also NULL, the full supported bit-mask (\mask) is
271 const void *default_mask;
272 /** Bit-masks size in bytes. */
273 const unsigned int mask_sz;
275 * Conversion function from rte_flow to NIC specific flow.
278 * rte_flow item to convert.
279 * @param default_mask
280 * Default bit-masks to use when item->mask is not provided.
282 * Internal structure to store the conversion.
285 * 0 on success, a negative errno value otherwise and rte_errno is
288 int (*convert)(const struct rte_flow_item *item,
289 const void *default_mask,
290 struct mlx5_flow_data *data);
291 /** Size in bytes of the destination structure. */
292 const unsigned int dst_sz;
293 /** List of possible following items. */
294 const enum rte_flow_item_type *const items;
297 /** Valid action for this PMD. */
298 static const enum rte_flow_action_type valid_actions[] = {
299 RTE_FLOW_ACTION_TYPE_DROP,
300 RTE_FLOW_ACTION_TYPE_QUEUE,
301 RTE_FLOW_ACTION_TYPE_MARK,
302 RTE_FLOW_ACTION_TYPE_FLAG,
303 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
304 RTE_FLOW_ACTION_TYPE_COUNT,
306 RTE_FLOW_ACTION_TYPE_END,
309 /** Graph of supported items and associated actions. */
310 static const struct mlx5_flow_items mlx5_flow_items[] = {
311 [RTE_FLOW_ITEM_TYPE_END] = {
312 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
313 RTE_FLOW_ITEM_TYPE_VXLAN,
314 RTE_FLOW_ITEM_TYPE_GRE),
316 [RTE_FLOW_ITEM_TYPE_ETH] = {
317 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
318 RTE_FLOW_ITEM_TYPE_IPV4,
319 RTE_FLOW_ITEM_TYPE_IPV6),
320 .actions = valid_actions,
321 .mask = &(const struct rte_flow_item_eth){
322 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
323 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
326 .default_mask = &rte_flow_item_eth_mask,
327 .mask_sz = sizeof(struct rte_flow_item_eth),
328 .convert = mlx5_flow_create_eth,
329 .dst_sz = sizeof(struct ibv_flow_spec_eth),
331 [RTE_FLOW_ITEM_TYPE_VLAN] = {
332 .items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
333 RTE_FLOW_ITEM_TYPE_IPV6),
334 .actions = valid_actions,
335 .mask = &(const struct rte_flow_item_vlan){
339 .default_mask = &rte_flow_item_vlan_mask,
340 .mask_sz = sizeof(struct rte_flow_item_vlan),
341 .convert = mlx5_flow_create_vlan,
344 [RTE_FLOW_ITEM_TYPE_IPV4] = {
345 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
346 RTE_FLOW_ITEM_TYPE_TCP,
347 RTE_FLOW_ITEM_TYPE_GRE),
348 .actions = valid_actions,
349 .mask = &(const struct rte_flow_item_ipv4){
353 .type_of_service = -1,
357 .default_mask = &rte_flow_item_ipv4_mask,
358 .mask_sz = sizeof(struct rte_flow_item_ipv4),
359 .convert = mlx5_flow_create_ipv4,
360 .dst_sz = sizeof(struct ibv_flow_spec_ipv4_ext),
362 [RTE_FLOW_ITEM_TYPE_IPV6] = {
363 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
364 RTE_FLOW_ITEM_TYPE_TCP,
365 RTE_FLOW_ITEM_TYPE_GRE),
366 .actions = valid_actions,
367 .mask = &(const struct rte_flow_item_ipv6){
370 0xff, 0xff, 0xff, 0xff,
371 0xff, 0xff, 0xff, 0xff,
372 0xff, 0xff, 0xff, 0xff,
373 0xff, 0xff, 0xff, 0xff,
376 0xff, 0xff, 0xff, 0xff,
377 0xff, 0xff, 0xff, 0xff,
378 0xff, 0xff, 0xff, 0xff,
379 0xff, 0xff, 0xff, 0xff,
386 .default_mask = &rte_flow_item_ipv6_mask,
387 .mask_sz = sizeof(struct rte_flow_item_ipv6),
388 .convert = mlx5_flow_create_ipv6,
389 .dst_sz = sizeof(struct ibv_flow_spec_ipv6),
391 [RTE_FLOW_ITEM_TYPE_UDP] = {
392 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN),
393 .actions = valid_actions,
394 .mask = &(const struct rte_flow_item_udp){
400 .default_mask = &rte_flow_item_udp_mask,
401 .mask_sz = sizeof(struct rte_flow_item_udp),
402 .convert = mlx5_flow_create_udp,
403 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
405 [RTE_FLOW_ITEM_TYPE_TCP] = {
406 .actions = valid_actions,
407 .mask = &(const struct rte_flow_item_tcp){
413 .default_mask = &rte_flow_item_tcp_mask,
414 .mask_sz = sizeof(struct rte_flow_item_tcp),
415 .convert = mlx5_flow_create_tcp,
416 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
418 [RTE_FLOW_ITEM_TYPE_GRE] = {
419 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
420 RTE_FLOW_ITEM_TYPE_IPV4,
421 RTE_FLOW_ITEM_TYPE_IPV6),
422 .actions = valid_actions,
423 .mask = &(const struct rte_flow_item_gre){
426 .default_mask = &rte_flow_item_gre_mask,
427 .mask_sz = sizeof(struct rte_flow_item_gre),
428 .convert = mlx5_flow_create_gre,
429 .dst_sz = sizeof(struct ibv_flow_spec_tunnel),
431 [RTE_FLOW_ITEM_TYPE_VXLAN] = {
432 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
433 RTE_FLOW_ITEM_TYPE_IPV4, /* For L3 VXLAN. */
434 RTE_FLOW_ITEM_TYPE_IPV6), /* For L3 VXLAN. */
435 .actions = valid_actions,
436 .mask = &(const struct rte_flow_item_vxlan){
437 .vni = "\xff\xff\xff",
439 .default_mask = &rte_flow_item_vxlan_mask,
440 .mask_sz = sizeof(struct rte_flow_item_vxlan),
441 .convert = mlx5_flow_create_vxlan,
442 .dst_sz = sizeof(struct ibv_flow_spec_tunnel),
446 /** Structure to pass to the conversion function. */
447 struct mlx5_flow_parse {
448 uint32_t inner; /**< Verbs value, set once tunnel is encountered. */
450 /**< Whether resources should remain after a validate. */
451 uint32_t drop:1; /**< Target is a drop queue. */
452 uint32_t mark:1; /**< Mark is present in the flow. */
453 uint32_t count:1; /**< Count is present in the flow. */
454 uint32_t mark_id; /**< Mark identifier. */
455 struct rte_flow_action_rss rss_conf; /**< RSS configuration */
456 uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
457 uint8_t rss_key[40]; /**< copy of the RSS key. */
458 enum hash_rxq_type layer; /**< Last pattern layer detected. */
459 enum hash_rxq_type out_layer; /**< Last outer pattern layer detected. */
460 uint32_t tunnel; /**< Tunnel type of RTE_PTYPE_TUNNEL_XXX. */
461 struct ibv_counter_set *cs; /**< Holds the counter set for the rule */
463 struct ibv_flow_attr *ibv_attr;
464 /**< Pointer to Verbs attributes. */
466 /**< Current position or total size of the attribute. */
467 uint64_t hash_fields; /**< Verbs hash fields. */
468 } queue[RTE_DIM(hash_rxq_init)];
471 static const struct rte_flow_ops mlx5_flow_ops = {
472 .validate = mlx5_flow_validate,
473 .create = mlx5_flow_create,
474 .destroy = mlx5_flow_destroy,
475 .flush = mlx5_flow_flush,
476 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
477 .query = mlx5_flow_query,
481 .isolate = mlx5_flow_isolate,
484 /* Convert FDIR request to Generic flow. */
486 struct rte_flow_attr attr;
487 struct rte_flow_action actions[2];
488 struct rte_flow_item items[4];
489 struct rte_flow_item_eth l2;
490 struct rte_flow_item_eth l2_mask;
492 struct rte_flow_item_ipv4 ipv4;
493 struct rte_flow_item_ipv6 ipv6;
496 struct rte_flow_item_ipv4 ipv4;
497 struct rte_flow_item_ipv6 ipv6;
500 struct rte_flow_item_udp udp;
501 struct rte_flow_item_tcp tcp;
504 struct rte_flow_item_udp udp;
505 struct rte_flow_item_tcp tcp;
507 struct rte_flow_action_queue queue;
510 /* Verbs specification header. */
511 struct ibv_spec_header {
512 enum ibv_flow_spec_type type;
517 * Check support for a given item.
520 * Item specification.
522 * Bit-masks covering supported fields to compare with spec, last and mask in
525 * Bit-Mask size in bytes.
528 * 0 on success, a negative errno value otherwise and rte_errno is set.
531 mlx5_flow_item_validate(const struct rte_flow_item *item,
532 const uint8_t *mask, unsigned int size)
534 if (!item->spec && (item->mask || item->last)) {
538 if (item->spec && !item->mask) {
540 const uint8_t *spec = item->spec;
542 for (i = 0; i < size; ++i)
543 if ((spec[i] | mask[i]) != mask[i]) {
548 if (item->last && !item->mask) {
550 const uint8_t *spec = item->last;
552 for (i = 0; i < size; ++i)
553 if ((spec[i] | mask[i]) != mask[i]) {
560 const uint8_t *spec = item->spec;
562 for (i = 0; i < size; ++i)
563 if ((spec[i] | mask[i]) != mask[i]) {
568 if (item->spec && item->last) {
571 const uint8_t *apply = mask;
577 for (i = 0; i < size; ++i) {
578 spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
579 last[i] = ((const uint8_t *)item->last)[i] & apply[i];
581 ret = memcmp(spec, last, size);
591 * Extract attribute to the parser.
594 * Flow rule attributes.
596 * Perform verbose error reporting if not NULL.
599 * 0 on success, a negative errno value otherwise and rte_errno is set.
602 mlx5_flow_convert_attributes(const struct rte_flow_attr *attr,
603 struct rte_flow_error *error)
606 rte_flow_error_set(error, ENOTSUP,
607 RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
609 "groups are not supported");
612 if (attr->priority && attr->priority != MLX5_CTRL_FLOW_PRIORITY) {
613 rte_flow_error_set(error, ENOTSUP,
614 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
616 "priorities are not supported");
620 rte_flow_error_set(error, ENOTSUP,
621 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
623 "egress is not supported");
626 if (attr->transfer) {
627 rte_flow_error_set(error, ENOTSUP,
628 RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
630 "transfer is not supported");
633 if (!attr->ingress) {
634 rte_flow_error_set(error, ENOTSUP,
635 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
637 "only ingress is supported");
644 * Extract actions request to the parser.
647 * Pointer to Ethernet device.
649 * Associated actions (list terminated by the END action).
651 * Perform verbose error reporting if not NULL.
652 * @param[in, out] parser
653 * Internal parser structure.
656 * 0 on success, a negative errno value otherwise and rte_errno is set.
659 mlx5_flow_convert_actions(struct rte_eth_dev *dev,
660 const struct rte_flow_action actions[],
661 struct rte_flow_error *error,
662 struct mlx5_flow_parse *parser)
664 enum { FATE = 1, MARK = 2, COUNT = 4, };
665 uint32_t overlap = 0;
666 struct priv *priv = dev->data->dev_private;
668 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
669 if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
671 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
673 goto exit_action_overlap;
676 } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
677 const struct rte_flow_action_queue *queue =
678 (const struct rte_flow_action_queue *)
682 goto exit_action_overlap;
684 if (!queue || (queue->index > (priv->rxqs_n - 1)))
685 goto exit_action_not_supported;
686 parser->queues[0] = queue->index;
687 parser->rss_conf = (struct rte_flow_action_rss){
689 .queue = parser->queues,
691 } else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
692 const struct rte_flow_action_rss *rss =
693 (const struct rte_flow_action_rss *)
695 const uint8_t *rss_key;
696 uint32_t rss_key_len;
700 goto exit_action_overlap;
703 rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ) {
704 rte_flow_error_set(error, EINVAL,
705 RTE_FLOW_ERROR_TYPE_ACTION,
707 "the only supported RSS hash"
708 " function is Toeplitz");
711 #ifndef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
712 if (parser->rss_conf.level > 1) {
713 rte_flow_error_set(error, EINVAL,
714 RTE_FLOW_ERROR_TYPE_ACTION,
716 "a nonzero RSS encapsulation"
717 " level is not supported");
721 if (parser->rss_conf.level > 2) {
722 rte_flow_error_set(error, EINVAL,
723 RTE_FLOW_ERROR_TYPE_ACTION,
725 "RSS encapsulation level"
726 " > 1 is not supported");
729 if (rss->types & MLX5_RSS_HF_MASK) {
730 rte_flow_error_set(error, EINVAL,
731 RTE_FLOW_ERROR_TYPE_ACTION,
733 "unsupported RSS type"
738 rss_key_len = rss->key_len;
741 rss_key_len = rss_hash_default_key_len;
742 rss_key = rss_hash_default_key;
744 if (rss_key_len != RTE_DIM(parser->rss_key)) {
745 rte_flow_error_set(error, EINVAL,
746 RTE_FLOW_ERROR_TYPE_ACTION,
748 "RSS hash key must be"
749 " exactly 40 bytes long");
752 if (!rss->queue_num) {
753 rte_flow_error_set(error, EINVAL,
754 RTE_FLOW_ERROR_TYPE_ACTION,
759 if (rss->queue_num > RTE_DIM(parser->queues)) {
760 rte_flow_error_set(error, EINVAL,
761 RTE_FLOW_ERROR_TYPE_ACTION,
763 "too many queues for RSS"
767 for (n = 0; n < rss->queue_num; ++n) {
768 if (rss->queue[n] >= priv->rxqs_n) {
769 rte_flow_error_set(error, EINVAL,
770 RTE_FLOW_ERROR_TYPE_ACTION,
772 "queue id > number of"
777 parser->rss_conf = (struct rte_flow_action_rss){
778 .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
781 .key_len = rss_key_len,
782 .queue_num = rss->queue_num,
783 .key = memcpy(parser->rss_key, rss_key,
784 sizeof(*rss_key) * rss_key_len),
785 .queue = memcpy(parser->queues, rss->queue,
786 sizeof(*rss->queue) *
789 } else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
790 const struct rte_flow_action_mark *mark =
791 (const struct rte_flow_action_mark *)
795 goto exit_action_overlap;
798 rte_flow_error_set(error, EINVAL,
799 RTE_FLOW_ERROR_TYPE_ACTION,
801 "mark must be defined");
803 } else if (mark->id >= MLX5_FLOW_MARK_MAX) {
804 rte_flow_error_set(error, ENOTSUP,
805 RTE_FLOW_ERROR_TYPE_ACTION,
807 "mark must be between 0"
812 parser->mark_id = mark->id;
813 } else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
815 goto exit_action_overlap;
818 } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT &&
819 priv->config.flow_counter_en) {
821 goto exit_action_overlap;
825 goto exit_action_not_supported;
828 /* When fate is unknown, drop traffic. */
829 if (!(overlap & FATE))
831 if (parser->drop && parser->mark)
833 if (!parser->rss_conf.queue_num && !parser->drop) {
834 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
835 NULL, "no valid action");
839 exit_action_not_supported:
840 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
841 actions, "action not supported");
844 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
845 actions, "overlapping actions are not supported");
853 * Pattern specification (list terminated by the END pattern item).
855 * Perform verbose error reporting if not NULL.
856 * @param[in, out] parser
857 * Internal parser structure.
860 * 0 on success, a negative errno value otherwise and rte_errno is set.
863 mlx5_flow_convert_items_validate(struct rte_eth_dev *dev,
864 const struct rte_flow_item items[],
865 struct rte_flow_error *error,
866 struct mlx5_flow_parse *parser)
868 struct priv *priv = dev->data->dev_private;
869 const struct mlx5_flow_items *cur_item = mlx5_flow_items;
873 /* Initialise the offsets to start after verbs attribute. */
874 for (i = 0; i != hash_rxq_init_n; ++i)
875 parser->queue[i].offset = sizeof(struct ibv_flow_attr);
876 for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
877 const struct mlx5_flow_items *token = NULL;
880 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
884 cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
886 if (cur_item->items[i] == items->type) {
887 token = &mlx5_flow_items[items->type];
893 goto exit_item_not_supported;
896 ret = mlx5_flow_item_validate(items,
897 (const uint8_t *)cur_item->mask,
900 goto exit_item_not_supported;
901 if (IS_TUNNEL(items->type)) {
902 if (parser->tunnel) {
903 rte_flow_error_set(error, ENOTSUP,
904 RTE_FLOW_ERROR_TYPE_ITEM,
906 "Cannot recognize multiple"
907 " tunnel encapsulations.");
910 if (!priv->config.tunnel_en &&
911 parser->rss_conf.level > 1) {
912 rte_flow_error_set(error, ENOTSUP,
913 RTE_FLOW_ERROR_TYPE_ITEM,
915 "RSS on tunnel is not supported");
918 parser->inner = IBV_FLOW_SPEC_INNER;
919 parser->tunnel = flow_ptype[items->type];
922 parser->queue[HASH_RXQ_ETH].offset += cur_item->dst_sz;
924 for (n = 0; n != hash_rxq_init_n; ++n)
925 parser->queue[n].offset += cur_item->dst_sz;
929 parser->queue[HASH_RXQ_ETH].offset +=
930 sizeof(struct ibv_flow_spec_action_drop);
933 for (i = 0; i != hash_rxq_init_n; ++i)
934 parser->queue[i].offset +=
935 sizeof(struct ibv_flow_spec_action_tag);
938 unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
940 for (i = 0; i != hash_rxq_init_n; ++i)
941 parser->queue[i].offset += size;
944 exit_item_not_supported:
945 return rte_flow_error_set(error, -ret, RTE_FLOW_ERROR_TYPE_ITEM,
946 items, "item not supported");
950 * Allocate memory space to store verbs flow attributes.
953 * Amount of byte to allocate.
955 * Perform verbose error reporting if not NULL.
958 * A verbs flow attribute on success, NULL otherwise and rte_errno is set.
960 static struct ibv_flow_attr *
961 mlx5_flow_convert_allocate(unsigned int size, struct rte_flow_error *error)
963 struct ibv_flow_attr *ibv_attr;
965 ibv_attr = rte_calloc(__func__, 1, size, 0);
967 rte_flow_error_set(error, ENOMEM,
968 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
970 "cannot allocate verbs spec attributes");
977 * Make inner packet matching with an higher priority from the non Inner
981 * Pointer to Ethernet device.
982 * @param[in, out] parser
983 * Internal parser structure.
985 * User flow attribute.
988 mlx5_flow_update_priority(struct rte_eth_dev *dev,
989 struct mlx5_flow_parse *parser,
990 const struct rte_flow_attr *attr)
992 struct priv *priv = dev->data->dev_private;
996 /* 8 priorities >= 16 priorities
997 * Control flow: 4-7 8-15
998 * User normal flow: 1-3 4-7
999 * User tunnel flow: 0-2 0-3
1001 priority = attr->priority * MLX5_VERBS_FLOW_PRIO_8;
1002 if (priv->config.max_verbs_prio == MLX5_VERBS_FLOW_PRIO_8)
1005 * Lower non-tunnel flow Verbs priority 1 if only support 8 Verbs
1006 * priorities, lower 4 otherwise.
1008 if (!parser->inner) {
1009 if (priv->config.max_verbs_prio == MLX5_VERBS_FLOW_PRIO_8)
1012 priority += MLX5_VERBS_FLOW_PRIO_8 / 2;
1015 parser->queue[HASH_RXQ_ETH].ibv_attr->priority = priority +
1016 hash_rxq_init[HASH_RXQ_ETH].flow_priority;
1019 for (i = 0; i != hash_rxq_init_n; ++i) {
1020 if (!parser->queue[i].ibv_attr)
1022 parser->queue[i].ibv_attr->priority = priority +
1023 hash_rxq_init[i].flow_priority;
1028 * Finalise verbs flow attributes.
1030 * @param[in, out] parser
1031 * Internal parser structure.
1034 mlx5_flow_convert_finalise(struct mlx5_flow_parse *parser)
1037 uint32_t inner = parser->inner;
1039 /* Don't create extra flows for outer RSS. */
1040 if (parser->tunnel && parser->rss_conf.level < 2)
1043 * Fill missing layers in verbs specifications, or compute the correct
1044 * offset to allocate the memory space for the attributes and
1047 for (i = 0; i != hash_rxq_init_n - 1; ++i) {
1049 struct ibv_flow_spec_ipv4_ext ipv4;
1050 struct ibv_flow_spec_ipv6 ipv6;
1051 struct ibv_flow_spec_tcp_udp udp_tcp;
1052 struct ibv_flow_spec_eth eth;
1057 if (i == parser->layer)
1059 if (parser->layer == HASH_RXQ_ETH ||
1060 parser->layer == HASH_RXQ_TUNNEL) {
1061 if (hash_rxq_init[i].ip_version == MLX5_IPV4) {
1062 size = sizeof(struct ibv_flow_spec_ipv4_ext);
1063 specs.ipv4 = (struct ibv_flow_spec_ipv4_ext){
1064 .type = inner | IBV_FLOW_SPEC_IPV4_EXT,
1068 size = sizeof(struct ibv_flow_spec_ipv6);
1069 specs.ipv6 = (struct ibv_flow_spec_ipv6){
1070 .type = inner | IBV_FLOW_SPEC_IPV6,
1074 if (parser->queue[i].ibv_attr) {
1075 dst = (void *)((uintptr_t)
1076 parser->queue[i].ibv_attr +
1077 parser->queue[i].offset);
1078 memcpy(dst, &specs, size);
1079 ++parser->queue[i].ibv_attr->num_of_specs;
1081 parser->queue[i].offset += size;
1083 if ((i == HASH_RXQ_UDPV4) || (i == HASH_RXQ_TCPV4) ||
1084 (i == HASH_RXQ_UDPV6) || (i == HASH_RXQ_TCPV6)) {
1085 size = sizeof(struct ibv_flow_spec_tcp_udp);
1086 specs.udp_tcp = (struct ibv_flow_spec_tcp_udp) {
1087 .type = inner | ((i == HASH_RXQ_UDPV4 ||
1088 i == HASH_RXQ_UDPV6) ?
1093 if (parser->queue[i].ibv_attr) {
1094 dst = (void *)((uintptr_t)
1095 parser->queue[i].ibv_attr +
1096 parser->queue[i].offset);
1097 memcpy(dst, &specs, size);
1098 ++parser->queue[i].ibv_attr->num_of_specs;
1100 parser->queue[i].offset += size;
1106 * Update flows according to pattern and RSS hash fields.
1108 * @param[in, out] parser
1109 * Internal parser structure.
1112 * 0 on success, a negative errno value otherwise and rte_errno is set.
1115 mlx5_flow_convert_rss(struct mlx5_flow_parse *parser)
1118 enum hash_rxq_type start;
1119 enum hash_rxq_type layer;
1120 int outer = parser->tunnel && parser->rss_conf.level < 2;
1121 uint64_t rss = parser->rss_conf.types;
1123 /* Default to outer RSS. */
1124 if (!parser->rss_conf.level)
1125 parser->rss_conf.level = 1;
1126 layer = outer ? parser->out_layer : parser->layer;
1127 if (layer == HASH_RXQ_TUNNEL)
1128 layer = HASH_RXQ_ETH;
1130 /* Only one hash type for outer RSS. */
1131 if (rss && layer == HASH_RXQ_ETH) {
1132 start = HASH_RXQ_TCPV4;
1133 } else if (rss && layer != HASH_RXQ_ETH &&
1134 !(rss & hash_rxq_init[layer].dpdk_rss_hf)) {
1135 /* If RSS not match L4 pattern, try L3 RSS. */
1136 if (layer < HASH_RXQ_IPV4)
1137 layer = HASH_RXQ_IPV4;
1138 else if (layer > HASH_RXQ_IPV4 && layer < HASH_RXQ_IPV6)
1139 layer = HASH_RXQ_IPV6;
1144 /* Scan first valid hash type. */
1145 for (i = start; rss && i <= layer; ++i) {
1146 if (!parser->queue[i].ibv_attr)
1148 if (hash_rxq_init[i].dpdk_rss_hf & rss)
1151 if (rss && i <= layer)
1152 parser->queue[layer].hash_fields =
1153 hash_rxq_init[i].hash_fields;
1154 /* Trim unused hash types. */
1155 for (i = 0; i != hash_rxq_init_n; ++i) {
1156 if (parser->queue[i].ibv_attr && i != layer) {
1157 rte_free(parser->queue[i].ibv_attr);
1158 parser->queue[i].ibv_attr = NULL;
1162 /* Expand for inner or normal RSS. */
1163 if (rss && (layer == HASH_RXQ_ETH || layer == HASH_RXQ_IPV4))
1164 start = HASH_RXQ_TCPV4;
1165 else if (rss && layer == HASH_RXQ_IPV6)
1166 start = HASH_RXQ_TCPV6;
1169 /* For L4 pattern, try L3 RSS if no L4 RSS. */
1170 /* Trim unused hash types. */
1171 for (i = 0; i != hash_rxq_init_n; ++i) {
1172 if (!parser->queue[i].ibv_attr)
1174 if (i < start || i > layer) {
1175 rte_free(parser->queue[i].ibv_attr);
1176 parser->queue[i].ibv_attr = NULL;
1181 if (hash_rxq_init[i].dpdk_rss_hf & rss) {
1182 parser->queue[i].hash_fields =
1183 hash_rxq_init[i].hash_fields;
1184 } else if (i != layer) {
1185 /* Remove unused RSS expansion. */
1186 rte_free(parser->queue[i].ibv_attr);
1187 parser->queue[i].ibv_attr = NULL;
1188 } else if (layer < HASH_RXQ_IPV4 &&
1189 (hash_rxq_init[HASH_RXQ_IPV4].dpdk_rss_hf &
1191 /* Allow IPv4 RSS on L4 pattern. */
1192 parser->queue[i].hash_fields =
1193 hash_rxq_init[HASH_RXQ_IPV4]
1195 } else if (i > HASH_RXQ_IPV4 && i < HASH_RXQ_IPV6 &&
1196 (hash_rxq_init[HASH_RXQ_IPV6].dpdk_rss_hf &
1198 /* Allow IPv4 RSS on L4 pattern. */
1199 parser->queue[i].hash_fields =
1200 hash_rxq_init[HASH_RXQ_IPV6]
1209 * Validate and convert a flow supported by the NIC.
1212 * Pointer to Ethernet device.
1214 * Flow rule attributes.
1215 * @param[in] pattern
1216 * Pattern specification (list terminated by the END pattern item).
1217 * @param[in] actions
1218 * Associated actions (list terminated by the END action).
1220 * Perform verbose error reporting if not NULL.
1221 * @param[in, out] parser
1222 * Internal parser structure.
1225 * 0 on success, a negative errno value otherwise and rte_errno is set.
1228 mlx5_flow_convert(struct rte_eth_dev *dev,
1229 const struct rte_flow_attr *attr,
1230 const struct rte_flow_item items[],
1231 const struct rte_flow_action actions[],
1232 struct rte_flow_error *error,
1233 struct mlx5_flow_parse *parser)
1235 const struct mlx5_flow_items *cur_item = mlx5_flow_items;
1239 /* First step. Validate the attributes, items and actions. */
1240 *parser = (struct mlx5_flow_parse){
1241 .create = parser->create,
1242 .layer = HASH_RXQ_ETH,
1243 .mark_id = MLX5_FLOW_MARK_DEFAULT,
1245 ret = mlx5_flow_convert_attributes(attr, error);
1248 ret = mlx5_flow_convert_actions(dev, actions, error, parser);
1251 ret = mlx5_flow_convert_items_validate(dev, items, error, parser);
1254 mlx5_flow_convert_finalise(parser);
1257 * Allocate the memory space to store verbs specifications.
1260 unsigned int offset = parser->queue[HASH_RXQ_ETH].offset;
1262 parser->queue[HASH_RXQ_ETH].ibv_attr =
1263 mlx5_flow_convert_allocate(offset, error);
1264 if (!parser->queue[HASH_RXQ_ETH].ibv_attr)
1266 parser->queue[HASH_RXQ_ETH].offset =
1267 sizeof(struct ibv_flow_attr);
1269 for (i = 0; i != hash_rxq_init_n; ++i) {
1270 unsigned int offset;
1272 offset = parser->queue[i].offset;
1273 parser->queue[i].ibv_attr =
1274 mlx5_flow_convert_allocate(offset, error);
1275 if (!parser->queue[i].ibv_attr)
1277 parser->queue[i].offset = sizeof(struct ibv_flow_attr);
1280 /* Third step. Conversion parse, fill the specifications. */
1283 parser->layer = HASH_RXQ_ETH;
1284 for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
1285 struct mlx5_flow_data data = {
1291 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
1293 cur_item = &mlx5_flow_items[items->type];
1294 ret = cur_item->convert(items,
1295 (cur_item->default_mask ?
1296 cur_item->default_mask :
1302 if (!parser->drop) {
1303 /* RSS check, remove unused hash types. */
1304 ret = mlx5_flow_convert_rss(parser);
1307 /* Complete missing specification. */
1308 mlx5_flow_convert_finalise(parser);
1310 mlx5_flow_update_priority(dev, parser, attr);
1312 mlx5_flow_create_flag_mark(parser, parser->mark_id);
1313 if (parser->count && parser->create) {
1314 mlx5_flow_create_count(dev, parser);
1316 goto exit_count_error;
1319 /* Only verification is expected, all resources should be released. */
1320 if (!parser->create) {
1321 for (i = 0; i != hash_rxq_init_n; ++i) {
1322 if (parser->queue[i].ibv_attr) {
1323 rte_free(parser->queue[i].ibv_attr);
1324 parser->queue[i].ibv_attr = NULL;
1330 for (i = 0; i != hash_rxq_init_n; ++i) {
1331 if (parser->queue[i].ibv_attr) {
1332 rte_free(parser->queue[i].ibv_attr);
1333 parser->queue[i].ibv_attr = NULL;
1336 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1337 NULL, "cannot allocate verbs spec attributes");
1340 rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1341 NULL, "cannot create counter");
1346 * Copy the specification created into the flow.
1349 * Internal parser structure.
1351 * Create specification.
1353 * Size in bytes of the specification to copy.
1356 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
1362 for (i = 0; i != hash_rxq_init_n; ++i) {
1363 if (!parser->queue[i].ibv_attr)
1365 dst = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1366 parser->queue[i].offset);
1367 memcpy(dst, src, size);
1368 ++parser->queue[i].ibv_attr->num_of_specs;
1369 parser->queue[i].offset += size;
1374 * Convert Ethernet item to Verbs specification.
1377 * Item specification.
1378 * @param default_mask[in]
1379 * Default bit-masks to use when item->mask is not provided.
1380 * @param data[in, out]
1384 * 0 on success, a negative errno value otherwise and rte_errno is set.
1387 mlx5_flow_create_eth(const struct rte_flow_item *item,
1388 const void *default_mask,
1389 struct mlx5_flow_data *data)
1391 const struct rte_flow_item_eth *spec = item->spec;
1392 const struct rte_flow_item_eth *mask = item->mask;
1393 struct mlx5_flow_parse *parser = data->parser;
1394 const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1395 struct ibv_flow_spec_eth eth = {
1396 .type = parser->inner | IBV_FLOW_SPEC_ETH,
1400 parser->layer = HASH_RXQ_ETH;
1405 mask = default_mask;
1406 memcpy(ð.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
1407 memcpy(ð.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
1408 eth.val.ether_type = spec->type;
1409 memcpy(ð.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
1410 memcpy(ð.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
1411 eth.mask.ether_type = mask->type;
1412 /* Remove unwanted bits from values. */
1413 for (i = 0; i < ETHER_ADDR_LEN; ++i) {
1414 eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
1415 eth.val.src_mac[i] &= eth.mask.src_mac[i];
1417 eth.val.ether_type &= eth.mask.ether_type;
1419 mlx5_flow_create_copy(parser, ð, eth_size);
1424 * Convert VLAN item to Verbs specification.
1427 * Item specification.
1428 * @param default_mask[in]
1429 * Default bit-masks to use when item->mask is not provided.
1430 * @param data[in, out]
1434 * 0 on success, a negative errno value otherwise and rte_errno is set.
1437 mlx5_flow_create_vlan(const struct rte_flow_item *item,
1438 const void *default_mask,
1439 struct mlx5_flow_data *data)
1441 const struct rte_flow_item_vlan *spec = item->spec;
1442 const struct rte_flow_item_vlan *mask = item->mask;
1443 struct mlx5_flow_parse *parser = data->parser;
1444 struct ibv_flow_spec_eth *eth;
1445 const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1446 const char *msg = "VLAN cannot be empty";
1451 mask = default_mask;
1453 for (i = 0; i != hash_rxq_init_n; ++i) {
1454 if (!parser->queue[i].ibv_attr)
1457 eth = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1458 parser->queue[i].offset - eth_size);
1459 eth->val.vlan_tag = spec->tci;
1460 eth->mask.vlan_tag = mask->tci;
1461 eth->val.vlan_tag &= eth->mask.vlan_tag;
1463 * From verbs perspective an empty VLAN is equivalent
1464 * to a packet without VLAN layer.
1466 if (!eth->mask.vlan_tag)
1468 /* Outer TPID cannot be matched. */
1469 if (eth->mask.ether_type) {
1470 msg = "VLAN TPID matching is not supported";
1473 eth->val.ether_type = spec->inner_type;
1474 eth->mask.ether_type = mask->inner_type;
1475 eth->val.ether_type &= eth->mask.ether_type;
1480 return rte_flow_error_set(data->error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM,
1485 * Convert IPv4 item to Verbs specification.
1488 * Item specification.
1489 * @param default_mask[in]
1490 * Default bit-masks to use when item->mask is not provided.
1491 * @param data[in, out]
1495 * 0 on success, a negative errno value otherwise and rte_errno is set.
1498 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
1499 const void *default_mask,
1500 struct mlx5_flow_data *data)
1502 struct priv *priv = data->dev->data->dev_private;
1503 const struct rte_flow_item_ipv4 *spec = item->spec;
1504 const struct rte_flow_item_ipv4 *mask = item->mask;
1505 struct mlx5_flow_parse *parser = data->parser;
1506 unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4_ext);
1507 struct ibv_flow_spec_ipv4_ext ipv4 = {
1508 .type = parser->inner | IBV_FLOW_SPEC_IPV4_EXT,
1512 if (parser->layer == HASH_RXQ_TUNNEL &&
1513 parser->tunnel == ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN)] &&
1514 !priv->config.l3_vxlan_en)
1515 return rte_flow_error_set(data->error, EINVAL,
1516 RTE_FLOW_ERROR_TYPE_ITEM,
1518 "L3 VXLAN not enabled by device"
1519 " parameter and/or not configured"
1521 parser->layer = HASH_RXQ_IPV4;
1524 mask = default_mask;
1525 ipv4.val = (struct ibv_flow_ipv4_ext_filter){
1526 .src_ip = spec->hdr.src_addr,
1527 .dst_ip = spec->hdr.dst_addr,
1528 .proto = spec->hdr.next_proto_id,
1529 .tos = spec->hdr.type_of_service,
1531 ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
1532 .src_ip = mask->hdr.src_addr,
1533 .dst_ip = mask->hdr.dst_addr,
1534 .proto = mask->hdr.next_proto_id,
1535 .tos = mask->hdr.type_of_service,
1537 /* Remove unwanted bits from values. */
1538 ipv4.val.src_ip &= ipv4.mask.src_ip;
1539 ipv4.val.dst_ip &= ipv4.mask.dst_ip;
1540 ipv4.val.proto &= ipv4.mask.proto;
1541 ipv4.val.tos &= ipv4.mask.tos;
1543 mlx5_flow_create_copy(parser, &ipv4, ipv4_size);
1548 * Convert IPv6 item to Verbs specification.
1551 * Item specification.
1552 * @param default_mask[in]
1553 * Default bit-masks to use when item->mask is not provided.
1554 * @param data[in, out]
1558 * 0 on success, a negative errno value otherwise and rte_errno is set.
1561 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
1562 const void *default_mask,
1563 struct mlx5_flow_data *data)
1565 struct priv *priv = data->dev->data->dev_private;
1566 const struct rte_flow_item_ipv6 *spec = item->spec;
1567 const struct rte_flow_item_ipv6 *mask = item->mask;
1568 struct mlx5_flow_parse *parser = data->parser;
1569 unsigned int ipv6_size = sizeof(struct ibv_flow_spec_ipv6);
1570 struct ibv_flow_spec_ipv6 ipv6 = {
1571 .type = parser->inner | IBV_FLOW_SPEC_IPV6,
1575 if (parser->layer == HASH_RXQ_TUNNEL &&
1576 parser->tunnel == ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN)] &&
1577 !priv->config.l3_vxlan_en)
1578 return rte_flow_error_set(data->error, EINVAL,
1579 RTE_FLOW_ERROR_TYPE_ITEM,
1581 "L3 VXLAN not enabled by device"
1582 " parameter and/or not configured"
1584 parser->layer = HASH_RXQ_IPV6;
1587 uint32_t vtc_flow_val;
1588 uint32_t vtc_flow_mask;
1591 mask = default_mask;
1592 memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
1593 RTE_DIM(ipv6.val.src_ip));
1594 memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
1595 RTE_DIM(ipv6.val.dst_ip));
1596 memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
1597 RTE_DIM(ipv6.mask.src_ip));
1598 memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
1599 RTE_DIM(ipv6.mask.dst_ip));
1600 vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
1601 vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
1602 ipv6.val.flow_label =
1603 rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >>
1605 ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >>
1607 ipv6.val.next_hdr = spec->hdr.proto;
1608 ipv6.val.hop_limit = spec->hdr.hop_limits;
1609 ipv6.mask.flow_label =
1610 rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >>
1612 ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >>
1614 ipv6.mask.next_hdr = mask->hdr.proto;
1615 ipv6.mask.hop_limit = mask->hdr.hop_limits;
1616 /* Remove unwanted bits from values. */
1617 for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
1618 ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
1619 ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
1621 ipv6.val.flow_label &= ipv6.mask.flow_label;
1622 ipv6.val.traffic_class &= ipv6.mask.traffic_class;
1623 ipv6.val.next_hdr &= ipv6.mask.next_hdr;
1624 ipv6.val.hop_limit &= ipv6.mask.hop_limit;
1626 mlx5_flow_create_copy(parser, &ipv6, ipv6_size);
1631 * Convert UDP item to Verbs specification.
1634 * Item specification.
1635 * @param default_mask[in]
1636 * Default bit-masks to use when item->mask is not provided.
1637 * @param data[in, out]
1641 * 0 on success, a negative errno value otherwise and rte_errno is set.
1644 mlx5_flow_create_udp(const struct rte_flow_item *item,
1645 const void *default_mask,
1646 struct mlx5_flow_data *data)
1648 const struct rte_flow_item_udp *spec = item->spec;
1649 const struct rte_flow_item_udp *mask = item->mask;
1650 struct mlx5_flow_parse *parser = data->parser;
1651 unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1652 struct ibv_flow_spec_tcp_udp udp = {
1653 .type = parser->inner | IBV_FLOW_SPEC_UDP,
1657 if (parser->layer == HASH_RXQ_IPV4)
1658 parser->layer = HASH_RXQ_UDPV4;
1660 parser->layer = HASH_RXQ_UDPV6;
1663 mask = default_mask;
1664 udp.val.dst_port = spec->hdr.dst_port;
1665 udp.val.src_port = spec->hdr.src_port;
1666 udp.mask.dst_port = mask->hdr.dst_port;
1667 udp.mask.src_port = mask->hdr.src_port;
1668 /* Remove unwanted bits from values. */
1669 udp.val.src_port &= udp.mask.src_port;
1670 udp.val.dst_port &= udp.mask.dst_port;
1672 mlx5_flow_create_copy(parser, &udp, udp_size);
1677 * Convert TCP item to Verbs specification.
1680 * Item specification.
1681 * @param default_mask[in]
1682 * Default bit-masks to use when item->mask is not provided.
1683 * @param data[in, out]
1687 * 0 on success, a negative errno value otherwise and rte_errno is set.
1690 mlx5_flow_create_tcp(const struct rte_flow_item *item,
1691 const void *default_mask,
1692 struct mlx5_flow_data *data)
1694 const struct rte_flow_item_tcp *spec = item->spec;
1695 const struct rte_flow_item_tcp *mask = item->mask;
1696 struct mlx5_flow_parse *parser = data->parser;
1697 unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1698 struct ibv_flow_spec_tcp_udp tcp = {
1699 .type = parser->inner | IBV_FLOW_SPEC_TCP,
1703 if (parser->layer == HASH_RXQ_IPV4)
1704 parser->layer = HASH_RXQ_TCPV4;
1706 parser->layer = HASH_RXQ_TCPV6;
1709 mask = default_mask;
1710 tcp.val.dst_port = spec->hdr.dst_port;
1711 tcp.val.src_port = spec->hdr.src_port;
1712 tcp.mask.dst_port = mask->hdr.dst_port;
1713 tcp.mask.src_port = mask->hdr.src_port;
1714 /* Remove unwanted bits from values. */
1715 tcp.val.src_port &= tcp.mask.src_port;
1716 tcp.val.dst_port &= tcp.mask.dst_port;
1718 mlx5_flow_create_copy(parser, &tcp, tcp_size);
1723 * Convert VXLAN item to Verbs specification.
1726 * Item specification.
1727 * @param default_mask[in]
1728 * Default bit-masks to use when item->mask is not provided.
1729 * @param data[in, out]
1733 * 0 on success, a negative errno value otherwise and rte_errno is set.
1736 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
1737 const void *default_mask,
1738 struct mlx5_flow_data *data)
1740 const struct rte_flow_item_vxlan *spec = item->spec;
1741 const struct rte_flow_item_vxlan *mask = item->mask;
1742 struct mlx5_flow_parse *parser = data->parser;
1743 unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1744 struct ibv_flow_spec_tunnel vxlan = {
1745 .type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1754 parser->inner = IBV_FLOW_SPEC_INNER;
1755 parser->tunnel = ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN)];
1756 parser->out_layer = parser->layer;
1757 parser->layer = HASH_RXQ_TUNNEL;
1758 /* Default VXLAN to outer RSS. */
1759 if (!parser->rss_conf.level)
1760 parser->rss_conf.level = 1;
1763 mask = default_mask;
1764 memcpy(&id.vni[1], spec->vni, 3);
1765 vxlan.val.tunnel_id = id.vlan_id;
1766 memcpy(&id.vni[1], mask->vni, 3);
1767 vxlan.mask.tunnel_id = id.vlan_id;
1768 /* Remove unwanted bits from values. */
1769 vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
1772 * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
1773 * layer is defined in the Verbs specification it is interpreted as
1774 * wildcard and all packets will match this rule, if it follows a full
1775 * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
1776 * before will also match this rule.
1777 * To avoid such situation, VNI 0 is currently refused.
1779 if (!vxlan.val.tunnel_id)
1780 return rte_flow_error_set(data->error, EINVAL,
1781 RTE_FLOW_ERROR_TYPE_ITEM,
1783 "VxLAN vni cannot be 0");
1784 mlx5_flow_create_copy(parser, &vxlan, size);
1789 * Convert GRE item to Verbs specification.
1792 * Item specification.
1793 * @param default_mask[in]
1794 * Default bit-masks to use when item->mask is not provided.
1795 * @param data[in, out]
1799 * 0 on success, a negative errno value otherwise and rte_errno is set.
1802 mlx5_flow_create_gre(const struct rte_flow_item *item __rte_unused,
1803 const void *default_mask __rte_unused,
1804 struct mlx5_flow_data *data)
1806 struct mlx5_flow_parse *parser = data->parser;
1807 unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1808 struct ibv_flow_spec_tunnel tunnel = {
1809 .type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1812 struct ibv_flow_spec_ipv4_ext *ipv4;
1813 struct ibv_flow_spec_ipv6 *ipv6;
1816 parser->inner = IBV_FLOW_SPEC_INNER;
1817 parser->tunnel = ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_GRE)];
1818 parser->out_layer = parser->layer;
1819 parser->layer = HASH_RXQ_TUNNEL;
1820 /* Default GRE to inner RSS. */
1821 if (!parser->rss_conf.level)
1822 parser->rss_conf.level = 2;
1823 /* Update encapsulation IP layer protocol. */
1824 for (i = 0; i != hash_rxq_init_n; ++i) {
1825 if (!parser->queue[i].ibv_attr)
1827 if (parser->out_layer == HASH_RXQ_IPV4) {
1828 ipv4 = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1829 parser->queue[i].offset -
1830 sizeof(struct ibv_flow_spec_ipv4_ext));
1831 if (ipv4->mask.proto && ipv4->val.proto != MLX5_GRE)
1833 ipv4->val.proto = MLX5_GRE;
1834 ipv4->mask.proto = 0xff;
1835 } else if (parser->out_layer == HASH_RXQ_IPV6) {
1836 ipv6 = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1837 parser->queue[i].offset -
1838 sizeof(struct ibv_flow_spec_ipv6));
1839 if (ipv6->mask.next_hdr &&
1840 ipv6->val.next_hdr != MLX5_GRE)
1842 ipv6->val.next_hdr = MLX5_GRE;
1843 ipv6->mask.next_hdr = 0xff;
1846 if (i != hash_rxq_init_n)
1847 return rte_flow_error_set(data->error, EINVAL,
1848 RTE_FLOW_ERROR_TYPE_ITEM,
1850 "IP protocol of GRE must be 47");
1851 mlx5_flow_create_copy(parser, &tunnel, size);
1856 * Convert mark/flag action to Verbs specification.
1859 * Internal parser structure.
1864 * 0 on success, a negative errno value otherwise and rte_errno is set.
1867 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id)
1869 unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1870 struct ibv_flow_spec_action_tag tag = {
1871 .type = IBV_FLOW_SPEC_ACTION_TAG,
1873 .tag_id = mlx5_flow_mark_set(mark_id),
1876 assert(parser->mark);
1877 mlx5_flow_create_copy(parser, &tag, size);
1882 * Convert count action to Verbs specification.
1885 * Pointer to Ethernet device.
1887 * Pointer to MLX5 flow parser structure.
1890 * 0 on success, a negative errno value otherwise and rte_errno is set.
1893 mlx5_flow_create_count(struct rte_eth_dev *dev __rte_unused,
1894 struct mlx5_flow_parse *parser __rte_unused)
1896 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
1897 struct priv *priv = dev->data->dev_private;
1898 unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
1899 struct ibv_counter_set_init_attr init_attr = {0};
1900 struct ibv_flow_spec_counter_action counter = {
1901 .type = IBV_FLOW_SPEC_ACTION_COUNT,
1903 .counter_set_handle = 0,
1906 init_attr.counter_set_id = 0;
1907 parser->cs = mlx5_glue->create_counter_set(priv->ctx, &init_attr);
1912 counter.counter_set_handle = parser->cs->handle;
1913 mlx5_flow_create_copy(parser, &counter, size);
1919 * Complete flow rule creation with a drop queue.
1922 * Pointer to Ethernet device.
1924 * Internal parser structure.
1926 * Pointer to the rte_flow.
1928 * Perform verbose error reporting if not NULL.
1931 * 0 on success, a negative errno value otherwise and rte_errno is set.
1934 mlx5_flow_create_action_queue_drop(struct rte_eth_dev *dev,
1935 struct mlx5_flow_parse *parser,
1936 struct rte_flow *flow,
1937 struct rte_flow_error *error)
1939 struct priv *priv = dev->data->dev_private;
1940 struct ibv_flow_spec_action_drop *drop;
1941 unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
1946 drop = (void *)((uintptr_t)parser->queue[HASH_RXQ_ETH].ibv_attr +
1947 parser->queue[HASH_RXQ_ETH].offset);
1948 *drop = (struct ibv_flow_spec_action_drop){
1949 .type = IBV_FLOW_SPEC_ACTION_DROP,
1952 ++parser->queue[HASH_RXQ_ETH].ibv_attr->num_of_specs;
1953 parser->queue[HASH_RXQ_ETH].offset += size;
1954 flow->frxq[HASH_RXQ_ETH].ibv_attr =
1955 parser->queue[HASH_RXQ_ETH].ibv_attr;
1957 flow->cs = parser->cs;
1958 if (!priv->dev->data->dev_started)
1960 parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
1961 flow->frxq[HASH_RXQ_ETH].ibv_flow =
1962 mlx5_glue->create_flow(priv->flow_drop_queue->qp,
1963 flow->frxq[HASH_RXQ_ETH].ibv_attr);
1964 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
1965 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1966 NULL, "flow rule creation failure");
1972 if (flow->frxq[HASH_RXQ_ETH].ibv_flow) {
1973 claim_zero(mlx5_glue->destroy_flow
1974 (flow->frxq[HASH_RXQ_ETH].ibv_flow));
1975 flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
1977 if (flow->frxq[HASH_RXQ_ETH].ibv_attr) {
1978 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
1979 flow->frxq[HASH_RXQ_ETH].ibv_attr = NULL;
1982 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
1990 * Create hash Rx queues when RSS is enabled.
1993 * Pointer to Ethernet device.
1995 * Internal parser structure.
1997 * Pointer to the rte_flow.
1999 * Perform verbose error reporting if not NULL.
2002 * 0 on success, a negative errno value otherwise and rte_errno is set.
2005 mlx5_flow_create_action_queue_rss(struct rte_eth_dev *dev,
2006 struct mlx5_flow_parse *parser,
2007 struct rte_flow *flow,
2008 struct rte_flow_error *error)
2010 struct priv *priv = dev->data->dev_private;
2013 for (i = 0; i != hash_rxq_init_n; ++i) {
2014 if (!parser->queue[i].ibv_attr)
2016 flow->frxq[i].ibv_attr = parser->queue[i].ibv_attr;
2017 parser->queue[i].ibv_attr = NULL;
2018 flow->frxq[i].hash_fields = parser->queue[i].hash_fields;
2019 if (!priv->dev->data->dev_started)
2021 flow->frxq[i].hrxq =
2023 parser->rss_conf.key,
2024 parser->rss_conf.key_len,
2025 flow->frxq[i].hash_fields,
2026 parser->rss_conf.queue,
2027 parser->rss_conf.queue_num,
2029 parser->rss_conf.level);
2030 if (flow->frxq[i].hrxq)
2032 flow->frxq[i].hrxq =
2034 parser->rss_conf.key,
2035 parser->rss_conf.key_len,
2036 flow->frxq[i].hash_fields,
2037 parser->rss_conf.queue,
2038 parser->rss_conf.queue_num,
2040 parser->rss_conf.level);
2041 if (!flow->frxq[i].hrxq) {
2042 return rte_flow_error_set(error, ENOMEM,
2043 RTE_FLOW_ERROR_TYPE_HANDLE,
2045 "cannot create hash rxq");
2052 * RXQ update after flow rule creation.
2055 * Pointer to Ethernet device.
2057 * Pointer to the flow rule.
2060 mlx5_flow_create_update_rxqs(struct rte_eth_dev *dev, struct rte_flow *flow)
2062 struct priv *priv = dev->data->dev_private;
2066 if (!dev->data->dev_started)
2068 for (i = 0; i != flow->rss_conf.queue_num; ++i) {
2069 struct mlx5_rxq_data *rxq_data = (*priv->rxqs)
2070 [(*flow->queues)[i]];
2071 struct mlx5_rxq_ctrl *rxq_ctrl =
2072 container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
2073 uint8_t tunnel = PTYPE_IDX(flow->tunnel);
2075 rxq_data->mark |= flow->mark;
2078 rxq_ctrl->tunnel_types[tunnel] += 1;
2079 /* Clear tunnel type if more than one tunnel types set. */
2080 for (j = 0; j != RTE_DIM(rxq_ctrl->tunnel_types); ++j) {
2083 if (rxq_ctrl->tunnel_types[j] > 0) {
2084 rxq_data->tunnel = 0;
2088 if (j == RTE_DIM(rxq_ctrl->tunnel_types))
2089 rxq_data->tunnel = flow->tunnel;
2094 * Dump flow hash RX queue detail.
2097 * Pointer to Ethernet device.
2099 * Pointer to the rte_flow.
2101 * Hash RX queue index.
2104 mlx5_flow_dump(struct rte_eth_dev *dev __rte_unused,
2105 struct rte_flow *flow __rte_unused,
2106 unsigned int hrxq_idx __rte_unused)
2114 spec_ptr = (uintptr_t)(flow->frxq[hrxq_idx].ibv_attr + 1);
2115 for (j = 0, off = 0; j < flow->frxq[hrxq_idx].ibv_attr->num_of_specs;
2117 struct ibv_flow_spec *spec = (void *)spec_ptr;
2118 off += sprintf(buf + off, " %x(%hu)", spec->hdr.type,
2120 spec_ptr += spec->hdr.size;
2123 "port %u Verbs flow %p type %u: hrxq:%p qp:%p ind:%p,"
2124 " hash:%" PRIx64 "/%u specs:%hhu(%hu), priority:%hu, type:%d,"
2125 " flags:%x, comp_mask:%x specs:%s",
2126 dev->data->port_id, (void *)flow, hrxq_idx,
2127 (void *)flow->frxq[hrxq_idx].hrxq,
2128 (void *)flow->frxq[hrxq_idx].hrxq->qp,
2129 (void *)flow->frxq[hrxq_idx].hrxq->ind_table,
2130 flow->frxq[hrxq_idx].hash_fields |
2132 flow->rss_conf.level > 1 ? (uint32_t)IBV_RX_HASH_INNER : 0),
2133 flow->rss_conf.queue_num,
2134 flow->frxq[hrxq_idx].ibv_attr->num_of_specs,
2135 flow->frxq[hrxq_idx].ibv_attr->size,
2136 flow->frxq[hrxq_idx].ibv_attr->priority,
2137 flow->frxq[hrxq_idx].ibv_attr->type,
2138 flow->frxq[hrxq_idx].ibv_attr->flags,
2139 flow->frxq[hrxq_idx].ibv_attr->comp_mask,
2145 * Complete flow rule creation.
2148 * Pointer to Ethernet device.
2150 * Internal parser structure.
2152 * Pointer to the rte_flow.
2154 * Perform verbose error reporting if not NULL.
2157 * 0 on success, a negative errno value otherwise and rte_errno is set.
2160 mlx5_flow_create_action_queue(struct rte_eth_dev *dev,
2161 struct mlx5_flow_parse *parser,
2162 struct rte_flow *flow,
2163 struct rte_flow_error *error)
2165 struct priv *priv = dev->data->dev_private;
2168 unsigned int flows_n = 0;
2172 assert(!parser->drop);
2173 ret = mlx5_flow_create_action_queue_rss(dev, parser, flow, error);
2177 flow->cs = parser->cs;
2178 if (!priv->dev->data->dev_started)
2180 for (i = 0; i != hash_rxq_init_n; ++i) {
2181 if (!flow->frxq[i].hrxq)
2183 flow->frxq[i].ibv_flow =
2184 mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
2185 flow->frxq[i].ibv_attr);
2186 mlx5_flow_dump(dev, flow, i);
2187 if (!flow->frxq[i].ibv_flow) {
2188 rte_flow_error_set(error, ENOMEM,
2189 RTE_FLOW_ERROR_TYPE_HANDLE,
2190 NULL, "flow rule creation failure");
2196 rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_HANDLE,
2197 NULL, "internal error in flow creation");
2200 mlx5_flow_create_update_rxqs(dev, flow);
2203 ret = rte_errno; /* Save rte_errno before cleanup. */
2205 for (i = 0; i != hash_rxq_init_n; ++i) {
2206 if (flow->frxq[i].ibv_flow) {
2207 struct ibv_flow *ibv_flow = flow->frxq[i].ibv_flow;
2209 claim_zero(mlx5_glue->destroy_flow(ibv_flow));
2211 if (flow->frxq[i].hrxq)
2212 mlx5_hrxq_release(dev, flow->frxq[i].hrxq);
2213 if (flow->frxq[i].ibv_attr)
2214 rte_free(flow->frxq[i].ibv_attr);
2217 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
2221 rte_errno = ret; /* Restore rte_errno. */
2229 * Pointer to Ethernet device.
2231 * Pointer to a TAILQ flow list.
2233 * Flow rule attributes.
2234 * @param[in] pattern
2235 * Pattern specification (list terminated by the END pattern item).
2236 * @param[in] actions
2237 * Associated actions (list terminated by the END action).
2239 * Perform verbose error reporting if not NULL.
2242 * A flow on success, NULL otherwise and rte_errno is set.
2244 static struct rte_flow *
2245 mlx5_flow_list_create(struct rte_eth_dev *dev,
2246 struct mlx5_flows *list,
2247 const struct rte_flow_attr *attr,
2248 const struct rte_flow_item items[],
2249 const struct rte_flow_action actions[],
2250 struct rte_flow_error *error)
2252 struct mlx5_flow_parse parser = { .create = 1, };
2253 struct rte_flow *flow = NULL;
2257 ret = mlx5_flow_convert(dev, attr, items, actions, error, &parser);
2260 flow = rte_calloc(__func__, 1,
2262 parser.rss_conf.queue_num * sizeof(uint16_t),
2265 rte_flow_error_set(error, ENOMEM,
2266 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2268 "cannot allocate flow memory");
2271 /* Copy configuration. */
2272 flow->queues = (uint16_t (*)[])(flow + 1);
2273 flow->tunnel = parser.tunnel;
2274 flow->rss_conf = (struct rte_flow_action_rss){
2275 .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
2277 .types = parser.rss_conf.types,
2278 .key_len = parser.rss_conf.key_len,
2279 .queue_num = parser.rss_conf.queue_num,
2280 .key = memcpy(flow->rss_key, parser.rss_conf.key,
2281 sizeof(*parser.rss_conf.key) *
2282 parser.rss_conf.key_len),
2283 .queue = memcpy(flow->queues, parser.rss_conf.queue,
2284 sizeof(*parser.rss_conf.queue) *
2285 parser.rss_conf.queue_num),
2287 flow->mark = parser.mark;
2288 /* finalise the flow. */
2290 ret = mlx5_flow_create_action_queue_drop(dev, &parser, flow,
2293 ret = mlx5_flow_create_action_queue(dev, &parser, flow, error);
2296 TAILQ_INSERT_TAIL(list, flow, next);
2297 DRV_LOG(DEBUG, "port %u flow created %p", dev->data->port_id,
2301 DRV_LOG(ERR, "port %u flow creation error: %s", dev->data->port_id,
2303 for (i = 0; i != hash_rxq_init_n; ++i) {
2304 if (parser.queue[i].ibv_attr)
2305 rte_free(parser.queue[i].ibv_attr);
2312 * Validate a flow supported by the NIC.
2314 * @see rte_flow_validate()
2318 mlx5_flow_validate(struct rte_eth_dev *dev,
2319 const struct rte_flow_attr *attr,
2320 const struct rte_flow_item items[],
2321 const struct rte_flow_action actions[],
2322 struct rte_flow_error *error)
2324 struct mlx5_flow_parse parser = { .create = 0, };
2326 return mlx5_flow_convert(dev, attr, items, actions, error, &parser);
2332 * @see rte_flow_create()
2336 mlx5_flow_create(struct rte_eth_dev *dev,
2337 const struct rte_flow_attr *attr,
2338 const struct rte_flow_item items[],
2339 const struct rte_flow_action actions[],
2340 struct rte_flow_error *error)
2342 struct priv *priv = dev->data->dev_private;
2344 return mlx5_flow_list_create(dev, &priv->flows, attr, items, actions,
2349 * Destroy a flow in a list.
2352 * Pointer to Ethernet device.
2354 * Pointer to a TAILQ flow list.
2359 mlx5_flow_list_destroy(struct rte_eth_dev *dev, struct mlx5_flows *list,
2360 struct rte_flow *flow)
2362 struct priv *priv = dev->data->dev_private;
2365 if (flow->drop || !dev->data->dev_started)
2367 for (i = 0; flow->tunnel && i != flow->rss_conf.queue_num; ++i) {
2368 /* Update queue tunnel type. */
2369 struct mlx5_rxq_data *rxq_data = (*priv->rxqs)
2370 [(*flow->queues)[i]];
2371 struct mlx5_rxq_ctrl *rxq_ctrl =
2372 container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
2373 uint8_t tunnel = PTYPE_IDX(flow->tunnel);
2375 assert(rxq_ctrl->tunnel_types[tunnel] > 0);
2376 rxq_ctrl->tunnel_types[tunnel] -= 1;
2377 if (!rxq_ctrl->tunnel_types[tunnel]) {
2378 /* Update tunnel type. */
2383 for (j = 0; j < RTE_DIM(rxq_ctrl->tunnel_types); j++)
2384 if (rxq_ctrl->tunnel_types[j]) {
2388 /* Keep same if more than one tunnel types left. */
2390 rxq_data->tunnel = ptype_ext[last];
2391 else if (types == 0)
2392 /* No tunnel type left. */
2393 rxq_data->tunnel = 0;
2396 for (i = 0; flow->mark && i != flow->rss_conf.queue_num; ++i) {
2397 struct rte_flow *tmp;
2401 * To remove the mark from the queue, the queue must not be
2402 * present in any other marked flow (RSS or not).
2404 TAILQ_FOREACH(tmp, list, next) {
2406 uint16_t *tqs = NULL;
2411 for (j = 0; j != hash_rxq_init_n; ++j) {
2412 if (!tmp->frxq[j].hrxq)
2414 tqs = tmp->frxq[j].hrxq->ind_table->queues;
2415 tq_n = tmp->frxq[j].hrxq->ind_table->queues_n;
2419 for (j = 0; (j != tq_n) && !mark; j++)
2420 if (tqs[j] == (*flow->queues)[i])
2423 (*priv->rxqs)[(*flow->queues)[i]]->mark = mark;
2427 if (flow->frxq[HASH_RXQ_ETH].ibv_flow)
2428 claim_zero(mlx5_glue->destroy_flow
2429 (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2430 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
2432 for (i = 0; i != hash_rxq_init_n; ++i) {
2433 struct mlx5_flow *frxq = &flow->frxq[i];
2436 claim_zero(mlx5_glue->destroy_flow
2439 mlx5_hrxq_release(dev, frxq->hrxq);
2441 rte_free(frxq->ibv_attr);
2445 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
2448 TAILQ_REMOVE(list, flow, next);
2449 DRV_LOG(DEBUG, "port %u flow destroyed %p", dev->data->port_id,
2455 * Destroy all flows.
2458 * Pointer to Ethernet device.
2460 * Pointer to a TAILQ flow list.
2463 mlx5_flow_list_flush(struct rte_eth_dev *dev, struct mlx5_flows *list)
2465 while (!TAILQ_EMPTY(list)) {
2466 struct rte_flow *flow;
2468 flow = TAILQ_FIRST(list);
2469 mlx5_flow_list_destroy(dev, list, flow);
2474 * Create drop queue.
2477 * Pointer to Ethernet device.
2480 * 0 on success, a negative errno value otherwise and rte_errno is set.
2483 mlx5_flow_create_drop_queue(struct rte_eth_dev *dev)
2485 struct priv *priv = dev->data->dev_private;
2486 struct mlx5_hrxq_drop *fdq = NULL;
2490 fdq = rte_calloc(__func__, 1, sizeof(*fdq), 0);
2493 "port %u cannot allocate memory for drop queue",
2494 dev->data->port_id);
2498 fdq->cq = mlx5_glue->create_cq(priv->ctx, 1, NULL, NULL, 0);
2500 DRV_LOG(WARNING, "port %u cannot allocate CQ for drop queue",
2501 dev->data->port_id);
2505 fdq->wq = mlx5_glue->create_wq
2507 &(struct ibv_wq_init_attr){
2508 .wq_type = IBV_WQT_RQ,
2515 DRV_LOG(WARNING, "port %u cannot allocate WQ for drop queue",
2516 dev->data->port_id);
2520 fdq->ind_table = mlx5_glue->create_rwq_ind_table
2522 &(struct ibv_rwq_ind_table_init_attr){
2523 .log_ind_tbl_size = 0,
2524 .ind_tbl = &fdq->wq,
2527 if (!fdq->ind_table) {
2529 "port %u cannot allocate indirection table for drop"
2531 dev->data->port_id);
2535 fdq->qp = mlx5_glue->create_qp_ex
2537 &(struct ibv_qp_init_attr_ex){
2538 .qp_type = IBV_QPT_RAW_PACKET,
2540 IBV_QP_INIT_ATTR_PD |
2541 IBV_QP_INIT_ATTR_IND_TABLE |
2542 IBV_QP_INIT_ATTR_RX_HASH,
2543 .rx_hash_conf = (struct ibv_rx_hash_conf){
2545 IBV_RX_HASH_FUNC_TOEPLITZ,
2546 .rx_hash_key_len = rss_hash_default_key_len,
2547 .rx_hash_key = rss_hash_default_key,
2548 .rx_hash_fields_mask = 0,
2550 .rwq_ind_tbl = fdq->ind_table,
2554 DRV_LOG(WARNING, "port %u cannot allocate QP for drop queue",
2555 dev->data->port_id);
2559 priv->flow_drop_queue = fdq;
2563 claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2565 claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2567 claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2569 claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2572 priv->flow_drop_queue = NULL;
2577 * Delete drop queue.
2580 * Pointer to Ethernet device.
2583 mlx5_flow_delete_drop_queue(struct rte_eth_dev *dev)
2585 struct priv *priv = dev->data->dev_private;
2586 struct mlx5_hrxq_drop *fdq = priv->flow_drop_queue;
2591 claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2593 claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2595 claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2597 claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2599 priv->flow_drop_queue = NULL;
2606 * Pointer to Ethernet device.
2608 * Pointer to a TAILQ flow list.
2611 mlx5_flow_stop(struct rte_eth_dev *dev, struct mlx5_flows *list)
2613 struct priv *priv = dev->data->dev_private;
2614 struct rte_flow *flow;
2617 TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) {
2618 struct mlx5_ind_table_ibv *ind_tbl = NULL;
2621 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow)
2623 claim_zero(mlx5_glue->destroy_flow
2624 (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2625 flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
2626 DRV_LOG(DEBUG, "port %u flow %p removed",
2627 dev->data->port_id, (void *)flow);
2631 /* Verify the flow has not already been cleaned. */
2632 for (i = 0; i != hash_rxq_init_n; ++i) {
2633 if (!flow->frxq[i].ibv_flow)
2636 * Indirection table may be necessary to remove the
2637 * flags in the Rx queues.
2638 * This helps to speed-up the process by avoiding
2641 ind_tbl = flow->frxq[i].hrxq->ind_table;
2644 if (i == hash_rxq_init_n)
2648 for (i = 0; i != ind_tbl->queues_n; ++i)
2649 (*priv->rxqs)[ind_tbl->queues[i]]->mark = 0;
2651 for (i = 0; i != hash_rxq_init_n; ++i) {
2652 if (!flow->frxq[i].ibv_flow)
2654 claim_zero(mlx5_glue->destroy_flow
2655 (flow->frxq[i].ibv_flow));
2656 flow->frxq[i].ibv_flow = NULL;
2657 mlx5_hrxq_release(dev, flow->frxq[i].hrxq);
2658 flow->frxq[i].hrxq = NULL;
2660 DRV_LOG(DEBUG, "port %u flow %p removed", dev->data->port_id,
2663 /* Cleanup Rx queue tunnel info. */
2664 for (i = 0; i != priv->rxqs_n; ++i) {
2665 struct mlx5_rxq_data *q = (*priv->rxqs)[i];
2666 struct mlx5_rxq_ctrl *rxq_ctrl =
2667 container_of(q, struct mlx5_rxq_ctrl, rxq);
2671 memset((void *)rxq_ctrl->tunnel_types, 0,
2672 sizeof(rxq_ctrl->tunnel_types));
2681 * Pointer to Ethernet device.
2683 * Pointer to a TAILQ flow list.
2686 * 0 on success, a negative errno value otherwise and rte_errno is set.
2689 mlx5_flow_start(struct rte_eth_dev *dev, struct mlx5_flows *list)
2691 struct priv *priv = dev->data->dev_private;
2692 struct rte_flow *flow;
2694 TAILQ_FOREACH(flow, list, next) {
2698 flow->frxq[HASH_RXQ_ETH].ibv_flow =
2699 mlx5_glue->create_flow
2700 (priv->flow_drop_queue->qp,
2701 flow->frxq[HASH_RXQ_ETH].ibv_attr);
2702 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
2704 "port %u flow %p cannot be applied",
2705 dev->data->port_id, (void *)flow);
2709 DRV_LOG(DEBUG, "port %u flow %p applied",
2710 dev->data->port_id, (void *)flow);
2714 for (i = 0; i != hash_rxq_init_n; ++i) {
2715 if (!flow->frxq[i].ibv_attr)
2717 flow->frxq[i].hrxq =
2718 mlx5_hrxq_get(dev, flow->rss_conf.key,
2719 flow->rss_conf.key_len,
2720 flow->frxq[i].hash_fields,
2721 flow->rss_conf.queue,
2722 flow->rss_conf.queue_num,
2724 flow->rss_conf.level);
2725 if (flow->frxq[i].hrxq)
2727 flow->frxq[i].hrxq =
2728 mlx5_hrxq_new(dev, flow->rss_conf.key,
2729 flow->rss_conf.key_len,
2730 flow->frxq[i].hash_fields,
2731 flow->rss_conf.queue,
2732 flow->rss_conf.queue_num,
2734 flow->rss_conf.level);
2735 if (!flow->frxq[i].hrxq) {
2737 "port %u flow %p cannot create hash"
2739 dev->data->port_id, (void *)flow);
2744 mlx5_flow_dump(dev, flow, i);
2745 flow->frxq[i].ibv_flow =
2746 mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
2747 flow->frxq[i].ibv_attr);
2748 if (!flow->frxq[i].ibv_flow) {
2750 "port %u flow %p type %u cannot be"
2752 dev->data->port_id, (void *)flow, i);
2757 mlx5_flow_create_update_rxqs(dev, flow);
2763 * Verify the flow list is empty
2766 * Pointer to Ethernet device.
2768 * @return the number of flows not released.
2771 mlx5_flow_verify(struct rte_eth_dev *dev)
2773 struct priv *priv = dev->data->dev_private;
2774 struct rte_flow *flow;
2777 TAILQ_FOREACH(flow, &priv->flows, next) {
2778 DRV_LOG(DEBUG, "port %u flow %p still referenced",
2779 dev->data->port_id, (void *)flow);
2786 * Enable a control flow configured from the control plane.
2789 * Pointer to Ethernet device.
2791 * An Ethernet flow spec to apply.
2793 * An Ethernet flow mask to apply.
2795 * A VLAN flow spec to apply.
2797 * A VLAN flow mask to apply.
2800 * 0 on success, a negative errno value otherwise and rte_errno is set.
2803 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
2804 struct rte_flow_item_eth *eth_spec,
2805 struct rte_flow_item_eth *eth_mask,
2806 struct rte_flow_item_vlan *vlan_spec,
2807 struct rte_flow_item_vlan *vlan_mask)
2809 struct priv *priv = dev->data->dev_private;
2810 const struct rte_flow_attr attr = {
2812 .priority = MLX5_CTRL_FLOW_PRIORITY,
2814 struct rte_flow_item items[] = {
2816 .type = RTE_FLOW_ITEM_TYPE_ETH,
2822 .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
2823 RTE_FLOW_ITEM_TYPE_END,
2829 .type = RTE_FLOW_ITEM_TYPE_END,
2832 uint16_t queue[priv->reta_idx_n];
2833 struct rte_flow_action_rss action_rss = {
2834 .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
2836 .types = priv->rss_conf.rss_hf,
2837 .key_len = priv->rss_conf.rss_key_len,
2838 .queue_num = priv->reta_idx_n,
2839 .key = priv->rss_conf.rss_key,
2842 struct rte_flow_action actions[] = {
2844 .type = RTE_FLOW_ACTION_TYPE_RSS,
2845 .conf = &action_rss,
2848 .type = RTE_FLOW_ACTION_TYPE_END,
2851 struct rte_flow *flow;
2852 struct rte_flow_error error;
2855 if (!priv->reta_idx_n) {
2859 for (i = 0; i != priv->reta_idx_n; ++i)
2860 queue[i] = (*priv->reta_idx)[i];
2861 flow = mlx5_flow_list_create(dev, &priv->ctrl_flows, &attr, items,
2869 * Enable a flow control configured from the control plane.
2872 * Pointer to Ethernet device.
2874 * An Ethernet flow spec to apply.
2876 * An Ethernet flow mask to apply.
2879 * 0 on success, a negative errno value otherwise and rte_errno is set.
2882 mlx5_ctrl_flow(struct rte_eth_dev *dev,
2883 struct rte_flow_item_eth *eth_spec,
2884 struct rte_flow_item_eth *eth_mask)
2886 return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
2892 * @see rte_flow_destroy()
2896 mlx5_flow_destroy(struct rte_eth_dev *dev,
2897 struct rte_flow *flow,
2898 struct rte_flow_error *error __rte_unused)
2900 struct priv *priv = dev->data->dev_private;
2902 mlx5_flow_list_destroy(dev, &priv->flows, flow);
2907 * Destroy all flows.
2909 * @see rte_flow_flush()
2913 mlx5_flow_flush(struct rte_eth_dev *dev,
2914 struct rte_flow_error *error __rte_unused)
2916 struct priv *priv = dev->data->dev_private;
2918 mlx5_flow_list_flush(dev, &priv->flows);
2922 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
2924 * Query flow counter.
2928 * @param counter_value
2929 * returned data from the counter.
2932 * 0 on success, a negative errno value otherwise and rte_errno is set.
2935 mlx5_flow_query_count(struct ibv_counter_set *cs,
2936 struct mlx5_flow_counter_stats *counter_stats,
2937 struct rte_flow_query_count *query_count,
2938 struct rte_flow_error *error)
2940 uint64_t counters[2];
2941 struct ibv_query_counter_set_attr query_cs_attr = {
2943 .query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
2945 struct ibv_counter_set_data query_out = {
2947 .outlen = 2 * sizeof(uint64_t),
2949 int err = mlx5_glue->query_counter_set(&query_cs_attr, &query_out);
2952 return rte_flow_error_set(error, err,
2953 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2955 "cannot read counter");
2956 query_count->hits_set = 1;
2957 query_count->bytes_set = 1;
2958 query_count->hits = counters[0] - counter_stats->hits;
2959 query_count->bytes = counters[1] - counter_stats->bytes;
2960 if (query_count->reset) {
2961 counter_stats->hits = counters[0];
2962 counter_stats->bytes = counters[1];
2970 * @see rte_flow_query()
2974 mlx5_flow_query(struct rte_eth_dev *dev __rte_unused,
2975 struct rte_flow *flow,
2976 enum rte_flow_action_type action __rte_unused,
2978 struct rte_flow_error *error)
2983 ret = mlx5_flow_query_count(flow->cs,
2984 &flow->counter_stats,
2985 (struct rte_flow_query_count *)data,
2990 return rte_flow_error_set(error, EINVAL,
2991 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2993 "no counter found for flow");
3002 * @see rte_flow_isolate()
3006 mlx5_flow_isolate(struct rte_eth_dev *dev,
3008 struct rte_flow_error *error)
3010 struct priv *priv = dev->data->dev_private;
3012 if (dev->data->dev_started) {
3013 rte_flow_error_set(error, EBUSY,
3014 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
3016 "port must be stopped first");
3019 priv->isolated = !!enable;
3021 priv->dev->dev_ops = &mlx5_dev_ops_isolate;
3023 priv->dev->dev_ops = &mlx5_dev_ops;
3028 * Convert a flow director filter to a generic flow.
3031 * Pointer to Ethernet device.
3032 * @param fdir_filter
3033 * Flow director filter to add.
3035 * Generic flow parameters structure.
3038 * 0 on success, a negative errno value otherwise and rte_errno is set.
3041 mlx5_fdir_filter_convert(struct rte_eth_dev *dev,
3042 const struct rte_eth_fdir_filter *fdir_filter,
3043 struct mlx5_fdir *attributes)
3045 struct priv *priv = dev->data->dev_private;
3046 const struct rte_eth_fdir_input *input = &fdir_filter->input;
3047 const struct rte_eth_fdir_masks *mask =
3048 &dev->data->dev_conf.fdir_conf.mask;
3050 /* Validate queue number. */
3051 if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
3052 DRV_LOG(ERR, "port %u invalid queue number %d",
3053 dev->data->port_id, fdir_filter->action.rx_queue);
3057 attributes->attr.ingress = 1;
3058 attributes->items[0] = (struct rte_flow_item) {
3059 .type = RTE_FLOW_ITEM_TYPE_ETH,
3060 .spec = &attributes->l2,
3061 .mask = &attributes->l2_mask,
3063 switch (fdir_filter->action.behavior) {
3064 case RTE_ETH_FDIR_ACCEPT:
3065 attributes->actions[0] = (struct rte_flow_action){
3066 .type = RTE_FLOW_ACTION_TYPE_QUEUE,
3067 .conf = &attributes->queue,
3070 case RTE_ETH_FDIR_REJECT:
3071 attributes->actions[0] = (struct rte_flow_action){
3072 .type = RTE_FLOW_ACTION_TYPE_DROP,
3076 DRV_LOG(ERR, "port %u invalid behavior %d",
3078 fdir_filter->action.behavior);
3079 rte_errno = ENOTSUP;
3082 attributes->queue.index = fdir_filter->action.rx_queue;
3084 switch (fdir_filter->input.flow_type) {
3085 case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
3086 case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
3087 case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
3088 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
3089 .src_addr = input->flow.ip4_flow.src_ip,
3090 .dst_addr = input->flow.ip4_flow.dst_ip,
3091 .time_to_live = input->flow.ip4_flow.ttl,
3092 .type_of_service = input->flow.ip4_flow.tos,
3093 .next_proto_id = input->flow.ip4_flow.proto,
3095 attributes->l3_mask.ipv4.hdr = (struct ipv4_hdr){
3096 .src_addr = mask->ipv4_mask.src_ip,
3097 .dst_addr = mask->ipv4_mask.dst_ip,
3098 .time_to_live = mask->ipv4_mask.ttl,
3099 .type_of_service = mask->ipv4_mask.tos,
3100 .next_proto_id = mask->ipv4_mask.proto,
3102 attributes->items[1] = (struct rte_flow_item){
3103 .type = RTE_FLOW_ITEM_TYPE_IPV4,
3104 .spec = &attributes->l3,
3105 .mask = &attributes->l3_mask,
3108 case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
3109 case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
3110 case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
3111 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
3112 .hop_limits = input->flow.ipv6_flow.hop_limits,
3113 .proto = input->flow.ipv6_flow.proto,
3116 memcpy(attributes->l3.ipv6.hdr.src_addr,
3117 input->flow.ipv6_flow.src_ip,
3118 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
3119 memcpy(attributes->l3.ipv6.hdr.dst_addr,
3120 input->flow.ipv6_flow.dst_ip,
3121 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
3122 memcpy(attributes->l3_mask.ipv6.hdr.src_addr,
3123 mask->ipv6_mask.src_ip,
3124 RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
3125 memcpy(attributes->l3_mask.ipv6.hdr.dst_addr,
3126 mask->ipv6_mask.dst_ip,
3127 RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
3128 attributes->items[1] = (struct rte_flow_item){
3129 .type = RTE_FLOW_ITEM_TYPE_IPV6,
3130 .spec = &attributes->l3,
3131 .mask = &attributes->l3_mask,
3135 DRV_LOG(ERR, "port %u invalid flow type%d",
3136 dev->data->port_id, fdir_filter->input.flow_type);
3137 rte_errno = ENOTSUP;
3141 switch (fdir_filter->input.flow_type) {
3142 case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
3143 attributes->l4.udp.hdr = (struct udp_hdr){
3144 .src_port = input->flow.udp4_flow.src_port,
3145 .dst_port = input->flow.udp4_flow.dst_port,
3147 attributes->l4_mask.udp.hdr = (struct udp_hdr){
3148 .src_port = mask->src_port_mask,
3149 .dst_port = mask->dst_port_mask,
3151 attributes->items[2] = (struct rte_flow_item){
3152 .type = RTE_FLOW_ITEM_TYPE_UDP,
3153 .spec = &attributes->l4,
3154 .mask = &attributes->l4_mask,
3157 case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
3158 attributes->l4.tcp.hdr = (struct tcp_hdr){
3159 .src_port = input->flow.tcp4_flow.src_port,
3160 .dst_port = input->flow.tcp4_flow.dst_port,
3162 attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
3163 .src_port = mask->src_port_mask,
3164 .dst_port = mask->dst_port_mask,
3166 attributes->items[2] = (struct rte_flow_item){
3167 .type = RTE_FLOW_ITEM_TYPE_TCP,
3168 .spec = &attributes->l4,
3169 .mask = &attributes->l4_mask,
3172 case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
3173 attributes->l4.udp.hdr = (struct udp_hdr){
3174 .src_port = input->flow.udp6_flow.src_port,
3175 .dst_port = input->flow.udp6_flow.dst_port,
3177 attributes->l4_mask.udp.hdr = (struct udp_hdr){
3178 .src_port = mask->src_port_mask,
3179 .dst_port = mask->dst_port_mask,
3181 attributes->items[2] = (struct rte_flow_item){
3182 .type = RTE_FLOW_ITEM_TYPE_UDP,
3183 .spec = &attributes->l4,
3184 .mask = &attributes->l4_mask,
3187 case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
3188 attributes->l4.tcp.hdr = (struct tcp_hdr){
3189 .src_port = input->flow.tcp6_flow.src_port,
3190 .dst_port = input->flow.tcp6_flow.dst_port,
3192 attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
3193 .src_port = mask->src_port_mask,
3194 .dst_port = mask->dst_port_mask,
3196 attributes->items[2] = (struct rte_flow_item){
3197 .type = RTE_FLOW_ITEM_TYPE_TCP,
3198 .spec = &attributes->l4,
3199 .mask = &attributes->l4_mask,
3202 case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
3203 case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
3206 DRV_LOG(ERR, "port %u invalid flow type%d",
3207 dev->data->port_id, fdir_filter->input.flow_type);
3208 rte_errno = ENOTSUP;
3215 * Add new flow director filter and store it in list.
3218 * Pointer to Ethernet device.
3219 * @param fdir_filter
3220 * Flow director filter to add.
3223 * 0 on success, a negative errno value otherwise and rte_errno is set.
3226 mlx5_fdir_filter_add(struct rte_eth_dev *dev,
3227 const struct rte_eth_fdir_filter *fdir_filter)
3229 struct priv *priv = dev->data->dev_private;
3230 struct mlx5_fdir attributes = {
3233 .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
3234 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
3238 struct mlx5_flow_parse parser = {
3239 .layer = HASH_RXQ_ETH,
3241 struct rte_flow_error error;
3242 struct rte_flow *flow;
3245 ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
3248 ret = mlx5_flow_convert(dev, &attributes.attr, attributes.items,
3249 attributes.actions, &error, &parser);
3252 flow = mlx5_flow_list_create(dev, &priv->flows, &attributes.attr,
3253 attributes.items, attributes.actions,
3256 DRV_LOG(DEBUG, "port %u FDIR created %p", dev->data->port_id,
3264 * Delete specific filter.
3267 * Pointer to Ethernet device.
3268 * @param fdir_filter
3269 * Filter to be deleted.
3272 * 0 on success, a negative errno value otherwise and rte_errno is set.
3275 mlx5_fdir_filter_delete(struct rte_eth_dev *dev,
3276 const struct rte_eth_fdir_filter *fdir_filter)
3278 struct priv *priv = dev->data->dev_private;
3279 struct mlx5_fdir attributes = {
3282 struct mlx5_flow_parse parser = {
3284 .layer = HASH_RXQ_ETH,
3286 struct rte_flow_error error;
3287 struct rte_flow *flow;
3291 ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
3294 ret = mlx5_flow_convert(dev, &attributes.attr, attributes.items,
3295 attributes.actions, &error, &parser);
3299 * Special case for drop action which is only set in the
3300 * specifications when the flow is created. In this situation the
3301 * drop specification is missing.
3304 struct ibv_flow_spec_action_drop *drop;
3306 drop = (void *)((uintptr_t)parser.queue[HASH_RXQ_ETH].ibv_attr +
3307 parser.queue[HASH_RXQ_ETH].offset);
3308 *drop = (struct ibv_flow_spec_action_drop){
3309 .type = IBV_FLOW_SPEC_ACTION_DROP,
3310 .size = sizeof(struct ibv_flow_spec_action_drop),
3312 parser.queue[HASH_RXQ_ETH].ibv_attr->num_of_specs++;
3314 TAILQ_FOREACH(flow, &priv->flows, next) {
3315 struct ibv_flow_attr *attr;
3316 struct ibv_spec_header *attr_h;
3318 struct ibv_flow_attr *flow_attr;
3319 struct ibv_spec_header *flow_h;
3321 unsigned int specs_n;
3323 attr = parser.queue[HASH_RXQ_ETH].ibv_attr;
3324 flow_attr = flow->frxq[HASH_RXQ_ETH].ibv_attr;
3325 /* Compare first the attributes. */
3326 if (memcmp(attr, flow_attr, sizeof(struct ibv_flow_attr)))
3328 if (attr->num_of_specs == 0)
3330 spec = (void *)((uintptr_t)attr +
3331 sizeof(struct ibv_flow_attr));
3332 flow_spec = (void *)((uintptr_t)flow_attr +
3333 sizeof(struct ibv_flow_attr));
3334 specs_n = RTE_MIN(attr->num_of_specs, flow_attr->num_of_specs);
3335 for (i = 0; i != specs_n; ++i) {
3338 if (memcmp(spec, flow_spec,
3339 RTE_MIN(attr_h->size, flow_h->size)))
3341 spec = (void *)((uintptr_t)spec + attr_h->size);
3342 flow_spec = (void *)((uintptr_t)flow_spec +
3345 /* At this point, the flow match. */
3348 /* The flow does not match. */
3351 ret = rte_errno; /* Save rte_errno before cleanup. */
3353 mlx5_flow_list_destroy(dev, &priv->flows, flow);
3355 for (i = 0; i != hash_rxq_init_n; ++i) {
3356 if (parser.queue[i].ibv_attr)
3357 rte_free(parser.queue[i].ibv_attr);
3359 rte_errno = ret; /* Restore rte_errno. */
3364 * Update queue for specific filter.
3367 * Pointer to Ethernet device.
3368 * @param fdir_filter
3369 * Filter to be updated.
3372 * 0 on success, a negative errno value otherwise and rte_errno is set.
3375 mlx5_fdir_filter_update(struct rte_eth_dev *dev,
3376 const struct rte_eth_fdir_filter *fdir_filter)
3380 ret = mlx5_fdir_filter_delete(dev, fdir_filter);
3383 return mlx5_fdir_filter_add(dev, fdir_filter);
3387 * Flush all filters.
3390 * Pointer to Ethernet device.
3393 mlx5_fdir_filter_flush(struct rte_eth_dev *dev)
3395 struct priv *priv = dev->data->dev_private;
3397 mlx5_flow_list_flush(dev, &priv->flows);
3401 * Get flow director information.
3404 * Pointer to Ethernet device.
3405 * @param[out] fdir_info
3406 * Resulting flow director information.
3409 mlx5_fdir_info_get(struct rte_eth_dev *dev, struct rte_eth_fdir_info *fdir_info)
3411 struct priv *priv = dev->data->dev_private;
3412 struct rte_eth_fdir_masks *mask =
3413 &priv->dev->data->dev_conf.fdir_conf.mask;
3415 fdir_info->mode = priv->dev->data->dev_conf.fdir_conf.mode;
3416 fdir_info->guarant_spc = 0;
3417 rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
3418 fdir_info->max_flexpayload = 0;
3419 fdir_info->flow_types_mask[0] = 0;
3420 fdir_info->flex_payload_unit = 0;
3421 fdir_info->max_flex_payload_segment_num = 0;
3422 fdir_info->flex_payload_limit = 0;
3423 memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
3427 * Deal with flow director operations.
3430 * Pointer to Ethernet device.
3432 * Operation to perform.
3434 * Pointer to operation-specific structure.
3437 * 0 on success, a negative errno value otherwise and rte_errno is set.
3440 mlx5_fdir_ctrl_func(struct rte_eth_dev *dev, enum rte_filter_op filter_op,
3443 struct priv *priv = dev->data->dev_private;
3444 enum rte_fdir_mode fdir_mode =
3445 priv->dev->data->dev_conf.fdir_conf.mode;
3447 if (filter_op == RTE_ETH_FILTER_NOP)
3449 if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
3450 fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
3451 DRV_LOG(ERR, "port %u flow director mode %d not supported",
3452 dev->data->port_id, fdir_mode);
3456 switch (filter_op) {
3457 case RTE_ETH_FILTER_ADD:
3458 return mlx5_fdir_filter_add(dev, arg);
3459 case RTE_ETH_FILTER_UPDATE:
3460 return mlx5_fdir_filter_update(dev, arg);
3461 case RTE_ETH_FILTER_DELETE:
3462 return mlx5_fdir_filter_delete(dev, arg);
3463 case RTE_ETH_FILTER_FLUSH:
3464 mlx5_fdir_filter_flush(dev);
3466 case RTE_ETH_FILTER_INFO:
3467 mlx5_fdir_info_get(dev, arg);
3470 DRV_LOG(DEBUG, "port %u unknown operation %u",
3471 dev->data->port_id, filter_op);
3479 * Manage filter operations.
3482 * Pointer to Ethernet device structure.
3483 * @param filter_type
3486 * Operation to perform.
3488 * Pointer to operation-specific structure.
3491 * 0 on success, a negative errno value otherwise and rte_errno is set.
3494 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
3495 enum rte_filter_type filter_type,
3496 enum rte_filter_op filter_op,
3499 switch (filter_type) {
3500 case RTE_ETH_FILTER_GENERIC:
3501 if (filter_op != RTE_ETH_FILTER_GET) {
3505 *(const void **)arg = &mlx5_flow_ops;
3507 case RTE_ETH_FILTER_FDIR:
3508 return mlx5_fdir_ctrl_func(dev, filter_op, arg);
3510 DRV_LOG(ERR, "port %u filter type (%d) not supported",
3511 dev->data->port_id, filter_type);
3512 rte_errno = ENOTSUP;
3519 * Detect number of Verbs flow priorities supported.
3522 * Pointer to Ethernet device.
3525 * number of supported Verbs flow priority.
3528 mlx5_get_max_verbs_prio(struct rte_eth_dev *dev)
3530 struct priv *priv = dev->data->dev_private;
3531 unsigned int verb_priorities = MLX5_VERBS_FLOW_PRIO_8;
3533 struct ibv_flow_attr attr;
3534 struct ibv_flow_spec_eth eth;
3535 struct ibv_flow_spec_action_drop drop;
3541 .type = IBV_FLOW_SPEC_ETH,
3542 .size = sizeof(struct ibv_flow_spec_eth),
3545 .size = sizeof(struct ibv_flow_spec_action_drop),
3546 .type = IBV_FLOW_SPEC_ACTION_DROP,
3549 struct ibv_flow *flow;
3552 flow_attr.attr.priority = verb_priorities - 1;
3553 flow = mlx5_glue->create_flow(priv->flow_drop_queue->qp,
3556 claim_zero(mlx5_glue->destroy_flow(flow));
3557 /* Try more priorities. */
3558 verb_priorities *= 2;
3560 /* Failed, restore last right number. */
3561 verb_priorities /= 2;
3565 DRV_LOG(DEBUG, "port %u Verbs flow priorities: %d,"
3566 " user flow priorities: %d",
3567 dev->data->port_id, verb_priorities, MLX5_CTRL_FLOW_PRIORITY);
3568 return verb_priorities;