1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2016 6WIND S.A.
3 * Copyright 2016 Mellanox Technologies, Ltd
11 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
13 #pragma GCC diagnostic ignored "-Wpedantic"
15 #include <infiniband/verbs.h>
17 #pragma GCC diagnostic error "-Wpedantic"
20 #include <rte_common.h>
21 #include <rte_ether.h>
22 #include <rte_eth_ctrl.h>
23 #include <rte_ethdev_driver.h>
25 #include <rte_flow_driver.h>
26 #include <rte_malloc.h>
30 #include "mlx5_defs.h"
32 #include "mlx5_glue.h"
34 /* Flow priority for control plane flows. */
35 #define MLX5_CTRL_FLOW_PRIORITY 1
37 /* Internet Protocol versions. */
42 #ifndef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
43 struct ibv_flow_spec_counter_action {
48 /* Dev ops structure defined in mlx5.c */
49 extern const struct eth_dev_ops mlx5_dev_ops;
50 extern const struct eth_dev_ops mlx5_dev_ops_isolate;
52 /** Structure give to the conversion functions. */
53 struct mlx5_flow_data {
54 struct rte_eth_dev *dev; /** Ethernet device. */
55 struct mlx5_flow_parse *parser; /** Parser context. */
56 struct rte_flow_error *error; /** Error context. */
60 mlx5_flow_create_eth(const struct rte_flow_item *item,
61 const void *default_mask,
62 struct mlx5_flow_data *data);
65 mlx5_flow_create_vlan(const struct rte_flow_item *item,
66 const void *default_mask,
67 struct mlx5_flow_data *data);
70 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
71 const void *default_mask,
72 struct mlx5_flow_data *data);
75 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
76 const void *default_mask,
77 struct mlx5_flow_data *data);
80 mlx5_flow_create_udp(const struct rte_flow_item *item,
81 const void *default_mask,
82 struct mlx5_flow_data *data);
85 mlx5_flow_create_tcp(const struct rte_flow_item *item,
86 const void *default_mask,
87 struct mlx5_flow_data *data);
90 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
91 const void *default_mask,
92 struct mlx5_flow_data *data);
95 mlx5_flow_create_gre(const struct rte_flow_item *item,
96 const void *default_mask,
97 struct mlx5_flow_data *data);
99 struct mlx5_flow_parse;
102 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
106 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id);
109 mlx5_flow_create_count(struct rte_eth_dev *dev, struct mlx5_flow_parse *parser);
111 /* Hash RX queue types. */
123 /* Initialization data for hash RX queue. */
124 struct hash_rxq_init {
125 uint64_t hash_fields; /* Fields that participate in the hash. */
126 uint64_t dpdk_rss_hf; /* Matching DPDK RSS hash fields. */
127 unsigned int flow_priority; /* Flow priority to use. */
128 unsigned int ip_version; /* Internet protocol. */
131 /* Initialization data for hash RX queues. */
132 const struct hash_rxq_init hash_rxq_init[] = {
134 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
135 IBV_RX_HASH_DST_IPV4 |
136 IBV_RX_HASH_SRC_PORT_TCP |
137 IBV_RX_HASH_DST_PORT_TCP),
138 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,
140 .ip_version = MLX5_IPV4,
143 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
144 IBV_RX_HASH_DST_IPV4 |
145 IBV_RX_HASH_SRC_PORT_UDP |
146 IBV_RX_HASH_DST_PORT_UDP),
147 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP,
149 .ip_version = MLX5_IPV4,
152 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
153 IBV_RX_HASH_DST_IPV4),
154 .dpdk_rss_hf = (ETH_RSS_IPV4 |
157 .ip_version = MLX5_IPV4,
160 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
161 IBV_RX_HASH_DST_IPV6 |
162 IBV_RX_HASH_SRC_PORT_TCP |
163 IBV_RX_HASH_DST_PORT_TCP),
164 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP,
166 .ip_version = MLX5_IPV6,
169 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
170 IBV_RX_HASH_DST_IPV6 |
171 IBV_RX_HASH_SRC_PORT_UDP |
172 IBV_RX_HASH_DST_PORT_UDP),
173 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP,
175 .ip_version = MLX5_IPV6,
178 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
179 IBV_RX_HASH_DST_IPV6),
180 .dpdk_rss_hf = (ETH_RSS_IPV6 |
183 .ip_version = MLX5_IPV6,
192 /* Number of entries in hash_rxq_init[]. */
193 const unsigned int hash_rxq_init_n = RTE_DIM(hash_rxq_init);
195 /** Structure for holding counter stats. */
196 struct mlx5_flow_counter_stats {
197 uint64_t hits; /**< Number of packets matched by the rule. */
198 uint64_t bytes; /**< Number of bytes matched by the rule. */
201 /** Structure for Drop queue. */
202 struct mlx5_hrxq_drop {
203 struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */
204 struct ibv_qp *qp; /**< Verbs queue pair. */
205 struct ibv_wq *wq; /**< Verbs work queue. */
206 struct ibv_cq *cq; /**< Verbs completion queue. */
209 /* Flows structures. */
211 uint64_t hash_fields; /**< Fields that participate in the hash. */
212 struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
213 struct ibv_flow *ibv_flow; /**< Verbs flow. */
214 struct mlx5_hrxq *hrxq; /**< Hash Rx queues. */
217 /* Drop flows structures. */
218 struct mlx5_flow_drop {
219 struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
220 struct ibv_flow *ibv_flow; /**< Verbs flow. */
224 TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
225 uint32_t mark:1; /**< Set if the flow is marked. */
226 uint32_t drop:1; /**< Drop queue. */
227 struct rte_flow_action_rss rss_conf; /**< RSS configuration */
228 uint16_t (*queues)[]; /**< Queues indexes to use. */
229 uint8_t rss_key[40]; /**< copy of the RSS key. */
230 uint32_t tunnel; /**< Tunnel type of RTE_PTYPE_TUNNEL_XXX. */
231 struct ibv_counter_set *cs; /**< Holds the counters for the rule. */
232 struct mlx5_flow_counter_stats counter_stats;/**<The counter stats. */
233 struct mlx5_flow frxq[RTE_DIM(hash_rxq_init)];
234 /**< Flow with Rx queue. */
237 /** Static initializer for items. */
239 (const enum rte_flow_item_type []){ \
240 __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
243 #define IS_TUNNEL(type) ( \
244 (type) == RTE_FLOW_ITEM_TYPE_VXLAN || \
245 (type) == RTE_FLOW_ITEM_TYPE_GRE)
247 const uint32_t flow_ptype[] = {
248 [RTE_FLOW_ITEM_TYPE_VXLAN] = RTE_PTYPE_TUNNEL_VXLAN,
249 [RTE_FLOW_ITEM_TYPE_GRE] = RTE_PTYPE_TUNNEL_GRE,
252 #define PTYPE_IDX(t) ((RTE_PTYPE_TUNNEL_MASK & (t)) >> 12)
254 const uint32_t ptype_ext[] = {
255 [PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN)] = RTE_PTYPE_TUNNEL_VXLAN |
257 [PTYPE_IDX(RTE_PTYPE_TUNNEL_GRE)] = RTE_PTYPE_TUNNEL_GRE,
260 /** Structure to generate a simple graph of layers supported by the NIC. */
261 struct mlx5_flow_items {
262 /** List of possible actions for these items. */
263 const enum rte_flow_action_type *const actions;
264 /** Bit-masks corresponding to the possibilities for the item. */
267 * Default bit-masks to use when item->mask is not provided. When
268 * \default_mask is also NULL, the full supported bit-mask (\mask) is
271 const void *default_mask;
272 /** Bit-masks size in bytes. */
273 const unsigned int mask_sz;
275 * Conversion function from rte_flow to NIC specific flow.
278 * rte_flow item to convert.
279 * @param default_mask
280 * Default bit-masks to use when item->mask is not provided.
282 * Internal structure to store the conversion.
285 * 0 on success, a negative errno value otherwise and rte_errno is
288 int (*convert)(const struct rte_flow_item *item,
289 const void *default_mask,
290 struct mlx5_flow_data *data);
291 /** Size in bytes of the destination structure. */
292 const unsigned int dst_sz;
293 /** List of possible following items. */
294 const enum rte_flow_item_type *const items;
297 /** Valid action for this PMD. */
298 static const enum rte_flow_action_type valid_actions[] = {
299 RTE_FLOW_ACTION_TYPE_DROP,
300 RTE_FLOW_ACTION_TYPE_QUEUE,
301 RTE_FLOW_ACTION_TYPE_MARK,
302 RTE_FLOW_ACTION_TYPE_FLAG,
303 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
304 RTE_FLOW_ACTION_TYPE_COUNT,
306 RTE_FLOW_ACTION_TYPE_END,
309 /** Graph of supported items and associated actions. */
310 static const struct mlx5_flow_items mlx5_flow_items[] = {
311 [RTE_FLOW_ITEM_TYPE_END] = {
312 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
313 RTE_FLOW_ITEM_TYPE_VXLAN,
314 RTE_FLOW_ITEM_TYPE_GRE),
316 [RTE_FLOW_ITEM_TYPE_ETH] = {
317 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
318 RTE_FLOW_ITEM_TYPE_IPV4,
319 RTE_FLOW_ITEM_TYPE_IPV6),
320 .actions = valid_actions,
321 .mask = &(const struct rte_flow_item_eth){
322 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
323 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
326 .default_mask = &rte_flow_item_eth_mask,
327 .mask_sz = sizeof(struct rte_flow_item_eth),
328 .convert = mlx5_flow_create_eth,
329 .dst_sz = sizeof(struct ibv_flow_spec_eth),
331 [RTE_FLOW_ITEM_TYPE_VLAN] = {
332 .items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
333 RTE_FLOW_ITEM_TYPE_IPV6),
334 .actions = valid_actions,
335 .mask = &(const struct rte_flow_item_vlan){
339 .default_mask = &rte_flow_item_vlan_mask,
340 .mask_sz = sizeof(struct rte_flow_item_vlan),
341 .convert = mlx5_flow_create_vlan,
344 [RTE_FLOW_ITEM_TYPE_IPV4] = {
345 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
346 RTE_FLOW_ITEM_TYPE_TCP,
347 RTE_FLOW_ITEM_TYPE_GRE),
348 .actions = valid_actions,
349 .mask = &(const struct rte_flow_item_ipv4){
353 .type_of_service = -1,
357 .default_mask = &rte_flow_item_ipv4_mask,
358 .mask_sz = sizeof(struct rte_flow_item_ipv4),
359 .convert = mlx5_flow_create_ipv4,
360 .dst_sz = sizeof(struct ibv_flow_spec_ipv4_ext),
362 [RTE_FLOW_ITEM_TYPE_IPV6] = {
363 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
364 RTE_FLOW_ITEM_TYPE_TCP,
365 RTE_FLOW_ITEM_TYPE_GRE),
366 .actions = valid_actions,
367 .mask = &(const struct rte_flow_item_ipv6){
370 0xff, 0xff, 0xff, 0xff,
371 0xff, 0xff, 0xff, 0xff,
372 0xff, 0xff, 0xff, 0xff,
373 0xff, 0xff, 0xff, 0xff,
376 0xff, 0xff, 0xff, 0xff,
377 0xff, 0xff, 0xff, 0xff,
378 0xff, 0xff, 0xff, 0xff,
379 0xff, 0xff, 0xff, 0xff,
386 .default_mask = &rte_flow_item_ipv6_mask,
387 .mask_sz = sizeof(struct rte_flow_item_ipv6),
388 .convert = mlx5_flow_create_ipv6,
389 .dst_sz = sizeof(struct ibv_flow_spec_ipv6),
391 [RTE_FLOW_ITEM_TYPE_UDP] = {
392 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN),
393 .actions = valid_actions,
394 .mask = &(const struct rte_flow_item_udp){
400 .default_mask = &rte_flow_item_udp_mask,
401 .mask_sz = sizeof(struct rte_flow_item_udp),
402 .convert = mlx5_flow_create_udp,
403 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
405 [RTE_FLOW_ITEM_TYPE_TCP] = {
406 .actions = valid_actions,
407 .mask = &(const struct rte_flow_item_tcp){
413 .default_mask = &rte_flow_item_tcp_mask,
414 .mask_sz = sizeof(struct rte_flow_item_tcp),
415 .convert = mlx5_flow_create_tcp,
416 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
418 [RTE_FLOW_ITEM_TYPE_GRE] = {
419 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
420 RTE_FLOW_ITEM_TYPE_IPV4,
421 RTE_FLOW_ITEM_TYPE_IPV6),
422 .actions = valid_actions,
423 .mask = &(const struct rte_flow_item_gre){
426 .default_mask = &rte_flow_item_gre_mask,
427 .mask_sz = sizeof(struct rte_flow_item_gre),
428 .convert = mlx5_flow_create_gre,
429 .dst_sz = sizeof(struct ibv_flow_spec_tunnel),
431 [RTE_FLOW_ITEM_TYPE_VXLAN] = {
432 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
433 RTE_FLOW_ITEM_TYPE_IPV4, /* For L3 VXLAN. */
434 RTE_FLOW_ITEM_TYPE_IPV6), /* For L3 VXLAN. */
435 .actions = valid_actions,
436 .mask = &(const struct rte_flow_item_vxlan){
437 .vni = "\xff\xff\xff",
439 .default_mask = &rte_flow_item_vxlan_mask,
440 .mask_sz = sizeof(struct rte_flow_item_vxlan),
441 .convert = mlx5_flow_create_vxlan,
442 .dst_sz = sizeof(struct ibv_flow_spec_tunnel),
446 /** Structure to pass to the conversion function. */
447 struct mlx5_flow_parse {
448 uint32_t inner; /**< Verbs value, set once tunnel is encountered. */
450 /**< Whether resources should remain after a validate. */
451 uint32_t drop:1; /**< Target is a drop queue. */
452 uint32_t mark:1; /**< Mark is present in the flow. */
453 uint32_t count:1; /**< Count is present in the flow. */
454 uint32_t mark_id; /**< Mark identifier. */
455 struct rte_flow_action_rss rss_conf; /**< RSS configuration */
456 uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
457 uint8_t rss_key[40]; /**< copy of the RSS key. */
458 enum hash_rxq_type layer; /**< Last pattern layer detected. */
459 enum hash_rxq_type out_layer; /**< Last outer pattern layer detected. */
460 uint32_t tunnel; /**< Tunnel type of RTE_PTYPE_TUNNEL_XXX. */
461 struct ibv_counter_set *cs; /**< Holds the counter set for the rule */
463 struct ibv_flow_attr *ibv_attr;
464 /**< Pointer to Verbs attributes. */
466 /**< Current position or total size of the attribute. */
467 } queue[RTE_DIM(hash_rxq_init)];
470 static const struct rte_flow_ops mlx5_flow_ops = {
471 .validate = mlx5_flow_validate,
472 .create = mlx5_flow_create,
473 .destroy = mlx5_flow_destroy,
474 .flush = mlx5_flow_flush,
475 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
476 .query = mlx5_flow_query,
480 .isolate = mlx5_flow_isolate,
483 /* Convert FDIR request to Generic flow. */
485 struct rte_flow_attr attr;
486 struct rte_flow_action actions[2];
487 struct rte_flow_item items[4];
488 struct rte_flow_item_eth l2;
489 struct rte_flow_item_eth l2_mask;
491 struct rte_flow_item_ipv4 ipv4;
492 struct rte_flow_item_ipv6 ipv6;
495 struct rte_flow_item_ipv4 ipv4;
496 struct rte_flow_item_ipv6 ipv6;
499 struct rte_flow_item_udp udp;
500 struct rte_flow_item_tcp tcp;
503 struct rte_flow_item_udp udp;
504 struct rte_flow_item_tcp tcp;
506 struct rte_flow_action_queue queue;
509 /* Verbs specification header. */
510 struct ibv_spec_header {
511 enum ibv_flow_spec_type type;
516 * Check support for a given item.
519 * Item specification.
521 * Bit-masks covering supported fields to compare with spec, last and mask in
524 * Bit-Mask size in bytes.
527 * 0 on success, a negative errno value otherwise and rte_errno is set.
530 mlx5_flow_item_validate(const struct rte_flow_item *item,
531 const uint8_t *mask, unsigned int size)
533 if (!item->spec && (item->mask || item->last)) {
537 if (item->spec && !item->mask) {
539 const uint8_t *spec = item->spec;
541 for (i = 0; i < size; ++i)
542 if ((spec[i] | mask[i]) != mask[i]) {
547 if (item->last && !item->mask) {
549 const uint8_t *spec = item->last;
551 for (i = 0; i < size; ++i)
552 if ((spec[i] | mask[i]) != mask[i]) {
559 const uint8_t *spec = item->spec;
561 for (i = 0; i < size; ++i)
562 if ((spec[i] | mask[i]) != mask[i]) {
567 if (item->spec && item->last) {
570 const uint8_t *apply = mask;
576 for (i = 0; i < size; ++i) {
577 spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
578 last[i] = ((const uint8_t *)item->last)[i] & apply[i];
580 ret = memcmp(spec, last, size);
590 * Extract attribute to the parser.
593 * Flow rule attributes.
595 * Perform verbose error reporting if not NULL.
598 * 0 on success, a negative errno value otherwise and rte_errno is set.
601 mlx5_flow_convert_attributes(const struct rte_flow_attr *attr,
602 struct rte_flow_error *error)
605 rte_flow_error_set(error, ENOTSUP,
606 RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
608 "groups are not supported");
611 if (attr->priority && attr->priority != MLX5_CTRL_FLOW_PRIORITY) {
612 rte_flow_error_set(error, ENOTSUP,
613 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
615 "priorities are not supported");
619 rte_flow_error_set(error, ENOTSUP,
620 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
622 "egress is not supported");
625 if (attr->transfer) {
626 rte_flow_error_set(error, ENOTSUP,
627 RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
629 "transfer is not supported");
632 if (!attr->ingress) {
633 rte_flow_error_set(error, ENOTSUP,
634 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
636 "only ingress is supported");
643 * Extract actions request to the parser.
646 * Pointer to Ethernet device.
648 * Associated actions (list terminated by the END action).
650 * Perform verbose error reporting if not NULL.
651 * @param[in, out] parser
652 * Internal parser structure.
655 * 0 on success, a negative errno value otherwise and rte_errno is set.
658 mlx5_flow_convert_actions(struct rte_eth_dev *dev,
659 const struct rte_flow_action actions[],
660 struct rte_flow_error *error,
661 struct mlx5_flow_parse *parser)
663 enum { FATE = 1, MARK = 2, COUNT = 4, };
664 uint32_t overlap = 0;
665 struct priv *priv = dev->data->dev_private;
667 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
668 if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
670 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
672 goto exit_action_overlap;
675 } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
676 const struct rte_flow_action_queue *queue =
677 (const struct rte_flow_action_queue *)
681 goto exit_action_overlap;
683 if (!queue || (queue->index > (priv->rxqs_n - 1)))
684 goto exit_action_not_supported;
685 parser->queues[0] = queue->index;
686 parser->rss_conf = (struct rte_flow_action_rss){
688 .queue = parser->queues,
690 } else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
691 const struct rte_flow_action_rss *rss =
692 (const struct rte_flow_action_rss *)
694 const uint8_t *rss_key;
695 uint32_t rss_key_len;
699 goto exit_action_overlap;
702 rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ) {
703 rte_flow_error_set(error, EINVAL,
704 RTE_FLOW_ERROR_TYPE_ACTION,
706 "the only supported RSS hash"
707 " function is Toeplitz");
711 rte_flow_error_set(error, EINVAL,
712 RTE_FLOW_ERROR_TYPE_ACTION,
714 "a nonzero RSS encapsulation"
715 " level is not supported");
718 if (rss->types & MLX5_RSS_HF_MASK) {
719 rte_flow_error_set(error, EINVAL,
720 RTE_FLOW_ERROR_TYPE_ACTION,
722 "unsupported RSS type"
727 rss_key_len = rss->key_len;
730 rss_key_len = rss_hash_default_key_len;
731 rss_key = rss_hash_default_key;
733 if (rss_key_len != RTE_DIM(parser->rss_key)) {
734 rte_flow_error_set(error, EINVAL,
735 RTE_FLOW_ERROR_TYPE_ACTION,
737 "RSS hash key must be"
738 " exactly 40 bytes long");
741 if (!rss->queue_num) {
742 rte_flow_error_set(error, EINVAL,
743 RTE_FLOW_ERROR_TYPE_ACTION,
748 if (rss->queue_num > RTE_DIM(parser->queues)) {
749 rte_flow_error_set(error, EINVAL,
750 RTE_FLOW_ERROR_TYPE_ACTION,
752 "too many queues for RSS"
756 for (n = 0; n < rss->queue_num; ++n) {
757 if (rss->queue[n] >= priv->rxqs_n) {
758 rte_flow_error_set(error, EINVAL,
759 RTE_FLOW_ERROR_TYPE_ACTION,
761 "queue id > number of"
766 parser->rss_conf = (struct rte_flow_action_rss){
767 .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
770 .key_len = rss_key_len,
771 .queue_num = rss->queue_num,
772 .key = memcpy(parser->rss_key, rss_key,
773 sizeof(*rss_key) * rss_key_len),
774 .queue = memcpy(parser->queues, rss->queue,
775 sizeof(*rss->queue) *
778 } else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
779 const struct rte_flow_action_mark *mark =
780 (const struct rte_flow_action_mark *)
784 goto exit_action_overlap;
787 rte_flow_error_set(error, EINVAL,
788 RTE_FLOW_ERROR_TYPE_ACTION,
790 "mark must be defined");
792 } else if (mark->id >= MLX5_FLOW_MARK_MAX) {
793 rte_flow_error_set(error, ENOTSUP,
794 RTE_FLOW_ERROR_TYPE_ACTION,
796 "mark must be between 0"
801 parser->mark_id = mark->id;
802 } else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
804 goto exit_action_overlap;
807 } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT &&
808 priv->config.flow_counter_en) {
810 goto exit_action_overlap;
814 goto exit_action_not_supported;
817 /* When fate is unknown, drop traffic. */
818 if (!(overlap & FATE))
820 if (parser->drop && parser->mark)
822 if (!parser->rss_conf.queue_num && !parser->drop) {
823 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
824 NULL, "no valid action");
828 exit_action_not_supported:
829 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
830 actions, "action not supported");
833 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
834 actions, "overlapping actions are not supported");
842 * Pattern specification (list terminated by the END pattern item).
844 * Perform verbose error reporting if not NULL.
845 * @param[in, out] parser
846 * Internal parser structure.
849 * 0 on success, a negative errno value otherwise and rte_errno is set.
852 mlx5_flow_convert_items_validate(const struct rte_flow_item items[],
853 struct rte_flow_error *error,
854 struct mlx5_flow_parse *parser)
856 const struct mlx5_flow_items *cur_item = mlx5_flow_items;
860 /* Initialise the offsets to start after verbs attribute. */
861 for (i = 0; i != hash_rxq_init_n; ++i)
862 parser->queue[i].offset = sizeof(struct ibv_flow_attr);
863 for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
864 const struct mlx5_flow_items *token = NULL;
867 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
871 cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
873 if (cur_item->items[i] == items->type) {
874 token = &mlx5_flow_items[items->type];
880 goto exit_item_not_supported;
883 ret = mlx5_flow_item_validate(items,
884 (const uint8_t *)cur_item->mask,
887 goto exit_item_not_supported;
888 if (IS_TUNNEL(items->type)) {
889 if (parser->tunnel) {
890 rte_flow_error_set(error, ENOTSUP,
891 RTE_FLOW_ERROR_TYPE_ITEM,
893 "Cannot recognize multiple"
894 " tunnel encapsulations.");
897 parser->inner = IBV_FLOW_SPEC_INNER;
898 parser->tunnel = flow_ptype[items->type];
901 parser->queue[HASH_RXQ_ETH].offset += cur_item->dst_sz;
903 for (n = 0; n != hash_rxq_init_n; ++n)
904 parser->queue[n].offset += cur_item->dst_sz;
908 parser->queue[HASH_RXQ_ETH].offset +=
909 sizeof(struct ibv_flow_spec_action_drop);
912 for (i = 0; i != hash_rxq_init_n; ++i)
913 parser->queue[i].offset +=
914 sizeof(struct ibv_flow_spec_action_tag);
917 unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
919 for (i = 0; i != hash_rxq_init_n; ++i)
920 parser->queue[i].offset += size;
923 exit_item_not_supported:
924 return rte_flow_error_set(error, -ret, RTE_FLOW_ERROR_TYPE_ITEM,
925 items, "item not supported");
929 * Allocate memory space to store verbs flow attributes.
932 * Amount of byte to allocate.
934 * Perform verbose error reporting if not NULL.
937 * A verbs flow attribute on success, NULL otherwise and rte_errno is set.
939 static struct ibv_flow_attr *
940 mlx5_flow_convert_allocate(unsigned int size, struct rte_flow_error *error)
942 struct ibv_flow_attr *ibv_attr;
944 ibv_attr = rte_calloc(__func__, 1, size, 0);
946 rte_flow_error_set(error, ENOMEM,
947 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
949 "cannot allocate verbs spec attributes");
956 * Make inner packet matching with an higher priority from the non Inner
960 * Pointer to Ethernet device.
961 * @param[in, out] parser
962 * Internal parser structure.
964 * User flow attribute.
967 mlx5_flow_update_priority(struct rte_eth_dev *dev,
968 struct mlx5_flow_parse *parser,
969 const struct rte_flow_attr *attr)
971 struct priv *priv = dev->data->dev_private;
975 /* 8 priorities >= 16 priorities
976 * Control flow: 4-7 8-15
977 * User normal flow: 1-3 4-7
978 * User tunnel flow: 0-2 0-3
980 priority = attr->priority * MLX5_VERBS_FLOW_PRIO_8;
981 if (priv->config.max_verbs_prio == MLX5_VERBS_FLOW_PRIO_8)
984 * Lower non-tunnel flow Verbs priority 1 if only support 8 Verbs
985 * priorities, lower 4 otherwise.
987 if (!parser->inner) {
988 if (priv->config.max_verbs_prio == MLX5_VERBS_FLOW_PRIO_8)
991 priority += MLX5_VERBS_FLOW_PRIO_8 / 2;
994 parser->queue[HASH_RXQ_ETH].ibv_attr->priority = priority +
995 hash_rxq_init[HASH_RXQ_ETH].flow_priority;
998 for (i = 0; i != hash_rxq_init_n; ++i) {
999 if (!parser->queue[i].ibv_attr)
1001 parser->queue[i].ibv_attr->priority = priority +
1002 hash_rxq_init[i].flow_priority;
1007 * Finalise verbs flow attributes.
1009 * @param[in, out] parser
1010 * Internal parser structure.
1013 mlx5_flow_convert_finalise(struct mlx5_flow_parse *parser)
1018 * Fill missing layers in verbs specifications, or compute the correct
1019 * offset to allocate the memory space for the attributes and
1022 for (i = 0; i != hash_rxq_init_n - 1; ++i) {
1024 struct ibv_flow_spec_ipv4_ext ipv4;
1025 struct ibv_flow_spec_ipv6 ipv6;
1026 struct ibv_flow_spec_tcp_udp udp_tcp;
1031 if (i == parser->layer)
1033 if (parser->layer == HASH_RXQ_ETH) {
1034 if (hash_rxq_init[i].ip_version == MLX5_IPV4) {
1035 size = sizeof(struct ibv_flow_spec_ipv4_ext);
1036 specs.ipv4 = (struct ibv_flow_spec_ipv4_ext){
1037 .type = IBV_FLOW_SPEC_IPV4_EXT,
1041 size = sizeof(struct ibv_flow_spec_ipv6);
1042 specs.ipv6 = (struct ibv_flow_spec_ipv6){
1043 .type = IBV_FLOW_SPEC_IPV6,
1047 if (parser->queue[i].ibv_attr) {
1048 dst = (void *)((uintptr_t)
1049 parser->queue[i].ibv_attr +
1050 parser->queue[i].offset);
1051 memcpy(dst, &specs, size);
1052 ++parser->queue[i].ibv_attr->num_of_specs;
1054 parser->queue[i].offset += size;
1056 if ((i == HASH_RXQ_UDPV4) || (i == HASH_RXQ_TCPV4) ||
1057 (i == HASH_RXQ_UDPV6) || (i == HASH_RXQ_TCPV6)) {
1058 size = sizeof(struct ibv_flow_spec_tcp_udp);
1059 specs.udp_tcp = (struct ibv_flow_spec_tcp_udp) {
1060 .type = ((i == HASH_RXQ_UDPV4 ||
1061 i == HASH_RXQ_UDPV6) ?
1066 if (parser->queue[i].ibv_attr) {
1067 dst = (void *)((uintptr_t)
1068 parser->queue[i].ibv_attr +
1069 parser->queue[i].offset);
1070 memcpy(dst, &specs, size);
1071 ++parser->queue[i].ibv_attr->num_of_specs;
1073 parser->queue[i].offset += size;
1079 * Update flows according to pattern and RSS hash fields.
1081 * @param[in, out] parser
1082 * Internal parser structure.
1085 * 0 on success, a negative errno value otherwise and rte_errno is set.
1088 mlx5_flow_convert_rss(struct mlx5_flow_parse *parser)
1090 const unsigned int ipv4 =
1091 hash_rxq_init[parser->layer].ip_version == MLX5_IPV4;
1092 const enum hash_rxq_type hmin = ipv4 ? HASH_RXQ_TCPV4 : HASH_RXQ_TCPV6;
1093 const enum hash_rxq_type hmax = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
1094 const enum hash_rxq_type ohmin = ipv4 ? HASH_RXQ_TCPV6 : HASH_RXQ_TCPV4;
1095 const enum hash_rxq_type ohmax = ipv4 ? HASH_RXQ_IPV6 : HASH_RXQ_IPV4;
1096 const enum hash_rxq_type ip = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
1099 /* Remove any other flow not matching the pattern. */
1100 if (parser->rss_conf.queue_num == 1 && !parser->rss_conf.types) {
1101 for (i = 0; i != hash_rxq_init_n; ++i) {
1102 if (i == HASH_RXQ_ETH)
1104 rte_free(parser->queue[i].ibv_attr);
1105 parser->queue[i].ibv_attr = NULL;
1109 if (parser->layer == HASH_RXQ_ETH)
1111 /* This layer becomes useless as the pattern define under layers. */
1112 rte_free(parser->queue[HASH_RXQ_ETH].ibv_attr);
1113 parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
1114 /* Remove opposite kind of layer e.g. IPv6 if the pattern is IPv4. */
1115 for (i = ohmin; i != (ohmax + 1); ++i) {
1116 if (!parser->queue[i].ibv_attr)
1118 rte_free(parser->queue[i].ibv_attr);
1119 parser->queue[i].ibv_attr = NULL;
1121 /* Remove impossible flow according to the RSS configuration. */
1122 if (hash_rxq_init[parser->layer].dpdk_rss_hf &
1123 parser->rss_conf.types) {
1124 /* Remove any other flow. */
1125 for (i = hmin; i != (hmax + 1); ++i) {
1126 if (i == parser->layer || !parser->queue[i].ibv_attr)
1128 rte_free(parser->queue[i].ibv_attr);
1129 parser->queue[i].ibv_attr = NULL;
1131 } else if (!parser->queue[ip].ibv_attr) {
1132 /* no RSS possible with the current configuration. */
1133 parser->rss_conf.queue_num = 1;
1139 * Validate and convert a flow supported by the NIC.
1142 * Pointer to Ethernet device.
1144 * Flow rule attributes.
1145 * @param[in] pattern
1146 * Pattern specification (list terminated by the END pattern item).
1147 * @param[in] actions
1148 * Associated actions (list terminated by the END action).
1150 * Perform verbose error reporting if not NULL.
1151 * @param[in, out] parser
1152 * Internal parser structure.
1155 * 0 on success, a negative errno value otherwise and rte_errno is set.
1158 mlx5_flow_convert(struct rte_eth_dev *dev,
1159 const struct rte_flow_attr *attr,
1160 const struct rte_flow_item items[],
1161 const struct rte_flow_action actions[],
1162 struct rte_flow_error *error,
1163 struct mlx5_flow_parse *parser)
1165 const struct mlx5_flow_items *cur_item = mlx5_flow_items;
1169 /* First step. Validate the attributes, items and actions. */
1170 *parser = (struct mlx5_flow_parse){
1171 .create = parser->create,
1172 .layer = HASH_RXQ_ETH,
1173 .mark_id = MLX5_FLOW_MARK_DEFAULT,
1175 ret = mlx5_flow_convert_attributes(attr, error);
1178 ret = mlx5_flow_convert_actions(dev, actions, error, parser);
1181 ret = mlx5_flow_convert_items_validate(items, error, parser);
1184 mlx5_flow_convert_finalise(parser);
1187 * Allocate the memory space to store verbs specifications.
1190 unsigned int offset = parser->queue[HASH_RXQ_ETH].offset;
1192 parser->queue[HASH_RXQ_ETH].ibv_attr =
1193 mlx5_flow_convert_allocate(offset, error);
1194 if (!parser->queue[HASH_RXQ_ETH].ibv_attr)
1196 parser->queue[HASH_RXQ_ETH].offset =
1197 sizeof(struct ibv_flow_attr);
1199 for (i = 0; i != hash_rxq_init_n; ++i) {
1200 unsigned int offset;
1202 if (!(parser->rss_conf.types &
1203 hash_rxq_init[i].dpdk_rss_hf) &&
1204 (i != HASH_RXQ_ETH))
1206 offset = parser->queue[i].offset;
1207 parser->queue[i].ibv_attr =
1208 mlx5_flow_convert_allocate(offset, error);
1209 if (!parser->queue[i].ibv_attr)
1211 parser->queue[i].offset = sizeof(struct ibv_flow_attr);
1214 /* Third step. Conversion parse, fill the specifications. */
1217 for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
1218 struct mlx5_flow_data data = {
1224 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
1226 cur_item = &mlx5_flow_items[items->type];
1227 ret = cur_item->convert(items,
1228 (cur_item->default_mask ?
1229 cur_item->default_mask :
1235 if (!parser->drop) {
1236 /* RSS check, remove unused hash types. */
1237 ret = mlx5_flow_convert_rss(parser);
1240 /* Complete missing specification. */
1241 mlx5_flow_convert_finalise(parser);
1243 mlx5_flow_update_priority(dev, parser, attr);
1245 mlx5_flow_create_flag_mark(parser, parser->mark_id);
1246 if (parser->count && parser->create) {
1247 mlx5_flow_create_count(dev, parser);
1249 goto exit_count_error;
1252 /* Only verification is expected, all resources should be released. */
1253 if (!parser->create) {
1254 for (i = 0; i != hash_rxq_init_n; ++i) {
1255 if (parser->queue[i].ibv_attr) {
1256 rte_free(parser->queue[i].ibv_attr);
1257 parser->queue[i].ibv_attr = NULL;
1263 for (i = 0; i != hash_rxq_init_n; ++i) {
1264 if (parser->queue[i].ibv_attr) {
1265 rte_free(parser->queue[i].ibv_attr);
1266 parser->queue[i].ibv_attr = NULL;
1269 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1270 NULL, "cannot allocate verbs spec attributes");
1273 rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1274 NULL, "cannot create counter");
1279 * Copy the specification created into the flow.
1282 * Internal parser structure.
1284 * Create specification.
1286 * Size in bytes of the specification to copy.
1289 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
1295 for (i = 0; i != hash_rxq_init_n; ++i) {
1296 if (!parser->queue[i].ibv_attr)
1298 /* Specification must be the same l3 type or none. */
1299 if (parser->layer == HASH_RXQ_ETH ||
1300 (hash_rxq_init[parser->layer].ip_version ==
1301 hash_rxq_init[i].ip_version) ||
1302 (hash_rxq_init[i].ip_version == 0)) {
1303 dst = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1304 parser->queue[i].offset);
1305 memcpy(dst, src, size);
1306 ++parser->queue[i].ibv_attr->num_of_specs;
1307 parser->queue[i].offset += size;
1313 * Convert Ethernet item to Verbs specification.
1316 * Item specification.
1317 * @param default_mask[in]
1318 * Default bit-masks to use when item->mask is not provided.
1319 * @param data[in, out]
1323 * 0 on success, a negative errno value otherwise and rte_errno is set.
1326 mlx5_flow_create_eth(const struct rte_flow_item *item,
1327 const void *default_mask,
1328 struct mlx5_flow_data *data)
1330 const struct rte_flow_item_eth *spec = item->spec;
1331 const struct rte_flow_item_eth *mask = item->mask;
1332 struct mlx5_flow_parse *parser = data->parser;
1333 const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1334 struct ibv_flow_spec_eth eth = {
1335 .type = parser->inner | IBV_FLOW_SPEC_ETH,
1339 /* Don't update layer for the inner pattern. */
1341 parser->layer = HASH_RXQ_ETH;
1346 mask = default_mask;
1347 memcpy(ð.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
1348 memcpy(ð.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
1349 eth.val.ether_type = spec->type;
1350 memcpy(ð.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
1351 memcpy(ð.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
1352 eth.mask.ether_type = mask->type;
1353 /* Remove unwanted bits from values. */
1354 for (i = 0; i < ETHER_ADDR_LEN; ++i) {
1355 eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
1356 eth.val.src_mac[i] &= eth.mask.src_mac[i];
1358 eth.val.ether_type &= eth.mask.ether_type;
1360 mlx5_flow_create_copy(parser, ð, eth_size);
1365 * Convert VLAN item to Verbs specification.
1368 * Item specification.
1369 * @param default_mask[in]
1370 * Default bit-masks to use when item->mask is not provided.
1371 * @param data[in, out]
1375 * 0 on success, a negative errno value otherwise and rte_errno is set.
1378 mlx5_flow_create_vlan(const struct rte_flow_item *item,
1379 const void *default_mask,
1380 struct mlx5_flow_data *data)
1382 const struct rte_flow_item_vlan *spec = item->spec;
1383 const struct rte_flow_item_vlan *mask = item->mask;
1384 struct mlx5_flow_parse *parser = data->parser;
1385 struct ibv_flow_spec_eth *eth;
1386 const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1387 const char *msg = "VLAN cannot be empty";
1392 mask = default_mask;
1394 for (i = 0; i != hash_rxq_init_n; ++i) {
1395 if (!parser->queue[i].ibv_attr)
1398 eth = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1399 parser->queue[i].offset - eth_size);
1400 eth->val.vlan_tag = spec->tci;
1401 eth->mask.vlan_tag = mask->tci;
1402 eth->val.vlan_tag &= eth->mask.vlan_tag;
1404 * From verbs perspective an empty VLAN is equivalent
1405 * to a packet without VLAN layer.
1407 if (!eth->mask.vlan_tag)
1409 /* Outer TPID cannot be matched. */
1410 if (eth->mask.ether_type) {
1411 msg = "VLAN TPID matching is not supported";
1414 eth->val.ether_type = spec->inner_type;
1415 eth->mask.ether_type = mask->inner_type;
1416 eth->val.ether_type &= eth->mask.ether_type;
1421 return rte_flow_error_set(data->error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM,
1426 * Convert IPv4 item to Verbs specification.
1429 * Item specification.
1430 * @param default_mask[in]
1431 * Default bit-masks to use when item->mask is not provided.
1432 * @param data[in, out]
1436 * 0 on success, a negative errno value otherwise and rte_errno is set.
1439 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
1440 const void *default_mask,
1441 struct mlx5_flow_data *data)
1443 struct priv *priv = data->dev->data->dev_private;
1444 const struct rte_flow_item_ipv4 *spec = item->spec;
1445 const struct rte_flow_item_ipv4 *mask = item->mask;
1446 struct mlx5_flow_parse *parser = data->parser;
1447 unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4_ext);
1448 struct ibv_flow_spec_ipv4_ext ipv4 = {
1449 .type = parser->inner | IBV_FLOW_SPEC_IPV4_EXT,
1453 if (parser->layer == HASH_RXQ_TUNNEL &&
1454 parser->tunnel == ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN)] &&
1455 !priv->config.l3_vxlan_en)
1456 return rte_flow_error_set(data->error, EINVAL,
1457 RTE_FLOW_ERROR_TYPE_ITEM,
1459 "L3 VXLAN not enabled by device"
1460 " parameter and/or not configured"
1462 /* Don't update layer for the inner pattern. */
1464 parser->layer = HASH_RXQ_IPV4;
1467 mask = default_mask;
1468 ipv4.val = (struct ibv_flow_ipv4_ext_filter){
1469 .src_ip = spec->hdr.src_addr,
1470 .dst_ip = spec->hdr.dst_addr,
1471 .proto = spec->hdr.next_proto_id,
1472 .tos = spec->hdr.type_of_service,
1474 ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
1475 .src_ip = mask->hdr.src_addr,
1476 .dst_ip = mask->hdr.dst_addr,
1477 .proto = mask->hdr.next_proto_id,
1478 .tos = mask->hdr.type_of_service,
1480 /* Remove unwanted bits from values. */
1481 ipv4.val.src_ip &= ipv4.mask.src_ip;
1482 ipv4.val.dst_ip &= ipv4.mask.dst_ip;
1483 ipv4.val.proto &= ipv4.mask.proto;
1484 ipv4.val.tos &= ipv4.mask.tos;
1486 mlx5_flow_create_copy(parser, &ipv4, ipv4_size);
1491 * Convert IPv6 item to Verbs specification.
1494 * Item specification.
1495 * @param default_mask[in]
1496 * Default bit-masks to use when item->mask is not provided.
1497 * @param data[in, out]
1501 * 0 on success, a negative errno value otherwise and rte_errno is set.
1504 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
1505 const void *default_mask,
1506 struct mlx5_flow_data *data)
1508 struct priv *priv = data->dev->data->dev_private;
1509 const struct rte_flow_item_ipv6 *spec = item->spec;
1510 const struct rte_flow_item_ipv6 *mask = item->mask;
1511 struct mlx5_flow_parse *parser = data->parser;
1512 unsigned int ipv6_size = sizeof(struct ibv_flow_spec_ipv6);
1513 struct ibv_flow_spec_ipv6 ipv6 = {
1514 .type = parser->inner | IBV_FLOW_SPEC_IPV6,
1518 if (parser->layer == HASH_RXQ_TUNNEL &&
1519 parser->tunnel == ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN)] &&
1520 !priv->config.l3_vxlan_en)
1521 return rte_flow_error_set(data->error, EINVAL,
1522 RTE_FLOW_ERROR_TYPE_ITEM,
1524 "L3 VXLAN not enabled by device"
1525 " parameter and/or not configured"
1527 /* Don't update layer for the inner pattern. */
1529 parser->layer = HASH_RXQ_IPV6;
1532 uint32_t vtc_flow_val;
1533 uint32_t vtc_flow_mask;
1536 mask = default_mask;
1537 memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
1538 RTE_DIM(ipv6.val.src_ip));
1539 memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
1540 RTE_DIM(ipv6.val.dst_ip));
1541 memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
1542 RTE_DIM(ipv6.mask.src_ip));
1543 memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
1544 RTE_DIM(ipv6.mask.dst_ip));
1545 vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
1546 vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
1547 ipv6.val.flow_label =
1548 rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >>
1550 ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >>
1552 ipv6.val.next_hdr = spec->hdr.proto;
1553 ipv6.val.hop_limit = spec->hdr.hop_limits;
1554 ipv6.mask.flow_label =
1555 rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >>
1557 ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >>
1559 ipv6.mask.next_hdr = mask->hdr.proto;
1560 ipv6.mask.hop_limit = mask->hdr.hop_limits;
1561 /* Remove unwanted bits from values. */
1562 for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
1563 ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
1564 ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
1566 ipv6.val.flow_label &= ipv6.mask.flow_label;
1567 ipv6.val.traffic_class &= ipv6.mask.traffic_class;
1568 ipv6.val.next_hdr &= ipv6.mask.next_hdr;
1569 ipv6.val.hop_limit &= ipv6.mask.hop_limit;
1571 mlx5_flow_create_copy(parser, &ipv6, ipv6_size);
1576 * Convert UDP item to Verbs specification.
1579 * Item specification.
1580 * @param default_mask[in]
1581 * Default bit-masks to use when item->mask is not provided.
1582 * @param data[in, out]
1586 * 0 on success, a negative errno value otherwise and rte_errno is set.
1589 mlx5_flow_create_udp(const struct rte_flow_item *item,
1590 const void *default_mask,
1591 struct mlx5_flow_data *data)
1593 const struct rte_flow_item_udp *spec = item->spec;
1594 const struct rte_flow_item_udp *mask = item->mask;
1595 struct mlx5_flow_parse *parser = data->parser;
1596 unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1597 struct ibv_flow_spec_tcp_udp udp = {
1598 .type = parser->inner | IBV_FLOW_SPEC_UDP,
1602 /* Don't update layer for the inner pattern. */
1603 if (!parser->inner) {
1604 if (parser->layer == HASH_RXQ_IPV4)
1605 parser->layer = HASH_RXQ_UDPV4;
1607 parser->layer = HASH_RXQ_UDPV6;
1611 mask = default_mask;
1612 udp.val.dst_port = spec->hdr.dst_port;
1613 udp.val.src_port = spec->hdr.src_port;
1614 udp.mask.dst_port = mask->hdr.dst_port;
1615 udp.mask.src_port = mask->hdr.src_port;
1616 /* Remove unwanted bits from values. */
1617 udp.val.src_port &= udp.mask.src_port;
1618 udp.val.dst_port &= udp.mask.dst_port;
1620 mlx5_flow_create_copy(parser, &udp, udp_size);
1625 * Convert TCP item to Verbs specification.
1628 * Item specification.
1629 * @param default_mask[in]
1630 * Default bit-masks to use when item->mask is not provided.
1631 * @param data[in, out]
1635 * 0 on success, a negative errno value otherwise and rte_errno is set.
1638 mlx5_flow_create_tcp(const struct rte_flow_item *item,
1639 const void *default_mask,
1640 struct mlx5_flow_data *data)
1642 const struct rte_flow_item_tcp *spec = item->spec;
1643 const struct rte_flow_item_tcp *mask = item->mask;
1644 struct mlx5_flow_parse *parser = data->parser;
1645 unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1646 struct ibv_flow_spec_tcp_udp tcp = {
1647 .type = parser->inner | IBV_FLOW_SPEC_TCP,
1651 /* Don't update layer for the inner pattern. */
1652 if (!parser->inner) {
1653 if (parser->layer == HASH_RXQ_IPV4)
1654 parser->layer = HASH_RXQ_TCPV4;
1656 parser->layer = HASH_RXQ_TCPV6;
1660 mask = default_mask;
1661 tcp.val.dst_port = spec->hdr.dst_port;
1662 tcp.val.src_port = spec->hdr.src_port;
1663 tcp.mask.dst_port = mask->hdr.dst_port;
1664 tcp.mask.src_port = mask->hdr.src_port;
1665 /* Remove unwanted bits from values. */
1666 tcp.val.src_port &= tcp.mask.src_port;
1667 tcp.val.dst_port &= tcp.mask.dst_port;
1669 mlx5_flow_create_copy(parser, &tcp, tcp_size);
1674 * Convert VXLAN item to Verbs specification.
1677 * Item specification.
1678 * @param default_mask[in]
1679 * Default bit-masks to use when item->mask is not provided.
1680 * @param data[in, out]
1684 * 0 on success, a negative errno value otherwise and rte_errno is set.
1687 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
1688 const void *default_mask,
1689 struct mlx5_flow_data *data)
1691 const struct rte_flow_item_vxlan *spec = item->spec;
1692 const struct rte_flow_item_vxlan *mask = item->mask;
1693 struct mlx5_flow_parse *parser = data->parser;
1694 unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1695 struct ibv_flow_spec_tunnel vxlan = {
1696 .type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1705 parser->inner = IBV_FLOW_SPEC_INNER;
1706 parser->tunnel = ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN)];
1709 mask = default_mask;
1710 memcpy(&id.vni[1], spec->vni, 3);
1711 vxlan.val.tunnel_id = id.vlan_id;
1712 memcpy(&id.vni[1], mask->vni, 3);
1713 vxlan.mask.tunnel_id = id.vlan_id;
1714 /* Remove unwanted bits from values. */
1715 vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
1718 * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
1719 * layer is defined in the Verbs specification it is interpreted as
1720 * wildcard and all packets will match this rule, if it follows a full
1721 * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
1722 * before will also match this rule.
1723 * To avoid such situation, VNI 0 is currently refused.
1725 if (!vxlan.val.tunnel_id)
1726 return rte_flow_error_set(data->error, EINVAL,
1727 RTE_FLOW_ERROR_TYPE_ITEM,
1729 "VxLAN vni cannot be 0");
1730 mlx5_flow_create_copy(parser, &vxlan, size);
1735 * Convert GRE item to Verbs specification.
1738 * Item specification.
1739 * @param default_mask[in]
1740 * Default bit-masks to use when item->mask is not provided.
1741 * @param data[in, out]
1745 * 0 on success, a negative errno value otherwise and rte_errno is set.
1748 mlx5_flow_create_gre(const struct rte_flow_item *item __rte_unused,
1749 const void *default_mask __rte_unused,
1750 struct mlx5_flow_data *data)
1752 struct mlx5_flow_parse *parser = data->parser;
1753 unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1754 struct ibv_flow_spec_tunnel tunnel = {
1755 .type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1758 struct ibv_flow_spec_ipv4_ext *ipv4;
1759 struct ibv_flow_spec_ipv6 *ipv6;
1762 parser->inner = IBV_FLOW_SPEC_INNER;
1763 parser->tunnel = ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_GRE)];
1764 /* Update encapsulation IP layer protocol. */
1765 for (i = 0; i != hash_rxq_init_n; ++i) {
1766 if (!parser->queue[i].ibv_attr)
1768 if (parser->out_layer == HASH_RXQ_IPV4) {
1769 ipv4 = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1770 parser->queue[i].offset -
1771 sizeof(struct ibv_flow_spec_ipv4_ext));
1772 if (ipv4->mask.proto && ipv4->val.proto != MLX5_GRE)
1774 ipv4->val.proto = MLX5_GRE;
1775 ipv4->mask.proto = 0xff;
1776 } else if (parser->out_layer == HASH_RXQ_IPV6) {
1777 ipv6 = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1778 parser->queue[i].offset -
1779 sizeof(struct ibv_flow_spec_ipv6));
1780 if (ipv6->mask.next_hdr &&
1781 ipv6->val.next_hdr != MLX5_GRE)
1783 ipv6->val.next_hdr = MLX5_GRE;
1784 ipv6->mask.next_hdr = 0xff;
1787 if (i != hash_rxq_init_n)
1788 return rte_flow_error_set(data->error, EINVAL,
1789 RTE_FLOW_ERROR_TYPE_ITEM,
1791 "IP protocol of GRE must be 47");
1792 mlx5_flow_create_copy(parser, &tunnel, size);
1797 * Convert mark/flag action to Verbs specification.
1800 * Internal parser structure.
1805 * 0 on success, a negative errno value otherwise and rte_errno is set.
1808 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id)
1810 unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1811 struct ibv_flow_spec_action_tag tag = {
1812 .type = IBV_FLOW_SPEC_ACTION_TAG,
1814 .tag_id = mlx5_flow_mark_set(mark_id),
1817 assert(parser->mark);
1818 mlx5_flow_create_copy(parser, &tag, size);
1823 * Convert count action to Verbs specification.
1826 * Pointer to Ethernet device.
1828 * Pointer to MLX5 flow parser structure.
1831 * 0 on success, a negative errno value otherwise and rte_errno is set.
1834 mlx5_flow_create_count(struct rte_eth_dev *dev __rte_unused,
1835 struct mlx5_flow_parse *parser __rte_unused)
1837 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
1838 struct priv *priv = dev->data->dev_private;
1839 unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
1840 struct ibv_counter_set_init_attr init_attr = {0};
1841 struct ibv_flow_spec_counter_action counter = {
1842 .type = IBV_FLOW_SPEC_ACTION_COUNT,
1844 .counter_set_handle = 0,
1847 init_attr.counter_set_id = 0;
1848 parser->cs = mlx5_glue->create_counter_set(priv->ctx, &init_attr);
1853 counter.counter_set_handle = parser->cs->handle;
1854 mlx5_flow_create_copy(parser, &counter, size);
1860 * Complete flow rule creation with a drop queue.
1863 * Pointer to Ethernet device.
1865 * Internal parser structure.
1867 * Pointer to the rte_flow.
1869 * Perform verbose error reporting if not NULL.
1872 * 0 on success, a negative errno value otherwise and rte_errno is set.
1875 mlx5_flow_create_action_queue_drop(struct rte_eth_dev *dev,
1876 struct mlx5_flow_parse *parser,
1877 struct rte_flow *flow,
1878 struct rte_flow_error *error)
1880 struct priv *priv = dev->data->dev_private;
1881 struct ibv_flow_spec_action_drop *drop;
1882 unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
1887 drop = (void *)((uintptr_t)parser->queue[HASH_RXQ_ETH].ibv_attr +
1888 parser->queue[HASH_RXQ_ETH].offset);
1889 *drop = (struct ibv_flow_spec_action_drop){
1890 .type = IBV_FLOW_SPEC_ACTION_DROP,
1893 ++parser->queue[HASH_RXQ_ETH].ibv_attr->num_of_specs;
1894 parser->queue[HASH_RXQ_ETH].offset += size;
1895 flow->frxq[HASH_RXQ_ETH].ibv_attr =
1896 parser->queue[HASH_RXQ_ETH].ibv_attr;
1898 flow->cs = parser->cs;
1899 if (!priv->dev->data->dev_started)
1901 parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
1902 flow->frxq[HASH_RXQ_ETH].ibv_flow =
1903 mlx5_glue->create_flow(priv->flow_drop_queue->qp,
1904 flow->frxq[HASH_RXQ_ETH].ibv_attr);
1905 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
1906 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1907 NULL, "flow rule creation failure");
1913 if (flow->frxq[HASH_RXQ_ETH].ibv_flow) {
1914 claim_zero(mlx5_glue->destroy_flow
1915 (flow->frxq[HASH_RXQ_ETH].ibv_flow));
1916 flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
1918 if (flow->frxq[HASH_RXQ_ETH].ibv_attr) {
1919 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
1920 flow->frxq[HASH_RXQ_ETH].ibv_attr = NULL;
1923 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
1931 * Create hash Rx queues when RSS is enabled.
1934 * Pointer to Ethernet device.
1936 * Internal parser structure.
1938 * Pointer to the rte_flow.
1940 * Perform verbose error reporting if not NULL.
1943 * 0 on success, a negative errno value otherwise and rte_errno is set.
1946 mlx5_flow_create_action_queue_rss(struct rte_eth_dev *dev,
1947 struct mlx5_flow_parse *parser,
1948 struct rte_flow *flow,
1949 struct rte_flow_error *error)
1951 struct priv *priv = dev->data->dev_private;
1954 for (i = 0; i != hash_rxq_init_n; ++i) {
1955 uint64_t hash_fields;
1957 if (!parser->queue[i].ibv_attr)
1959 flow->frxq[i].ibv_attr = parser->queue[i].ibv_attr;
1960 parser->queue[i].ibv_attr = NULL;
1961 hash_fields = hash_rxq_init[i].hash_fields;
1962 if (!priv->dev->data->dev_started)
1964 flow->frxq[i].hrxq =
1966 parser->rss_conf.key,
1967 parser->rss_conf.key_len,
1969 parser->rss_conf.queue,
1970 parser->rss_conf.queue_num,
1972 if (flow->frxq[i].hrxq)
1974 flow->frxq[i].hrxq =
1976 parser->rss_conf.key,
1977 parser->rss_conf.key_len,
1979 parser->rss_conf.queue,
1980 parser->rss_conf.queue_num,
1982 if (!flow->frxq[i].hrxq) {
1983 return rte_flow_error_set(error, ENOMEM,
1984 RTE_FLOW_ERROR_TYPE_HANDLE,
1986 "cannot create hash rxq");
1993 * RXQ update after flow rule creation.
1996 * Pointer to Ethernet device.
1998 * Pointer to the flow rule.
2001 mlx5_flow_create_update_rxqs(struct rte_eth_dev *dev, struct rte_flow *flow)
2003 struct priv *priv = dev->data->dev_private;
2007 if (!dev->data->dev_started)
2009 for (i = 0; i != flow->rss_conf.queue_num; ++i) {
2010 struct mlx5_rxq_data *rxq_data = (*priv->rxqs)
2011 [(*flow->queues)[i]];
2012 struct mlx5_rxq_ctrl *rxq_ctrl =
2013 container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
2014 uint8_t tunnel = PTYPE_IDX(flow->tunnel);
2016 rxq_data->mark |= flow->mark;
2019 rxq_ctrl->tunnel_types[tunnel] += 1;
2020 /* Clear tunnel type if more than one tunnel types set. */
2021 for (j = 0; j != RTE_DIM(rxq_ctrl->tunnel_types); ++j) {
2024 if (rxq_ctrl->tunnel_types[j] > 0) {
2025 rxq_data->tunnel = 0;
2029 if (j == RTE_DIM(rxq_ctrl->tunnel_types))
2030 rxq_data->tunnel = flow->tunnel;
2035 * Complete flow rule creation.
2038 * Pointer to Ethernet device.
2040 * Internal parser structure.
2042 * Pointer to the rte_flow.
2044 * Perform verbose error reporting if not NULL.
2047 * 0 on success, a negative errno value otherwise and rte_errno is set.
2050 mlx5_flow_create_action_queue(struct rte_eth_dev *dev,
2051 struct mlx5_flow_parse *parser,
2052 struct rte_flow *flow,
2053 struct rte_flow_error *error)
2055 struct priv *priv = dev->data->dev_private;
2058 unsigned int flows_n = 0;
2062 assert(!parser->drop);
2063 ret = mlx5_flow_create_action_queue_rss(dev, parser, flow, error);
2067 flow->cs = parser->cs;
2068 if (!priv->dev->data->dev_started)
2070 for (i = 0; i != hash_rxq_init_n; ++i) {
2071 if (!flow->frxq[i].hrxq)
2073 flow->frxq[i].ibv_flow =
2074 mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
2075 flow->frxq[i].ibv_attr);
2076 if (!flow->frxq[i].ibv_flow) {
2077 rte_flow_error_set(error, ENOMEM,
2078 RTE_FLOW_ERROR_TYPE_HANDLE,
2079 NULL, "flow rule creation failure");
2083 DRV_LOG(DEBUG, "port %u %p type %d QP %p ibv_flow %p",
2086 (void *)flow->frxq[i].hrxq,
2087 (void *)flow->frxq[i].ibv_flow);
2090 rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_HANDLE,
2091 NULL, "internal error in flow creation");
2094 mlx5_flow_create_update_rxqs(dev, flow);
2097 ret = rte_errno; /* Save rte_errno before cleanup. */
2099 for (i = 0; i != hash_rxq_init_n; ++i) {
2100 if (flow->frxq[i].ibv_flow) {
2101 struct ibv_flow *ibv_flow = flow->frxq[i].ibv_flow;
2103 claim_zero(mlx5_glue->destroy_flow(ibv_flow));
2105 if (flow->frxq[i].hrxq)
2106 mlx5_hrxq_release(dev, flow->frxq[i].hrxq);
2107 if (flow->frxq[i].ibv_attr)
2108 rte_free(flow->frxq[i].ibv_attr);
2111 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
2115 rte_errno = ret; /* Restore rte_errno. */
2123 * Pointer to Ethernet device.
2125 * Pointer to a TAILQ flow list.
2127 * Flow rule attributes.
2128 * @param[in] pattern
2129 * Pattern specification (list terminated by the END pattern item).
2130 * @param[in] actions
2131 * Associated actions (list terminated by the END action).
2133 * Perform verbose error reporting if not NULL.
2136 * A flow on success, NULL otherwise and rte_errno is set.
2138 static struct rte_flow *
2139 mlx5_flow_list_create(struct rte_eth_dev *dev,
2140 struct mlx5_flows *list,
2141 const struct rte_flow_attr *attr,
2142 const struct rte_flow_item items[],
2143 const struct rte_flow_action actions[],
2144 struct rte_flow_error *error)
2146 struct mlx5_flow_parse parser = { .create = 1, };
2147 struct rte_flow *flow = NULL;
2151 ret = mlx5_flow_convert(dev, attr, items, actions, error, &parser);
2154 flow = rte_calloc(__func__, 1,
2156 parser.rss_conf.queue_num * sizeof(uint16_t),
2159 rte_flow_error_set(error, ENOMEM,
2160 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2162 "cannot allocate flow memory");
2165 /* Copy configuration. */
2166 flow->queues = (uint16_t (*)[])(flow + 1);
2167 flow->tunnel = parser.tunnel;
2168 flow->rss_conf = (struct rte_flow_action_rss){
2169 .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
2171 .types = parser.rss_conf.types,
2172 .key_len = parser.rss_conf.key_len,
2173 .queue_num = parser.rss_conf.queue_num,
2174 .key = memcpy(flow->rss_key, parser.rss_conf.key,
2175 sizeof(*parser.rss_conf.key) *
2176 parser.rss_conf.key_len),
2177 .queue = memcpy(flow->queues, parser.rss_conf.queue,
2178 sizeof(*parser.rss_conf.queue) *
2179 parser.rss_conf.queue_num),
2181 flow->mark = parser.mark;
2182 /* finalise the flow. */
2184 ret = mlx5_flow_create_action_queue_drop(dev, &parser, flow,
2187 ret = mlx5_flow_create_action_queue(dev, &parser, flow, error);
2190 TAILQ_INSERT_TAIL(list, flow, next);
2191 DRV_LOG(DEBUG, "port %u flow created %p", dev->data->port_id,
2195 DRV_LOG(ERR, "port %u flow creation error: %s", dev->data->port_id,
2197 for (i = 0; i != hash_rxq_init_n; ++i) {
2198 if (parser.queue[i].ibv_attr)
2199 rte_free(parser.queue[i].ibv_attr);
2206 * Validate a flow supported by the NIC.
2208 * @see rte_flow_validate()
2212 mlx5_flow_validate(struct rte_eth_dev *dev,
2213 const struct rte_flow_attr *attr,
2214 const struct rte_flow_item items[],
2215 const struct rte_flow_action actions[],
2216 struct rte_flow_error *error)
2218 struct mlx5_flow_parse parser = { .create = 0, };
2220 return mlx5_flow_convert(dev, attr, items, actions, error, &parser);
2226 * @see rte_flow_create()
2230 mlx5_flow_create(struct rte_eth_dev *dev,
2231 const struct rte_flow_attr *attr,
2232 const struct rte_flow_item items[],
2233 const struct rte_flow_action actions[],
2234 struct rte_flow_error *error)
2236 struct priv *priv = dev->data->dev_private;
2238 return mlx5_flow_list_create(dev, &priv->flows, attr, items, actions,
2243 * Destroy a flow in a list.
2246 * Pointer to Ethernet device.
2248 * Pointer to a TAILQ flow list.
2253 mlx5_flow_list_destroy(struct rte_eth_dev *dev, struct mlx5_flows *list,
2254 struct rte_flow *flow)
2256 struct priv *priv = dev->data->dev_private;
2259 if (flow->drop || !dev->data->dev_started)
2261 for (i = 0; flow->tunnel && i != flow->rss_conf.queue_num; ++i) {
2262 /* Update queue tunnel type. */
2263 struct mlx5_rxq_data *rxq_data = (*priv->rxqs)
2264 [(*flow->queues)[i]];
2265 struct mlx5_rxq_ctrl *rxq_ctrl =
2266 container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
2267 uint8_t tunnel = PTYPE_IDX(flow->tunnel);
2269 assert(rxq_ctrl->tunnel_types[tunnel] > 0);
2270 rxq_ctrl->tunnel_types[tunnel] -= 1;
2271 if (!rxq_ctrl->tunnel_types[tunnel]) {
2272 /* Update tunnel type. */
2277 for (j = 0; j < RTE_DIM(rxq_ctrl->tunnel_types); j++)
2278 if (rxq_ctrl->tunnel_types[j]) {
2282 /* Keep same if more than one tunnel types left. */
2284 rxq_data->tunnel = ptype_ext[last];
2285 else if (types == 0)
2286 /* No tunnel type left. */
2287 rxq_data->tunnel = 0;
2290 for (i = 0; flow->mark && i != flow->rss_conf.queue_num; ++i) {
2291 struct rte_flow *tmp;
2295 * To remove the mark from the queue, the queue must not be
2296 * present in any other marked flow (RSS or not).
2298 TAILQ_FOREACH(tmp, list, next) {
2300 uint16_t *tqs = NULL;
2305 for (j = 0; j != hash_rxq_init_n; ++j) {
2306 if (!tmp->frxq[j].hrxq)
2308 tqs = tmp->frxq[j].hrxq->ind_table->queues;
2309 tq_n = tmp->frxq[j].hrxq->ind_table->queues_n;
2313 for (j = 0; (j != tq_n) && !mark; j++)
2314 if (tqs[j] == (*flow->queues)[i])
2317 (*priv->rxqs)[(*flow->queues)[i]]->mark = mark;
2321 if (flow->frxq[HASH_RXQ_ETH].ibv_flow)
2322 claim_zero(mlx5_glue->destroy_flow
2323 (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2324 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
2326 for (i = 0; i != hash_rxq_init_n; ++i) {
2327 struct mlx5_flow *frxq = &flow->frxq[i];
2330 claim_zero(mlx5_glue->destroy_flow
2333 mlx5_hrxq_release(dev, frxq->hrxq);
2335 rte_free(frxq->ibv_attr);
2339 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
2342 TAILQ_REMOVE(list, flow, next);
2343 DRV_LOG(DEBUG, "port %u flow destroyed %p", dev->data->port_id,
2349 * Destroy all flows.
2352 * Pointer to Ethernet device.
2354 * Pointer to a TAILQ flow list.
2357 mlx5_flow_list_flush(struct rte_eth_dev *dev, struct mlx5_flows *list)
2359 while (!TAILQ_EMPTY(list)) {
2360 struct rte_flow *flow;
2362 flow = TAILQ_FIRST(list);
2363 mlx5_flow_list_destroy(dev, list, flow);
2368 * Create drop queue.
2371 * Pointer to Ethernet device.
2374 * 0 on success, a negative errno value otherwise and rte_errno is set.
2377 mlx5_flow_create_drop_queue(struct rte_eth_dev *dev)
2379 struct priv *priv = dev->data->dev_private;
2380 struct mlx5_hrxq_drop *fdq = NULL;
2384 fdq = rte_calloc(__func__, 1, sizeof(*fdq), 0);
2387 "port %u cannot allocate memory for drop queue",
2388 dev->data->port_id);
2392 fdq->cq = mlx5_glue->create_cq(priv->ctx, 1, NULL, NULL, 0);
2394 DRV_LOG(WARNING, "port %u cannot allocate CQ for drop queue",
2395 dev->data->port_id);
2399 fdq->wq = mlx5_glue->create_wq
2401 &(struct ibv_wq_init_attr){
2402 .wq_type = IBV_WQT_RQ,
2409 DRV_LOG(WARNING, "port %u cannot allocate WQ for drop queue",
2410 dev->data->port_id);
2414 fdq->ind_table = mlx5_glue->create_rwq_ind_table
2416 &(struct ibv_rwq_ind_table_init_attr){
2417 .log_ind_tbl_size = 0,
2418 .ind_tbl = &fdq->wq,
2421 if (!fdq->ind_table) {
2423 "port %u cannot allocate indirection table for drop"
2425 dev->data->port_id);
2429 fdq->qp = mlx5_glue->create_qp_ex
2431 &(struct ibv_qp_init_attr_ex){
2432 .qp_type = IBV_QPT_RAW_PACKET,
2434 IBV_QP_INIT_ATTR_PD |
2435 IBV_QP_INIT_ATTR_IND_TABLE |
2436 IBV_QP_INIT_ATTR_RX_HASH,
2437 .rx_hash_conf = (struct ibv_rx_hash_conf){
2439 IBV_RX_HASH_FUNC_TOEPLITZ,
2440 .rx_hash_key_len = rss_hash_default_key_len,
2441 .rx_hash_key = rss_hash_default_key,
2442 .rx_hash_fields_mask = 0,
2444 .rwq_ind_tbl = fdq->ind_table,
2448 DRV_LOG(WARNING, "port %u cannot allocate QP for drop queue",
2449 dev->data->port_id);
2453 priv->flow_drop_queue = fdq;
2457 claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2459 claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2461 claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2463 claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2466 priv->flow_drop_queue = NULL;
2471 * Delete drop queue.
2474 * Pointer to Ethernet device.
2477 mlx5_flow_delete_drop_queue(struct rte_eth_dev *dev)
2479 struct priv *priv = dev->data->dev_private;
2480 struct mlx5_hrxq_drop *fdq = priv->flow_drop_queue;
2485 claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2487 claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2489 claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2491 claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2493 priv->flow_drop_queue = NULL;
2500 * Pointer to Ethernet device.
2502 * Pointer to a TAILQ flow list.
2505 mlx5_flow_stop(struct rte_eth_dev *dev, struct mlx5_flows *list)
2507 struct priv *priv = dev->data->dev_private;
2508 struct rte_flow *flow;
2511 TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) {
2512 struct mlx5_ind_table_ibv *ind_tbl = NULL;
2515 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow)
2517 claim_zero(mlx5_glue->destroy_flow
2518 (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2519 flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
2520 DRV_LOG(DEBUG, "port %u flow %p removed",
2521 dev->data->port_id, (void *)flow);
2525 /* Verify the flow has not already been cleaned. */
2526 for (i = 0; i != hash_rxq_init_n; ++i) {
2527 if (!flow->frxq[i].ibv_flow)
2530 * Indirection table may be necessary to remove the
2531 * flags in the Rx queues.
2532 * This helps to speed-up the process by avoiding
2535 ind_tbl = flow->frxq[i].hrxq->ind_table;
2538 if (i == hash_rxq_init_n)
2542 for (i = 0; i != ind_tbl->queues_n; ++i)
2543 (*priv->rxqs)[ind_tbl->queues[i]]->mark = 0;
2545 for (i = 0; i != hash_rxq_init_n; ++i) {
2546 if (!flow->frxq[i].ibv_flow)
2548 claim_zero(mlx5_glue->destroy_flow
2549 (flow->frxq[i].ibv_flow));
2550 flow->frxq[i].ibv_flow = NULL;
2551 mlx5_hrxq_release(dev, flow->frxq[i].hrxq);
2552 flow->frxq[i].hrxq = NULL;
2554 DRV_LOG(DEBUG, "port %u flow %p removed", dev->data->port_id,
2557 /* Cleanup Rx queue tunnel info. */
2558 for (i = 0; i != priv->rxqs_n; ++i) {
2559 struct mlx5_rxq_data *q = (*priv->rxqs)[i];
2560 struct mlx5_rxq_ctrl *rxq_ctrl =
2561 container_of(q, struct mlx5_rxq_ctrl, rxq);
2565 memset((void *)rxq_ctrl->tunnel_types, 0,
2566 sizeof(rxq_ctrl->tunnel_types));
2575 * Pointer to Ethernet device.
2577 * Pointer to a TAILQ flow list.
2580 * 0 on success, a negative errno value otherwise and rte_errno is set.
2583 mlx5_flow_start(struct rte_eth_dev *dev, struct mlx5_flows *list)
2585 struct priv *priv = dev->data->dev_private;
2586 struct rte_flow *flow;
2588 TAILQ_FOREACH(flow, list, next) {
2592 flow->frxq[HASH_RXQ_ETH].ibv_flow =
2593 mlx5_glue->create_flow
2594 (priv->flow_drop_queue->qp,
2595 flow->frxq[HASH_RXQ_ETH].ibv_attr);
2596 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
2598 "port %u flow %p cannot be applied",
2599 dev->data->port_id, (void *)flow);
2603 DRV_LOG(DEBUG, "port %u flow %p applied",
2604 dev->data->port_id, (void *)flow);
2608 for (i = 0; i != hash_rxq_init_n; ++i) {
2609 if (!flow->frxq[i].ibv_attr)
2611 flow->frxq[i].hrxq =
2612 mlx5_hrxq_get(dev, flow->rss_conf.key,
2613 flow->rss_conf.key_len,
2614 hash_rxq_init[i].hash_fields,
2615 flow->rss_conf.queue,
2616 flow->rss_conf.queue_num,
2618 if (flow->frxq[i].hrxq)
2620 flow->frxq[i].hrxq =
2621 mlx5_hrxq_new(dev, flow->rss_conf.key,
2622 flow->rss_conf.key_len,
2623 hash_rxq_init[i].hash_fields,
2624 flow->rss_conf.queue,
2625 flow->rss_conf.queue_num,
2627 if (!flow->frxq[i].hrxq) {
2629 "port %u flow %p cannot be applied",
2630 dev->data->port_id, (void *)flow);
2635 flow->frxq[i].ibv_flow =
2636 mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
2637 flow->frxq[i].ibv_attr);
2638 if (!flow->frxq[i].ibv_flow) {
2640 "port %u flow %p cannot be applied",
2641 dev->data->port_id, (void *)flow);
2645 DRV_LOG(DEBUG, "port %u flow %p applied",
2646 dev->data->port_id, (void *)flow);
2648 mlx5_flow_create_update_rxqs(dev, flow);
2654 * Verify the flow list is empty
2657 * Pointer to Ethernet device.
2659 * @return the number of flows not released.
2662 mlx5_flow_verify(struct rte_eth_dev *dev)
2664 struct priv *priv = dev->data->dev_private;
2665 struct rte_flow *flow;
2668 TAILQ_FOREACH(flow, &priv->flows, next) {
2669 DRV_LOG(DEBUG, "port %u flow %p still referenced",
2670 dev->data->port_id, (void *)flow);
2677 * Enable a control flow configured from the control plane.
2680 * Pointer to Ethernet device.
2682 * An Ethernet flow spec to apply.
2684 * An Ethernet flow mask to apply.
2686 * A VLAN flow spec to apply.
2688 * A VLAN flow mask to apply.
2691 * 0 on success, a negative errno value otherwise and rte_errno is set.
2694 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
2695 struct rte_flow_item_eth *eth_spec,
2696 struct rte_flow_item_eth *eth_mask,
2697 struct rte_flow_item_vlan *vlan_spec,
2698 struct rte_flow_item_vlan *vlan_mask)
2700 struct priv *priv = dev->data->dev_private;
2701 const struct rte_flow_attr attr = {
2703 .priority = MLX5_CTRL_FLOW_PRIORITY,
2705 struct rte_flow_item items[] = {
2707 .type = RTE_FLOW_ITEM_TYPE_ETH,
2713 .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
2714 RTE_FLOW_ITEM_TYPE_END,
2720 .type = RTE_FLOW_ITEM_TYPE_END,
2723 uint16_t queue[priv->reta_idx_n];
2724 struct rte_flow_action_rss action_rss = {
2725 .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
2727 .types = priv->rss_conf.rss_hf,
2728 .key_len = priv->rss_conf.rss_key_len,
2729 .queue_num = priv->reta_idx_n,
2730 .key = priv->rss_conf.rss_key,
2733 struct rte_flow_action actions[] = {
2735 .type = RTE_FLOW_ACTION_TYPE_RSS,
2736 .conf = &action_rss,
2739 .type = RTE_FLOW_ACTION_TYPE_END,
2742 struct rte_flow *flow;
2743 struct rte_flow_error error;
2746 if (!priv->reta_idx_n) {
2750 for (i = 0; i != priv->reta_idx_n; ++i)
2751 queue[i] = (*priv->reta_idx)[i];
2752 flow = mlx5_flow_list_create(dev, &priv->ctrl_flows, &attr, items,
2760 * Enable a flow control configured from the control plane.
2763 * Pointer to Ethernet device.
2765 * An Ethernet flow spec to apply.
2767 * An Ethernet flow mask to apply.
2770 * 0 on success, a negative errno value otherwise and rte_errno is set.
2773 mlx5_ctrl_flow(struct rte_eth_dev *dev,
2774 struct rte_flow_item_eth *eth_spec,
2775 struct rte_flow_item_eth *eth_mask)
2777 return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
2783 * @see rte_flow_destroy()
2787 mlx5_flow_destroy(struct rte_eth_dev *dev,
2788 struct rte_flow *flow,
2789 struct rte_flow_error *error __rte_unused)
2791 struct priv *priv = dev->data->dev_private;
2793 mlx5_flow_list_destroy(dev, &priv->flows, flow);
2798 * Destroy all flows.
2800 * @see rte_flow_flush()
2804 mlx5_flow_flush(struct rte_eth_dev *dev,
2805 struct rte_flow_error *error __rte_unused)
2807 struct priv *priv = dev->data->dev_private;
2809 mlx5_flow_list_flush(dev, &priv->flows);
2813 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
2815 * Query flow counter.
2819 * @param counter_value
2820 * returned data from the counter.
2823 * 0 on success, a negative errno value otherwise and rte_errno is set.
2826 mlx5_flow_query_count(struct ibv_counter_set *cs,
2827 struct mlx5_flow_counter_stats *counter_stats,
2828 struct rte_flow_query_count *query_count,
2829 struct rte_flow_error *error)
2831 uint64_t counters[2];
2832 struct ibv_query_counter_set_attr query_cs_attr = {
2834 .query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
2836 struct ibv_counter_set_data query_out = {
2838 .outlen = 2 * sizeof(uint64_t),
2840 int err = mlx5_glue->query_counter_set(&query_cs_attr, &query_out);
2843 return rte_flow_error_set(error, err,
2844 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2846 "cannot read counter");
2847 query_count->hits_set = 1;
2848 query_count->bytes_set = 1;
2849 query_count->hits = counters[0] - counter_stats->hits;
2850 query_count->bytes = counters[1] - counter_stats->bytes;
2851 if (query_count->reset) {
2852 counter_stats->hits = counters[0];
2853 counter_stats->bytes = counters[1];
2861 * @see rte_flow_query()
2865 mlx5_flow_query(struct rte_eth_dev *dev __rte_unused,
2866 struct rte_flow *flow,
2867 enum rte_flow_action_type action __rte_unused,
2869 struct rte_flow_error *error)
2874 ret = mlx5_flow_query_count(flow->cs,
2875 &flow->counter_stats,
2876 (struct rte_flow_query_count *)data,
2881 return rte_flow_error_set(error, EINVAL,
2882 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2884 "no counter found for flow");
2893 * @see rte_flow_isolate()
2897 mlx5_flow_isolate(struct rte_eth_dev *dev,
2899 struct rte_flow_error *error)
2901 struct priv *priv = dev->data->dev_private;
2903 if (dev->data->dev_started) {
2904 rte_flow_error_set(error, EBUSY,
2905 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2907 "port must be stopped first");
2910 priv->isolated = !!enable;
2912 priv->dev->dev_ops = &mlx5_dev_ops_isolate;
2914 priv->dev->dev_ops = &mlx5_dev_ops;
2919 * Convert a flow director filter to a generic flow.
2922 * Pointer to Ethernet device.
2923 * @param fdir_filter
2924 * Flow director filter to add.
2926 * Generic flow parameters structure.
2929 * 0 on success, a negative errno value otherwise and rte_errno is set.
2932 mlx5_fdir_filter_convert(struct rte_eth_dev *dev,
2933 const struct rte_eth_fdir_filter *fdir_filter,
2934 struct mlx5_fdir *attributes)
2936 struct priv *priv = dev->data->dev_private;
2937 const struct rte_eth_fdir_input *input = &fdir_filter->input;
2938 const struct rte_eth_fdir_masks *mask =
2939 &dev->data->dev_conf.fdir_conf.mask;
2941 /* Validate queue number. */
2942 if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
2943 DRV_LOG(ERR, "port %u invalid queue number %d",
2944 dev->data->port_id, fdir_filter->action.rx_queue);
2948 attributes->attr.ingress = 1;
2949 attributes->items[0] = (struct rte_flow_item) {
2950 .type = RTE_FLOW_ITEM_TYPE_ETH,
2951 .spec = &attributes->l2,
2952 .mask = &attributes->l2_mask,
2954 switch (fdir_filter->action.behavior) {
2955 case RTE_ETH_FDIR_ACCEPT:
2956 attributes->actions[0] = (struct rte_flow_action){
2957 .type = RTE_FLOW_ACTION_TYPE_QUEUE,
2958 .conf = &attributes->queue,
2961 case RTE_ETH_FDIR_REJECT:
2962 attributes->actions[0] = (struct rte_flow_action){
2963 .type = RTE_FLOW_ACTION_TYPE_DROP,
2967 DRV_LOG(ERR, "port %u invalid behavior %d",
2969 fdir_filter->action.behavior);
2970 rte_errno = ENOTSUP;
2973 attributes->queue.index = fdir_filter->action.rx_queue;
2975 switch (fdir_filter->input.flow_type) {
2976 case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
2977 case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
2978 case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
2979 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2980 .src_addr = input->flow.ip4_flow.src_ip,
2981 .dst_addr = input->flow.ip4_flow.dst_ip,
2982 .time_to_live = input->flow.ip4_flow.ttl,
2983 .type_of_service = input->flow.ip4_flow.tos,
2984 .next_proto_id = input->flow.ip4_flow.proto,
2986 attributes->l3_mask.ipv4.hdr = (struct ipv4_hdr){
2987 .src_addr = mask->ipv4_mask.src_ip,
2988 .dst_addr = mask->ipv4_mask.dst_ip,
2989 .time_to_live = mask->ipv4_mask.ttl,
2990 .type_of_service = mask->ipv4_mask.tos,
2991 .next_proto_id = mask->ipv4_mask.proto,
2993 attributes->items[1] = (struct rte_flow_item){
2994 .type = RTE_FLOW_ITEM_TYPE_IPV4,
2995 .spec = &attributes->l3,
2996 .mask = &attributes->l3_mask,
2999 case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
3000 case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
3001 case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
3002 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
3003 .hop_limits = input->flow.ipv6_flow.hop_limits,
3004 .proto = input->flow.ipv6_flow.proto,
3007 memcpy(attributes->l3.ipv6.hdr.src_addr,
3008 input->flow.ipv6_flow.src_ip,
3009 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
3010 memcpy(attributes->l3.ipv6.hdr.dst_addr,
3011 input->flow.ipv6_flow.dst_ip,
3012 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
3013 memcpy(attributes->l3_mask.ipv6.hdr.src_addr,
3014 mask->ipv6_mask.src_ip,
3015 RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
3016 memcpy(attributes->l3_mask.ipv6.hdr.dst_addr,
3017 mask->ipv6_mask.dst_ip,
3018 RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
3019 attributes->items[1] = (struct rte_flow_item){
3020 .type = RTE_FLOW_ITEM_TYPE_IPV6,
3021 .spec = &attributes->l3,
3022 .mask = &attributes->l3_mask,
3026 DRV_LOG(ERR, "port %u invalid flow type%d",
3027 dev->data->port_id, fdir_filter->input.flow_type);
3028 rte_errno = ENOTSUP;
3032 switch (fdir_filter->input.flow_type) {
3033 case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
3034 attributes->l4.udp.hdr = (struct udp_hdr){
3035 .src_port = input->flow.udp4_flow.src_port,
3036 .dst_port = input->flow.udp4_flow.dst_port,
3038 attributes->l4_mask.udp.hdr = (struct udp_hdr){
3039 .src_port = mask->src_port_mask,
3040 .dst_port = mask->dst_port_mask,
3042 attributes->items[2] = (struct rte_flow_item){
3043 .type = RTE_FLOW_ITEM_TYPE_UDP,
3044 .spec = &attributes->l4,
3045 .mask = &attributes->l4_mask,
3048 case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
3049 attributes->l4.tcp.hdr = (struct tcp_hdr){
3050 .src_port = input->flow.tcp4_flow.src_port,
3051 .dst_port = input->flow.tcp4_flow.dst_port,
3053 attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
3054 .src_port = mask->src_port_mask,
3055 .dst_port = mask->dst_port_mask,
3057 attributes->items[2] = (struct rte_flow_item){
3058 .type = RTE_FLOW_ITEM_TYPE_TCP,
3059 .spec = &attributes->l4,
3060 .mask = &attributes->l4_mask,
3063 case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
3064 attributes->l4.udp.hdr = (struct udp_hdr){
3065 .src_port = input->flow.udp6_flow.src_port,
3066 .dst_port = input->flow.udp6_flow.dst_port,
3068 attributes->l4_mask.udp.hdr = (struct udp_hdr){
3069 .src_port = mask->src_port_mask,
3070 .dst_port = mask->dst_port_mask,
3072 attributes->items[2] = (struct rte_flow_item){
3073 .type = RTE_FLOW_ITEM_TYPE_UDP,
3074 .spec = &attributes->l4,
3075 .mask = &attributes->l4_mask,
3078 case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
3079 attributes->l4.tcp.hdr = (struct tcp_hdr){
3080 .src_port = input->flow.tcp6_flow.src_port,
3081 .dst_port = input->flow.tcp6_flow.dst_port,
3083 attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
3084 .src_port = mask->src_port_mask,
3085 .dst_port = mask->dst_port_mask,
3087 attributes->items[2] = (struct rte_flow_item){
3088 .type = RTE_FLOW_ITEM_TYPE_TCP,
3089 .spec = &attributes->l4,
3090 .mask = &attributes->l4_mask,
3093 case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
3094 case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
3097 DRV_LOG(ERR, "port %u invalid flow type%d",
3098 dev->data->port_id, fdir_filter->input.flow_type);
3099 rte_errno = ENOTSUP;
3106 * Add new flow director filter and store it in list.
3109 * Pointer to Ethernet device.
3110 * @param fdir_filter
3111 * Flow director filter to add.
3114 * 0 on success, a negative errno value otherwise and rte_errno is set.
3117 mlx5_fdir_filter_add(struct rte_eth_dev *dev,
3118 const struct rte_eth_fdir_filter *fdir_filter)
3120 struct priv *priv = dev->data->dev_private;
3121 struct mlx5_fdir attributes = {
3124 .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
3125 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
3129 struct mlx5_flow_parse parser = {
3130 .layer = HASH_RXQ_ETH,
3132 struct rte_flow_error error;
3133 struct rte_flow *flow;
3136 ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
3139 ret = mlx5_flow_convert(dev, &attributes.attr, attributes.items,
3140 attributes.actions, &error, &parser);
3143 flow = mlx5_flow_list_create(dev, &priv->flows, &attributes.attr,
3144 attributes.items, attributes.actions,
3147 DRV_LOG(DEBUG, "port %u FDIR created %p", dev->data->port_id,
3155 * Delete specific filter.
3158 * Pointer to Ethernet device.
3159 * @param fdir_filter
3160 * Filter to be deleted.
3163 * 0 on success, a negative errno value otherwise and rte_errno is set.
3166 mlx5_fdir_filter_delete(struct rte_eth_dev *dev,
3167 const struct rte_eth_fdir_filter *fdir_filter)
3169 struct priv *priv = dev->data->dev_private;
3170 struct mlx5_fdir attributes = {
3173 struct mlx5_flow_parse parser = {
3175 .layer = HASH_RXQ_ETH,
3177 struct rte_flow_error error;
3178 struct rte_flow *flow;
3182 ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
3185 ret = mlx5_flow_convert(dev, &attributes.attr, attributes.items,
3186 attributes.actions, &error, &parser);
3190 * Special case for drop action which is only set in the
3191 * specifications when the flow is created. In this situation the
3192 * drop specification is missing.
3195 struct ibv_flow_spec_action_drop *drop;
3197 drop = (void *)((uintptr_t)parser.queue[HASH_RXQ_ETH].ibv_attr +
3198 parser.queue[HASH_RXQ_ETH].offset);
3199 *drop = (struct ibv_flow_spec_action_drop){
3200 .type = IBV_FLOW_SPEC_ACTION_DROP,
3201 .size = sizeof(struct ibv_flow_spec_action_drop),
3203 parser.queue[HASH_RXQ_ETH].ibv_attr->num_of_specs++;
3205 TAILQ_FOREACH(flow, &priv->flows, next) {
3206 struct ibv_flow_attr *attr;
3207 struct ibv_spec_header *attr_h;
3209 struct ibv_flow_attr *flow_attr;
3210 struct ibv_spec_header *flow_h;
3212 unsigned int specs_n;
3214 attr = parser.queue[HASH_RXQ_ETH].ibv_attr;
3215 flow_attr = flow->frxq[HASH_RXQ_ETH].ibv_attr;
3216 /* Compare first the attributes. */
3217 if (memcmp(attr, flow_attr, sizeof(struct ibv_flow_attr)))
3219 if (attr->num_of_specs == 0)
3221 spec = (void *)((uintptr_t)attr +
3222 sizeof(struct ibv_flow_attr));
3223 flow_spec = (void *)((uintptr_t)flow_attr +
3224 sizeof(struct ibv_flow_attr));
3225 specs_n = RTE_MIN(attr->num_of_specs, flow_attr->num_of_specs);
3226 for (i = 0; i != specs_n; ++i) {
3229 if (memcmp(spec, flow_spec,
3230 RTE_MIN(attr_h->size, flow_h->size)))
3232 spec = (void *)((uintptr_t)spec + attr_h->size);
3233 flow_spec = (void *)((uintptr_t)flow_spec +
3236 /* At this point, the flow match. */
3239 /* The flow does not match. */
3242 ret = rte_errno; /* Save rte_errno before cleanup. */
3244 mlx5_flow_list_destroy(dev, &priv->flows, flow);
3246 for (i = 0; i != hash_rxq_init_n; ++i) {
3247 if (parser.queue[i].ibv_attr)
3248 rte_free(parser.queue[i].ibv_attr);
3250 rte_errno = ret; /* Restore rte_errno. */
3255 * Update queue for specific filter.
3258 * Pointer to Ethernet device.
3259 * @param fdir_filter
3260 * Filter to be updated.
3263 * 0 on success, a negative errno value otherwise and rte_errno is set.
3266 mlx5_fdir_filter_update(struct rte_eth_dev *dev,
3267 const struct rte_eth_fdir_filter *fdir_filter)
3271 ret = mlx5_fdir_filter_delete(dev, fdir_filter);
3274 return mlx5_fdir_filter_add(dev, fdir_filter);
3278 * Flush all filters.
3281 * Pointer to Ethernet device.
3284 mlx5_fdir_filter_flush(struct rte_eth_dev *dev)
3286 struct priv *priv = dev->data->dev_private;
3288 mlx5_flow_list_flush(dev, &priv->flows);
3292 * Get flow director information.
3295 * Pointer to Ethernet device.
3296 * @param[out] fdir_info
3297 * Resulting flow director information.
3300 mlx5_fdir_info_get(struct rte_eth_dev *dev, struct rte_eth_fdir_info *fdir_info)
3302 struct priv *priv = dev->data->dev_private;
3303 struct rte_eth_fdir_masks *mask =
3304 &priv->dev->data->dev_conf.fdir_conf.mask;
3306 fdir_info->mode = priv->dev->data->dev_conf.fdir_conf.mode;
3307 fdir_info->guarant_spc = 0;
3308 rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
3309 fdir_info->max_flexpayload = 0;
3310 fdir_info->flow_types_mask[0] = 0;
3311 fdir_info->flex_payload_unit = 0;
3312 fdir_info->max_flex_payload_segment_num = 0;
3313 fdir_info->flex_payload_limit = 0;
3314 memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
3318 * Deal with flow director operations.
3321 * Pointer to Ethernet device.
3323 * Operation to perform.
3325 * Pointer to operation-specific structure.
3328 * 0 on success, a negative errno value otherwise and rte_errno is set.
3331 mlx5_fdir_ctrl_func(struct rte_eth_dev *dev, enum rte_filter_op filter_op,
3334 struct priv *priv = dev->data->dev_private;
3335 enum rte_fdir_mode fdir_mode =
3336 priv->dev->data->dev_conf.fdir_conf.mode;
3338 if (filter_op == RTE_ETH_FILTER_NOP)
3340 if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
3341 fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
3342 DRV_LOG(ERR, "port %u flow director mode %d not supported",
3343 dev->data->port_id, fdir_mode);
3347 switch (filter_op) {
3348 case RTE_ETH_FILTER_ADD:
3349 return mlx5_fdir_filter_add(dev, arg);
3350 case RTE_ETH_FILTER_UPDATE:
3351 return mlx5_fdir_filter_update(dev, arg);
3352 case RTE_ETH_FILTER_DELETE:
3353 return mlx5_fdir_filter_delete(dev, arg);
3354 case RTE_ETH_FILTER_FLUSH:
3355 mlx5_fdir_filter_flush(dev);
3357 case RTE_ETH_FILTER_INFO:
3358 mlx5_fdir_info_get(dev, arg);
3361 DRV_LOG(DEBUG, "port %u unknown operation %u",
3362 dev->data->port_id, filter_op);
3370 * Manage filter operations.
3373 * Pointer to Ethernet device structure.
3374 * @param filter_type
3377 * Operation to perform.
3379 * Pointer to operation-specific structure.
3382 * 0 on success, a negative errno value otherwise and rte_errno is set.
3385 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
3386 enum rte_filter_type filter_type,
3387 enum rte_filter_op filter_op,
3390 switch (filter_type) {
3391 case RTE_ETH_FILTER_GENERIC:
3392 if (filter_op != RTE_ETH_FILTER_GET) {
3396 *(const void **)arg = &mlx5_flow_ops;
3398 case RTE_ETH_FILTER_FDIR:
3399 return mlx5_fdir_ctrl_func(dev, filter_op, arg);
3401 DRV_LOG(ERR, "port %u filter type (%d) not supported",
3402 dev->data->port_id, filter_type);
3403 rte_errno = ENOTSUP;
3410 * Detect number of Verbs flow priorities supported.
3413 * Pointer to Ethernet device.
3416 * number of supported Verbs flow priority.
3419 mlx5_get_max_verbs_prio(struct rte_eth_dev *dev)
3421 struct priv *priv = dev->data->dev_private;
3422 unsigned int verb_priorities = MLX5_VERBS_FLOW_PRIO_8;
3424 struct ibv_flow_attr attr;
3425 struct ibv_flow_spec_eth eth;
3426 struct ibv_flow_spec_action_drop drop;
3432 .type = IBV_FLOW_SPEC_ETH,
3433 .size = sizeof(struct ibv_flow_spec_eth),
3436 .size = sizeof(struct ibv_flow_spec_action_drop),
3437 .type = IBV_FLOW_SPEC_ACTION_DROP,
3440 struct ibv_flow *flow;
3443 flow_attr.attr.priority = verb_priorities - 1;
3444 flow = mlx5_glue->create_flow(priv->flow_drop_queue->qp,
3447 claim_zero(mlx5_glue->destroy_flow(flow));
3448 /* Try more priorities. */
3449 verb_priorities *= 2;
3451 /* Failed, restore last right number. */
3452 verb_priorities /= 2;
3456 DRV_LOG(DEBUG, "port %u Verbs flow priorities: %d,"
3457 " user flow priorities: %d",
3458 dev->data->port_id, verb_priorities, MLX5_CTRL_FLOW_PRIORITY);
3459 return verb_priorities;