1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2016 6WIND S.A.
3 * Copyright 2016 Mellanox Technologies, Ltd
11 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
13 #pragma GCC diagnostic ignored "-Wpedantic"
15 #include <infiniband/verbs.h>
17 #pragma GCC diagnostic error "-Wpedantic"
20 #include <rte_common.h>
21 #include <rte_ethdev_driver.h>
23 #include <rte_flow_driver.h>
24 #include <rte_malloc.h>
28 #include "mlx5_defs.h"
30 #include "mlx5_glue.h"
32 /* Define minimal priority for control plane flows. */
33 #define MLX5_CTRL_FLOW_PRIORITY 4
35 /* Internet Protocol versions. */
39 #ifndef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
40 struct ibv_flow_spec_counter_action {
45 /* Dev ops structure defined in mlx5.c */
46 extern const struct eth_dev_ops mlx5_dev_ops;
47 extern const struct eth_dev_ops mlx5_dev_ops_isolate;
49 /** Structure give to the conversion functions. */
50 struct mlx5_flow_data {
51 struct mlx5_flow_parse *parser; /** Parser context. */
52 struct rte_flow_error *error; /** Error context. */
56 mlx5_flow_create_eth(const struct rte_flow_item *item,
57 const void *default_mask,
58 struct mlx5_flow_data *data);
61 mlx5_flow_create_vlan(const struct rte_flow_item *item,
62 const void *default_mask,
63 struct mlx5_flow_data *data);
66 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
67 const void *default_mask,
68 struct mlx5_flow_data *data);
71 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
72 const void *default_mask,
73 struct mlx5_flow_data *data);
76 mlx5_flow_create_udp(const struct rte_flow_item *item,
77 const void *default_mask,
78 struct mlx5_flow_data *data);
81 mlx5_flow_create_tcp(const struct rte_flow_item *item,
82 const void *default_mask,
83 struct mlx5_flow_data *data);
86 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
87 const void *default_mask,
88 struct mlx5_flow_data *data);
90 struct mlx5_flow_parse;
93 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
97 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id);
100 mlx5_flow_create_count(struct rte_eth_dev *dev, struct mlx5_flow_parse *parser);
102 /* Hash RX queue types. */
113 /* Initialization data for hash RX queue. */
114 struct hash_rxq_init {
115 uint64_t hash_fields; /* Fields that participate in the hash. */
116 uint64_t dpdk_rss_hf; /* Matching DPDK RSS hash fields. */
117 unsigned int flow_priority; /* Flow priority to use. */
118 unsigned int ip_version; /* Internet protocol. */
121 /* Initialization data for hash RX queues. */
122 const struct hash_rxq_init hash_rxq_init[] = {
124 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
125 IBV_RX_HASH_DST_IPV4 |
126 IBV_RX_HASH_SRC_PORT_TCP |
127 IBV_RX_HASH_DST_PORT_TCP),
128 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,
130 .ip_version = MLX5_IPV4,
133 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
134 IBV_RX_HASH_DST_IPV4 |
135 IBV_RX_HASH_SRC_PORT_UDP |
136 IBV_RX_HASH_DST_PORT_UDP),
137 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP,
139 .ip_version = MLX5_IPV4,
142 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
143 IBV_RX_HASH_DST_IPV4),
144 .dpdk_rss_hf = (ETH_RSS_IPV4 |
147 .ip_version = MLX5_IPV4,
150 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
151 IBV_RX_HASH_DST_IPV6 |
152 IBV_RX_HASH_SRC_PORT_TCP |
153 IBV_RX_HASH_DST_PORT_TCP),
154 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP,
156 .ip_version = MLX5_IPV6,
159 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
160 IBV_RX_HASH_DST_IPV6 |
161 IBV_RX_HASH_SRC_PORT_UDP |
162 IBV_RX_HASH_DST_PORT_UDP),
163 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP,
165 .ip_version = MLX5_IPV6,
168 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
169 IBV_RX_HASH_DST_IPV6),
170 .dpdk_rss_hf = (ETH_RSS_IPV6 |
173 .ip_version = MLX5_IPV6,
182 /* Number of entries in hash_rxq_init[]. */
183 const unsigned int hash_rxq_init_n = RTE_DIM(hash_rxq_init);
185 /** Structure for holding counter stats. */
186 struct mlx5_flow_counter_stats {
187 uint64_t hits; /**< Number of packets matched by the rule. */
188 uint64_t bytes; /**< Number of bytes matched by the rule. */
191 /** Structure for Drop queue. */
192 struct mlx5_hrxq_drop {
193 struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */
194 struct ibv_qp *qp; /**< Verbs queue pair. */
195 struct ibv_wq *wq; /**< Verbs work queue. */
196 struct ibv_cq *cq; /**< Verbs completion queue. */
199 /* Flows structures. */
201 uint64_t hash_fields; /**< Fields that participate in the hash. */
202 struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
203 struct ibv_flow *ibv_flow; /**< Verbs flow. */
204 struct mlx5_hrxq *hrxq; /**< Hash Rx queues. */
207 /* Drop flows structures. */
208 struct mlx5_flow_drop {
209 struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
210 struct ibv_flow *ibv_flow; /**< Verbs flow. */
214 TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
215 uint32_t mark:1; /**< Set if the flow is marked. */
216 uint32_t drop:1; /**< Drop queue. */
217 uint16_t queues_n; /**< Number of entries in queue[]. */
218 uint16_t (*queues)[]; /**< Queues indexes to use. */
219 struct rte_eth_rss_conf rss_conf; /**< RSS configuration */
220 uint8_t rss_key[40]; /**< copy of the RSS key. */
221 struct ibv_counter_set *cs; /**< Holds the counters for the rule. */
222 struct mlx5_flow_counter_stats counter_stats;/**<The counter stats. */
223 struct mlx5_flow frxq[RTE_DIM(hash_rxq_init)];
224 /**< Flow with Rx queue. */
227 /** Static initializer for items. */
229 (const enum rte_flow_item_type []){ \
230 __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
233 /** Structure to generate a simple graph of layers supported by the NIC. */
234 struct mlx5_flow_items {
235 /** List of possible actions for these items. */
236 const enum rte_flow_action_type *const actions;
237 /** Bit-masks corresponding to the possibilities for the item. */
240 * Default bit-masks to use when item->mask is not provided. When
241 * \default_mask is also NULL, the full supported bit-mask (\mask) is
244 const void *default_mask;
245 /** Bit-masks size in bytes. */
246 const unsigned int mask_sz;
248 * Conversion function from rte_flow to NIC specific flow.
251 * rte_flow item to convert.
252 * @param default_mask
253 * Default bit-masks to use when item->mask is not provided.
255 * Internal structure to store the conversion.
258 * 0 on success, a negative errno value otherwise and rte_errno is
261 int (*convert)(const struct rte_flow_item *item,
262 const void *default_mask,
263 struct mlx5_flow_data *data);
264 /** Size in bytes of the destination structure. */
265 const unsigned int dst_sz;
266 /** List of possible following items. */
267 const enum rte_flow_item_type *const items;
270 /** Valid action for this PMD. */
271 static const enum rte_flow_action_type valid_actions[] = {
272 RTE_FLOW_ACTION_TYPE_DROP,
273 RTE_FLOW_ACTION_TYPE_QUEUE,
274 RTE_FLOW_ACTION_TYPE_MARK,
275 RTE_FLOW_ACTION_TYPE_FLAG,
276 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
277 RTE_FLOW_ACTION_TYPE_COUNT,
279 RTE_FLOW_ACTION_TYPE_END,
282 /** Graph of supported items and associated actions. */
283 static const struct mlx5_flow_items mlx5_flow_items[] = {
284 [RTE_FLOW_ITEM_TYPE_END] = {
285 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
286 RTE_FLOW_ITEM_TYPE_VXLAN),
288 [RTE_FLOW_ITEM_TYPE_ETH] = {
289 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
290 RTE_FLOW_ITEM_TYPE_IPV4,
291 RTE_FLOW_ITEM_TYPE_IPV6),
292 .actions = valid_actions,
293 .mask = &(const struct rte_flow_item_eth){
294 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
295 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
298 .default_mask = &rte_flow_item_eth_mask,
299 .mask_sz = sizeof(struct rte_flow_item_eth),
300 .convert = mlx5_flow_create_eth,
301 .dst_sz = sizeof(struct ibv_flow_spec_eth),
303 [RTE_FLOW_ITEM_TYPE_VLAN] = {
304 .items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
305 RTE_FLOW_ITEM_TYPE_IPV6),
306 .actions = valid_actions,
307 .mask = &(const struct rte_flow_item_vlan){
310 .default_mask = &rte_flow_item_vlan_mask,
311 .mask_sz = sizeof(struct rte_flow_item_vlan),
312 .convert = mlx5_flow_create_vlan,
315 [RTE_FLOW_ITEM_TYPE_IPV4] = {
316 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
317 RTE_FLOW_ITEM_TYPE_TCP),
318 .actions = valid_actions,
319 .mask = &(const struct rte_flow_item_ipv4){
323 .type_of_service = -1,
327 .default_mask = &rte_flow_item_ipv4_mask,
328 .mask_sz = sizeof(struct rte_flow_item_ipv4),
329 .convert = mlx5_flow_create_ipv4,
330 .dst_sz = sizeof(struct ibv_flow_spec_ipv4_ext),
332 [RTE_FLOW_ITEM_TYPE_IPV6] = {
333 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
334 RTE_FLOW_ITEM_TYPE_TCP),
335 .actions = valid_actions,
336 .mask = &(const struct rte_flow_item_ipv6){
339 0xff, 0xff, 0xff, 0xff,
340 0xff, 0xff, 0xff, 0xff,
341 0xff, 0xff, 0xff, 0xff,
342 0xff, 0xff, 0xff, 0xff,
345 0xff, 0xff, 0xff, 0xff,
346 0xff, 0xff, 0xff, 0xff,
347 0xff, 0xff, 0xff, 0xff,
348 0xff, 0xff, 0xff, 0xff,
355 .default_mask = &rte_flow_item_ipv6_mask,
356 .mask_sz = sizeof(struct rte_flow_item_ipv6),
357 .convert = mlx5_flow_create_ipv6,
358 .dst_sz = sizeof(struct ibv_flow_spec_ipv6),
360 [RTE_FLOW_ITEM_TYPE_UDP] = {
361 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN),
362 .actions = valid_actions,
363 .mask = &(const struct rte_flow_item_udp){
369 .default_mask = &rte_flow_item_udp_mask,
370 .mask_sz = sizeof(struct rte_flow_item_udp),
371 .convert = mlx5_flow_create_udp,
372 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
374 [RTE_FLOW_ITEM_TYPE_TCP] = {
375 .actions = valid_actions,
376 .mask = &(const struct rte_flow_item_tcp){
382 .default_mask = &rte_flow_item_tcp_mask,
383 .mask_sz = sizeof(struct rte_flow_item_tcp),
384 .convert = mlx5_flow_create_tcp,
385 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
387 [RTE_FLOW_ITEM_TYPE_VXLAN] = {
388 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH),
389 .actions = valid_actions,
390 .mask = &(const struct rte_flow_item_vxlan){
391 .vni = "\xff\xff\xff",
393 .default_mask = &rte_flow_item_vxlan_mask,
394 .mask_sz = sizeof(struct rte_flow_item_vxlan),
395 .convert = mlx5_flow_create_vxlan,
396 .dst_sz = sizeof(struct ibv_flow_spec_tunnel),
400 /** Structure to pass to the conversion function. */
401 struct mlx5_flow_parse {
402 uint32_t inner; /**< Set once VXLAN is encountered. */
404 /**< Whether resources should remain after a validate. */
405 uint32_t drop:1; /**< Target is a drop queue. */
406 uint32_t mark:1; /**< Mark is present in the flow. */
407 uint32_t count:1; /**< Count is present in the flow. */
408 uint32_t mark_id; /**< Mark identifier. */
409 uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
410 uint16_t queues_n; /**< Number of entries in queue[]. */
411 struct rte_eth_rss_conf rss_conf; /**< RSS configuration */
412 uint8_t rss_key[40]; /**< copy of the RSS key. */
413 enum hash_rxq_type layer; /**< Last pattern layer detected. */
414 struct ibv_counter_set *cs; /**< Holds the counter set for the rule */
416 struct ibv_flow_attr *ibv_attr;
417 /**< Pointer to Verbs attributes. */
419 /**< Current position or total size of the attribute. */
420 } queue[RTE_DIM(hash_rxq_init)];
423 static const struct rte_flow_ops mlx5_flow_ops = {
424 .validate = mlx5_flow_validate,
425 .create = mlx5_flow_create,
426 .destroy = mlx5_flow_destroy,
427 .flush = mlx5_flow_flush,
428 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
429 .query = mlx5_flow_query,
433 .isolate = mlx5_flow_isolate,
436 /* Convert FDIR request to Generic flow. */
438 struct rte_flow_attr attr;
439 struct rte_flow_action actions[2];
440 struct rte_flow_item items[4];
441 struct rte_flow_item_eth l2;
442 struct rte_flow_item_eth l2_mask;
444 struct rte_flow_item_ipv4 ipv4;
445 struct rte_flow_item_ipv6 ipv6;
448 struct rte_flow_item_ipv4 ipv4;
449 struct rte_flow_item_ipv6 ipv6;
452 struct rte_flow_item_udp udp;
453 struct rte_flow_item_tcp tcp;
456 struct rte_flow_item_udp udp;
457 struct rte_flow_item_tcp tcp;
459 struct rte_flow_action_queue queue;
462 /* Verbs specification header. */
463 struct ibv_spec_header {
464 enum ibv_flow_spec_type type;
469 * Check support for a given item.
472 * Item specification.
474 * Bit-masks covering supported fields to compare with spec, last and mask in
477 * Bit-Mask size in bytes.
480 * 0 on success, a negative errno value otherwise and rte_errno is set.
483 mlx5_flow_item_validate(const struct rte_flow_item *item,
484 const uint8_t *mask, unsigned int size)
486 if (!item->spec && (item->mask || item->last)) {
490 if (item->spec && !item->mask) {
492 const uint8_t *spec = item->spec;
494 for (i = 0; i < size; ++i)
495 if ((spec[i] | mask[i]) != mask[i]) {
500 if (item->last && !item->mask) {
502 const uint8_t *spec = item->last;
504 for (i = 0; i < size; ++i)
505 if ((spec[i] | mask[i]) != mask[i]) {
512 const uint8_t *spec = item->spec;
514 for (i = 0; i < size; ++i)
515 if ((spec[i] | mask[i]) != mask[i]) {
520 if (item->spec && item->last) {
523 const uint8_t *apply = mask;
529 for (i = 0; i < size; ++i) {
530 spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
531 last[i] = ((const uint8_t *)item->last)[i] & apply[i];
533 ret = memcmp(spec, last, size);
543 * Copy the RSS configuration from the user ones, of the rss_conf is null,
544 * uses the driver one.
547 * Internal parser structure.
549 * User RSS configuration to save.
552 * 0 on success, a negative errno value otherwise and rte_errno is set.
555 mlx5_flow_convert_rss_conf(struct mlx5_flow_parse *parser,
556 const struct rte_eth_rss_conf *rss_conf)
559 * This function is also called at the beginning of
560 * mlx5_flow_convert_actions() to initialize the parser with the
561 * device default RSS configuration.
564 if (rss_conf->rss_hf & MLX5_RSS_HF_MASK) {
568 if (rss_conf->rss_key_len != 40) {
572 if (rss_conf->rss_key_len && rss_conf->rss_key) {
573 parser->rss_conf.rss_key_len = rss_conf->rss_key_len;
574 memcpy(parser->rss_key, rss_conf->rss_key,
575 rss_conf->rss_key_len);
576 parser->rss_conf.rss_key = parser->rss_key;
578 parser->rss_conf.rss_hf = rss_conf->rss_hf;
584 * Extract attribute to the parser.
587 * Flow rule attributes.
589 * Perform verbose error reporting if not NULL.
592 * 0 on success, a negative errno value otherwise and rte_errno is set.
595 mlx5_flow_convert_attributes(const struct rte_flow_attr *attr,
596 struct rte_flow_error *error)
599 rte_flow_error_set(error, ENOTSUP,
600 RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
602 "groups are not supported");
605 if (attr->priority && attr->priority != MLX5_CTRL_FLOW_PRIORITY) {
606 rte_flow_error_set(error, ENOTSUP,
607 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
609 "priorities are not supported");
613 rte_flow_error_set(error, ENOTSUP,
614 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
616 "egress is not supported");
619 if (!attr->ingress) {
620 rte_flow_error_set(error, ENOTSUP,
621 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
623 "only ingress is supported");
630 * Extract actions request to the parser.
633 * Pointer to Ethernet device.
635 * Associated actions (list terminated by the END action).
637 * Perform verbose error reporting if not NULL.
638 * @param[in, out] parser
639 * Internal parser structure.
642 * 0 on success, a negative errno value otherwise and rte_errno is set.
645 mlx5_flow_convert_actions(struct rte_eth_dev *dev,
646 const struct rte_flow_action actions[],
647 struct rte_flow_error *error,
648 struct mlx5_flow_parse *parser)
650 enum { FATE = 1, MARK = 2, COUNT = 4, };
651 uint32_t overlap = 0;
652 struct priv *priv = dev->data->dev_private;
656 * Add default RSS configuration necessary for Verbs to create QP even
657 * if no RSS is necessary.
659 ret = mlx5_flow_convert_rss_conf(parser,
660 (const struct rte_eth_rss_conf *)
664 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
665 if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
667 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
669 goto exit_action_overlap;
672 } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
673 const struct rte_flow_action_queue *queue =
674 (const struct rte_flow_action_queue *)
678 goto exit_action_overlap;
680 if (!queue || (queue->index > (priv->rxqs_n - 1)))
681 goto exit_action_not_supported;
682 parser->queues_n = 1;
683 parser->queues[0] = queue->index;
684 } else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
685 const struct rte_flow_action_rss *rss =
686 (const struct rte_flow_action_rss *)
691 goto exit_action_overlap;
693 if (!rss || !rss->num) {
694 rte_flow_error_set(error, EINVAL,
695 RTE_FLOW_ERROR_TYPE_ACTION,
700 if (rss->num > RTE_DIM(parser->queues)) {
701 rte_flow_error_set(error, EINVAL,
702 RTE_FLOW_ERROR_TYPE_ACTION,
704 "too many queues for RSS"
708 for (n = 0; n < rss->num; ++n) {
709 if (rss->queue[n] >= priv->rxqs_n) {
710 rte_flow_error_set(error, EINVAL,
711 RTE_FLOW_ERROR_TYPE_ACTION,
713 "queue id > number of"
718 for (n = 0; n < rss->num; ++n)
719 parser->queues[n] = rss->queue[n];
720 parser->queues_n = rss->num;
721 if (mlx5_flow_convert_rss_conf(parser, rss->rss_conf)) {
722 rte_flow_error_set(error, EINVAL,
723 RTE_FLOW_ERROR_TYPE_ACTION,
725 "wrong RSS configuration");
728 } else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
729 const struct rte_flow_action_mark *mark =
730 (const struct rte_flow_action_mark *)
734 goto exit_action_overlap;
737 rte_flow_error_set(error, EINVAL,
738 RTE_FLOW_ERROR_TYPE_ACTION,
740 "mark must be defined");
742 } else if (mark->id >= MLX5_FLOW_MARK_MAX) {
743 rte_flow_error_set(error, ENOTSUP,
744 RTE_FLOW_ERROR_TYPE_ACTION,
746 "mark must be between 0"
751 parser->mark_id = mark->id;
752 } else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
754 goto exit_action_overlap;
757 } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT &&
758 priv->config.flow_counter_en) {
760 goto exit_action_overlap;
764 goto exit_action_not_supported;
767 /* When fate is unknown, drop traffic. */
768 if (!(overlap & FATE))
770 if (parser->drop && parser->mark)
772 if (!parser->queues_n && !parser->drop) {
773 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
774 NULL, "no valid action");
778 exit_action_not_supported:
779 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
780 actions, "action not supported");
783 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
784 actions, "overlapping actions are not supported");
792 * Pattern specification (list terminated by the END pattern item).
794 * Perform verbose error reporting if not NULL.
795 * @param[in, out] parser
796 * Internal parser structure.
799 * 0 on success, a negative errno value otherwise and rte_errno is set.
802 mlx5_flow_convert_items_validate(const struct rte_flow_item items[],
803 struct rte_flow_error *error,
804 struct mlx5_flow_parse *parser)
806 const struct mlx5_flow_items *cur_item = mlx5_flow_items;
810 /* Initialise the offsets to start after verbs attribute. */
811 for (i = 0; i != hash_rxq_init_n; ++i)
812 parser->queue[i].offset = sizeof(struct ibv_flow_attr);
813 for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
814 const struct mlx5_flow_items *token = NULL;
817 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
821 cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
823 if (cur_item->items[i] == items->type) {
824 token = &mlx5_flow_items[items->type];
830 goto exit_item_not_supported;
833 ret = mlx5_flow_item_validate(items,
834 (const uint8_t *)cur_item->mask,
837 goto exit_item_not_supported;
838 if (items->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
840 rte_flow_error_set(error, ENOTSUP,
841 RTE_FLOW_ERROR_TYPE_ITEM,
843 "cannot recognize multiple"
844 " VXLAN encapsulations");
847 parser->inner = IBV_FLOW_SPEC_INNER;
850 parser->queue[HASH_RXQ_ETH].offset += cur_item->dst_sz;
852 for (n = 0; n != hash_rxq_init_n; ++n)
853 parser->queue[n].offset += cur_item->dst_sz;
857 parser->queue[HASH_RXQ_ETH].offset +=
858 sizeof(struct ibv_flow_spec_action_drop);
861 for (i = 0; i != hash_rxq_init_n; ++i)
862 parser->queue[i].offset +=
863 sizeof(struct ibv_flow_spec_action_tag);
866 unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
868 for (i = 0; i != hash_rxq_init_n; ++i)
869 parser->queue[i].offset += size;
872 exit_item_not_supported:
873 return rte_flow_error_set(error, -ret, RTE_FLOW_ERROR_TYPE_ITEM,
874 items, "item not supported");
878 * Allocate memory space to store verbs flow attributes.
881 * Amount of byte to allocate.
883 * Perform verbose error reporting if not NULL.
886 * A verbs flow attribute on success, NULL otherwise and rte_errno is set.
888 static struct ibv_flow_attr *
889 mlx5_flow_convert_allocate(unsigned int size, struct rte_flow_error *error)
891 struct ibv_flow_attr *ibv_attr;
893 ibv_attr = rte_calloc(__func__, 1, size, 0);
895 rte_flow_error_set(error, ENOMEM,
896 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
898 "cannot allocate verbs spec attributes");
905 * Make inner packet matching with an higher priority from the non Inner
908 * @param[in, out] parser
909 * Internal parser structure.
911 * User flow attribute.
914 mlx5_flow_update_priority(struct mlx5_flow_parse *parser,
915 const struct rte_flow_attr *attr)
920 parser->queue[HASH_RXQ_ETH].ibv_attr->priority =
922 hash_rxq_init[HASH_RXQ_ETH].flow_priority;
925 for (i = 0; i != hash_rxq_init_n; ++i) {
926 if (parser->queue[i].ibv_attr) {
927 parser->queue[i].ibv_attr->priority =
929 hash_rxq_init[i].flow_priority -
930 (parser->inner ? 1 : 0);
936 * Finalise verbs flow attributes.
938 * @param[in, out] parser
939 * Internal parser structure.
942 mlx5_flow_convert_finalise(struct mlx5_flow_parse *parser)
944 const unsigned int ipv4 =
945 hash_rxq_init[parser->layer].ip_version == MLX5_IPV4;
946 const enum hash_rxq_type hmin = ipv4 ? HASH_RXQ_TCPV4 : HASH_RXQ_TCPV6;
947 const enum hash_rxq_type hmax = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
948 const enum hash_rxq_type ohmin = ipv4 ? HASH_RXQ_TCPV6 : HASH_RXQ_TCPV4;
949 const enum hash_rxq_type ohmax = ipv4 ? HASH_RXQ_IPV6 : HASH_RXQ_IPV4;
950 const enum hash_rxq_type ip = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
953 /* Remove any other flow not matching the pattern. */
954 if (parser->queues_n == 1 && !parser->rss_conf.rss_hf) {
955 for (i = 0; i != hash_rxq_init_n; ++i) {
956 if (i == HASH_RXQ_ETH)
958 rte_free(parser->queue[i].ibv_attr);
959 parser->queue[i].ibv_attr = NULL;
963 if (parser->layer == HASH_RXQ_ETH) {
967 * This layer becomes useless as the pattern define under
970 rte_free(parser->queue[HASH_RXQ_ETH].ibv_attr);
971 parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
973 /* Remove opposite kind of layer e.g. IPv6 if the pattern is IPv4. */
974 for (i = ohmin; i != (ohmax + 1); ++i) {
975 if (!parser->queue[i].ibv_attr)
977 rte_free(parser->queue[i].ibv_attr);
978 parser->queue[i].ibv_attr = NULL;
980 /* Remove impossible flow according to the RSS configuration. */
981 if (hash_rxq_init[parser->layer].dpdk_rss_hf &
982 parser->rss_conf.rss_hf) {
983 /* Remove any other flow. */
984 for (i = hmin; i != (hmax + 1); ++i) {
985 if ((i == parser->layer) ||
986 (!parser->queue[i].ibv_attr))
988 rte_free(parser->queue[i].ibv_attr);
989 parser->queue[i].ibv_attr = NULL;
991 } else if (!parser->queue[ip].ibv_attr) {
992 /* no RSS possible with the current configuration. */
993 parser->queues_n = 1;
998 * Fill missing layers in verbs specifications, or compute the correct
999 * offset to allocate the memory space for the attributes and
1002 for (i = 0; i != hash_rxq_init_n - 1; ++i) {
1004 struct ibv_flow_spec_ipv4_ext ipv4;
1005 struct ibv_flow_spec_ipv6 ipv6;
1006 struct ibv_flow_spec_tcp_udp udp_tcp;
1011 if (i == parser->layer)
1013 if (parser->layer == HASH_RXQ_ETH) {
1014 if (hash_rxq_init[i].ip_version == MLX5_IPV4) {
1015 size = sizeof(struct ibv_flow_spec_ipv4_ext);
1016 specs.ipv4 = (struct ibv_flow_spec_ipv4_ext){
1017 .type = IBV_FLOW_SPEC_IPV4_EXT,
1021 size = sizeof(struct ibv_flow_spec_ipv6);
1022 specs.ipv6 = (struct ibv_flow_spec_ipv6){
1023 .type = IBV_FLOW_SPEC_IPV6,
1027 if (parser->queue[i].ibv_attr) {
1028 dst = (void *)((uintptr_t)
1029 parser->queue[i].ibv_attr +
1030 parser->queue[i].offset);
1031 memcpy(dst, &specs, size);
1032 ++parser->queue[i].ibv_attr->num_of_specs;
1034 parser->queue[i].offset += size;
1036 if ((i == HASH_RXQ_UDPV4) || (i == HASH_RXQ_TCPV4) ||
1037 (i == HASH_RXQ_UDPV6) || (i == HASH_RXQ_TCPV6)) {
1038 size = sizeof(struct ibv_flow_spec_tcp_udp);
1039 specs.udp_tcp = (struct ibv_flow_spec_tcp_udp) {
1040 .type = ((i == HASH_RXQ_UDPV4 ||
1041 i == HASH_RXQ_UDPV6) ?
1046 if (parser->queue[i].ibv_attr) {
1047 dst = (void *)((uintptr_t)
1048 parser->queue[i].ibv_attr +
1049 parser->queue[i].offset);
1050 memcpy(dst, &specs, size);
1051 ++parser->queue[i].ibv_attr->num_of_specs;
1053 parser->queue[i].offset += size;
1059 * Validate and convert a flow supported by the NIC.
1062 * Pointer to Ethernet device.
1064 * Flow rule attributes.
1065 * @param[in] pattern
1066 * Pattern specification (list terminated by the END pattern item).
1067 * @param[in] actions
1068 * Associated actions (list terminated by the END action).
1070 * Perform verbose error reporting if not NULL.
1071 * @param[in, out] parser
1072 * Internal parser structure.
1075 * 0 on success, a negative errno value otherwise and rte_errno is set.
1078 mlx5_flow_convert(struct rte_eth_dev *dev,
1079 const struct rte_flow_attr *attr,
1080 const struct rte_flow_item items[],
1081 const struct rte_flow_action actions[],
1082 struct rte_flow_error *error,
1083 struct mlx5_flow_parse *parser)
1085 const struct mlx5_flow_items *cur_item = mlx5_flow_items;
1089 /* First step. Validate the attributes, items and actions. */
1090 *parser = (struct mlx5_flow_parse){
1091 .create = parser->create,
1092 .layer = HASH_RXQ_ETH,
1093 .mark_id = MLX5_FLOW_MARK_DEFAULT,
1095 ret = mlx5_flow_convert_attributes(attr, error);
1098 ret = mlx5_flow_convert_actions(dev, actions, error, parser);
1101 ret = mlx5_flow_convert_items_validate(items, error, parser);
1104 mlx5_flow_convert_finalise(parser);
1107 * Allocate the memory space to store verbs specifications.
1110 unsigned int offset = parser->queue[HASH_RXQ_ETH].offset;
1112 parser->queue[HASH_RXQ_ETH].ibv_attr =
1113 mlx5_flow_convert_allocate(offset, error);
1114 if (!parser->queue[HASH_RXQ_ETH].ibv_attr)
1116 parser->queue[HASH_RXQ_ETH].offset =
1117 sizeof(struct ibv_flow_attr);
1119 for (i = 0; i != hash_rxq_init_n; ++i) {
1120 unsigned int offset;
1122 if (!(parser->rss_conf.rss_hf &
1123 hash_rxq_init[i].dpdk_rss_hf) &&
1124 (i != HASH_RXQ_ETH))
1126 offset = parser->queue[i].offset;
1127 parser->queue[i].ibv_attr =
1128 mlx5_flow_convert_allocate(offset, error);
1129 if (!parser->queue[i].ibv_attr)
1131 parser->queue[i].offset = sizeof(struct ibv_flow_attr);
1134 /* Third step. Conversion parse, fill the specifications. */
1136 for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
1137 struct mlx5_flow_data data = {
1142 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
1144 cur_item = &mlx5_flow_items[items->type];
1145 ret = cur_item->convert(items,
1146 (cur_item->default_mask ?
1147 cur_item->default_mask :
1154 mlx5_flow_create_flag_mark(parser, parser->mark_id);
1155 if (parser->count && parser->create) {
1156 mlx5_flow_create_count(dev, parser);
1158 goto exit_count_error;
1161 * Last step. Complete missing specification to reach the RSS
1165 mlx5_flow_convert_finalise(parser);
1166 mlx5_flow_update_priority(parser, attr);
1168 /* Only verification is expected, all resources should be released. */
1169 if (!parser->create) {
1170 for (i = 0; i != hash_rxq_init_n; ++i) {
1171 if (parser->queue[i].ibv_attr) {
1172 rte_free(parser->queue[i].ibv_attr);
1173 parser->queue[i].ibv_attr = NULL;
1179 for (i = 0; i != hash_rxq_init_n; ++i) {
1180 if (parser->queue[i].ibv_attr) {
1181 rte_free(parser->queue[i].ibv_attr);
1182 parser->queue[i].ibv_attr = NULL;
1185 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1186 NULL, "cannot allocate verbs spec attributes");
1189 rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1190 NULL, "cannot create counter");
1195 * Copy the specification created into the flow.
1198 * Internal parser structure.
1200 * Create specification.
1202 * Size in bytes of the specification to copy.
1205 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
1211 for (i = 0; i != hash_rxq_init_n; ++i) {
1212 if (!parser->queue[i].ibv_attr)
1214 /* Specification must be the same l3 type or none. */
1215 if (parser->layer == HASH_RXQ_ETH ||
1216 (hash_rxq_init[parser->layer].ip_version ==
1217 hash_rxq_init[i].ip_version) ||
1218 (hash_rxq_init[i].ip_version == 0)) {
1219 dst = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1220 parser->queue[i].offset);
1221 memcpy(dst, src, size);
1222 ++parser->queue[i].ibv_attr->num_of_specs;
1223 parser->queue[i].offset += size;
1229 * Convert Ethernet item to Verbs specification.
1232 * Item specification.
1233 * @param default_mask[in]
1234 * Default bit-masks to use when item->mask is not provided.
1235 * @param data[in, out]
1239 * 0 on success, a negative errno value otherwise and rte_errno is set.
1242 mlx5_flow_create_eth(const struct rte_flow_item *item,
1243 const void *default_mask,
1244 struct mlx5_flow_data *data)
1246 const struct rte_flow_item_eth *spec = item->spec;
1247 const struct rte_flow_item_eth *mask = item->mask;
1248 struct mlx5_flow_parse *parser = data->parser;
1249 const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1250 struct ibv_flow_spec_eth eth = {
1251 .type = parser->inner | IBV_FLOW_SPEC_ETH,
1255 /* Don't update layer for the inner pattern. */
1257 parser->layer = HASH_RXQ_ETH;
1262 mask = default_mask;
1263 memcpy(ð.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
1264 memcpy(ð.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
1265 eth.val.ether_type = spec->type;
1266 memcpy(ð.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
1267 memcpy(ð.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
1268 eth.mask.ether_type = mask->type;
1269 /* Remove unwanted bits from values. */
1270 for (i = 0; i < ETHER_ADDR_LEN; ++i) {
1271 eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
1272 eth.val.src_mac[i] &= eth.mask.src_mac[i];
1274 eth.val.ether_type &= eth.mask.ether_type;
1276 mlx5_flow_create_copy(parser, ð, eth_size);
1281 * Convert VLAN item to Verbs specification.
1284 * Item specification.
1285 * @param default_mask[in]
1286 * Default bit-masks to use when item->mask is not provided.
1287 * @param data[in, out]
1291 * 0 on success, a negative errno value otherwise and rte_errno is set.
1294 mlx5_flow_create_vlan(const struct rte_flow_item *item,
1295 const void *default_mask,
1296 struct mlx5_flow_data *data)
1298 const struct rte_flow_item_vlan *spec = item->spec;
1299 const struct rte_flow_item_vlan *mask = item->mask;
1300 struct mlx5_flow_parse *parser = data->parser;
1301 struct ibv_flow_spec_eth *eth;
1302 const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1307 mask = default_mask;
1309 for (i = 0; i != hash_rxq_init_n; ++i) {
1310 if (!parser->queue[i].ibv_attr)
1313 eth = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1314 parser->queue[i].offset - eth_size);
1315 eth->val.vlan_tag = spec->tci;
1316 eth->mask.vlan_tag = mask->tci;
1317 eth->val.vlan_tag &= eth->mask.vlan_tag;
1319 * From verbs perspective an empty VLAN is equivalent
1320 * to a packet without VLAN layer.
1322 if (!eth->mask.vlan_tag)
1328 return rte_flow_error_set(data->error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM,
1329 item, "VLAN cannot be empty");
1333 * Convert IPv4 item to Verbs specification.
1336 * Item specification.
1337 * @param default_mask[in]
1338 * Default bit-masks to use when item->mask is not provided.
1339 * @param data[in, out]
1343 * 0 on success, a negative errno value otherwise and rte_errno is set.
1346 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
1347 const void *default_mask,
1348 struct mlx5_flow_data *data)
1350 const struct rte_flow_item_ipv4 *spec = item->spec;
1351 const struct rte_flow_item_ipv4 *mask = item->mask;
1352 struct mlx5_flow_parse *parser = data->parser;
1353 unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4_ext);
1354 struct ibv_flow_spec_ipv4_ext ipv4 = {
1355 .type = parser->inner | IBV_FLOW_SPEC_IPV4_EXT,
1359 /* Don't update layer for the inner pattern. */
1361 parser->layer = HASH_RXQ_IPV4;
1364 mask = default_mask;
1365 ipv4.val = (struct ibv_flow_ipv4_ext_filter){
1366 .src_ip = spec->hdr.src_addr,
1367 .dst_ip = spec->hdr.dst_addr,
1368 .proto = spec->hdr.next_proto_id,
1369 .tos = spec->hdr.type_of_service,
1371 ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
1372 .src_ip = mask->hdr.src_addr,
1373 .dst_ip = mask->hdr.dst_addr,
1374 .proto = mask->hdr.next_proto_id,
1375 .tos = mask->hdr.type_of_service,
1377 /* Remove unwanted bits from values. */
1378 ipv4.val.src_ip &= ipv4.mask.src_ip;
1379 ipv4.val.dst_ip &= ipv4.mask.dst_ip;
1380 ipv4.val.proto &= ipv4.mask.proto;
1381 ipv4.val.tos &= ipv4.mask.tos;
1383 mlx5_flow_create_copy(parser, &ipv4, ipv4_size);
1388 * Convert IPv6 item to Verbs specification.
1391 * Item specification.
1392 * @param default_mask[in]
1393 * Default bit-masks to use when item->mask is not provided.
1394 * @param data[in, out]
1398 * 0 on success, a negative errno value otherwise and rte_errno is set.
1401 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
1402 const void *default_mask,
1403 struct mlx5_flow_data *data)
1405 const struct rte_flow_item_ipv6 *spec = item->spec;
1406 const struct rte_flow_item_ipv6 *mask = item->mask;
1407 struct mlx5_flow_parse *parser = data->parser;
1408 unsigned int ipv6_size = sizeof(struct ibv_flow_spec_ipv6);
1409 struct ibv_flow_spec_ipv6 ipv6 = {
1410 .type = parser->inner | IBV_FLOW_SPEC_IPV6,
1414 /* Don't update layer for the inner pattern. */
1416 parser->layer = HASH_RXQ_IPV6;
1419 uint32_t vtc_flow_val;
1420 uint32_t vtc_flow_mask;
1423 mask = default_mask;
1424 memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
1425 RTE_DIM(ipv6.val.src_ip));
1426 memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
1427 RTE_DIM(ipv6.val.dst_ip));
1428 memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
1429 RTE_DIM(ipv6.mask.src_ip));
1430 memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
1431 RTE_DIM(ipv6.mask.dst_ip));
1432 vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
1433 vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
1434 ipv6.val.flow_label =
1435 rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >>
1437 ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >>
1439 ipv6.val.next_hdr = spec->hdr.proto;
1440 ipv6.val.hop_limit = spec->hdr.hop_limits;
1441 ipv6.mask.flow_label =
1442 rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >>
1444 ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >>
1446 ipv6.mask.next_hdr = mask->hdr.proto;
1447 ipv6.mask.hop_limit = mask->hdr.hop_limits;
1448 /* Remove unwanted bits from values. */
1449 for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
1450 ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
1451 ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
1453 ipv6.val.flow_label &= ipv6.mask.flow_label;
1454 ipv6.val.traffic_class &= ipv6.mask.traffic_class;
1455 ipv6.val.next_hdr &= ipv6.mask.next_hdr;
1456 ipv6.val.hop_limit &= ipv6.mask.hop_limit;
1458 mlx5_flow_create_copy(parser, &ipv6, ipv6_size);
1463 * Convert UDP item to Verbs specification.
1466 * Item specification.
1467 * @param default_mask[in]
1468 * Default bit-masks to use when item->mask is not provided.
1469 * @param data[in, out]
1473 * 0 on success, a negative errno value otherwise and rte_errno is set.
1476 mlx5_flow_create_udp(const struct rte_flow_item *item,
1477 const void *default_mask,
1478 struct mlx5_flow_data *data)
1480 const struct rte_flow_item_udp *spec = item->spec;
1481 const struct rte_flow_item_udp *mask = item->mask;
1482 struct mlx5_flow_parse *parser = data->parser;
1483 unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1484 struct ibv_flow_spec_tcp_udp udp = {
1485 .type = parser->inner | IBV_FLOW_SPEC_UDP,
1489 /* Don't update layer for the inner pattern. */
1490 if (!parser->inner) {
1491 if (parser->layer == HASH_RXQ_IPV4)
1492 parser->layer = HASH_RXQ_UDPV4;
1494 parser->layer = HASH_RXQ_UDPV6;
1498 mask = default_mask;
1499 udp.val.dst_port = spec->hdr.dst_port;
1500 udp.val.src_port = spec->hdr.src_port;
1501 udp.mask.dst_port = mask->hdr.dst_port;
1502 udp.mask.src_port = mask->hdr.src_port;
1503 /* Remove unwanted bits from values. */
1504 udp.val.src_port &= udp.mask.src_port;
1505 udp.val.dst_port &= udp.mask.dst_port;
1507 mlx5_flow_create_copy(parser, &udp, udp_size);
1512 * Convert TCP item to Verbs specification.
1515 * Item specification.
1516 * @param default_mask[in]
1517 * Default bit-masks to use when item->mask is not provided.
1518 * @param data[in, out]
1522 * 0 on success, a negative errno value otherwise and rte_errno is set.
1525 mlx5_flow_create_tcp(const struct rte_flow_item *item,
1526 const void *default_mask,
1527 struct mlx5_flow_data *data)
1529 const struct rte_flow_item_tcp *spec = item->spec;
1530 const struct rte_flow_item_tcp *mask = item->mask;
1531 struct mlx5_flow_parse *parser = data->parser;
1532 unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1533 struct ibv_flow_spec_tcp_udp tcp = {
1534 .type = parser->inner | IBV_FLOW_SPEC_TCP,
1538 /* Don't update layer for the inner pattern. */
1539 if (!parser->inner) {
1540 if (parser->layer == HASH_RXQ_IPV4)
1541 parser->layer = HASH_RXQ_TCPV4;
1543 parser->layer = HASH_RXQ_TCPV6;
1547 mask = default_mask;
1548 tcp.val.dst_port = spec->hdr.dst_port;
1549 tcp.val.src_port = spec->hdr.src_port;
1550 tcp.mask.dst_port = mask->hdr.dst_port;
1551 tcp.mask.src_port = mask->hdr.src_port;
1552 /* Remove unwanted bits from values. */
1553 tcp.val.src_port &= tcp.mask.src_port;
1554 tcp.val.dst_port &= tcp.mask.dst_port;
1556 mlx5_flow_create_copy(parser, &tcp, tcp_size);
1561 * Convert VXLAN item to Verbs specification.
1564 * Item specification.
1565 * @param default_mask[in]
1566 * Default bit-masks to use when item->mask is not provided.
1567 * @param data[in, out]
1571 * 0 on success, a negative errno value otherwise and rte_errno is set.
1574 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
1575 const void *default_mask,
1576 struct mlx5_flow_data *data)
1578 const struct rte_flow_item_vxlan *spec = item->spec;
1579 const struct rte_flow_item_vxlan *mask = item->mask;
1580 struct mlx5_flow_parse *parser = data->parser;
1581 unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1582 struct ibv_flow_spec_tunnel vxlan = {
1583 .type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1592 parser->inner = IBV_FLOW_SPEC_INNER;
1595 mask = default_mask;
1596 memcpy(&id.vni[1], spec->vni, 3);
1597 vxlan.val.tunnel_id = id.vlan_id;
1598 memcpy(&id.vni[1], mask->vni, 3);
1599 vxlan.mask.tunnel_id = id.vlan_id;
1600 /* Remove unwanted bits from values. */
1601 vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
1604 * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
1605 * layer is defined in the Verbs specification it is interpreted as
1606 * wildcard and all packets will match this rule, if it follows a full
1607 * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
1608 * before will also match this rule.
1609 * To avoid such situation, VNI 0 is currently refused.
1611 if (!vxlan.val.tunnel_id)
1612 return rte_flow_error_set(data->error, EINVAL,
1613 RTE_FLOW_ERROR_TYPE_ITEM,
1615 "VxLAN vni cannot be 0");
1616 mlx5_flow_create_copy(parser, &vxlan, size);
1621 * Convert mark/flag action to Verbs specification.
1624 * Internal parser structure.
1629 * 0 on success, a negative errno value otherwise and rte_errno is set.
1632 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id)
1634 unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1635 struct ibv_flow_spec_action_tag tag = {
1636 .type = IBV_FLOW_SPEC_ACTION_TAG,
1638 .tag_id = mlx5_flow_mark_set(mark_id),
1641 assert(parser->mark);
1642 mlx5_flow_create_copy(parser, &tag, size);
1647 * Convert count action to Verbs specification.
1650 * Pointer to Ethernet device.
1652 * Pointer to MLX5 flow parser structure.
1655 * 0 on success, a negative errno value otherwise and rte_errno is set.
1658 mlx5_flow_create_count(struct rte_eth_dev *dev __rte_unused,
1659 struct mlx5_flow_parse *parser __rte_unused)
1661 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
1662 struct priv *priv = dev->data->dev_private;
1663 unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
1664 struct ibv_counter_set_init_attr init_attr = {0};
1665 struct ibv_flow_spec_counter_action counter = {
1666 .type = IBV_FLOW_SPEC_ACTION_COUNT,
1668 .counter_set_handle = 0,
1671 init_attr.counter_set_id = 0;
1672 parser->cs = mlx5_glue->create_counter_set(priv->ctx, &init_attr);
1677 counter.counter_set_handle = parser->cs->handle;
1678 mlx5_flow_create_copy(parser, &counter, size);
1684 * Complete flow rule creation with a drop queue.
1687 * Pointer to Ethernet device.
1689 * Internal parser structure.
1691 * Pointer to the rte_flow.
1693 * Perform verbose error reporting if not NULL.
1696 * 0 on success, a negative errno value otherwise and rte_errno is set.
1699 mlx5_flow_create_action_queue_drop(struct rte_eth_dev *dev,
1700 struct mlx5_flow_parse *parser,
1701 struct rte_flow *flow,
1702 struct rte_flow_error *error)
1704 struct priv *priv = dev->data->dev_private;
1705 struct ibv_flow_spec_action_drop *drop;
1706 unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
1711 drop = (void *)((uintptr_t)parser->queue[HASH_RXQ_ETH].ibv_attr +
1712 parser->queue[HASH_RXQ_ETH].offset);
1713 *drop = (struct ibv_flow_spec_action_drop){
1714 .type = IBV_FLOW_SPEC_ACTION_DROP,
1717 ++parser->queue[HASH_RXQ_ETH].ibv_attr->num_of_specs;
1718 parser->queue[HASH_RXQ_ETH].offset += size;
1719 flow->frxq[HASH_RXQ_ETH].ibv_attr =
1720 parser->queue[HASH_RXQ_ETH].ibv_attr;
1722 flow->cs = parser->cs;
1723 if (!priv->dev->data->dev_started)
1725 parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
1726 flow->frxq[HASH_RXQ_ETH].ibv_flow =
1727 mlx5_glue->create_flow(priv->flow_drop_queue->qp,
1728 flow->frxq[HASH_RXQ_ETH].ibv_attr);
1729 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
1730 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1731 NULL, "flow rule creation failure");
1737 if (flow->frxq[HASH_RXQ_ETH].ibv_flow) {
1738 claim_zero(mlx5_glue->destroy_flow
1739 (flow->frxq[HASH_RXQ_ETH].ibv_flow));
1740 flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
1742 if (flow->frxq[HASH_RXQ_ETH].ibv_attr) {
1743 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
1744 flow->frxq[HASH_RXQ_ETH].ibv_attr = NULL;
1747 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
1755 * Create hash Rx queues when RSS is enabled.
1758 * Pointer to Ethernet device.
1760 * Internal parser structure.
1762 * Pointer to the rte_flow.
1764 * Perform verbose error reporting if not NULL.
1767 * 0 on success, a negative errno value otherwise and rte_errno is set.
1770 mlx5_flow_create_action_queue_rss(struct rte_eth_dev *dev,
1771 struct mlx5_flow_parse *parser,
1772 struct rte_flow *flow,
1773 struct rte_flow_error *error)
1775 struct priv *priv = dev->data->dev_private;
1778 for (i = 0; i != hash_rxq_init_n; ++i) {
1779 uint64_t hash_fields;
1781 if (!parser->queue[i].ibv_attr)
1783 flow->frxq[i].ibv_attr = parser->queue[i].ibv_attr;
1784 parser->queue[i].ibv_attr = NULL;
1785 hash_fields = hash_rxq_init[i].hash_fields;
1786 if (!priv->dev->data->dev_started)
1788 flow->frxq[i].hrxq =
1790 parser->rss_conf.rss_key,
1791 parser->rss_conf.rss_key_len,
1795 if (flow->frxq[i].hrxq)
1797 flow->frxq[i].hrxq =
1799 parser->rss_conf.rss_key,
1800 parser->rss_conf.rss_key_len,
1804 if (!flow->frxq[i].hrxq) {
1805 return rte_flow_error_set(error, ENOMEM,
1806 RTE_FLOW_ERROR_TYPE_HANDLE,
1808 "cannot create hash rxq");
1815 * Complete flow rule creation.
1818 * Pointer to Ethernet device.
1820 * Internal parser structure.
1822 * Pointer to the rte_flow.
1824 * Perform verbose error reporting if not NULL.
1827 * 0 on success, a negative errno value otherwise and rte_errno is set.
1830 mlx5_flow_create_action_queue(struct rte_eth_dev *dev,
1831 struct mlx5_flow_parse *parser,
1832 struct rte_flow *flow,
1833 struct rte_flow_error *error)
1835 struct priv *priv = dev->data->dev_private;
1838 unsigned int flows_n = 0;
1842 assert(!parser->drop);
1843 ret = mlx5_flow_create_action_queue_rss(dev, parser, flow, error);
1847 flow->cs = parser->cs;
1848 if (!priv->dev->data->dev_started)
1850 for (i = 0; i != hash_rxq_init_n; ++i) {
1851 if (!flow->frxq[i].hrxq)
1853 flow->frxq[i].ibv_flow =
1854 mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
1855 flow->frxq[i].ibv_attr);
1856 if (!flow->frxq[i].ibv_flow) {
1857 rte_flow_error_set(error, ENOMEM,
1858 RTE_FLOW_ERROR_TYPE_HANDLE,
1859 NULL, "flow rule creation failure");
1863 DRV_LOG(DEBUG, "port %u %p type %d QP %p ibv_flow %p",
1866 (void *)flow->frxq[i].hrxq,
1867 (void *)flow->frxq[i].ibv_flow);
1870 rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_HANDLE,
1871 NULL, "internal error in flow creation");
1874 for (i = 0; i != parser->queues_n; ++i) {
1875 struct mlx5_rxq_data *q =
1876 (*priv->rxqs)[parser->queues[i]];
1878 q->mark |= parser->mark;
1882 ret = rte_errno; /* Save rte_errno before cleanup. */
1884 for (i = 0; i != hash_rxq_init_n; ++i) {
1885 if (flow->frxq[i].ibv_flow) {
1886 struct ibv_flow *ibv_flow = flow->frxq[i].ibv_flow;
1888 claim_zero(mlx5_glue->destroy_flow(ibv_flow));
1890 if (flow->frxq[i].hrxq)
1891 mlx5_hrxq_release(dev, flow->frxq[i].hrxq);
1892 if (flow->frxq[i].ibv_attr)
1893 rte_free(flow->frxq[i].ibv_attr);
1896 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
1900 rte_errno = ret; /* Restore rte_errno. */
1908 * Pointer to Ethernet device.
1910 * Pointer to a TAILQ flow list.
1912 * Flow rule attributes.
1913 * @param[in] pattern
1914 * Pattern specification (list terminated by the END pattern item).
1915 * @param[in] actions
1916 * Associated actions (list terminated by the END action).
1918 * Perform verbose error reporting if not NULL.
1921 * A flow on success, NULL otherwise and rte_errno is set.
1923 static struct rte_flow *
1924 mlx5_flow_list_create(struct rte_eth_dev *dev,
1925 struct mlx5_flows *list,
1926 const struct rte_flow_attr *attr,
1927 const struct rte_flow_item items[],
1928 const struct rte_flow_action actions[],
1929 struct rte_flow_error *error)
1931 struct mlx5_flow_parse parser = { .create = 1, };
1932 struct rte_flow *flow = NULL;
1936 ret = mlx5_flow_convert(dev, attr, items, actions, error, &parser);
1939 flow = rte_calloc(__func__, 1,
1940 sizeof(*flow) + parser.queues_n * sizeof(uint16_t),
1943 rte_flow_error_set(error, ENOMEM,
1944 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1946 "cannot allocate flow memory");
1949 /* Copy queues configuration. */
1950 flow->queues = (uint16_t (*)[])(flow + 1);
1951 memcpy(flow->queues, parser.queues, parser.queues_n * sizeof(uint16_t));
1952 flow->queues_n = parser.queues_n;
1953 flow->mark = parser.mark;
1954 /* Copy RSS configuration. */
1955 flow->rss_conf = parser.rss_conf;
1956 flow->rss_conf.rss_key = flow->rss_key;
1957 memcpy(flow->rss_key, parser.rss_key, parser.rss_conf.rss_key_len);
1958 /* finalise the flow. */
1960 ret = mlx5_flow_create_action_queue_drop(dev, &parser, flow,
1963 ret = mlx5_flow_create_action_queue(dev, &parser, flow, error);
1966 TAILQ_INSERT_TAIL(list, flow, next);
1967 DRV_LOG(DEBUG, "port %u flow created %p", dev->data->port_id,
1971 DRV_LOG(ERR, "port %u flow creation error: %s", dev->data->port_id,
1973 for (i = 0; i != hash_rxq_init_n; ++i) {
1974 if (parser.queue[i].ibv_attr)
1975 rte_free(parser.queue[i].ibv_attr);
1982 * Validate a flow supported by the NIC.
1984 * @see rte_flow_validate()
1988 mlx5_flow_validate(struct rte_eth_dev *dev,
1989 const struct rte_flow_attr *attr,
1990 const struct rte_flow_item items[],
1991 const struct rte_flow_action actions[],
1992 struct rte_flow_error *error)
1994 struct mlx5_flow_parse parser = { .create = 0, };
1996 return mlx5_flow_convert(dev, attr, items, actions, error, &parser);
2002 * @see rte_flow_create()
2006 mlx5_flow_create(struct rte_eth_dev *dev,
2007 const struct rte_flow_attr *attr,
2008 const struct rte_flow_item items[],
2009 const struct rte_flow_action actions[],
2010 struct rte_flow_error *error)
2012 struct priv *priv = dev->data->dev_private;
2014 return mlx5_flow_list_create(dev, &priv->flows, attr, items, actions,
2019 * Destroy a flow in a list.
2022 * Pointer to Ethernet device.
2024 * Pointer to a TAILQ flow list.
2029 mlx5_flow_list_destroy(struct rte_eth_dev *dev, struct mlx5_flows *list,
2030 struct rte_flow *flow)
2032 struct priv *priv = dev->data->dev_private;
2035 if (flow->drop || !flow->mark)
2037 for (i = 0; i != flow->queues_n; ++i) {
2038 struct rte_flow *tmp;
2042 * To remove the mark from the queue, the queue must not be
2043 * present in any other marked flow (RSS or not).
2045 TAILQ_FOREACH(tmp, list, next) {
2047 uint16_t *tqs = NULL;
2052 for (j = 0; j != hash_rxq_init_n; ++j) {
2053 if (!tmp->frxq[j].hrxq)
2055 tqs = tmp->frxq[j].hrxq->ind_table->queues;
2056 tq_n = tmp->frxq[j].hrxq->ind_table->queues_n;
2060 for (j = 0; (j != tq_n) && !mark; j++)
2061 if (tqs[j] == (*flow->queues)[i])
2064 (*priv->rxqs)[(*flow->queues)[i]]->mark = mark;
2068 if (flow->frxq[HASH_RXQ_ETH].ibv_flow)
2069 claim_zero(mlx5_glue->destroy_flow
2070 (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2071 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
2073 for (i = 0; i != hash_rxq_init_n; ++i) {
2074 struct mlx5_flow *frxq = &flow->frxq[i];
2077 claim_zero(mlx5_glue->destroy_flow
2080 mlx5_hrxq_release(dev, frxq->hrxq);
2082 rte_free(frxq->ibv_attr);
2086 claim_zero(mlx5_glue->destroy_counter_set(flow->cs));
2089 TAILQ_REMOVE(list, flow, next);
2090 DRV_LOG(DEBUG, "port %u flow destroyed %p", dev->data->port_id,
2096 * Destroy all flows.
2099 * Pointer to Ethernet device.
2101 * Pointer to a TAILQ flow list.
2104 mlx5_flow_list_flush(struct rte_eth_dev *dev, struct mlx5_flows *list)
2106 while (!TAILQ_EMPTY(list)) {
2107 struct rte_flow *flow;
2109 flow = TAILQ_FIRST(list);
2110 mlx5_flow_list_destroy(dev, list, flow);
2115 * Create drop queue.
2118 * Pointer to Ethernet device.
2121 * 0 on success, a negative errno value otherwise and rte_errno is set.
2124 mlx5_flow_create_drop_queue(struct rte_eth_dev *dev)
2126 struct priv *priv = dev->data->dev_private;
2127 struct mlx5_hrxq_drop *fdq = NULL;
2131 fdq = rte_calloc(__func__, 1, sizeof(*fdq), 0);
2134 "port %u cannot allocate memory for drop queue",
2135 dev->data->port_id);
2139 fdq->cq = mlx5_glue->create_cq(priv->ctx, 1, NULL, NULL, 0);
2141 DRV_LOG(WARNING, "port %u cannot allocate CQ for drop queue",
2142 dev->data->port_id);
2146 fdq->wq = mlx5_glue->create_wq
2148 &(struct ibv_wq_init_attr){
2149 .wq_type = IBV_WQT_RQ,
2156 DRV_LOG(WARNING, "port %u cannot allocate WQ for drop queue",
2157 dev->data->port_id);
2161 fdq->ind_table = mlx5_glue->create_rwq_ind_table
2163 &(struct ibv_rwq_ind_table_init_attr){
2164 .log_ind_tbl_size = 0,
2165 .ind_tbl = &fdq->wq,
2168 if (!fdq->ind_table) {
2170 "port %u cannot allocate indirection table for drop"
2172 dev->data->port_id);
2176 fdq->qp = mlx5_glue->create_qp_ex
2178 &(struct ibv_qp_init_attr_ex){
2179 .qp_type = IBV_QPT_RAW_PACKET,
2181 IBV_QP_INIT_ATTR_PD |
2182 IBV_QP_INIT_ATTR_IND_TABLE |
2183 IBV_QP_INIT_ATTR_RX_HASH,
2184 .rx_hash_conf = (struct ibv_rx_hash_conf){
2186 IBV_RX_HASH_FUNC_TOEPLITZ,
2187 .rx_hash_key_len = rss_hash_default_key_len,
2188 .rx_hash_key = rss_hash_default_key,
2189 .rx_hash_fields_mask = 0,
2191 .rwq_ind_tbl = fdq->ind_table,
2195 DRV_LOG(WARNING, "port %u cannot allocate QP for drop queue",
2196 dev->data->port_id);
2200 priv->flow_drop_queue = fdq;
2204 claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2206 claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2208 claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2210 claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2213 priv->flow_drop_queue = NULL;
2218 * Delete drop queue.
2221 * Pointer to Ethernet device.
2224 mlx5_flow_delete_drop_queue(struct rte_eth_dev *dev)
2226 struct priv *priv = dev->data->dev_private;
2227 struct mlx5_hrxq_drop *fdq = priv->flow_drop_queue;
2232 claim_zero(mlx5_glue->destroy_qp(fdq->qp));
2234 claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table));
2236 claim_zero(mlx5_glue->destroy_wq(fdq->wq));
2238 claim_zero(mlx5_glue->destroy_cq(fdq->cq));
2240 priv->flow_drop_queue = NULL;
2247 * Pointer to Ethernet device.
2249 * Pointer to a TAILQ flow list.
2252 mlx5_flow_stop(struct rte_eth_dev *dev, struct mlx5_flows *list)
2254 struct priv *priv = dev->data->dev_private;
2255 struct rte_flow *flow;
2257 TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) {
2259 struct mlx5_ind_table_ibv *ind_tbl = NULL;
2262 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow)
2264 claim_zero(mlx5_glue->destroy_flow
2265 (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2266 flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
2267 DRV_LOG(DEBUG, "port %u flow %p removed",
2268 dev->data->port_id, (void *)flow);
2272 /* Verify the flow has not already been cleaned. */
2273 for (i = 0; i != hash_rxq_init_n; ++i) {
2274 if (!flow->frxq[i].ibv_flow)
2277 * Indirection table may be necessary to remove the
2278 * flags in the Rx queues.
2279 * This helps to speed-up the process by avoiding
2282 ind_tbl = flow->frxq[i].hrxq->ind_table;
2285 if (i == hash_rxq_init_n)
2289 for (i = 0; i != ind_tbl->queues_n; ++i)
2290 (*priv->rxqs)[ind_tbl->queues[i]]->mark = 0;
2292 for (i = 0; i != hash_rxq_init_n; ++i) {
2293 if (!flow->frxq[i].ibv_flow)
2295 claim_zero(mlx5_glue->destroy_flow
2296 (flow->frxq[i].ibv_flow));
2297 flow->frxq[i].ibv_flow = NULL;
2298 mlx5_hrxq_release(dev, flow->frxq[i].hrxq);
2299 flow->frxq[i].hrxq = NULL;
2301 DRV_LOG(DEBUG, "port %u flow %p removed", dev->data->port_id,
2310 * Pointer to Ethernet device.
2312 * Pointer to a TAILQ flow list.
2315 * 0 on success, a negative errno value otherwise and rte_errno is set.
2318 mlx5_flow_start(struct rte_eth_dev *dev, struct mlx5_flows *list)
2320 struct priv *priv = dev->data->dev_private;
2321 struct rte_flow *flow;
2323 TAILQ_FOREACH(flow, list, next) {
2327 flow->frxq[HASH_RXQ_ETH].ibv_flow =
2328 mlx5_glue->create_flow
2329 (priv->flow_drop_queue->qp,
2330 flow->frxq[HASH_RXQ_ETH].ibv_attr);
2331 if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
2333 "port %u flow %p cannot be applied",
2334 dev->data->port_id, (void *)flow);
2338 DRV_LOG(DEBUG, "port %u flow %p applied",
2339 dev->data->port_id, (void *)flow);
2343 for (i = 0; i != hash_rxq_init_n; ++i) {
2344 if (!flow->frxq[i].ibv_attr)
2346 flow->frxq[i].hrxq =
2347 mlx5_hrxq_get(dev, flow->rss_conf.rss_key,
2348 flow->rss_conf.rss_key_len,
2349 hash_rxq_init[i].hash_fields,
2352 if (flow->frxq[i].hrxq)
2354 flow->frxq[i].hrxq =
2355 mlx5_hrxq_new(dev, flow->rss_conf.rss_key,
2356 flow->rss_conf.rss_key_len,
2357 hash_rxq_init[i].hash_fields,
2360 if (!flow->frxq[i].hrxq) {
2362 "port %u flow %p cannot be applied",
2363 dev->data->port_id, (void *)flow);
2368 flow->frxq[i].ibv_flow =
2369 mlx5_glue->create_flow(flow->frxq[i].hrxq->qp,
2370 flow->frxq[i].ibv_attr);
2371 if (!flow->frxq[i].ibv_flow) {
2373 "port %u flow %p cannot be applied",
2374 dev->data->port_id, (void *)flow);
2378 DRV_LOG(DEBUG, "port %u flow %p applied",
2379 dev->data->port_id, (void *)flow);
2383 for (i = 0; i != flow->queues_n; ++i)
2384 (*priv->rxqs)[(*flow->queues)[i]]->mark = 1;
2390 * Verify the flow list is empty
2393 * Pointer to Ethernet device.
2395 * @return the number of flows not released.
2398 mlx5_flow_verify(struct rte_eth_dev *dev)
2400 struct priv *priv = dev->data->dev_private;
2401 struct rte_flow *flow;
2404 TAILQ_FOREACH(flow, &priv->flows, next) {
2405 DRV_LOG(DEBUG, "port %u flow %p still referenced",
2406 dev->data->port_id, (void *)flow);
2413 * Enable a control flow configured from the control plane.
2416 * Pointer to Ethernet device.
2418 * An Ethernet flow spec to apply.
2420 * An Ethernet flow mask to apply.
2422 * A VLAN flow spec to apply.
2424 * A VLAN flow mask to apply.
2427 * 0 on success, a negative errno value otherwise and rte_errno is set.
2430 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
2431 struct rte_flow_item_eth *eth_spec,
2432 struct rte_flow_item_eth *eth_mask,
2433 struct rte_flow_item_vlan *vlan_spec,
2434 struct rte_flow_item_vlan *vlan_mask)
2436 struct priv *priv = dev->data->dev_private;
2437 const struct rte_flow_attr attr = {
2439 .priority = MLX5_CTRL_FLOW_PRIORITY,
2441 struct rte_flow_item items[] = {
2443 .type = RTE_FLOW_ITEM_TYPE_ETH,
2449 .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
2450 RTE_FLOW_ITEM_TYPE_END,
2456 .type = RTE_FLOW_ITEM_TYPE_END,
2459 uint16_t queue[priv->reta_idx_n];
2460 struct rte_flow_action_rss action_rss = {
2461 .rss_conf = &priv->rss_conf,
2462 .num = priv->reta_idx_n,
2465 struct rte_flow_action actions[] = {
2467 .type = RTE_FLOW_ACTION_TYPE_RSS,
2468 .conf = &action_rss,
2471 .type = RTE_FLOW_ACTION_TYPE_END,
2474 struct rte_flow *flow;
2475 struct rte_flow_error error;
2478 if (!priv->reta_idx_n) {
2482 for (i = 0; i != priv->reta_idx_n; ++i)
2483 queue[i] = (*priv->reta_idx)[i];
2484 flow = mlx5_flow_list_create(dev, &priv->ctrl_flows, &attr, items,
2492 * Enable a flow control configured from the control plane.
2495 * Pointer to Ethernet device.
2497 * An Ethernet flow spec to apply.
2499 * An Ethernet flow mask to apply.
2502 * 0 on success, a negative errno value otherwise and rte_errno is set.
2505 mlx5_ctrl_flow(struct rte_eth_dev *dev,
2506 struct rte_flow_item_eth *eth_spec,
2507 struct rte_flow_item_eth *eth_mask)
2509 return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
2515 * @see rte_flow_destroy()
2519 mlx5_flow_destroy(struct rte_eth_dev *dev,
2520 struct rte_flow *flow,
2521 struct rte_flow_error *error __rte_unused)
2523 struct priv *priv = dev->data->dev_private;
2525 mlx5_flow_list_destroy(dev, &priv->flows, flow);
2530 * Destroy all flows.
2532 * @see rte_flow_flush()
2536 mlx5_flow_flush(struct rte_eth_dev *dev,
2537 struct rte_flow_error *error __rte_unused)
2539 struct priv *priv = dev->data->dev_private;
2541 mlx5_flow_list_flush(dev, &priv->flows);
2545 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
2547 * Query flow counter.
2551 * @param counter_value
2552 * returned data from the counter.
2555 * 0 on success, a negative errno value otherwise and rte_errno is set.
2558 mlx5_flow_query_count(struct ibv_counter_set *cs,
2559 struct mlx5_flow_counter_stats *counter_stats,
2560 struct rte_flow_query_count *query_count,
2561 struct rte_flow_error *error)
2563 uint64_t counters[2];
2564 struct ibv_query_counter_set_attr query_cs_attr = {
2566 .query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
2568 struct ibv_counter_set_data query_out = {
2570 .outlen = 2 * sizeof(uint64_t),
2572 int err = mlx5_glue->query_counter_set(&query_cs_attr, &query_out);
2575 return rte_flow_error_set(error, err,
2576 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2578 "cannot read counter");
2579 query_count->hits_set = 1;
2580 query_count->bytes_set = 1;
2581 query_count->hits = counters[0] - counter_stats->hits;
2582 query_count->bytes = counters[1] - counter_stats->bytes;
2583 if (query_count->reset) {
2584 counter_stats->hits = counters[0];
2585 counter_stats->bytes = counters[1];
2593 * @see rte_flow_query()
2597 mlx5_flow_query(struct rte_eth_dev *dev __rte_unused,
2598 struct rte_flow *flow,
2599 enum rte_flow_action_type action __rte_unused,
2601 struct rte_flow_error *error)
2606 ret = mlx5_flow_query_count(flow->cs,
2607 &flow->counter_stats,
2608 (struct rte_flow_query_count *)data,
2613 return rte_flow_error_set(error, EINVAL,
2614 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2616 "no counter found for flow");
2625 * @see rte_flow_isolate()
2629 mlx5_flow_isolate(struct rte_eth_dev *dev,
2631 struct rte_flow_error *error)
2633 struct priv *priv = dev->data->dev_private;
2635 if (dev->data->dev_started) {
2636 rte_flow_error_set(error, EBUSY,
2637 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2639 "port must be stopped first");
2642 priv->isolated = !!enable;
2644 priv->dev->dev_ops = &mlx5_dev_ops_isolate;
2646 priv->dev->dev_ops = &mlx5_dev_ops;
2651 * Convert a flow director filter to a generic flow.
2654 * Pointer to Ethernet device.
2655 * @param fdir_filter
2656 * Flow director filter to add.
2658 * Generic flow parameters structure.
2661 * 0 on success, a negative errno value otherwise and rte_errno is set.
2664 mlx5_fdir_filter_convert(struct rte_eth_dev *dev,
2665 const struct rte_eth_fdir_filter *fdir_filter,
2666 struct mlx5_fdir *attributes)
2668 struct priv *priv = dev->data->dev_private;
2669 const struct rte_eth_fdir_input *input = &fdir_filter->input;
2670 const struct rte_eth_fdir_masks *mask =
2671 &dev->data->dev_conf.fdir_conf.mask;
2673 /* Validate queue number. */
2674 if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
2675 DRV_LOG(ERR, "port %u invalid queue number %d",
2676 dev->data->port_id, fdir_filter->action.rx_queue);
2680 attributes->attr.ingress = 1;
2681 attributes->items[0] = (struct rte_flow_item) {
2682 .type = RTE_FLOW_ITEM_TYPE_ETH,
2683 .spec = &attributes->l2,
2684 .mask = &attributes->l2_mask,
2686 switch (fdir_filter->action.behavior) {
2687 case RTE_ETH_FDIR_ACCEPT:
2688 attributes->actions[0] = (struct rte_flow_action){
2689 .type = RTE_FLOW_ACTION_TYPE_QUEUE,
2690 .conf = &attributes->queue,
2693 case RTE_ETH_FDIR_REJECT:
2694 attributes->actions[0] = (struct rte_flow_action){
2695 .type = RTE_FLOW_ACTION_TYPE_DROP,
2699 DRV_LOG(ERR, "port %u invalid behavior %d",
2701 fdir_filter->action.behavior);
2702 rte_errno = ENOTSUP;
2705 attributes->queue.index = fdir_filter->action.rx_queue;
2707 switch (fdir_filter->input.flow_type) {
2708 case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
2709 case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
2710 case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
2711 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2712 .src_addr = input->flow.ip4_flow.src_ip,
2713 .dst_addr = input->flow.ip4_flow.dst_ip,
2714 .time_to_live = input->flow.ip4_flow.ttl,
2715 .type_of_service = input->flow.ip4_flow.tos,
2716 .next_proto_id = input->flow.ip4_flow.proto,
2718 attributes->l3_mask.ipv4.hdr = (struct ipv4_hdr){
2719 .src_addr = mask->ipv4_mask.src_ip,
2720 .dst_addr = mask->ipv4_mask.dst_ip,
2721 .time_to_live = mask->ipv4_mask.ttl,
2722 .type_of_service = mask->ipv4_mask.tos,
2723 .next_proto_id = mask->ipv4_mask.proto,
2725 attributes->items[1] = (struct rte_flow_item){
2726 .type = RTE_FLOW_ITEM_TYPE_IPV4,
2727 .spec = &attributes->l3,
2728 .mask = &attributes->l3_mask,
2731 case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
2732 case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
2733 case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
2734 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2735 .hop_limits = input->flow.ipv6_flow.hop_limits,
2736 .proto = input->flow.ipv6_flow.proto,
2739 memcpy(attributes->l3.ipv6.hdr.src_addr,
2740 input->flow.ipv6_flow.src_ip,
2741 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2742 memcpy(attributes->l3.ipv6.hdr.dst_addr,
2743 input->flow.ipv6_flow.dst_ip,
2744 RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2745 memcpy(attributes->l3_mask.ipv6.hdr.src_addr,
2746 mask->ipv6_mask.src_ip,
2747 RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
2748 memcpy(attributes->l3_mask.ipv6.hdr.dst_addr,
2749 mask->ipv6_mask.dst_ip,
2750 RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
2751 attributes->items[1] = (struct rte_flow_item){
2752 .type = RTE_FLOW_ITEM_TYPE_IPV6,
2753 .spec = &attributes->l3,
2754 .mask = &attributes->l3_mask,
2758 DRV_LOG(ERR, "port %u invalid flow type%d",
2759 dev->data->port_id, fdir_filter->input.flow_type);
2760 rte_errno = ENOTSUP;
2764 switch (fdir_filter->input.flow_type) {
2765 case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
2766 attributes->l4.udp.hdr = (struct udp_hdr){
2767 .src_port = input->flow.udp4_flow.src_port,
2768 .dst_port = input->flow.udp4_flow.dst_port,
2770 attributes->l4_mask.udp.hdr = (struct udp_hdr){
2771 .src_port = mask->src_port_mask,
2772 .dst_port = mask->dst_port_mask,
2774 attributes->items[2] = (struct rte_flow_item){
2775 .type = RTE_FLOW_ITEM_TYPE_UDP,
2776 .spec = &attributes->l4,
2777 .mask = &attributes->l4_mask,
2780 case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
2781 attributes->l4.tcp.hdr = (struct tcp_hdr){
2782 .src_port = input->flow.tcp4_flow.src_port,
2783 .dst_port = input->flow.tcp4_flow.dst_port,
2785 attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
2786 .src_port = mask->src_port_mask,
2787 .dst_port = mask->dst_port_mask,
2789 attributes->items[2] = (struct rte_flow_item){
2790 .type = RTE_FLOW_ITEM_TYPE_TCP,
2791 .spec = &attributes->l4,
2792 .mask = &attributes->l4_mask,
2795 case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
2796 attributes->l4.udp.hdr = (struct udp_hdr){
2797 .src_port = input->flow.udp6_flow.src_port,
2798 .dst_port = input->flow.udp6_flow.dst_port,
2800 attributes->l4_mask.udp.hdr = (struct udp_hdr){
2801 .src_port = mask->src_port_mask,
2802 .dst_port = mask->dst_port_mask,
2804 attributes->items[2] = (struct rte_flow_item){
2805 .type = RTE_FLOW_ITEM_TYPE_UDP,
2806 .spec = &attributes->l4,
2807 .mask = &attributes->l4_mask,
2810 case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
2811 attributes->l4.tcp.hdr = (struct tcp_hdr){
2812 .src_port = input->flow.tcp6_flow.src_port,
2813 .dst_port = input->flow.tcp6_flow.dst_port,
2815 attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
2816 .src_port = mask->src_port_mask,
2817 .dst_port = mask->dst_port_mask,
2819 attributes->items[2] = (struct rte_flow_item){
2820 .type = RTE_FLOW_ITEM_TYPE_TCP,
2821 .spec = &attributes->l4,
2822 .mask = &attributes->l4_mask,
2825 case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
2826 case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
2829 DRV_LOG(ERR, "port %u invalid flow type%d",
2830 dev->data->port_id, fdir_filter->input.flow_type);
2831 rte_errno = ENOTSUP;
2838 * Add new flow director filter and store it in list.
2841 * Pointer to Ethernet device.
2842 * @param fdir_filter
2843 * Flow director filter to add.
2846 * 0 on success, a negative errno value otherwise and rte_errno is set.
2849 mlx5_fdir_filter_add(struct rte_eth_dev *dev,
2850 const struct rte_eth_fdir_filter *fdir_filter)
2852 struct priv *priv = dev->data->dev_private;
2853 struct mlx5_fdir attributes = {
2856 .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2857 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2861 struct mlx5_flow_parse parser = {
2862 .layer = HASH_RXQ_ETH,
2864 struct rte_flow_error error;
2865 struct rte_flow *flow;
2868 ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
2871 ret = mlx5_flow_convert(dev, &attributes.attr, attributes.items,
2872 attributes.actions, &error, &parser);
2875 flow = mlx5_flow_list_create(dev, &priv->flows, &attributes.attr,
2876 attributes.items, attributes.actions,
2879 DRV_LOG(DEBUG, "port %u FDIR created %p", dev->data->port_id,
2887 * Delete specific filter.
2890 * Pointer to Ethernet device.
2891 * @param fdir_filter
2892 * Filter to be deleted.
2895 * 0 on success, a negative errno value otherwise and rte_errno is set.
2898 mlx5_fdir_filter_delete(struct rte_eth_dev *dev,
2899 const struct rte_eth_fdir_filter *fdir_filter)
2901 struct priv *priv = dev->data->dev_private;
2902 struct mlx5_fdir attributes = {
2905 struct mlx5_flow_parse parser = {
2907 .layer = HASH_RXQ_ETH,
2909 struct rte_flow_error error;
2910 struct rte_flow *flow;
2914 ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
2917 ret = mlx5_flow_convert(dev, &attributes.attr, attributes.items,
2918 attributes.actions, &error, &parser);
2922 * Special case for drop action which is only set in the
2923 * specifications when the flow is created. In this situation the
2924 * drop specification is missing.
2927 struct ibv_flow_spec_action_drop *drop;
2929 drop = (void *)((uintptr_t)parser.queue[HASH_RXQ_ETH].ibv_attr +
2930 parser.queue[HASH_RXQ_ETH].offset);
2931 *drop = (struct ibv_flow_spec_action_drop){
2932 .type = IBV_FLOW_SPEC_ACTION_DROP,
2933 .size = sizeof(struct ibv_flow_spec_action_drop),
2935 parser.queue[HASH_RXQ_ETH].ibv_attr->num_of_specs++;
2937 TAILQ_FOREACH(flow, &priv->flows, next) {
2938 struct ibv_flow_attr *attr;
2939 struct ibv_spec_header *attr_h;
2941 struct ibv_flow_attr *flow_attr;
2942 struct ibv_spec_header *flow_h;
2944 unsigned int specs_n;
2946 attr = parser.queue[HASH_RXQ_ETH].ibv_attr;
2947 flow_attr = flow->frxq[HASH_RXQ_ETH].ibv_attr;
2948 /* Compare first the attributes. */
2949 if (memcmp(attr, flow_attr, sizeof(struct ibv_flow_attr)))
2951 if (attr->num_of_specs == 0)
2953 spec = (void *)((uintptr_t)attr +
2954 sizeof(struct ibv_flow_attr));
2955 flow_spec = (void *)((uintptr_t)flow_attr +
2956 sizeof(struct ibv_flow_attr));
2957 specs_n = RTE_MIN(attr->num_of_specs, flow_attr->num_of_specs);
2958 for (i = 0; i != specs_n; ++i) {
2961 if (memcmp(spec, flow_spec,
2962 RTE_MIN(attr_h->size, flow_h->size)))
2964 spec = (void *)((uintptr_t)spec + attr_h->size);
2965 flow_spec = (void *)((uintptr_t)flow_spec +
2968 /* At this point, the flow match. */
2971 /* The flow does not match. */
2974 ret = rte_errno; /* Save rte_errno before cleanup. */
2976 mlx5_flow_list_destroy(dev, &priv->flows, flow);
2978 for (i = 0; i != hash_rxq_init_n; ++i) {
2979 if (parser.queue[i].ibv_attr)
2980 rte_free(parser.queue[i].ibv_attr);
2982 rte_errno = ret; /* Restore rte_errno. */
2987 * Update queue for specific filter.
2990 * Pointer to Ethernet device.
2991 * @param fdir_filter
2992 * Filter to be updated.
2995 * 0 on success, a negative errno value otherwise and rte_errno is set.
2998 mlx5_fdir_filter_update(struct rte_eth_dev *dev,
2999 const struct rte_eth_fdir_filter *fdir_filter)
3003 ret = mlx5_fdir_filter_delete(dev, fdir_filter);
3006 return mlx5_fdir_filter_add(dev, fdir_filter);
3010 * Flush all filters.
3013 * Pointer to Ethernet device.
3016 mlx5_fdir_filter_flush(struct rte_eth_dev *dev)
3018 struct priv *priv = dev->data->dev_private;
3020 mlx5_flow_list_flush(dev, &priv->flows);
3024 * Get flow director information.
3027 * Pointer to Ethernet device.
3028 * @param[out] fdir_info
3029 * Resulting flow director information.
3032 mlx5_fdir_info_get(struct rte_eth_dev *dev, struct rte_eth_fdir_info *fdir_info)
3034 struct priv *priv = dev->data->dev_private;
3035 struct rte_eth_fdir_masks *mask =
3036 &priv->dev->data->dev_conf.fdir_conf.mask;
3038 fdir_info->mode = priv->dev->data->dev_conf.fdir_conf.mode;
3039 fdir_info->guarant_spc = 0;
3040 rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
3041 fdir_info->max_flexpayload = 0;
3042 fdir_info->flow_types_mask[0] = 0;
3043 fdir_info->flex_payload_unit = 0;
3044 fdir_info->max_flex_payload_segment_num = 0;
3045 fdir_info->flex_payload_limit = 0;
3046 memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
3050 * Deal with flow director operations.
3053 * Pointer to Ethernet device.
3055 * Operation to perform.
3057 * Pointer to operation-specific structure.
3060 * 0 on success, a negative errno value otherwise and rte_errno is set.
3063 mlx5_fdir_ctrl_func(struct rte_eth_dev *dev, enum rte_filter_op filter_op,
3066 struct priv *priv = dev->data->dev_private;
3067 enum rte_fdir_mode fdir_mode =
3068 priv->dev->data->dev_conf.fdir_conf.mode;
3070 if (filter_op == RTE_ETH_FILTER_NOP)
3072 if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
3073 fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
3074 DRV_LOG(ERR, "port %u flow director mode %d not supported",
3075 dev->data->port_id, fdir_mode);
3079 switch (filter_op) {
3080 case RTE_ETH_FILTER_ADD:
3081 return mlx5_fdir_filter_add(dev, arg);
3082 case RTE_ETH_FILTER_UPDATE:
3083 return mlx5_fdir_filter_update(dev, arg);
3084 case RTE_ETH_FILTER_DELETE:
3085 return mlx5_fdir_filter_delete(dev, arg);
3086 case RTE_ETH_FILTER_FLUSH:
3087 mlx5_fdir_filter_flush(dev);
3089 case RTE_ETH_FILTER_INFO:
3090 mlx5_fdir_info_get(dev, arg);
3093 DRV_LOG(DEBUG, "port %u unknown operation %u",
3094 dev->data->port_id, filter_op);
3102 * Manage filter operations.
3105 * Pointer to Ethernet device structure.
3106 * @param filter_type
3109 * Operation to perform.
3111 * Pointer to operation-specific structure.
3114 * 0 on success, a negative errno value otherwise and rte_errno is set.
3117 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
3118 enum rte_filter_type filter_type,
3119 enum rte_filter_op filter_op,
3122 switch (filter_type) {
3123 case RTE_ETH_FILTER_GENERIC:
3124 if (filter_op != RTE_ETH_FILTER_GET) {
3128 *(const void **)arg = &mlx5_flow_ops;
3130 case RTE_ETH_FILTER_FDIR:
3131 return mlx5_fdir_ctrl_func(dev, filter_op, arg);
3133 DRV_LOG(ERR, "port %u filter type (%d) not supported",
3134 dev->data->port_id, filter_type);
3135 rte_errno = ENOTSUP;