4 * Copyright 2016 6WIND S.A.
5 * Copyright 2016 Mellanox.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of 6WIND S.A. nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #include <sys/queue.h>
38 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
40 #pragma GCC diagnostic ignored "-Wpedantic"
42 #include <infiniband/verbs.h>
44 #pragma GCC diagnostic error "-Wpedantic"
47 #include <rte_ethdev.h>
49 #include <rte_flow_driver.h>
50 #include <rte_malloc.h>
55 /* Define minimal priority for control plane flows. */
56 #define MLX5_CTRL_FLOW_PRIORITY 4
58 /* Internet Protocol versions. */
63 mlx5_flow_create_eth(const struct rte_flow_item *item,
64 const void *default_mask,
68 mlx5_flow_create_vlan(const struct rte_flow_item *item,
69 const void *default_mask,
73 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
74 const void *default_mask,
78 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
79 const void *default_mask,
83 mlx5_flow_create_udp(const struct rte_flow_item *item,
84 const void *default_mask,
88 mlx5_flow_create_tcp(const struct rte_flow_item *item,
89 const void *default_mask,
93 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
94 const void *default_mask,
97 struct mlx5_flow_parse;
100 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
104 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id);
106 /* Hash RX queue types. */
117 /* Initialization data for hash RX queue. */
118 struct hash_rxq_init {
119 uint64_t hash_fields; /* Fields that participate in the hash. */
120 uint64_t dpdk_rss_hf; /* Matching DPDK RSS hash fields. */
121 unsigned int flow_priority; /* Flow priority to use. */
122 unsigned int ip_version; /* Internet protocol. */
125 /* Initialization data for hash RX queues. */
126 const struct hash_rxq_init hash_rxq_init[] = {
128 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
129 IBV_RX_HASH_DST_IPV4 |
130 IBV_RX_HASH_SRC_PORT_TCP |
131 IBV_RX_HASH_DST_PORT_TCP),
132 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,
134 .ip_version = MLX5_IPV4,
137 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
138 IBV_RX_HASH_DST_IPV4 |
139 IBV_RX_HASH_SRC_PORT_UDP |
140 IBV_RX_HASH_DST_PORT_UDP),
141 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP,
143 .ip_version = MLX5_IPV4,
146 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
147 IBV_RX_HASH_DST_IPV4),
148 .dpdk_rss_hf = (ETH_RSS_IPV4 |
151 .ip_version = MLX5_IPV4,
154 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
155 IBV_RX_HASH_DST_IPV6 |
156 IBV_RX_HASH_SRC_PORT_TCP |
157 IBV_RX_HASH_DST_PORT_TCP),
158 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP,
160 .ip_version = MLX5_IPV6,
163 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
164 IBV_RX_HASH_DST_IPV6 |
165 IBV_RX_HASH_SRC_PORT_UDP |
166 IBV_RX_HASH_DST_PORT_UDP),
167 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP,
169 .ip_version = MLX5_IPV6,
172 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
173 IBV_RX_HASH_DST_IPV6),
174 .dpdk_rss_hf = (ETH_RSS_IPV6 |
177 .ip_version = MLX5_IPV6,
186 /* Number of entries in hash_rxq_init[]. */
187 const unsigned int hash_rxq_init_n = RTE_DIM(hash_rxq_init);
189 /** Structure for Drop queue. */
190 struct mlx5_hrxq_drop {
191 struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */
192 struct ibv_qp *qp; /**< Verbs queue pair. */
193 struct ibv_wq *wq; /**< Verbs work queue. */
194 struct ibv_cq *cq; /**< Verbs completion queue. */
197 /* Flows structures. */
199 uint64_t hash_fields; /**< Fields that participate in the hash. */
200 struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
201 struct ibv_flow *ibv_flow; /**< Verbs flow. */
202 struct mlx5_hrxq *hrxq; /**< Hash Rx queues. */
205 /* Drop flows structures. */
206 struct mlx5_flow_drop {
207 struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
208 struct ibv_flow *ibv_flow; /**< Verbs flow. */
212 TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
213 uint32_t mark:1; /**< Set if the flow is marked. */
214 uint32_t drop:1; /**< Drop queue. */
215 uint16_t queues_n; /**< Number of entries in queue[]. */
216 uint16_t (*queues)[]; /**< Queues indexes to use. */
217 struct rte_eth_rss_conf rss_conf; /**< RSS configuration */
218 uint8_t rss_key[40]; /**< copy of the RSS key. */
220 struct mlx5_flow frxq[RTE_DIM(hash_rxq_init)];
221 /**< Flow with Rx queue. */
222 struct mlx5_flow_drop drxq; /**< Flow with drop Rx queue. */
226 /** Static initializer for items. */
228 (const enum rte_flow_item_type []){ \
229 __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
232 /** Structure to generate a simple graph of layers supported by the NIC. */
233 struct mlx5_flow_items {
234 /** List of possible actions for these items. */
235 const enum rte_flow_action_type *const actions;
236 /** Bit-masks corresponding to the possibilities for the item. */
239 * Default bit-masks to use when item->mask is not provided. When
240 * \default_mask is also NULL, the full supported bit-mask (\mask) is
243 const void *default_mask;
244 /** Bit-masks size in bytes. */
245 const unsigned int mask_sz;
247 * Conversion function from rte_flow to NIC specific flow.
250 * rte_flow item to convert.
251 * @param default_mask
252 * Default bit-masks to use when item->mask is not provided.
254 * Internal structure to store the conversion.
257 * 0 on success, negative value otherwise.
259 int (*convert)(const struct rte_flow_item *item,
260 const void *default_mask,
262 /** Size in bytes of the destination structure. */
263 const unsigned int dst_sz;
264 /** List of possible following items. */
265 const enum rte_flow_item_type *const items;
268 /** Valid action for this PMD. */
269 static const enum rte_flow_action_type valid_actions[] = {
270 RTE_FLOW_ACTION_TYPE_DROP,
271 RTE_FLOW_ACTION_TYPE_QUEUE,
272 RTE_FLOW_ACTION_TYPE_MARK,
273 RTE_FLOW_ACTION_TYPE_FLAG,
274 RTE_FLOW_ACTION_TYPE_END,
277 /** Graph of supported items and associated actions. */
278 static const struct mlx5_flow_items mlx5_flow_items[] = {
279 [RTE_FLOW_ITEM_TYPE_END] = {
280 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
281 RTE_FLOW_ITEM_TYPE_VXLAN),
283 [RTE_FLOW_ITEM_TYPE_ETH] = {
284 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
285 RTE_FLOW_ITEM_TYPE_IPV4,
286 RTE_FLOW_ITEM_TYPE_IPV6),
287 .actions = valid_actions,
288 .mask = &(const struct rte_flow_item_eth){
289 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
290 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
293 .default_mask = &rte_flow_item_eth_mask,
294 .mask_sz = sizeof(struct rte_flow_item_eth),
295 .convert = mlx5_flow_create_eth,
296 .dst_sz = sizeof(struct ibv_flow_spec_eth),
298 [RTE_FLOW_ITEM_TYPE_VLAN] = {
299 .items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
300 RTE_FLOW_ITEM_TYPE_IPV6),
301 .actions = valid_actions,
302 .mask = &(const struct rte_flow_item_vlan){
305 .default_mask = &rte_flow_item_vlan_mask,
306 .mask_sz = sizeof(struct rte_flow_item_vlan),
307 .convert = mlx5_flow_create_vlan,
310 [RTE_FLOW_ITEM_TYPE_IPV4] = {
311 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
312 RTE_FLOW_ITEM_TYPE_TCP),
313 .actions = valid_actions,
314 .mask = &(const struct rte_flow_item_ipv4){
318 .type_of_service = -1,
322 .default_mask = &rte_flow_item_ipv4_mask,
323 .mask_sz = sizeof(struct rte_flow_item_ipv4),
324 .convert = mlx5_flow_create_ipv4,
325 .dst_sz = sizeof(struct ibv_flow_spec_ipv4_ext),
327 [RTE_FLOW_ITEM_TYPE_IPV6] = {
328 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
329 RTE_FLOW_ITEM_TYPE_TCP),
330 .actions = valid_actions,
331 .mask = &(const struct rte_flow_item_ipv6){
334 0xff, 0xff, 0xff, 0xff,
335 0xff, 0xff, 0xff, 0xff,
336 0xff, 0xff, 0xff, 0xff,
337 0xff, 0xff, 0xff, 0xff,
340 0xff, 0xff, 0xff, 0xff,
341 0xff, 0xff, 0xff, 0xff,
342 0xff, 0xff, 0xff, 0xff,
343 0xff, 0xff, 0xff, 0xff,
350 .default_mask = &rte_flow_item_ipv6_mask,
351 .mask_sz = sizeof(struct rte_flow_item_ipv6),
352 .convert = mlx5_flow_create_ipv6,
353 .dst_sz = sizeof(struct ibv_flow_spec_ipv6),
355 [RTE_FLOW_ITEM_TYPE_UDP] = {
356 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN),
357 .actions = valid_actions,
358 .mask = &(const struct rte_flow_item_udp){
364 .default_mask = &rte_flow_item_udp_mask,
365 .mask_sz = sizeof(struct rte_flow_item_udp),
366 .convert = mlx5_flow_create_udp,
367 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
369 [RTE_FLOW_ITEM_TYPE_TCP] = {
370 .actions = valid_actions,
371 .mask = &(const struct rte_flow_item_tcp){
377 .default_mask = &rte_flow_item_tcp_mask,
378 .mask_sz = sizeof(struct rte_flow_item_tcp),
379 .convert = mlx5_flow_create_tcp,
380 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
382 [RTE_FLOW_ITEM_TYPE_VXLAN] = {
383 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH),
384 .actions = valid_actions,
385 .mask = &(const struct rte_flow_item_vxlan){
386 .vni = "\xff\xff\xff",
388 .default_mask = &rte_flow_item_vxlan_mask,
389 .mask_sz = sizeof(struct rte_flow_item_vxlan),
390 .convert = mlx5_flow_create_vxlan,
391 .dst_sz = sizeof(struct ibv_flow_spec_tunnel),
395 /** Structure to pass to the conversion function. */
396 struct mlx5_flow_parse {
397 uint32_t inner; /**< Set once VXLAN is encountered. */
399 /**< Whether resources should remain after a validate. */
400 uint32_t drop:1; /**< Target is a drop queue. */
401 uint32_t mark:1; /**< Mark is present in the flow. */
402 uint32_t mark_id; /**< Mark identifier. */
403 uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
404 uint16_t queues_n; /**< Number of entries in queue[]. */
405 struct rte_eth_rss_conf rss_conf; /**< RSS configuration */
406 uint8_t rss_key[40]; /**< copy of the RSS key. */
407 enum hash_rxq_type layer; /**< Last pattern layer detected. */
410 struct ibv_flow_attr *ibv_attr;
411 /**< Pointer to Verbs attributes. */
413 /**< Current position or total size of the attribute. */
414 } queue[RTE_DIM(hash_rxq_init)];
416 struct ibv_flow_attr *ibv_attr;
417 /**< Pointer to Verbs attributes. */
419 /**< Current position or total size of the attribute. */
424 static const struct rte_flow_ops mlx5_flow_ops = {
425 .validate = mlx5_flow_validate,
426 .create = mlx5_flow_create,
427 .destroy = mlx5_flow_destroy,
428 .flush = mlx5_flow_flush,
430 .isolate = mlx5_flow_isolate,
434 * Manage filter operations.
437 * Pointer to Ethernet device structure.
441 * Operation to perform.
443 * Pointer to operation-specific structure.
446 * 0 on success, negative errno value on failure.
449 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
450 enum rte_filter_type filter_type,
451 enum rte_filter_op filter_op,
456 if (filter_type == RTE_ETH_FILTER_GENERIC) {
457 if (filter_op != RTE_ETH_FILTER_GET)
459 *(const void **)arg = &mlx5_flow_ops;
462 ERROR("%p: filter type (%d) not supported",
463 (void *)dev, filter_type);
468 * Check support for a given item.
471 * Item specification.
473 * Bit-masks covering supported fields to compare with spec, last and mask in
476 * Bit-Mask size in bytes.
482 mlx5_flow_item_validate(const struct rte_flow_item *item,
483 const uint8_t *mask, unsigned int size)
487 if (!item->spec && (item->mask || item->last))
489 if (item->spec && !item->mask) {
491 const uint8_t *spec = item->spec;
493 for (i = 0; i < size; ++i)
494 if ((spec[i] | mask[i]) != mask[i])
497 if (item->last && !item->mask) {
499 const uint8_t *spec = item->last;
501 for (i = 0; i < size; ++i)
502 if ((spec[i] | mask[i]) != mask[i])
507 const uint8_t *spec = item->mask;
509 for (i = 0; i < size; ++i)
510 if ((spec[i] | mask[i]) != mask[i])
513 if (item->spec && item->last) {
516 const uint8_t *apply = mask;
521 for (i = 0; i < size; ++i) {
522 spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
523 last[i] = ((const uint8_t *)item->last)[i] & apply[i];
525 ret = memcmp(spec, last, size);
531 * Copy the RSS configuration from the user ones.
534 * Pointer to private structure.
536 * Internal parser structure.
538 * User RSS configuration to save.
541 * 0 on success, errno value on failure.
544 priv_flow_convert_rss_conf(struct priv *priv,
545 struct mlx5_flow_parse *parser,
546 const struct rte_eth_rss_conf *rss_conf)
548 const struct rte_eth_rss_conf *rss =
549 rss_conf ? rss_conf : &priv->rss_conf;
551 if (rss->rss_key_len > 40)
553 parser->rss_conf.rss_key_len = rss->rss_key_len;
554 parser->rss_conf.rss_hf = rss->rss_hf;
555 memcpy(parser->rss_key, rss->rss_key, rss->rss_key_len);
556 parser->rss_conf.rss_key = parser->rss_key;
561 * Extract attribute to the parser.
564 * Pointer to private structure.
566 * Flow rule attributes.
568 * Perform verbose error reporting if not NULL.
569 * @param[in, out] parser
570 * Internal parser structure.
573 * 0 on success, a negative errno value otherwise and rte_errno is set.
576 priv_flow_convert_attributes(struct priv *priv,
577 const struct rte_flow_attr *attr,
578 struct rte_flow_error *error,
579 struct mlx5_flow_parse *parser)
584 rte_flow_error_set(error, ENOTSUP,
585 RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
587 "groups are not supported");
590 if (attr->priority && attr->priority != MLX5_CTRL_FLOW_PRIORITY) {
591 rte_flow_error_set(error, ENOTSUP,
592 RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
594 "priorities are not supported");
598 rte_flow_error_set(error, ENOTSUP,
599 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
601 "egress is not supported");
604 if (!attr->ingress) {
605 rte_flow_error_set(error, ENOTSUP,
606 RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
608 "only ingress is supported");
615 * Extract actions request to the parser.
618 * Pointer to private structure.
620 * Associated actions (list terminated by the END action).
622 * Perform verbose error reporting if not NULL.
623 * @param[in, out] parser
624 * Internal parser structure.
627 * 0 on success, a negative errno value otherwise and rte_errno is set.
630 priv_flow_convert_actions(struct priv *priv,
631 const struct rte_flow_action actions[],
632 struct rte_flow_error *error,
633 struct mlx5_flow_parse *parser)
636 * Add default RSS configuration necessary for Verbs to create QP even
637 * if no RSS is necessary.
639 priv_flow_convert_rss_conf(priv, parser,
640 (const struct rte_eth_rss_conf *)
642 for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
643 if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
645 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
647 } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
648 const struct rte_flow_action_queue *queue =
649 (const struct rte_flow_action_queue *)
654 if (!queue || (queue->index > (priv->rxqs_n - 1)))
655 goto exit_action_not_supported;
656 for (n = 0; n < parser->queues_n; ++n) {
657 if (parser->queues[n] == queue->index) {
662 if (parser->queues_n > 1 && !found) {
663 rte_flow_error_set(error, ENOTSUP,
664 RTE_FLOW_ERROR_TYPE_ACTION,
666 "queue action not in RSS queues");
670 parser->queues_n = 1;
671 parser->queues[0] = queue->index;
673 } else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
674 const struct rte_flow_action_rss *rss =
675 (const struct rte_flow_action_rss *)
679 if (!rss || !rss->num) {
680 rte_flow_error_set(error, EINVAL,
681 RTE_FLOW_ERROR_TYPE_ACTION,
686 if (parser->queues_n == 1) {
689 assert(parser->queues_n);
690 for (n = 0; n < rss->num; ++n) {
691 if (parser->queues[0] ==
698 rte_flow_error_set(error, ENOTSUP,
699 RTE_FLOW_ERROR_TYPE_ACTION,
701 "queue action not in RSS"
706 for (n = 0; n < rss->num; ++n) {
707 if (rss->queue[n] >= priv->rxqs_n) {
708 rte_flow_error_set(error, EINVAL,
709 RTE_FLOW_ERROR_TYPE_ACTION,
711 "queue id > number of"
716 for (n = 0; n < rss->num; ++n)
717 parser->queues[n] = rss->queue[n];
718 parser->queues_n = rss->num;
719 if (priv_flow_convert_rss_conf(priv, parser,
721 rte_flow_error_set(error, EINVAL,
722 RTE_FLOW_ERROR_TYPE_ACTION,
724 "wrong RSS configuration");
727 } else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
728 const struct rte_flow_action_mark *mark =
729 (const struct rte_flow_action_mark *)
733 rte_flow_error_set(error, EINVAL,
734 RTE_FLOW_ERROR_TYPE_ACTION,
736 "mark must be defined");
738 } else if (mark->id >= MLX5_FLOW_MARK_MAX) {
739 rte_flow_error_set(error, ENOTSUP,
740 RTE_FLOW_ERROR_TYPE_ACTION,
742 "mark must be between 0"
747 parser->mark_id = mark->id;
748 } else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
751 goto exit_action_not_supported;
754 if (!parser->queues_n && !parser->drop) {
755 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
756 NULL, "no valid action");
760 exit_action_not_supported:
761 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
762 actions, "action not supported");
770 * Pointer to private structure.
772 * Pattern specification (list terminated by the END pattern item).
774 * Perform verbose error reporting if not NULL.
775 * @param[in, out] parser
776 * Internal parser structure.
779 * 0 on success, a negative errno value otherwise and rte_errno is set.
782 priv_flow_convert_items_validate(struct priv *priv,
783 const struct rte_flow_item items[],
784 struct rte_flow_error *error,
785 struct mlx5_flow_parse *parser)
787 const struct mlx5_flow_items *cur_item = mlx5_flow_items;
791 /* Initialise the offsets to start after verbs attribute. */
793 parser->drop_q.offset = sizeof(struct ibv_flow_attr);
795 for (i = 0; i != hash_rxq_init_n; ++i)
796 parser->queue[i].offset = sizeof(struct ibv_flow_attr);
798 for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
799 const struct mlx5_flow_items *token = NULL;
803 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
807 cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
809 if (cur_item->items[i] == items->type) {
810 token = &mlx5_flow_items[items->type];
815 goto exit_item_not_supported;
817 err = mlx5_flow_item_validate(items,
818 (const uint8_t *)cur_item->mask,
821 goto exit_item_not_supported;
822 if (items->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
824 rte_flow_error_set(error, ENOTSUP,
825 RTE_FLOW_ERROR_TYPE_ITEM,
827 "cannot recognize multiple"
828 " VXLAN encapsulations");
834 parser->drop_q.offset += cur_item->dst_sz;
835 } else if (parser->queues_n == 1) {
836 parser->queue[HASH_RXQ_ETH].offset += cur_item->dst_sz;
838 for (n = 0; n != hash_rxq_init_n; ++n)
839 parser->queue[n].offset += cur_item->dst_sz;
843 for (i = 0; i != hash_rxq_init_n; ++i)
844 parser->queue[i].offset +=
845 sizeof(struct ibv_flow_spec_action_tag);
848 exit_item_not_supported:
849 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
850 items, "item not supported");
855 * Allocate memory space to store verbs flow attributes.
858 * Pointer to private structure.
859 * @param[in] priority
862 * Amount of byte to allocate.
864 * Perform verbose error reporting if not NULL.
867 * A verbs flow attribute on success, NULL otherwise.
869 static struct ibv_flow_attr*
870 priv_flow_convert_allocate(struct priv *priv,
871 unsigned int priority,
873 struct rte_flow_error *error)
875 struct ibv_flow_attr *ibv_attr;
878 ibv_attr = rte_calloc(__func__, 1, size, 0);
880 rte_flow_error_set(error, ENOMEM,
881 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
883 "cannot allocate verbs spec attributes.");
886 ibv_attr->priority = priority;
891 * Finalise verbs flow attributes.
894 * Pointer to private structure.
895 * @param[in, out] parser
896 * Internal parser structure.
899 priv_flow_convert_finalise(struct priv *priv, struct mlx5_flow_parse *parser)
901 const unsigned int ipv4 =
902 hash_rxq_init[parser->layer].ip_version == MLX5_IPV4;
903 const enum hash_rxq_type hmin = ipv4 ? HASH_RXQ_TCPV4 : HASH_RXQ_TCPV6;
904 const enum hash_rxq_type hmax = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
905 const enum hash_rxq_type ohmin = ipv4 ? HASH_RXQ_TCPV6 : HASH_RXQ_TCPV4;
906 const enum hash_rxq_type ohmax = ipv4 ? HASH_RXQ_IPV6 : HASH_RXQ_IPV4;
907 const enum hash_rxq_type ip = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
911 if (parser->layer == HASH_RXQ_ETH) {
915 * This layer becomes useless as the pattern define under
918 rte_free(parser->queue[HASH_RXQ_ETH].ibv_attr);
919 parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
921 /* Remove opposite kind of layer e.g. IPv6 if the pattern is IPv4. */
922 for (i = ohmin; i != (ohmax + 1); ++i) {
923 if (!parser->queue[i].ibv_attr)
925 rte_free(parser->queue[i].ibv_attr);
926 parser->queue[i].ibv_attr = NULL;
928 /* Remove impossible flow according to the RSS configuration. */
929 if (hash_rxq_init[parser->layer].dpdk_rss_hf &
930 parser->rss_conf.rss_hf) {
931 /* Remove any other flow. */
932 for (i = hmin; i != (hmax + 1); ++i) {
933 if ((i == parser->layer) ||
934 (!parser->queue[i].ibv_attr))
936 rte_free(parser->queue[i].ibv_attr);
937 parser->queue[i].ibv_attr = NULL;
939 } else if (!parser->queue[ip].ibv_attr) {
940 /* no RSS possible with the current configuration. */
941 parser->queues_n = 1;
946 * Fill missing layers in verbs specifications, or compute the correct
947 * offset to allocate the memory space for the attributes and
950 for (i = 0; i != hash_rxq_init_n - 1; ++i) {
952 struct ibv_flow_spec_ipv4_ext ipv4;
953 struct ibv_flow_spec_ipv6 ipv6;
954 struct ibv_flow_spec_tcp_udp udp_tcp;
959 if (i == parser->layer)
961 if (parser->layer == HASH_RXQ_ETH) {
962 if (hash_rxq_init[i].ip_version == MLX5_IPV4) {
963 size = sizeof(struct ibv_flow_spec_ipv4_ext);
964 specs.ipv4 = (struct ibv_flow_spec_ipv4_ext){
965 .type = IBV_FLOW_SPEC_IPV4_EXT |
970 size = sizeof(struct ibv_flow_spec_ipv6);
971 specs.ipv6 = (struct ibv_flow_spec_ipv6){
972 .type = IBV_FLOW_SPEC_IPV6 |
977 if (parser->queue[i].ibv_attr) {
978 dst = (void *)((uintptr_t)
979 parser->queue[i].ibv_attr +
980 parser->queue[i].offset);
981 memcpy(dst, &specs, size);
982 ++parser->queue[i].ibv_attr->num_of_specs;
984 parser->queue[i].offset += size;
986 if ((i == HASH_RXQ_UDPV4) || (i == HASH_RXQ_TCPV4) ||
987 (i == HASH_RXQ_UDPV6) || (i == HASH_RXQ_TCPV6)) {
988 size = sizeof(struct ibv_flow_spec_tcp_udp);
989 specs.udp_tcp = (struct ibv_flow_spec_tcp_udp) {
990 .type = ((i == HASH_RXQ_UDPV4 ||
991 i == HASH_RXQ_UDPV6) ?
997 if (parser->queue[i].ibv_attr) {
998 dst = (void *)((uintptr_t)
999 parser->queue[i].ibv_attr +
1000 parser->queue[i].offset);
1001 memcpy(dst, &specs, size);
1002 ++parser->queue[i].ibv_attr->num_of_specs;
1004 parser->queue[i].offset += size;
1010 * Validate and convert a flow supported by the NIC.
1013 * Pointer to private structure.
1015 * Flow rule attributes.
1016 * @param[in] pattern
1017 * Pattern specification (list terminated by the END pattern item).
1018 * @param[in] actions
1019 * Associated actions (list terminated by the END action).
1021 * Perform verbose error reporting if not NULL.
1022 * @param[in, out] parser
1023 * Internal parser structure.
1026 * 0 on success, a negative errno value otherwise and rte_errno is set.
1029 priv_flow_convert(struct priv *priv,
1030 const struct rte_flow_attr *attr,
1031 const struct rte_flow_item items[],
1032 const struct rte_flow_action actions[],
1033 struct rte_flow_error *error,
1034 struct mlx5_flow_parse *parser)
1036 const struct mlx5_flow_items *cur_item = mlx5_flow_items;
1040 /* First step. Validate the attributes, items and actions. */
1041 *parser = (struct mlx5_flow_parse){
1042 .create = parser->create,
1043 .layer = HASH_RXQ_ETH,
1044 .mark_id = MLX5_FLOW_MARK_DEFAULT,
1046 ret = priv_flow_convert_attributes(priv, attr, error, parser);
1049 ret = priv_flow_convert_actions(priv, actions, error, parser);
1052 ret = priv_flow_convert_items_validate(priv, items, error, parser);
1055 priv_flow_convert_finalise(priv, parser);
1058 * Allocate the memory space to store verbs specifications.
1061 parser->drop_q.ibv_attr =
1062 priv_flow_convert_allocate(priv, attr->priority,
1063 parser->drop_q.offset,
1065 if (!parser->drop_q.ibv_attr)
1067 parser->drop_q.offset = sizeof(struct ibv_flow_attr);
1068 } else if (parser->queues_n == 1) {
1069 unsigned int priority =
1071 hash_rxq_init[HASH_RXQ_ETH].flow_priority;
1072 unsigned int offset = parser->queue[HASH_RXQ_ETH].offset;
1074 parser->queue[HASH_RXQ_ETH].ibv_attr =
1075 priv_flow_convert_allocate(priv, priority,
1077 if (!parser->queue[HASH_RXQ_ETH].ibv_attr)
1079 parser->queue[HASH_RXQ_ETH].offset =
1080 sizeof(struct ibv_flow_attr);
1082 for (i = 0; i != hash_rxq_init_n; ++i) {
1083 unsigned int priority =
1085 hash_rxq_init[i].flow_priority;
1086 unsigned int offset;
1088 if (!(parser->rss_conf.rss_hf &
1089 hash_rxq_init[i].dpdk_rss_hf) &&
1090 (i != HASH_RXQ_ETH))
1092 offset = parser->queue[i].offset;
1093 parser->queue[i].ibv_attr =
1094 priv_flow_convert_allocate(priv, priority,
1096 if (!parser->queue[i].ibv_attr)
1098 parser->queue[i].offset = sizeof(struct ibv_flow_attr);
1101 /* Third step. Conversion parse, fill the specifications. */
1103 for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
1104 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
1106 cur_item = &mlx5_flow_items[items->type];
1107 ret = cur_item->convert(items,
1108 (cur_item->default_mask ?
1109 cur_item->default_mask :
1113 rte_flow_error_set(error, ENOTSUP,
1114 RTE_FLOW_ERROR_TYPE_ITEM,
1115 items, "item not supported");
1120 mlx5_flow_create_flag_mark(parser, parser->mark_id);
1122 * Last step. Complete missing specification to reach the RSS
1125 if (parser->queues_n > 1)
1126 priv_flow_convert_finalise(priv, parser);
1128 /* Only verification is expected, all resources should be released. */
1129 if (!parser->create) {
1131 rte_free(parser->drop_q.ibv_attr);
1132 parser->drop_q.ibv_attr = NULL;
1134 for (i = 0; i != hash_rxq_init_n; ++i) {
1135 if (parser->queue[i].ibv_attr) {
1136 rte_free(parser->queue[i].ibv_attr);
1137 parser->queue[i].ibv_attr = NULL;
1143 for (i = 0; i != hash_rxq_init_n; ++i) {
1144 if (parser->queue[i].ibv_attr) {
1145 rte_free(parser->queue[i].ibv_attr);
1146 parser->queue[i].ibv_attr = NULL;
1149 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1150 NULL, "cannot allocate verbs spec attributes.");
1155 * Copy the specification created into the flow.
1158 * Internal parser structure.
1160 * Create specification.
1162 * Size in bytes of the specification to copy.
1165 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
1172 dst = (void *)((uintptr_t)parser->drop_q.ibv_attr +
1173 parser->drop_q.offset);
1174 memcpy(dst, src, size);
1175 ++parser->drop_q.ibv_attr->num_of_specs;
1176 parser->drop_q.offset += size;
1179 for (i = 0; i != hash_rxq_init_n; ++i) {
1180 if (!parser->queue[i].ibv_attr)
1182 /* Specification must be the same l3 type or none. */
1183 if (parser->layer == HASH_RXQ_ETH ||
1184 (hash_rxq_init[parser->layer].ip_version ==
1185 hash_rxq_init[i].ip_version) ||
1186 (hash_rxq_init[i].ip_version == 0)) {
1187 dst = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1188 parser->queue[i].offset);
1189 memcpy(dst, src, size);
1190 ++parser->queue[i].ibv_attr->num_of_specs;
1191 parser->queue[i].offset += size;
1197 * Convert Ethernet item to Verbs specification.
1200 * Item specification.
1201 * @param default_mask[in]
1202 * Default bit-masks to use when item->mask is not provided.
1203 * @param data[in, out]
1207 mlx5_flow_create_eth(const struct rte_flow_item *item,
1208 const void *default_mask,
1211 const struct rte_flow_item_eth *spec = item->spec;
1212 const struct rte_flow_item_eth *mask = item->mask;
1213 struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1214 const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1215 struct ibv_flow_spec_eth eth = {
1216 .type = parser->inner | IBV_FLOW_SPEC_ETH,
1220 parser->layer = HASH_RXQ_ETH;
1225 mask = default_mask;
1226 memcpy(ð.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
1227 memcpy(ð.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
1228 eth.val.ether_type = spec->type;
1229 memcpy(ð.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
1230 memcpy(ð.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
1231 eth.mask.ether_type = mask->type;
1232 /* Remove unwanted bits from values. */
1233 for (i = 0; i < ETHER_ADDR_LEN; ++i) {
1234 eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
1235 eth.val.src_mac[i] &= eth.mask.src_mac[i];
1237 eth.val.ether_type &= eth.mask.ether_type;
1239 mlx5_flow_create_copy(parser, ð, eth_size);
1244 * Convert VLAN item to Verbs specification.
1247 * Item specification.
1248 * @param default_mask[in]
1249 * Default bit-masks to use when item->mask is not provided.
1250 * @param data[in, out]
1254 mlx5_flow_create_vlan(const struct rte_flow_item *item,
1255 const void *default_mask,
1258 const struct rte_flow_item_vlan *spec = item->spec;
1259 const struct rte_flow_item_vlan *mask = item->mask;
1260 struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1261 struct ibv_flow_spec_eth *eth;
1262 const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1267 mask = default_mask;
1270 eth = (void *)((uintptr_t)parser->drop_q.ibv_attr +
1271 parser->drop_q.offset - eth_size);
1272 eth->val.vlan_tag = spec->tci;
1273 eth->mask.vlan_tag = mask->tci;
1274 eth->val.vlan_tag &= eth->mask.vlan_tag;
1277 for (i = 0; i != hash_rxq_init_n; ++i) {
1278 if (!parser->queue[i].ibv_attr)
1281 eth = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1282 parser->queue[i].offset - eth_size);
1283 eth->val.vlan_tag = spec->tci;
1284 eth->mask.vlan_tag = mask->tci;
1285 eth->val.vlan_tag &= eth->mask.vlan_tag;
1292 * Convert IPv4 item to Verbs specification.
1295 * Item specification.
1296 * @param default_mask[in]
1297 * Default bit-masks to use when item->mask is not provided.
1298 * @param data[in, out]
1302 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
1303 const void *default_mask,
1306 const struct rte_flow_item_ipv4 *spec = item->spec;
1307 const struct rte_flow_item_ipv4 *mask = item->mask;
1308 struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1309 unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4_ext);
1310 struct ibv_flow_spec_ipv4_ext ipv4 = {
1311 .type = parser->inner | IBV_FLOW_SPEC_IPV4_EXT,
1315 parser->layer = HASH_RXQ_IPV4;
1318 mask = default_mask;
1319 ipv4.val = (struct ibv_flow_ipv4_ext_filter){
1320 .src_ip = spec->hdr.src_addr,
1321 .dst_ip = spec->hdr.dst_addr,
1322 .proto = spec->hdr.next_proto_id,
1323 .tos = spec->hdr.type_of_service,
1325 ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
1326 .src_ip = mask->hdr.src_addr,
1327 .dst_ip = mask->hdr.dst_addr,
1328 .proto = mask->hdr.next_proto_id,
1329 .tos = mask->hdr.type_of_service,
1331 /* Remove unwanted bits from values. */
1332 ipv4.val.src_ip &= ipv4.mask.src_ip;
1333 ipv4.val.dst_ip &= ipv4.mask.dst_ip;
1334 ipv4.val.proto &= ipv4.mask.proto;
1335 ipv4.val.tos &= ipv4.mask.tos;
1337 mlx5_flow_create_copy(parser, &ipv4, ipv4_size);
1342 * Convert IPv6 item to Verbs specification.
1345 * Item specification.
1346 * @param default_mask[in]
1347 * Default bit-masks to use when item->mask is not provided.
1348 * @param data[in, out]
1352 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
1353 const void *default_mask,
1356 const struct rte_flow_item_ipv6 *spec = item->spec;
1357 const struct rte_flow_item_ipv6 *mask = item->mask;
1358 struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1359 unsigned int ipv6_size = sizeof(struct ibv_flow_spec_ipv6);
1360 struct ibv_flow_spec_ipv6 ipv6 = {
1361 .type = parser->inner | IBV_FLOW_SPEC_IPV6,
1365 parser->layer = HASH_RXQ_IPV6;
1370 mask = default_mask;
1371 memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
1372 RTE_DIM(ipv6.val.src_ip));
1373 memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
1374 RTE_DIM(ipv6.val.dst_ip));
1375 memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
1376 RTE_DIM(ipv6.mask.src_ip));
1377 memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
1378 RTE_DIM(ipv6.mask.dst_ip));
1379 ipv6.mask.flow_label = mask->hdr.vtc_flow;
1380 ipv6.mask.next_hdr = mask->hdr.proto;
1381 ipv6.mask.hop_limit = mask->hdr.hop_limits;
1382 /* Remove unwanted bits from values. */
1383 for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
1384 ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
1385 ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
1387 ipv6.val.flow_label &= ipv6.mask.flow_label;
1388 ipv6.val.next_hdr &= ipv6.mask.next_hdr;
1389 ipv6.val.hop_limit &= ipv6.mask.hop_limit;
1391 mlx5_flow_create_copy(parser, &ipv6, ipv6_size);
1396 * Convert UDP item to Verbs specification.
1399 * Item specification.
1400 * @param default_mask[in]
1401 * Default bit-masks to use when item->mask is not provided.
1402 * @param data[in, out]
1406 mlx5_flow_create_udp(const struct rte_flow_item *item,
1407 const void *default_mask,
1410 const struct rte_flow_item_udp *spec = item->spec;
1411 const struct rte_flow_item_udp *mask = item->mask;
1412 struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1413 unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1414 struct ibv_flow_spec_tcp_udp udp = {
1415 .type = parser->inner | IBV_FLOW_SPEC_UDP,
1419 if (parser->layer == HASH_RXQ_IPV4)
1420 parser->layer = HASH_RXQ_UDPV4;
1422 parser->layer = HASH_RXQ_UDPV6;
1425 mask = default_mask;
1426 udp.val.dst_port = spec->hdr.dst_port;
1427 udp.val.src_port = spec->hdr.src_port;
1428 udp.mask.dst_port = mask->hdr.dst_port;
1429 udp.mask.src_port = mask->hdr.src_port;
1430 /* Remove unwanted bits from values. */
1431 udp.val.src_port &= udp.mask.src_port;
1432 udp.val.dst_port &= udp.mask.dst_port;
1434 mlx5_flow_create_copy(parser, &udp, udp_size);
1439 * Convert TCP item to Verbs specification.
1442 * Item specification.
1443 * @param default_mask[in]
1444 * Default bit-masks to use when item->mask is not provided.
1445 * @param data[in, out]
1449 mlx5_flow_create_tcp(const struct rte_flow_item *item,
1450 const void *default_mask,
1453 const struct rte_flow_item_tcp *spec = item->spec;
1454 const struct rte_flow_item_tcp *mask = item->mask;
1455 struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1456 unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1457 struct ibv_flow_spec_tcp_udp tcp = {
1458 .type = parser->inner | IBV_FLOW_SPEC_TCP,
1462 if (parser->layer == HASH_RXQ_IPV4)
1463 parser->layer = HASH_RXQ_TCPV4;
1465 parser->layer = HASH_RXQ_TCPV6;
1468 mask = default_mask;
1469 tcp.val.dst_port = spec->hdr.dst_port;
1470 tcp.val.src_port = spec->hdr.src_port;
1471 tcp.mask.dst_port = mask->hdr.dst_port;
1472 tcp.mask.src_port = mask->hdr.src_port;
1473 /* Remove unwanted bits from values. */
1474 tcp.val.src_port &= tcp.mask.src_port;
1475 tcp.val.dst_port &= tcp.mask.dst_port;
1477 mlx5_flow_create_copy(parser, &tcp, tcp_size);
1482 * Convert VXLAN item to Verbs specification.
1485 * Item specification.
1486 * @param default_mask[in]
1487 * Default bit-masks to use when item->mask is not provided.
1488 * @param data[in, out]
1492 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
1493 const void *default_mask,
1496 const struct rte_flow_item_vxlan *spec = item->spec;
1497 const struct rte_flow_item_vxlan *mask = item->mask;
1498 struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1499 unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1500 struct ibv_flow_spec_tunnel vxlan = {
1501 .type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1510 parser->inner = IBV_FLOW_SPEC_INNER;
1513 mask = default_mask;
1514 memcpy(&id.vni[1], spec->vni, 3);
1515 vxlan.val.tunnel_id = id.vlan_id;
1516 memcpy(&id.vni[1], mask->vni, 3);
1517 vxlan.mask.tunnel_id = id.vlan_id;
1518 /* Remove unwanted bits from values. */
1519 vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
1521 mlx5_flow_create_copy(parser, &vxlan, size);
1526 * Convert mark/flag action to Verbs specification.
1529 * Internal parser structure.
1534 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id)
1536 unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1537 struct ibv_flow_spec_action_tag tag = {
1538 .type = IBV_FLOW_SPEC_ACTION_TAG,
1540 .tag_id = mlx5_flow_mark_set(mark_id),
1543 assert(parser->mark);
1544 mlx5_flow_create_copy(parser, &tag, size);
1549 * Complete flow rule creation with a drop queue.
1552 * Pointer to private structure.
1554 * Internal parser structure.
1556 * Pointer to the rte_flow.
1558 * Perform verbose error reporting if not NULL.
1561 * 0 on success, errno value on failure.
1564 priv_flow_create_action_queue_drop(struct priv *priv,
1565 struct mlx5_flow_parse *parser,
1566 struct rte_flow *flow,
1567 struct rte_flow_error *error)
1569 struct ibv_flow_spec_action_drop *drop;
1570 unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
1576 drop = (void *)((uintptr_t)parser->drop_q.ibv_attr +
1577 parser->drop_q.offset);
1578 *drop = (struct ibv_flow_spec_action_drop){
1579 .type = IBV_FLOW_SPEC_ACTION_DROP,
1582 ++parser->drop_q.ibv_attr->num_of_specs;
1583 parser->drop_q.offset += size;
1584 if (!priv->dev->data->dev_started)
1586 flow->drxq.ibv_attr = parser->drop_q.ibv_attr;
1587 parser->drop_q.ibv_attr = NULL;
1588 flow->drxq.ibv_flow = ibv_create_flow(priv->flow_drop_queue->qp,
1589 flow->drxq.ibv_attr);
1590 if (!flow->drxq.ibv_flow) {
1591 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1592 NULL, "flow rule creation failure");
1599 if (flow->drxq.ibv_flow) {
1600 claim_zero(ibv_destroy_flow(flow->drxq.ibv_flow));
1601 flow->drxq.ibv_flow = NULL;
1603 if (flow->drxq.ibv_attr) {
1604 rte_free(flow->drxq.ibv_attr);
1605 flow->drxq.ibv_attr = NULL;
1611 * Create hash Rx queues when RSS is enabled.
1614 * Pointer to private structure.
1616 * Internal parser structure.
1618 * Pointer to the rte_flow.
1620 * Perform verbose error reporting if not NULL.
1623 * 0 on success, a errno value otherwise and rte_errno is set.
1626 priv_flow_create_action_queue_rss(struct priv *priv,
1627 struct mlx5_flow_parse *parser,
1628 struct rte_flow *flow,
1629 struct rte_flow_error *error)
1633 for (i = 0; i != hash_rxq_init_n; ++i) {
1634 uint64_t hash_fields;
1636 if (!parser->queue[i].ibv_attr)
1638 flow->frxq[i].ibv_attr = parser->queue[i].ibv_attr;
1639 parser->queue[i].ibv_attr = NULL;
1640 hash_fields = hash_rxq_init[i].hash_fields;
1641 flow->frxq[i].hrxq =
1642 mlx5_priv_hrxq_get(priv,
1643 parser->rss_conf.rss_key,
1644 parser->rss_conf.rss_key_len,
1647 hash_fields ? parser->queues_n : 1);
1648 if (flow->frxq[i].hrxq)
1650 flow->frxq[i].hrxq =
1651 mlx5_priv_hrxq_new(priv,
1652 parser->rss_conf.rss_key,
1653 parser->rss_conf.rss_key_len,
1656 hash_fields ? parser->queues_n : 1);
1657 if (!flow->frxq[i].hrxq) {
1658 rte_flow_error_set(error, ENOMEM,
1659 RTE_FLOW_ERROR_TYPE_HANDLE,
1660 NULL, "cannot create hash rxq");
1668 * Complete flow rule creation.
1671 * Pointer to private structure.
1673 * Internal parser structure.
1675 * Pointer to the rte_flow.
1677 * Perform verbose error reporting if not NULL.
1680 * 0 on success, a errno value otherwise and rte_errno is set.
1683 priv_flow_create_action_queue(struct priv *priv,
1684 struct mlx5_flow_parse *parser,
1685 struct rte_flow *flow,
1686 struct rte_flow_error *error)
1693 assert(!parser->drop);
1694 err = priv_flow_create_action_queue_rss(priv, parser, flow, error);
1697 if (!priv->dev->data->dev_started)
1699 for (i = 0; i != hash_rxq_init_n; ++i) {
1700 if (!flow->frxq[i].hrxq)
1702 flow->frxq[i].ibv_flow =
1703 ibv_create_flow(flow->frxq[i].hrxq->qp,
1704 flow->frxq[i].ibv_attr);
1705 if (!flow->frxq[i].ibv_flow) {
1706 rte_flow_error_set(error, ENOMEM,
1707 RTE_FLOW_ERROR_TYPE_HANDLE,
1708 NULL, "flow rule creation failure");
1712 DEBUG("%p type %d QP %p ibv_flow %p",
1714 (void *)flow->frxq[i].hrxq,
1715 (void *)flow->frxq[i].ibv_flow);
1717 for (i = 0; i != parser->queues_n; ++i) {
1718 struct mlx5_rxq_data *q =
1719 (*priv->rxqs)[parser->queues[i]];
1721 q->mark |= parser->mark;
1726 for (i = 0; i != hash_rxq_init_n; ++i) {
1727 if (flow->frxq[i].ibv_flow) {
1728 struct ibv_flow *ibv_flow = flow->frxq[i].ibv_flow;
1730 claim_zero(ibv_destroy_flow(ibv_flow));
1732 if (flow->frxq[i].hrxq)
1733 mlx5_priv_hrxq_release(priv, flow->frxq[i].hrxq);
1734 if (flow->frxq[i].ibv_attr)
1735 rte_free(flow->frxq[i].ibv_attr);
1744 * Pointer to private structure.
1746 * Pointer to a TAILQ flow list.
1748 * Flow rule attributes.
1749 * @param[in] pattern
1750 * Pattern specification (list terminated by the END pattern item).
1751 * @param[in] actions
1752 * Associated actions (list terminated by the END action).
1754 * Perform verbose error reporting if not NULL.
1757 * A flow on success, NULL otherwise.
1759 static struct rte_flow *
1760 priv_flow_create(struct priv *priv,
1761 struct mlx5_flows *list,
1762 const struct rte_flow_attr *attr,
1763 const struct rte_flow_item items[],
1764 const struct rte_flow_action actions[],
1765 struct rte_flow_error *error)
1767 struct mlx5_flow_parse parser = { .create = 1, };
1768 struct rte_flow *flow = NULL;
1772 err = priv_flow_convert(priv, attr, items, actions, error, &parser);
1775 flow = rte_calloc(__func__, 1,
1776 sizeof(*flow) + parser.queues_n * sizeof(uint16_t),
1779 rte_flow_error_set(error, ENOMEM,
1780 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1782 "cannot allocate flow memory");
1785 /* Copy queues configuration. */
1786 flow->queues = (uint16_t (*)[])(flow + 1);
1787 memcpy(flow->queues, parser.queues, parser.queues_n * sizeof(uint16_t));
1788 flow->queues_n = parser.queues_n;
1789 /* Copy RSS configuration. */
1790 flow->rss_conf = parser.rss_conf;
1791 flow->rss_conf.rss_key = flow->rss_key;
1792 memcpy(flow->rss_key, parser.rss_key, parser.rss_conf.rss_key_len);
1793 /* finalise the flow. */
1795 err = priv_flow_create_action_queue_drop(priv, &parser, flow,
1798 err = priv_flow_create_action_queue(priv, &parser, flow, error);
1801 TAILQ_INSERT_TAIL(list, flow, next);
1802 DEBUG("Flow created %p", (void *)flow);
1806 rte_free(parser.drop_q.ibv_attr);
1808 for (i = 0; i != hash_rxq_init_n; ++i) {
1809 if (parser.queue[i].ibv_attr)
1810 rte_free(parser.queue[i].ibv_attr);
1818 * Validate a flow supported by the NIC.
1820 * @see rte_flow_validate()
1824 mlx5_flow_validate(struct rte_eth_dev *dev,
1825 const struct rte_flow_attr *attr,
1826 const struct rte_flow_item items[],
1827 const struct rte_flow_action actions[],
1828 struct rte_flow_error *error)
1830 struct priv *priv = dev->data->dev_private;
1832 struct mlx5_flow_parse parser = { .create = 0, };
1835 ret = priv_flow_convert(priv, attr, items, actions, error, &parser);
1843 * @see rte_flow_create()
1847 mlx5_flow_create(struct rte_eth_dev *dev,
1848 const struct rte_flow_attr *attr,
1849 const struct rte_flow_item items[],
1850 const struct rte_flow_action actions[],
1851 struct rte_flow_error *error)
1853 struct priv *priv = dev->data->dev_private;
1854 struct rte_flow *flow;
1857 flow = priv_flow_create(priv, &priv->flows, attr, items, actions,
1867 * Pointer to private structure.
1869 * Pointer to a TAILQ flow list.
1874 priv_flow_destroy(struct priv *priv,
1875 struct mlx5_flows *list,
1876 struct rte_flow *flow)
1880 if (flow->drop || !flow->mark)
1882 for (i = 0; i != flow->queues_n; ++i) {
1883 struct rte_flow *tmp;
1887 * To remove the mark from the queue, the queue must not be
1888 * present in any other marked flow (RSS or not).
1890 TAILQ_FOREACH(tmp, list, next) {
1892 uint16_t *tqs = NULL;
1897 for (j = 0; j != hash_rxq_init_n; ++j) {
1898 if (!tmp->frxq[j].hrxq)
1900 tqs = tmp->frxq[j].hrxq->ind_table->queues;
1901 tq_n = tmp->frxq[j].hrxq->ind_table->queues_n;
1905 for (j = 0; (j != tq_n) && !mark; j++)
1906 if (tqs[j] == (*flow->queues)[i])
1909 (*priv->rxqs)[(*flow->queues)[i]]->mark = mark;
1913 if (flow->drxq.ibv_flow)
1914 claim_zero(ibv_destroy_flow(flow->drxq.ibv_flow));
1915 rte_free(flow->drxq.ibv_attr);
1917 for (i = 0; i != hash_rxq_init_n; ++i) {
1918 struct mlx5_flow *frxq = &flow->frxq[i];
1921 claim_zero(ibv_destroy_flow(frxq->ibv_flow));
1923 mlx5_priv_hrxq_release(priv, frxq->hrxq);
1925 rte_free(frxq->ibv_attr);
1928 TAILQ_REMOVE(list, flow, next);
1929 DEBUG("Flow destroyed %p", (void *)flow);
1934 * Destroy all flows.
1937 * Pointer to private structure.
1939 * Pointer to a TAILQ flow list.
1942 priv_flow_flush(struct priv *priv, struct mlx5_flows *list)
1944 while (!TAILQ_EMPTY(list)) {
1945 struct rte_flow *flow;
1947 flow = TAILQ_FIRST(list);
1948 priv_flow_destroy(priv, list, flow);
1953 * Create drop queue.
1956 * Pointer to private structure.
1962 priv_flow_create_drop_queue(struct priv *priv)
1964 struct mlx5_hrxq_drop *fdq = NULL;
1968 fdq = rte_calloc(__func__, 1, sizeof(*fdq), 0);
1970 WARN("cannot allocate memory for drop queue");
1973 fdq->cq = ibv_create_cq(priv->ctx, 1, NULL, NULL, 0);
1975 WARN("cannot allocate CQ for drop queue");
1978 fdq->wq = ibv_create_wq(priv->ctx,
1979 &(struct ibv_wq_init_attr){
1980 .wq_type = IBV_WQT_RQ,
1987 WARN("cannot allocate WQ for drop queue");
1990 fdq->ind_table = ibv_create_rwq_ind_table(priv->ctx,
1991 &(struct ibv_rwq_ind_table_init_attr){
1992 .log_ind_tbl_size = 0,
1993 .ind_tbl = &fdq->wq,
1996 if (!fdq->ind_table) {
1997 WARN("cannot allocate indirection table for drop queue");
2000 fdq->qp = ibv_create_qp_ex(priv->ctx,
2001 &(struct ibv_qp_init_attr_ex){
2002 .qp_type = IBV_QPT_RAW_PACKET,
2004 IBV_QP_INIT_ATTR_PD |
2005 IBV_QP_INIT_ATTR_IND_TABLE |
2006 IBV_QP_INIT_ATTR_RX_HASH,
2007 .rx_hash_conf = (struct ibv_rx_hash_conf){
2009 IBV_RX_HASH_FUNC_TOEPLITZ,
2010 .rx_hash_key_len = rss_hash_default_key_len,
2011 .rx_hash_key = rss_hash_default_key,
2012 .rx_hash_fields_mask = 0,
2014 .rwq_ind_tbl = fdq->ind_table,
2018 WARN("cannot allocate QP for drop queue");
2021 priv->flow_drop_queue = fdq;
2025 claim_zero(ibv_destroy_qp(fdq->qp));
2027 claim_zero(ibv_destroy_rwq_ind_table(fdq->ind_table));
2029 claim_zero(ibv_destroy_wq(fdq->wq));
2031 claim_zero(ibv_destroy_cq(fdq->cq));
2034 priv->flow_drop_queue = NULL;
2039 * Delete drop queue.
2042 * Pointer to private structure.
2045 priv_flow_delete_drop_queue(struct priv *priv)
2047 struct mlx5_hrxq_drop *fdq = priv->flow_drop_queue;
2052 claim_zero(ibv_destroy_qp(fdq->qp));
2054 claim_zero(ibv_destroy_rwq_ind_table(fdq->ind_table));
2056 claim_zero(ibv_destroy_wq(fdq->wq));
2058 claim_zero(ibv_destroy_cq(fdq->cq));
2060 priv->flow_drop_queue = NULL;
2067 * Pointer to private structure.
2069 * Pointer to a TAILQ flow list.
2072 priv_flow_stop(struct priv *priv, struct mlx5_flows *list)
2074 struct rte_flow *flow;
2076 TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) {
2080 if (!flow->drxq.ibv_flow)
2082 claim_zero(ibv_destroy_flow(flow->drxq.ibv_flow));
2083 flow->drxq.ibv_flow = NULL;
2088 struct mlx5_ind_table_ibv *ind_tbl = NULL;
2090 for (i = 0; i != hash_rxq_init_n; ++i) {
2091 if (!flow->frxq[i].hrxq)
2093 ind_tbl = flow->frxq[i].hrxq->ind_table;
2096 for (i = 0; i != ind_tbl->queues_n; ++i)
2097 (*priv->rxqs)[ind_tbl->queues[i]]->mark = 0;
2099 for (i = 0; i != hash_rxq_init_n; ++i) {
2100 if (!flow->frxq[i].ibv_flow)
2102 claim_zero(ibv_destroy_flow(flow->frxq[i].ibv_flow));
2103 flow->frxq[i].ibv_flow = NULL;
2104 mlx5_priv_hrxq_release(priv, flow->frxq[i].hrxq);
2105 flow->frxq[i].hrxq = NULL;
2107 DEBUG("Flow %p removed", (void *)flow);
2115 * Pointer to private structure.
2117 * Pointer to a TAILQ flow list.
2120 * 0 on success, a errno value otherwise and rte_errno is set.
2123 priv_flow_start(struct priv *priv, struct mlx5_flows *list)
2125 struct rte_flow *flow;
2127 TAILQ_FOREACH(flow, list, next) {
2131 flow->drxq.ibv_flow =
2132 ibv_create_flow(priv->flow_drop_queue->qp,
2133 flow->drxq.ibv_attr);
2134 if (!flow->drxq.ibv_flow) {
2135 DEBUG("Flow %p cannot be applied",
2140 DEBUG("Flow %p applied", (void *)flow);
2144 for (i = 0; i != hash_rxq_init_n; ++i) {
2145 if (!flow->frxq[i].ibv_attr)
2147 flow->frxq[i].hrxq =
2148 mlx5_priv_hrxq_get(priv, flow->rss_conf.rss_key,
2149 flow->rss_conf.rss_key_len,
2150 hash_rxq_init[i].hash_fields,
2153 if (flow->frxq[i].hrxq)
2155 flow->frxq[i].hrxq =
2156 mlx5_priv_hrxq_new(priv, flow->rss_conf.rss_key,
2157 flow->rss_conf.rss_key_len,
2158 hash_rxq_init[i].hash_fields,
2161 if (!flow->frxq[i].hrxq) {
2162 DEBUG("Flow %p cannot be applied",
2168 flow->frxq[i].ibv_flow =
2169 ibv_create_flow(flow->frxq[i].hrxq->qp,
2170 flow->frxq[i].ibv_attr);
2171 if (!flow->frxq[i].ibv_flow) {
2172 DEBUG("Flow %p cannot be applied",
2177 DEBUG("Flow %p applied", (void *)flow);
2181 for (i = 0; i != flow->queues_n; ++i)
2182 (*priv->rxqs)[(*flow->queues)[i]]->mark = 1;
2188 * Verify the flow list is empty
2191 * Pointer to private structure.
2193 * @return the number of flows not released.
2196 priv_flow_verify(struct priv *priv)
2198 struct rte_flow *flow;
2201 TAILQ_FOREACH(flow, &priv->flows, next) {
2202 DEBUG("%p: flow %p still referenced", (void *)priv,
2210 * Enable a control flow configured from the control plane.
2213 * Pointer to Ethernet device.
2215 * An Ethernet flow spec to apply.
2217 * An Ethernet flow mask to apply.
2219 * A VLAN flow spec to apply.
2221 * A VLAN flow mask to apply.
2227 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
2228 struct rte_flow_item_eth *eth_spec,
2229 struct rte_flow_item_eth *eth_mask,
2230 struct rte_flow_item_vlan *vlan_spec,
2231 struct rte_flow_item_vlan *vlan_mask)
2233 struct priv *priv = dev->data->dev_private;
2234 const struct rte_flow_attr attr = {
2236 .priority = MLX5_CTRL_FLOW_PRIORITY,
2238 struct rte_flow_item items[] = {
2240 .type = RTE_FLOW_ITEM_TYPE_ETH,
2246 .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
2247 RTE_FLOW_ITEM_TYPE_END,
2253 .type = RTE_FLOW_ITEM_TYPE_END,
2256 struct rte_flow_action actions[] = {
2258 .type = RTE_FLOW_ACTION_TYPE_RSS,
2261 .type = RTE_FLOW_ACTION_TYPE_END,
2264 struct rte_flow *flow;
2265 struct rte_flow_error error;
2268 struct rte_flow_action_rss rss;
2270 const struct rte_eth_rss_conf *rss_conf;
2272 uint16_t queue[RTE_MAX_QUEUES_PER_PORT];
2276 if (!priv->reta_idx_n)
2278 for (i = 0; i != priv->reta_idx_n; ++i)
2279 action_rss.local.queue[i] = (*priv->reta_idx)[i];
2280 action_rss.local.rss_conf = &priv->rss_conf;
2281 action_rss.local.num = priv->reta_idx_n;
2282 actions[0].conf = (const void *)&action_rss.rss;
2283 flow = priv_flow_create(priv, &priv->ctrl_flows, &attr, items, actions,
2291 * Enable a flow control configured from the control plane.
2294 * Pointer to Ethernet device.
2296 * An Ethernet flow spec to apply.
2298 * An Ethernet flow mask to apply.
2304 mlx5_ctrl_flow(struct rte_eth_dev *dev,
2305 struct rte_flow_item_eth *eth_spec,
2306 struct rte_flow_item_eth *eth_mask)
2308 return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
2314 * @see rte_flow_destroy()
2318 mlx5_flow_destroy(struct rte_eth_dev *dev,
2319 struct rte_flow *flow,
2320 struct rte_flow_error *error)
2322 struct priv *priv = dev->data->dev_private;
2326 priv_flow_destroy(priv, &priv->flows, flow);
2332 * Destroy all flows.
2334 * @see rte_flow_flush()
2338 mlx5_flow_flush(struct rte_eth_dev *dev,
2339 struct rte_flow_error *error)
2341 struct priv *priv = dev->data->dev_private;
2345 priv_flow_flush(priv, &priv->flows);
2353 * @see rte_flow_isolate()
2357 mlx5_flow_isolate(struct rte_eth_dev *dev,
2359 struct rte_flow_error *error)
2361 struct priv *priv = dev->data->dev_private;
2364 if (dev->data->dev_started) {
2365 rte_flow_error_set(error, EBUSY,
2366 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2368 "port must be stopped first");
2372 priv->isolated = !!enable;