4 * Copyright 2015 6WIND S.A.
5 * Copyright 2015 Mellanox.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of 6WIND S.A. nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40 #include <sys/queue.h>
43 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
45 #pragma GCC diagnostic ignored "-Wpedantic"
47 #include <infiniband/verbs.h>
48 #include <infiniband/mlx5dv.h>
50 #pragma GCC diagnostic error "-Wpedantic"
54 #include <rte_malloc.h>
55 #include <rte_ethdev.h>
56 #include <rte_common.h>
57 #include <rte_interrupts.h>
58 #include <rte_debug.h>
62 #include "mlx5_rxtx.h"
63 #include "mlx5_utils.h"
64 #include "mlx5_autoconf.h"
65 #include "mlx5_defs.h"
67 /* Initialization data for hash RX queues. */
68 const struct hash_rxq_init hash_rxq_init[] = {
70 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
71 IBV_RX_HASH_DST_IPV4 |
72 IBV_RX_HASH_SRC_PORT_TCP |
73 IBV_RX_HASH_DST_PORT_TCP),
74 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,
76 .flow_spec.tcp_udp = {
77 .type = IBV_FLOW_SPEC_TCP,
78 .size = sizeof(hash_rxq_init[0].flow_spec.tcp_udp),
80 .underlayer = &hash_rxq_init[HASH_RXQ_IPV4],
83 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
84 IBV_RX_HASH_DST_IPV4 |
85 IBV_RX_HASH_SRC_PORT_UDP |
86 IBV_RX_HASH_DST_PORT_UDP),
87 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP,
89 .flow_spec.tcp_udp = {
90 .type = IBV_FLOW_SPEC_UDP,
91 .size = sizeof(hash_rxq_init[0].flow_spec.tcp_udp),
93 .underlayer = &hash_rxq_init[HASH_RXQ_IPV4],
96 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
97 IBV_RX_HASH_DST_IPV4),
98 .dpdk_rss_hf = (ETH_RSS_IPV4 |
102 .type = IBV_FLOW_SPEC_IPV4,
103 .size = sizeof(hash_rxq_init[0].flow_spec.ipv4),
105 .underlayer = &hash_rxq_init[HASH_RXQ_ETH],
108 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
109 IBV_RX_HASH_DST_IPV6 |
110 IBV_RX_HASH_SRC_PORT_TCP |
111 IBV_RX_HASH_DST_PORT_TCP),
112 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP,
114 .flow_spec.tcp_udp = {
115 .type = IBV_FLOW_SPEC_TCP,
116 .size = sizeof(hash_rxq_init[0].flow_spec.tcp_udp),
118 .underlayer = &hash_rxq_init[HASH_RXQ_IPV6],
121 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
122 IBV_RX_HASH_DST_IPV6 |
123 IBV_RX_HASH_SRC_PORT_UDP |
124 IBV_RX_HASH_DST_PORT_UDP),
125 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP,
127 .flow_spec.tcp_udp = {
128 .type = IBV_FLOW_SPEC_UDP,
129 .size = sizeof(hash_rxq_init[0].flow_spec.tcp_udp),
131 .underlayer = &hash_rxq_init[HASH_RXQ_IPV6],
134 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
135 IBV_RX_HASH_DST_IPV6),
136 .dpdk_rss_hf = (ETH_RSS_IPV6 |
140 .type = IBV_FLOW_SPEC_IPV6,
141 .size = sizeof(hash_rxq_init[0].flow_spec.ipv6),
143 .underlayer = &hash_rxq_init[HASH_RXQ_ETH],
150 .type = IBV_FLOW_SPEC_ETH,
151 .size = sizeof(hash_rxq_init[0].flow_spec.eth),
157 /* Number of entries in hash_rxq_init[]. */
158 const unsigned int hash_rxq_init_n = RTE_DIM(hash_rxq_init);
160 /* Initialization data for hash RX queue indirection tables. */
161 static const struct ind_table_init ind_table_init[] = {
163 .max_size = -1u, /* Superseded by HW limitations. */
165 1 << HASH_RXQ_TCPV4 |
166 1 << HASH_RXQ_UDPV4 |
168 1 << HASH_RXQ_TCPV6 |
169 1 << HASH_RXQ_UDPV6 |
176 .hash_types = 1 << HASH_RXQ_ETH,
181 #define IND_TABLE_INIT_N RTE_DIM(ind_table_init)
183 /* Default RSS hash key also used for ConnectX-3. */
184 uint8_t rss_hash_default_key[] = {
185 0x2c, 0xc6, 0x81, 0xd1,
186 0x5b, 0xdb, 0xf4, 0xf7,
187 0xfc, 0xa2, 0x83, 0x19,
188 0xdb, 0x1a, 0x3e, 0x94,
189 0x6b, 0x9e, 0x38, 0xd9,
190 0x2c, 0x9c, 0x03, 0xd1,
191 0xad, 0x99, 0x44, 0xa7,
192 0xd9, 0x56, 0x3d, 0x59,
193 0x06, 0x3c, 0x25, 0xf3,
194 0xfc, 0x1f, 0xdc, 0x2a,
197 /* Length of the default RSS hash key. */
198 const size_t rss_hash_default_key_len = sizeof(rss_hash_default_key);
201 * Populate flow steering rule for a given hash RX queue type using
202 * information from hash_rxq_init[]. Nothing is written to flow_attr when
203 * flow_attr_size is not large enough, but the required size is still returned.
206 * Pointer to private structure.
207 * @param[out] flow_attr
208 * Pointer to flow attribute structure to fill. Note that the allocated
209 * area must be larger and large enough to hold all flow specifications.
210 * @param flow_attr_size
211 * Entire size of flow_attr and trailing room for flow specifications.
213 * Hash RX queue type to use for flow steering rule.
216 * Total size of the flow attribute buffer. No errors are defined.
219 priv_flow_attr(struct priv *priv, struct ibv_flow_attr *flow_attr,
220 size_t flow_attr_size, enum hash_rxq_type type)
222 size_t offset = sizeof(*flow_attr);
223 const struct hash_rxq_init *init = &hash_rxq_init[type];
225 assert(priv != NULL);
226 assert((size_t)type < RTE_DIM(hash_rxq_init));
228 offset += init->flow_spec.hdr.size;
229 init = init->underlayer;
230 } while (init != NULL);
231 if (offset > flow_attr_size)
233 flow_attr_size = offset;
234 init = &hash_rxq_init[type];
235 *flow_attr = (struct ibv_flow_attr){
236 .type = IBV_FLOW_ATTR_NORMAL,
237 /* Priorities < 3 are reserved for flow director. */
238 .priority = init->flow_priority + 3,
244 offset -= init->flow_spec.hdr.size;
245 memcpy((void *)((uintptr_t)flow_attr + offset),
247 init->flow_spec.hdr.size);
248 ++flow_attr->num_of_specs;
249 init = init->underlayer;
250 } while (init != NULL);
251 return flow_attr_size;
255 * Convert hash type position in indirection table initializer to
256 * hash RX queue type.
259 * Indirection table initializer.
261 * Hash type position.
264 * Hash RX queue type.
266 static enum hash_rxq_type
267 hash_rxq_type_from_pos(const struct ind_table_init *table, unsigned int pos)
269 enum hash_rxq_type type = HASH_RXQ_TCPV4;
271 assert(pos < table->hash_types_n);
273 if ((table->hash_types & (1 << type)) && (pos-- == 0))
281 * Filter out disabled hash RX queue types from ind_table_init[].
284 * Pointer to private structure.
289 * Number of table entries.
292 priv_make_ind_table_init(struct priv *priv,
293 struct ind_table_init (*table)[IND_TABLE_INIT_N])
298 unsigned int table_n = 0;
299 /* Mandatory to receive frames not handled by normal hash RX queues. */
300 unsigned int hash_types_sup = 1 << HASH_RXQ_ETH;
302 rss_hf = priv->rss_hf;
303 /* Process other protocols only if more than one queue. */
304 if (priv->rxqs_n > 1)
305 for (i = 0; (i != hash_rxq_init_n); ++i)
306 if (rss_hf & hash_rxq_init[i].dpdk_rss_hf)
307 hash_types_sup |= (1 << i);
309 /* Filter out entries whose protocols are not in the set. */
310 for (i = 0, j = 0; (i != IND_TABLE_INIT_N); ++i) {
314 /* j is increased only if the table has valid protocols. */
316 (*table)[j] = ind_table_init[i];
317 (*table)[j].hash_types &= hash_types_sup;
318 for (h = 0, nb = 0; (h != hash_rxq_init_n); ++h)
319 if (((*table)[j].hash_types >> h) & 0x1)
321 (*table)[i].hash_types_n = nb;
331 * Initialize hash RX queues and indirection table.
334 * Pointer to private structure.
337 * 0 on success, errno value on failure.
340 priv_create_hash_rxqs(struct priv *priv)
342 struct ibv_wq *wqs[priv->reta_idx_n];
343 struct ind_table_init ind_table_init[IND_TABLE_INIT_N];
344 unsigned int ind_tables_n =
345 priv_make_ind_table_init(priv, &ind_table_init);
346 unsigned int hash_rxqs_n = 0;
347 struct hash_rxq (*hash_rxqs)[] = NULL;
348 struct ibv_rwq_ind_table *(*ind_tables)[] = NULL;
354 assert(priv->ind_tables == NULL);
355 assert(priv->ind_tables_n == 0);
356 assert(priv->hash_rxqs == NULL);
357 assert(priv->hash_rxqs_n == 0);
358 assert(priv->pd != NULL);
359 assert(priv->ctx != NULL);
362 if (priv->rxqs_n == 0)
364 assert(priv->rxqs != NULL);
365 if (ind_tables_n == 0) {
366 ERROR("all hash RX queue types have been filtered out,"
367 " indirection table cannot be created");
370 if (priv->rxqs_n & (priv->rxqs_n - 1)) {
371 INFO("%u RX queues are configured, consider rounding this"
372 " number to the next power of two for better balancing",
374 DEBUG("indirection table extended to assume %u WQs",
377 for (i = 0; (i != priv->reta_idx_n); ++i) {
378 struct mlx5_rxq_ctrl *rxq_ctrl;
380 rxq_ctrl = container_of((*priv->rxqs)[(*priv->reta_idx)[i]],
381 struct mlx5_rxq_ctrl, rxq);
382 wqs[i] = rxq_ctrl->ibv->wq;
384 /* Get number of hash RX queues to configure. */
385 for (i = 0, hash_rxqs_n = 0; (i != ind_tables_n); ++i)
386 hash_rxqs_n += ind_table_init[i].hash_types_n;
387 DEBUG("allocating %u hash RX queues for %u WQs, %u indirection tables",
388 hash_rxqs_n, priv->rxqs_n, ind_tables_n);
389 /* Create indirection tables. */
390 ind_tables = rte_calloc(__func__, ind_tables_n,
391 sizeof((*ind_tables)[0]), 0);
392 if (ind_tables == NULL) {
394 ERROR("cannot allocate indirection tables container: %s",
398 for (i = 0; (i != ind_tables_n); ++i) {
399 struct ibv_rwq_ind_table_init_attr ind_init_attr = {
400 .log_ind_tbl_size = 0, /* Set below. */
404 unsigned int ind_tbl_size = ind_table_init[i].max_size;
405 struct ibv_rwq_ind_table *ind_table;
407 if (priv->reta_idx_n < ind_tbl_size)
408 ind_tbl_size = priv->reta_idx_n;
409 ind_init_attr.log_ind_tbl_size = log2above(ind_tbl_size);
411 ind_table = ibv_create_rwq_ind_table(priv->ctx,
413 if (ind_table != NULL) {
414 (*ind_tables)[i] = ind_table;
417 /* Not clear whether errno is set. */
418 err = (errno ? errno : EINVAL);
419 ERROR("RX indirection table creation failed with error %d: %s",
423 /* Allocate array that holds hash RX queues and related data. */
424 hash_rxqs = rte_calloc(__func__, hash_rxqs_n,
425 sizeof((*hash_rxqs)[0]), 0);
426 if (hash_rxqs == NULL) {
428 ERROR("cannot allocate hash RX queues container: %s",
432 for (i = 0, j = 0, k = 0;
433 ((i != hash_rxqs_n) && (j != ind_tables_n));
435 struct hash_rxq *hash_rxq = &(*hash_rxqs)[i];
436 enum hash_rxq_type type =
437 hash_rxq_type_from_pos(&ind_table_init[j], k);
438 struct rte_eth_rss_conf *priv_rss_conf =
439 (*priv->rss_conf)[type];
440 struct ibv_rx_hash_conf hash_conf = {
441 .rx_hash_function = IBV_RX_HASH_FUNC_TOEPLITZ,
442 .rx_hash_key_len = (priv_rss_conf ?
443 priv_rss_conf->rss_key_len :
444 rss_hash_default_key_len),
445 .rx_hash_key = (priv_rss_conf ?
446 priv_rss_conf->rss_key :
447 rss_hash_default_key),
448 .rx_hash_fields_mask = hash_rxq_init[type].hash_fields,
450 struct ibv_qp_init_attr_ex qp_init_attr = {
451 .qp_type = IBV_QPT_RAW_PACKET,
452 .comp_mask = (IBV_QP_INIT_ATTR_PD |
453 IBV_QP_INIT_ATTR_IND_TABLE |
454 IBV_QP_INIT_ATTR_RX_HASH),
455 .rx_hash_conf = hash_conf,
456 .rwq_ind_tbl = (*ind_tables)[j],
460 DEBUG("using indirection table %u for hash RX queue %u type %d",
462 *hash_rxq = (struct hash_rxq){
464 .qp = ibv_create_qp_ex(priv->ctx, &qp_init_attr),
467 if (hash_rxq->qp == NULL) {
468 err = (errno ? errno : EINVAL);
469 ERROR("Hash RX QP creation failure: %s",
473 if (++k < ind_table_init[j].hash_types_n)
475 /* Switch to the next indirection table and reset hash RX
476 * queue type array index. */
480 priv->ind_tables = ind_tables;
481 priv->ind_tables_n = ind_tables_n;
482 priv->hash_rxqs = hash_rxqs;
483 priv->hash_rxqs_n = hash_rxqs_n;
487 if (hash_rxqs != NULL) {
488 for (i = 0; (i != hash_rxqs_n); ++i) {
489 struct ibv_qp *qp = (*hash_rxqs)[i].qp;
493 claim_zero(ibv_destroy_qp(qp));
497 if (ind_tables != NULL) {
498 for (j = 0; (j != ind_tables_n); ++j) {
499 struct ibv_rwq_ind_table *ind_table =
502 if (ind_table == NULL)
504 claim_zero(ibv_destroy_rwq_ind_table(ind_table));
506 rte_free(ind_tables);
512 * Clean up hash RX queues and indirection table.
515 * Pointer to private structure.
518 priv_destroy_hash_rxqs(struct priv *priv)
522 DEBUG("destroying %u hash RX queues", priv->hash_rxqs_n);
523 if (priv->hash_rxqs_n == 0) {
524 assert(priv->hash_rxqs == NULL);
525 assert(priv->ind_tables == NULL);
528 for (i = 0; (i != priv->hash_rxqs_n); ++i) {
529 struct hash_rxq *hash_rxq = &(*priv->hash_rxqs)[i];
532 assert(hash_rxq->priv == priv);
533 assert(hash_rxq->qp != NULL);
534 /* Also check that there are no remaining flows. */
535 for (j = 0; (j != RTE_DIM(hash_rxq->special_flow)); ++j)
537 (k != RTE_DIM(hash_rxq->special_flow[j]));
539 assert(hash_rxq->special_flow[j][k] == NULL);
540 for (j = 0; (j != RTE_DIM(hash_rxq->mac_flow)); ++j)
541 for (k = 0; (k != RTE_DIM(hash_rxq->mac_flow[j])); ++k)
542 assert(hash_rxq->mac_flow[j][k] == NULL);
543 claim_zero(ibv_destroy_qp(hash_rxq->qp));
545 priv->hash_rxqs_n = 0;
546 rte_free(priv->hash_rxqs);
547 priv->hash_rxqs = NULL;
548 for (i = 0; (i != priv->ind_tables_n); ++i) {
549 struct ibv_rwq_ind_table *ind_table =
550 (*priv->ind_tables)[i];
552 assert(ind_table != NULL);
553 claim_zero(ibv_destroy_rwq_ind_table(ind_table));
555 priv->ind_tables_n = 0;
556 rte_free(priv->ind_tables);
557 priv->ind_tables = NULL;
561 * Check whether a given flow type is allowed.
564 * Pointer to private structure.
566 * Flow type to check.
569 * Nonzero if the given flow type is allowed.
572 priv_allow_flow_type(struct priv *priv, enum hash_rxq_flow_type type)
576 case HASH_RXQ_FLOW_TYPE_BROADCAST:
577 case HASH_RXQ_FLOW_TYPE_IPV6MULTI:
578 case HASH_RXQ_FLOW_TYPE_MAC:
582 /* Unsupported flow type is not allowed. */
589 * Automatically enable/disable flows according to configuration.
595 * 0 on success, errno value on failure.
598 priv_rehash_flows(struct priv *priv)
602 for (i = 0; i != RTE_DIM((*priv->hash_rxqs)[0].special_flow); ++i)
603 if (!priv_allow_flow_type(priv, i)) {
604 priv_special_flow_disable(priv, i);
606 int ret = priv_special_flow_enable(priv, i);
611 if (priv_allow_flow_type(priv, HASH_RXQ_FLOW_TYPE_MAC))
612 return priv_mac_addrs_enable(priv);
613 priv_mac_addrs_disable(priv);
618 * Allocate RX queue elements.
621 * Pointer to RX queue structure.
624 * 0 on success, errno value on failure.
627 rxq_alloc_elts(struct mlx5_rxq_ctrl *rxq_ctrl)
629 const unsigned int sges_n = 1 << rxq_ctrl->rxq.sges_n;
630 unsigned int elts_n = 1 << rxq_ctrl->rxq.elts_n;
634 /* Iterate on segments. */
635 for (i = 0; (i != elts_n); ++i) {
636 struct rte_mbuf *buf;
638 buf = rte_pktmbuf_alloc(rxq_ctrl->rxq.mp);
640 ERROR("%p: empty mbuf pool", (void *)rxq_ctrl);
644 /* Headroom is reserved by rte_pktmbuf_alloc(). */
645 assert(DATA_OFF(buf) == RTE_PKTMBUF_HEADROOM);
646 /* Buffer is supposed to be empty. */
647 assert(rte_pktmbuf_data_len(buf) == 0);
648 assert(rte_pktmbuf_pkt_len(buf) == 0);
650 /* Only the first segment keeps headroom. */
652 SET_DATA_OFF(buf, 0);
653 PORT(buf) = rxq_ctrl->rxq.port_id;
654 DATA_LEN(buf) = rte_pktmbuf_tailroom(buf);
655 PKT_LEN(buf) = DATA_LEN(buf);
657 (*rxq_ctrl->rxq.elts)[i] = buf;
659 /* If Rx vector is activated. */
660 if (rxq_check_vec_support(&rxq_ctrl->rxq) > 0) {
661 struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq;
662 struct rte_mbuf *mbuf_init = &rxq->fake_mbuf;
665 /* Initialize default rearm_data for vPMD. */
666 mbuf_init->data_off = RTE_PKTMBUF_HEADROOM;
667 rte_mbuf_refcnt_set(mbuf_init, 1);
668 mbuf_init->nb_segs = 1;
669 mbuf_init->port = rxq->port_id;
671 * prevent compiler reordering:
672 * rearm_data covers previous fields.
674 rte_compiler_barrier();
675 rxq->mbuf_initializer =
676 *(uint64_t *)&mbuf_init->rearm_data;
677 /* Padding with a fake mbuf for vectorized Rx. */
678 for (j = 0; j < MLX5_VPMD_DESCS_PER_LOOP; ++j)
679 (*rxq->elts)[elts_n + j] = &rxq->fake_mbuf;
681 DEBUG("%p: allocated and configured %u segments (max %u packets)",
682 (void *)rxq_ctrl, elts_n, elts_n / (1 << rxq_ctrl->rxq.sges_n));
687 for (i = 0; (i != elts_n); ++i) {
688 if ((*rxq_ctrl->rxq.elts)[i] != NULL)
689 rte_pktmbuf_free_seg((*rxq_ctrl->rxq.elts)[i]);
690 (*rxq_ctrl->rxq.elts)[i] = NULL;
692 DEBUG("%p: failed, freed everything", (void *)rxq_ctrl);
698 * Free RX queue elements.
701 * Pointer to RX queue structure.
704 rxq_free_elts(struct mlx5_rxq_ctrl *rxq_ctrl)
706 struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq;
707 const uint16_t q_n = (1 << rxq->elts_n);
708 const uint16_t q_mask = q_n - 1;
709 uint16_t used = q_n - (rxq->rq_ci - rxq->rq_pi);
712 DEBUG("%p: freeing WRs", (void *)rxq_ctrl);
713 if (rxq->elts == NULL)
716 * Some mbuf in the Ring belongs to the application. They cannot be
719 if (rxq_check_vec_support(rxq) > 0) {
720 for (i = 0; i < used; ++i)
721 (*rxq->elts)[(rxq->rq_ci + i) & q_mask] = NULL;
722 rxq->rq_pi = rxq->rq_ci;
724 for (i = 0; (i != (1u << rxq->elts_n)); ++i) {
725 if ((*rxq->elts)[i] != NULL)
726 rte_pktmbuf_free_seg((*rxq->elts)[i]);
727 (*rxq->elts)[i] = NULL;
732 * Clean up a RX queue.
734 * Destroy objects, free allocated memory and reset the structure for reuse.
737 * Pointer to RX queue structure.
740 mlx5_rxq_cleanup(struct mlx5_rxq_ctrl *rxq_ctrl)
742 DEBUG("cleaning up %p", (void *)rxq_ctrl);
744 mlx5_priv_rxq_ibv_release(rxq_ctrl->priv, rxq_ctrl->ibv);
745 memset(rxq_ctrl, 0, sizeof(*rxq_ctrl));
751 * Pointer to Ethernet device structure.
755 * Number of descriptors to configure in queue.
757 * NUMA socket on which memory must be allocated.
759 * Thresholds parameters.
761 * Memory pool for buffer allocations.
764 * 0 on success, negative errno value on failure.
767 mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
768 unsigned int socket, const struct rte_eth_rxconf *conf,
769 struct rte_mempool *mp)
771 struct priv *priv = dev->data->dev_private;
772 struct mlx5_rxq_data *rxq = (*priv->rxqs)[idx];
773 struct mlx5_rxq_ctrl *rxq_ctrl =
774 container_of(rxq, struct mlx5_rxq_ctrl, rxq);
778 if (mlx5_is_secondary())
779 return -E_RTE_SECONDARY;
781 if (!rte_is_power_of_2(desc)) {
782 desc = 1 << log2above(desc);
783 WARN("%p: increased number of descriptors in RX queue %u"
784 " to the next power of two (%d)",
785 (void *)dev, idx, desc);
787 DEBUG("%p: configuring queue %u for %u descriptors",
788 (void *)dev, idx, desc);
789 if (idx >= priv->rxqs_n) {
790 ERROR("%p: queue index out of range (%u >= %u)",
791 (void *)dev, idx, priv->rxqs_n);
795 if (!mlx5_priv_rxq_releasable(priv, idx)) {
797 ERROR("%p: unable to release queue index %u",
801 mlx5_priv_rxq_release(priv, idx);
802 rxq_ctrl = mlx5_priv_rxq_new(priv, idx, desc, socket, mp);
804 ERROR("%p: unable to allocate queue index %u",
809 DEBUG("%p: adding RX queue %p to list",
810 (void *)dev, (void *)rxq_ctrl);
811 (*priv->rxqs)[idx] = &rxq_ctrl->rxq;
818 * DPDK callback to release a RX queue.
821 * Generic RX queue pointer.
824 mlx5_rx_queue_release(void *dpdk_rxq)
826 struct mlx5_rxq_data *rxq = (struct mlx5_rxq_data *)dpdk_rxq;
827 struct mlx5_rxq_ctrl *rxq_ctrl;
830 if (mlx5_is_secondary())
835 rxq_ctrl = container_of(rxq, struct mlx5_rxq_ctrl, rxq);
836 priv = rxq_ctrl->priv;
838 if (!mlx5_priv_rxq_releasable(priv, rxq_ctrl->rxq.stats.idx))
839 rte_panic("Rx queue %p is still used by a flow and cannot be"
840 " removed\n", (void *)rxq_ctrl);
841 mlx5_priv_rxq_release(priv, rxq_ctrl->rxq.stats.idx);
846 * Allocate queue vector and fill epoll fd list for Rx interrupts.
849 * Pointer to private structure.
852 * 0 on success, negative on failure.
855 priv_rx_intr_vec_enable(struct priv *priv)
858 unsigned int rxqs_n = priv->rxqs_n;
859 unsigned int n = RTE_MIN(rxqs_n, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID);
860 unsigned int count = 0;
861 struct rte_intr_handle *intr_handle = priv->dev->intr_handle;
863 assert(!mlx5_is_secondary());
864 if (!priv->dev->data->dev_conf.intr_conf.rxq)
866 priv_rx_intr_vec_disable(priv);
867 intr_handle->intr_vec = malloc(sizeof(intr_handle->intr_vec[rxqs_n]));
868 if (intr_handle->intr_vec == NULL) {
869 ERROR("failed to allocate memory for interrupt vector,"
870 " Rx interrupts will not be supported");
873 intr_handle->type = RTE_INTR_HANDLE_EXT;
874 for (i = 0; i != n; ++i) {
875 /* This rxq ibv must not be released in this function. */
876 struct mlx5_rxq_ibv *rxq_ibv = mlx5_priv_rxq_ibv_get(priv, i);
881 /* Skip queues that cannot request interrupts. */
882 if (!rxq_ibv || !rxq_ibv->channel) {
883 /* Use invalid intr_vec[] index to disable entry. */
884 intr_handle->intr_vec[i] =
885 RTE_INTR_VEC_RXTX_OFFSET +
886 RTE_MAX_RXTX_INTR_VEC_ID;
889 if (count >= RTE_MAX_RXTX_INTR_VEC_ID) {
890 ERROR("too many Rx queues for interrupt vector size"
891 " (%d), Rx interrupts cannot be enabled",
892 RTE_MAX_RXTX_INTR_VEC_ID);
893 priv_rx_intr_vec_disable(priv);
896 fd = rxq_ibv->channel->fd;
897 flags = fcntl(fd, F_GETFL);
898 rc = fcntl(fd, F_SETFL, flags | O_NONBLOCK);
900 ERROR("failed to make Rx interrupt file descriptor"
901 " %d non-blocking for queue index %d", fd, i);
902 priv_rx_intr_vec_disable(priv);
905 intr_handle->intr_vec[i] = RTE_INTR_VEC_RXTX_OFFSET + count;
906 intr_handle->efds[count] = fd;
910 priv_rx_intr_vec_disable(priv);
912 intr_handle->nb_efd = count;
917 * Clean up Rx interrupts handler.
920 * Pointer to private structure.
923 priv_rx_intr_vec_disable(struct priv *priv)
925 struct rte_intr_handle *intr_handle = priv->dev->intr_handle;
927 unsigned int rxqs_n = priv->rxqs_n;
928 unsigned int n = RTE_MIN(rxqs_n, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID);
930 if (!priv->dev->data->dev_conf.intr_conf.rxq)
932 for (i = 0; i != n; ++i) {
933 struct mlx5_rxq_ctrl *rxq_ctrl;
934 struct mlx5_rxq_data *rxq_data;
936 if (intr_handle->intr_vec[i] == RTE_INTR_VEC_RXTX_OFFSET +
937 RTE_MAX_RXTX_INTR_VEC_ID)
940 * Need to access directly the queue to release the reference
941 * kept in priv_rx_intr_vec_enable().
943 rxq_data = (*priv->rxqs)[i];
944 rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
945 mlx5_priv_rxq_ibv_release(priv, rxq_ctrl->ibv);
947 rte_intr_free_epoll_fd(intr_handle);
948 free(intr_handle->intr_vec);
949 intr_handle->nb_efd = 0;
950 intr_handle->intr_vec = NULL;
954 * MLX5 CQ notification .
957 * Pointer to receive queue structure.
959 * Sequence number per receive queue .
962 mlx5_arm_cq(struct mlx5_rxq_data *rxq, int sq_n_rxq)
965 uint32_t doorbell_hi;
967 void *cq_db_reg = (char *)rxq->cq_uar + MLX5_CQ_DOORBELL;
969 sq_n = sq_n_rxq & MLX5_CQ_SQN_MASK;
970 doorbell_hi = sq_n << MLX5_CQ_SQN_OFFSET | (rxq->cq_ci & MLX5_CI_MASK);
971 doorbell = (uint64_t)doorbell_hi << 32;
972 doorbell |= rxq->cqn;
973 rxq->cq_db[MLX5_CQ_ARM_DB] = rte_cpu_to_be_32(doorbell_hi);
975 rte_write64(rte_cpu_to_be_64(doorbell), cq_db_reg);
979 * DPDK callback for Rx queue interrupt enable.
982 * Pointer to Ethernet device structure.
987 * 0 on success, negative on failure.
990 mlx5_rx_intr_enable(struct rte_eth_dev *dev, uint16_t rx_queue_id)
992 struct priv *priv = mlx5_get_priv(dev);
993 struct mlx5_rxq_data *rxq_data;
994 struct mlx5_rxq_ctrl *rxq_ctrl;
998 rxq_data = (*priv->rxqs)[rx_queue_id];
1003 rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
1004 if (rxq_ctrl->irq) {
1005 struct mlx5_rxq_ibv *rxq_ibv;
1007 rxq_ibv = mlx5_priv_rxq_ibv_get(priv, rx_queue_id);
1012 mlx5_arm_cq(rxq_data, rxq_data->cq_arm_sn);
1013 mlx5_priv_rxq_ibv_release(priv, rxq_ibv);
1018 WARN("unable to arm interrupt on rx queue %d", rx_queue_id);
1023 * DPDK callback for Rx queue interrupt disable.
1026 * Pointer to Ethernet device structure.
1027 * @param rx_queue_id
1031 * 0 on success, negative on failure.
1034 mlx5_rx_intr_disable(struct rte_eth_dev *dev, uint16_t rx_queue_id)
1036 struct priv *priv = mlx5_get_priv(dev);
1037 struct mlx5_rxq_data *rxq_data;
1038 struct mlx5_rxq_ctrl *rxq_ctrl;
1039 struct mlx5_rxq_ibv *rxq_ibv = NULL;
1040 struct ibv_cq *ev_cq;
1045 rxq_data = (*priv->rxqs)[rx_queue_id];
1050 rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
1053 rxq_ibv = mlx5_priv_rxq_ibv_get(priv, rx_queue_id);
1058 ret = ibv_get_cq_event(rxq_ibv->channel, &ev_cq, &ev_ctx);
1059 if (ret || ev_cq != rxq_ibv->cq) {
1063 rxq_data->cq_arm_sn++;
1064 ibv_ack_cq_events(rxq_ibv->cq, 1);
1067 mlx5_priv_rxq_ibv_release(priv, rxq_ibv);
1070 WARN("unable to disable interrupt on rx queue %d",
1076 * Create the Rx queue Verbs object.
1079 * Pointer to private structure.
1081 * Queue index in DPDK Rx queue array
1084 * The Verbs object initialised if it can be created.
1086 struct mlx5_rxq_ibv*
1087 mlx5_priv_rxq_ibv_new(struct priv *priv, uint16_t idx)
1089 struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx];
1090 struct mlx5_rxq_ctrl *rxq_ctrl =
1091 container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
1092 struct ibv_wq_attr mod;
1094 struct ibv_cq_init_attr_ex cq;
1095 struct ibv_wq_init_attr wq;
1096 struct ibv_cq_ex cq_attr;
1098 unsigned int cqe_n = (1 << rxq_data->elts_n) - 1;
1099 struct mlx5_rxq_ibv *tmpl;
1100 struct mlx5dv_cq cq_info;
1101 struct mlx5dv_rwq rwq;
1104 struct mlx5dv_obj obj;
1107 assert(!rxq_ctrl->ibv);
1108 tmpl = rte_calloc_socket(__func__, 1, sizeof(*tmpl), 0,
1111 ERROR("%p: cannot allocate verbs resources",
1115 tmpl->rxq_ctrl = rxq_ctrl;
1116 /* Use the entire RX mempool as the memory region. */
1117 tmpl->mr = priv_mr_get(priv, rxq_data->mp);
1119 tmpl->mr = priv_mr_new(priv, rxq_data->mp);
1121 ERROR("%p: MR creation failure", (void *)rxq_ctrl);
1125 if (rxq_ctrl->irq) {
1126 tmpl->channel = ibv_create_comp_channel(priv->ctx);
1127 if (!tmpl->channel) {
1128 ERROR("%p: Comp Channel creation failure",
1133 attr.cq = (struct ibv_cq_init_attr_ex){
1136 if (priv->cqe_comp) {
1137 attr.cq.comp_mask |= IBV_CQ_INIT_ATTR_MASK_FLAGS;
1138 attr.cq.flags |= MLX5DV_CQ_INIT_ATTR_MASK_COMPRESSED_CQE;
1140 * For vectorized Rx, it must not be doubled in order to
1141 * make cq_ci and rq_ci aligned.
1143 if (rxq_check_vec_support(rxq_data) < 0)
1146 tmpl->cq = ibv_create_cq(priv->ctx, cqe_n, NULL, tmpl->channel, 0);
1147 if (tmpl->cq == NULL) {
1148 ERROR("%p: CQ creation failure", (void *)rxq_ctrl);
1151 DEBUG("priv->device_attr.max_qp_wr is %d",
1152 priv->device_attr.orig_attr.max_qp_wr);
1153 DEBUG("priv->device_attr.max_sge is %d",
1154 priv->device_attr.orig_attr.max_sge);
1155 attr.wq = (struct ibv_wq_init_attr){
1156 .wq_context = NULL, /* Could be useful in the future. */
1157 .wq_type = IBV_WQT_RQ,
1158 /* Max number of outstanding WRs. */
1159 .max_wr = (1 << rxq_data->elts_n) >> rxq_data->sges_n,
1160 /* Max number of scatter/gather elements in a WR. */
1161 .max_sge = 1 << rxq_data->sges_n,
1165 IBV_WQ_FLAGS_CVLAN_STRIPPING |
1167 .create_flags = (rxq_data->vlan_strip ?
1168 IBV_WQ_FLAGS_CVLAN_STRIPPING :
1171 /* By default, FCS (CRC) is stripped by hardware. */
1172 if (rxq_data->crc_present) {
1173 attr.wq.create_flags |= IBV_WQ_FLAGS_SCATTER_FCS;
1174 attr.wq.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS;
1176 #ifdef HAVE_IBV_WQ_FLAG_RX_END_PADDING
1177 if (priv->hw_padding) {
1178 attr.wq.create_flags |= IBV_WQ_FLAG_RX_END_PADDING;
1179 attr.wq.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS;
1182 tmpl->wq = ibv_create_wq(priv->ctx, &attr.wq);
1183 if (tmpl->wq == NULL) {
1184 ERROR("%p: WQ creation failure", (void *)rxq_ctrl);
1188 * Make sure number of WRs*SGEs match expectations since a queue
1189 * cannot allocate more than "desc" buffers.
1191 if (((int)attr.wq.max_wr !=
1192 ((1 << rxq_data->elts_n) >> rxq_data->sges_n)) ||
1193 ((int)attr.wq.max_sge != (1 << rxq_data->sges_n))) {
1194 ERROR("%p: requested %u*%u but got %u*%u WRs*SGEs",
1196 ((1 << rxq_data->elts_n) >> rxq_data->sges_n),
1197 (1 << rxq_data->sges_n),
1198 attr.wq.max_wr, attr.wq.max_sge);
1201 /* Change queue state to ready. */
1202 mod = (struct ibv_wq_attr){
1203 .attr_mask = IBV_WQ_ATTR_STATE,
1204 .wq_state = IBV_WQS_RDY,
1206 ret = ibv_modify_wq(tmpl->wq, &mod);
1208 ERROR("%p: WQ state to IBV_WQS_RDY failed",
1212 obj.cq.in = tmpl->cq;
1213 obj.cq.out = &cq_info;
1214 obj.rwq.in = tmpl->wq;
1216 ret = mlx5dv_init_obj(&obj, MLX5DV_OBJ_CQ | MLX5DV_OBJ_RWQ);
1219 if (cq_info.cqe_size != RTE_CACHE_LINE_SIZE) {
1220 ERROR("Wrong MLX5_CQE_SIZE environment variable value: "
1221 "it should be set to %u", RTE_CACHE_LINE_SIZE);
1224 /* Fill the rings. */
1225 rxq_data->wqes = (volatile struct mlx5_wqe_data_seg (*)[])
1227 for (i = 0; (i != (unsigned int)(1 << rxq_data->elts_n)); ++i) {
1228 struct rte_mbuf *buf = (*rxq_data->elts)[i];
1229 volatile struct mlx5_wqe_data_seg *scat = &(*rxq_data->wqes)[i];
1231 /* scat->addr must be able to store a pointer. */
1232 assert(sizeof(scat->addr) >= sizeof(uintptr_t));
1233 *scat = (struct mlx5_wqe_data_seg){
1234 .addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(buf,
1236 .byte_count = rte_cpu_to_be_32(DATA_LEN(buf)),
1237 .lkey = tmpl->mr->lkey,
1240 rxq_data->rq_db = rwq.dbrec;
1241 rxq_data->cqe_n = log2above(cq_info.cqe_cnt);
1242 rxq_data->cq_ci = 0;
1243 rxq_data->rq_ci = 0;
1244 rxq_data->rq_pi = 0;
1245 rxq_data->zip = (struct rxq_zip){
1248 rxq_data->cq_db = cq_info.dbrec;
1249 rxq_data->cqes = (volatile struct mlx5_cqe (*)[])(uintptr_t)cq_info.buf;
1250 /* Update doorbell counter. */
1251 rxq_data->rq_ci = (1 << rxq_data->elts_n) >> rxq_data->sges_n;
1253 *rxq_data->rq_db = rte_cpu_to_be_32(rxq_data->rq_ci);
1254 DEBUG("%p: rxq updated with %p", (void *)rxq_ctrl, (void *)&tmpl);
1255 rte_atomic32_inc(&tmpl->refcnt);
1256 DEBUG("%p: Verbs Rx queue %p: refcnt %d", (void *)priv,
1257 (void *)tmpl, rte_atomic32_read(&tmpl->refcnt));
1258 LIST_INSERT_HEAD(&priv->rxqsibv, tmpl, next);
1262 claim_zero(ibv_destroy_wq(tmpl->wq));
1264 claim_zero(ibv_destroy_cq(tmpl->cq));
1266 claim_zero(ibv_destroy_comp_channel(tmpl->channel));
1268 priv_mr_release(priv, tmpl->mr);
1273 * Get an Rx queue Verbs object.
1276 * Pointer to private structure.
1278 * Queue index in DPDK Rx queue array
1281 * The Verbs object if it exists.
1283 struct mlx5_rxq_ibv*
1284 mlx5_priv_rxq_ibv_get(struct priv *priv, uint16_t idx)
1286 struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx];
1287 struct mlx5_rxq_ctrl *rxq_ctrl;
1289 if (idx >= priv->rxqs_n)
1293 rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
1294 if (rxq_ctrl->ibv) {
1295 priv_mr_get(priv, rxq_data->mp);
1296 rte_atomic32_inc(&rxq_ctrl->ibv->refcnt);
1297 DEBUG("%p: Verbs Rx queue %p: refcnt %d", (void *)priv,
1298 (void *)rxq_ctrl->ibv,
1299 rte_atomic32_read(&rxq_ctrl->ibv->refcnt));
1301 return rxq_ctrl->ibv;
1305 * Release an Rx verbs queue object.
1308 * Pointer to private structure.
1310 * Verbs Rx queue object.
1313 * 0 on success, errno value on failure.
1316 mlx5_priv_rxq_ibv_release(struct priv *priv, struct mlx5_rxq_ibv *rxq_ibv)
1321 assert(rxq_ibv->wq);
1322 assert(rxq_ibv->cq);
1323 assert(rxq_ibv->mr);
1324 ret = priv_mr_release(priv, rxq_ibv->mr);
1327 DEBUG("%p: Verbs Rx queue %p: refcnt %d", (void *)priv,
1328 (void *)rxq_ibv, rte_atomic32_read(&rxq_ibv->refcnt));
1329 if (rte_atomic32_dec_and_test(&rxq_ibv->refcnt)) {
1330 rxq_free_elts(rxq_ibv->rxq_ctrl);
1331 claim_zero(ibv_destroy_wq(rxq_ibv->wq));
1332 claim_zero(ibv_destroy_cq(rxq_ibv->cq));
1333 if (rxq_ibv->channel)
1334 claim_zero(ibv_destroy_comp_channel(rxq_ibv->channel));
1335 LIST_REMOVE(rxq_ibv, next);
1343 * Verify the Verbs Rx queue list is empty
1346 * Pointer to private structure.
1348 * @return the number of object not released.
1351 mlx5_priv_rxq_ibv_verify(struct priv *priv)
1354 struct mlx5_rxq_ibv *rxq_ibv;
1356 LIST_FOREACH(rxq_ibv, &priv->rxqsibv, next) {
1357 DEBUG("%p: Verbs Rx queue %p still referenced", (void *)priv,
1365 * Return true if a single reference exists on the object.
1368 * Pointer to private structure.
1370 * Verbs Rx queue object.
1373 mlx5_priv_rxq_ibv_releasable(struct priv *priv, struct mlx5_rxq_ibv *rxq_ibv)
1377 return (rte_atomic32_read(&rxq_ibv->refcnt) == 1);
1381 * Create a DPDK Rx queue.
1384 * Pointer to private structure.
1388 * Number of descriptors to configure in queue.
1390 * NUMA socket on which memory must be allocated.
1393 * A DPDK queue object on success.
1395 struct mlx5_rxq_ctrl*
1396 mlx5_priv_rxq_new(struct priv *priv, uint16_t idx, uint16_t desc,
1397 unsigned int socket, struct rte_mempool *mp)
1399 struct rte_eth_dev *dev = priv->dev;
1400 struct mlx5_rxq_ctrl *tmpl;
1401 const uint16_t desc_n =
1402 desc + priv->rx_vec_en * MLX5_VPMD_DESCS_PER_LOOP;
1403 unsigned int mb_len = rte_pktmbuf_data_room_size(mp);
1405 tmpl = rte_calloc_socket("RXQ", 1,
1407 desc_n * sizeof(struct rte_mbuf *),
1411 if (priv->dev->data->dev_conf.intr_conf.rxq)
1413 /* Enable scattered packets support for this queue if necessary. */
1414 assert(mb_len >= RTE_PKTMBUF_HEADROOM);
1415 if (dev->data->dev_conf.rxmode.max_rx_pkt_len <=
1416 (mb_len - RTE_PKTMBUF_HEADROOM)) {
1417 tmpl->rxq.sges_n = 0;
1418 } else if (dev->data->dev_conf.rxmode.enable_scatter) {
1420 RTE_PKTMBUF_HEADROOM +
1421 dev->data->dev_conf.rxmode.max_rx_pkt_len;
1422 unsigned int sges_n;
1425 * Determine the number of SGEs needed for a full packet
1426 * and round it to the next power of two.
1428 sges_n = log2above((size / mb_len) + !!(size % mb_len));
1429 tmpl->rxq.sges_n = sges_n;
1430 /* Make sure rxq.sges_n did not overflow. */
1431 size = mb_len * (1 << tmpl->rxq.sges_n);
1432 size -= RTE_PKTMBUF_HEADROOM;
1433 if (size < dev->data->dev_conf.rxmode.max_rx_pkt_len) {
1434 ERROR("%p: too many SGEs (%u) needed to handle"
1435 " requested maximum packet size %u",
1438 dev->data->dev_conf.rxmode.max_rx_pkt_len);
1442 WARN("%p: the requested maximum Rx packet size (%u) is"
1443 " larger than a single mbuf (%u) and scattered"
1444 " mode has not been requested",
1446 dev->data->dev_conf.rxmode.max_rx_pkt_len,
1447 mb_len - RTE_PKTMBUF_HEADROOM);
1449 DEBUG("%p: maximum number of segments per packet: %u",
1450 (void *)dev, 1 << tmpl->rxq.sges_n);
1451 if (desc % (1 << tmpl->rxq.sges_n)) {
1452 ERROR("%p: number of RX queue descriptors (%u) is not a"
1453 " multiple of SGEs per packet (%u)",
1456 1 << tmpl->rxq.sges_n);
1459 /* Toggle RX checksum offload if hardware supports it. */
1461 tmpl->rxq.csum = !!dev->data->dev_conf.rxmode.hw_ip_checksum;
1462 if (priv->hw_csum_l2tun)
1463 tmpl->rxq.csum_l2tun =
1464 !!dev->data->dev_conf.rxmode.hw_ip_checksum;
1465 /* Configure VLAN stripping. */
1466 tmpl->rxq.vlan_strip = (priv->hw_vlan_strip &&
1467 !!dev->data->dev_conf.rxmode.hw_vlan_strip);
1468 /* By default, FCS (CRC) is stripped by hardware. */
1469 if (dev->data->dev_conf.rxmode.hw_strip_crc) {
1470 tmpl->rxq.crc_present = 0;
1471 } else if (priv->hw_fcs_strip) {
1472 tmpl->rxq.crc_present = 1;
1474 WARN("%p: CRC stripping has been disabled but will still"
1475 " be performed by hardware, make sure MLNX_OFED and"
1476 " firmware are up to date",
1478 tmpl->rxq.crc_present = 0;
1480 DEBUG("%p: CRC stripping is %s, %u bytes will be subtracted from"
1481 " incoming frames to hide it",
1483 tmpl->rxq.crc_present ? "disabled" : "enabled",
1484 tmpl->rxq.crc_present << 2);
1486 tmpl->rxq.rss_hash = priv->rxqs_n > 1;
1487 tmpl->rxq.port_id = dev->data->port_id;
1490 tmpl->rxq.stats.idx = idx;
1491 tmpl->rxq.elts_n = log2above(desc);
1493 (struct rte_mbuf *(*)[1 << tmpl->rxq.elts_n])(tmpl + 1);
1494 rte_atomic32_inc(&tmpl->refcnt);
1495 DEBUG("%p: Rx queue %p: refcnt %d", (void *)priv,
1496 (void *)tmpl, rte_atomic32_read(&tmpl->refcnt));
1497 LIST_INSERT_HEAD(&priv->rxqsctrl, tmpl, next);
1508 * Pointer to private structure.
1513 * A pointer to the queue if it exists.
1515 struct mlx5_rxq_ctrl*
1516 mlx5_priv_rxq_get(struct priv *priv, uint16_t idx)
1518 struct mlx5_rxq_ctrl *rxq_ctrl = NULL;
1520 if ((*priv->rxqs)[idx]) {
1521 rxq_ctrl = container_of((*priv->rxqs)[idx],
1522 struct mlx5_rxq_ctrl,
1525 mlx5_priv_rxq_ibv_get(priv, idx);
1526 rte_atomic32_inc(&rxq_ctrl->refcnt);
1527 DEBUG("%p: Rx queue %p: refcnt %d", (void *)priv,
1528 (void *)rxq_ctrl, rte_atomic32_read(&rxq_ctrl->refcnt));
1534 * Release a Rx queue.
1537 * Pointer to private structure.
1542 * 0 on success, errno value on failure.
1545 mlx5_priv_rxq_release(struct priv *priv, uint16_t idx)
1547 struct mlx5_rxq_ctrl *rxq_ctrl;
1549 if (!(*priv->rxqs)[idx])
1551 rxq_ctrl = container_of((*priv->rxqs)[idx], struct mlx5_rxq_ctrl, rxq);
1552 assert(rxq_ctrl->priv);
1553 if (rxq_ctrl->ibv) {
1556 ret = mlx5_priv_rxq_ibv_release(rxq_ctrl->priv, rxq_ctrl->ibv);
1558 rxq_ctrl->ibv = NULL;
1560 DEBUG("%p: Rx queue %p: refcnt %d", (void *)priv,
1561 (void *)rxq_ctrl, rte_atomic32_read(&rxq_ctrl->refcnt));
1562 if (rte_atomic32_dec_and_test(&rxq_ctrl->refcnt)) {
1563 LIST_REMOVE(rxq_ctrl, next);
1565 (*priv->rxqs)[idx] = NULL;
1572 * Verify if the queue can be released.
1575 * Pointer to private structure.
1580 * 1 if the queue can be released.
1583 mlx5_priv_rxq_releasable(struct priv *priv, uint16_t idx)
1585 struct mlx5_rxq_ctrl *rxq_ctrl;
1587 if (!(*priv->rxqs)[idx])
1589 rxq_ctrl = container_of((*priv->rxqs)[idx], struct mlx5_rxq_ctrl, rxq);
1590 return (rte_atomic32_read(&rxq_ctrl->refcnt) == 1);
1594 * Verify the Rx Queue list is empty
1597 * Pointer to private structure.
1599 * @return the number of object not released.
1602 mlx5_priv_rxq_verify(struct priv *priv)
1604 struct mlx5_rxq_ctrl *rxq_ctrl;
1607 LIST_FOREACH(rxq_ctrl, &priv->rxqsctrl, next) {
1608 DEBUG("%p: Rx Queue %p still referenced", (void *)priv,
1616 * Create an indirection table.
1619 * Pointer to private structure.
1621 * Queues entering in the indirection table.
1623 * Number of queues in the array.
1626 * A new indirection table.
1628 struct mlx5_ind_table_ibv*
1629 mlx5_priv_ind_table_ibv_new(struct priv *priv, uint16_t queues[],
1632 struct mlx5_ind_table_ibv *ind_tbl;
1633 const unsigned int wq_n = rte_is_power_of_2(queues_n) ?
1634 log2above(queues_n) :
1635 priv->ind_table_max_size;
1636 struct ibv_wq *wq[1 << wq_n];
1640 ind_tbl = rte_calloc(__func__, 1, sizeof(*ind_tbl) +
1641 queues_n * sizeof(uint16_t), 0);
1644 for (i = 0; i != queues_n; ++i) {
1645 struct mlx5_rxq_ctrl *rxq =
1646 mlx5_priv_rxq_get(priv, queues[i]);
1650 wq[i] = rxq->ibv->wq;
1651 ind_tbl->queues[i] = queues[i];
1653 ind_tbl->queues_n = queues_n;
1654 /* Finalise indirection table. */
1655 for (j = 0; i != (unsigned int)(1 << wq_n); ++i, ++j)
1657 ind_tbl->ind_table = ibv_create_rwq_ind_table(
1659 &(struct ibv_rwq_ind_table_init_attr){
1660 .log_ind_tbl_size = wq_n,
1664 if (!ind_tbl->ind_table)
1666 rte_atomic32_inc(&ind_tbl->refcnt);
1667 LIST_INSERT_HEAD(&priv->ind_tbls, ind_tbl, next);
1668 DEBUG("%p: Indirection table %p: refcnt %d", (void *)priv,
1669 (void *)ind_tbl, rte_atomic32_read(&ind_tbl->refcnt));
1673 DEBUG("%p cannot create indirection table", (void *)priv);
1678 * Get an indirection table.
1681 * Pointer to private structure.
1683 * Queues entering in the indirection table.
1685 * Number of queues in the array.
1688 * An indirection table if found.
1690 struct mlx5_ind_table_ibv*
1691 mlx5_priv_ind_table_ibv_get(struct priv *priv, uint16_t queues[],
1694 struct mlx5_ind_table_ibv *ind_tbl;
1696 LIST_FOREACH(ind_tbl, &priv->ind_tbls, next) {
1697 if ((ind_tbl->queues_n == queues_n) &&
1698 (memcmp(ind_tbl->queues, queues,
1699 ind_tbl->queues_n * sizeof(ind_tbl->queues[0]))
1706 rte_atomic32_inc(&ind_tbl->refcnt);
1707 DEBUG("%p: Indirection table %p: refcnt %d", (void *)priv,
1708 (void *)ind_tbl, rte_atomic32_read(&ind_tbl->refcnt));
1709 for (i = 0; i != ind_tbl->queues_n; ++i)
1710 mlx5_priv_rxq_get(priv, ind_tbl->queues[i]);
1716 * Release an indirection table.
1719 * Pointer to private structure.
1721 * Indirection table to release.
1724 * 0 on success, errno value on failure.
1727 mlx5_priv_ind_table_ibv_release(struct priv *priv,
1728 struct mlx5_ind_table_ibv *ind_tbl)
1732 DEBUG("%p: Indirection table %p: refcnt %d", (void *)priv,
1733 (void *)ind_tbl, rte_atomic32_read(&ind_tbl->refcnt));
1734 if (rte_atomic32_dec_and_test(&ind_tbl->refcnt))
1735 claim_zero(ibv_destroy_rwq_ind_table(ind_tbl->ind_table));
1736 for (i = 0; i != ind_tbl->queues_n; ++i)
1737 claim_nonzero(mlx5_priv_rxq_release(priv, ind_tbl->queues[i]));
1738 if (!rte_atomic32_read(&ind_tbl->refcnt)) {
1739 LIST_REMOVE(ind_tbl, next);
1747 * Verify the Rx Queue list is empty
1750 * Pointer to private structure.
1752 * @return the number of object not released.
1755 mlx5_priv_ind_table_ibv_verify(struct priv *priv)
1757 struct mlx5_ind_table_ibv *ind_tbl;
1760 LIST_FOREACH(ind_tbl, &priv->ind_tbls, next) {
1761 DEBUG("%p: Verbs indirection table %p still referenced",
1762 (void *)priv, (void *)ind_tbl);
1769 * Create an Rx Hash queue.
1772 * Pointer to private structure.
1774 * RSS key for the Rx hash queue.
1775 * @param rss_key_len
1777 * @param hash_fields
1778 * Verbs protocol hash field to make the RSS on.
1780 * Queues entering in hash queue.
1785 * An hash Rx queue on success.
1788 mlx5_priv_hrxq_new(struct priv *priv, uint8_t *rss_key, uint8_t rss_key_len,
1789 uint64_t hash_fields, uint16_t queues[], uint16_t queues_n)
1791 struct mlx5_hrxq *hrxq;
1792 struct mlx5_ind_table_ibv *ind_tbl;
1795 ind_tbl = mlx5_priv_ind_table_ibv_get(priv, queues, queues_n);
1797 ind_tbl = mlx5_priv_ind_table_ibv_new(priv, queues, queues_n);
1800 qp = ibv_create_qp_ex(
1802 &(struct ibv_qp_init_attr_ex){
1803 .qp_type = IBV_QPT_RAW_PACKET,
1805 IBV_QP_INIT_ATTR_PD |
1806 IBV_QP_INIT_ATTR_IND_TABLE |
1807 IBV_QP_INIT_ATTR_RX_HASH,
1808 .rx_hash_conf = (struct ibv_rx_hash_conf){
1809 .rx_hash_function = IBV_RX_HASH_FUNC_TOEPLITZ,
1810 .rx_hash_key_len = rss_key_len,
1811 .rx_hash_key = rss_key,
1812 .rx_hash_fields_mask = hash_fields,
1814 .rwq_ind_tbl = ind_tbl->ind_table,
1819 hrxq = rte_calloc(__func__, 1, sizeof(*hrxq) + rss_key_len, 0);
1822 hrxq->ind_table = ind_tbl;
1824 hrxq->rss_key_len = rss_key_len;
1825 hrxq->hash_fields = hash_fields;
1826 memcpy(hrxq->rss_key, rss_key, rss_key_len);
1827 rte_atomic32_inc(&hrxq->refcnt);
1828 LIST_INSERT_HEAD(&priv->hrxqs, hrxq, next);
1829 DEBUG("%p: Hash Rx queue %p: refcnt %d", (void *)priv,
1830 (void *)hrxq, rte_atomic32_read(&hrxq->refcnt));
1833 mlx5_priv_ind_table_ibv_release(priv, ind_tbl);
1835 claim_zero(ibv_destroy_qp(qp));
1840 * Get an Rx Hash queue.
1843 * Pointer to private structure.
1845 * RSS configuration for the Rx hash queue.
1847 * Queues entering in hash queue.
1852 * An hash Rx queue on success.
1855 mlx5_priv_hrxq_get(struct priv *priv, uint8_t *rss_key, uint8_t rss_key_len,
1856 uint64_t hash_fields, uint16_t queues[], uint16_t queues_n)
1858 struct mlx5_hrxq *hrxq;
1860 LIST_FOREACH(hrxq, &priv->hrxqs, next) {
1861 struct mlx5_ind_table_ibv *ind_tbl;
1863 if (hrxq->rss_key_len != rss_key_len)
1865 if (memcmp(hrxq->rss_key, rss_key, rss_key_len))
1867 if (hrxq->hash_fields != hash_fields)
1869 ind_tbl = mlx5_priv_ind_table_ibv_get(priv, queues, queues_n);
1872 if (ind_tbl != hrxq->ind_table) {
1873 mlx5_priv_ind_table_ibv_release(priv, ind_tbl);
1876 rte_atomic32_inc(&hrxq->refcnt);
1877 DEBUG("%p: Hash Rx queue %p: refcnt %d", (void *)priv,
1878 (void *)hrxq, rte_atomic32_read(&hrxq->refcnt));
1885 * Release the hash Rx queue.
1888 * Pointer to private structure.
1890 * Pointer to Hash Rx queue to release.
1893 * 0 on success, errno value on failure.
1896 mlx5_priv_hrxq_release(struct priv *priv, struct mlx5_hrxq *hrxq)
1898 DEBUG("%p: Hash Rx queue %p: refcnt %d", (void *)priv,
1899 (void *)hrxq, rte_atomic32_read(&hrxq->refcnt));
1900 if (rte_atomic32_dec_and_test(&hrxq->refcnt)) {
1901 claim_zero(ibv_destroy_qp(hrxq->qp));
1902 mlx5_priv_ind_table_ibv_release(priv, hrxq->ind_table);
1903 LIST_REMOVE(hrxq, next);
1907 claim_nonzero(mlx5_priv_ind_table_ibv_release(priv, hrxq->ind_table));
1912 * Verify the Rx Queue list is empty
1915 * Pointer to private structure.
1917 * @return the number of object not released.
1920 mlx5_priv_hrxq_ibv_verify(struct priv *priv)
1922 struct mlx5_hrxq *hrxq;
1925 LIST_FOREACH(hrxq, &priv->hrxqs, next) {
1926 DEBUG("%p: Verbs Hash Rx queue %p still referenced",
1927 (void *)priv, (void *)hrxq);