4 * Copyright 2015 6WIND S.A.
5 * Copyright 2015 Mellanox.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of 6WIND S.A. nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40 #include <sys/queue.h>
43 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
45 #pragma GCC diagnostic ignored "-Wpedantic"
47 #include <infiniband/verbs.h>
48 #include <infiniband/mlx5dv.h>
50 #pragma GCC diagnostic error "-Wpedantic"
54 #include <rte_malloc.h>
55 #include <rte_ethdev.h>
56 #include <rte_common.h>
57 #include <rte_interrupts.h>
58 #include <rte_debug.h>
62 #include "mlx5_rxtx.h"
63 #include "mlx5_utils.h"
64 #include "mlx5_autoconf.h"
65 #include "mlx5_defs.h"
67 /* Initialization data for hash RX queues. */
68 const struct hash_rxq_init hash_rxq_init[] = {
70 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
71 IBV_RX_HASH_DST_IPV4 |
72 IBV_RX_HASH_SRC_PORT_TCP |
73 IBV_RX_HASH_DST_PORT_TCP),
74 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,
76 .flow_spec.tcp_udp = {
77 .type = IBV_FLOW_SPEC_TCP,
78 .size = sizeof(hash_rxq_init[0].flow_spec.tcp_udp),
80 .underlayer = &hash_rxq_init[HASH_RXQ_IPV4],
83 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
84 IBV_RX_HASH_DST_IPV4 |
85 IBV_RX_HASH_SRC_PORT_UDP |
86 IBV_RX_HASH_DST_PORT_UDP),
87 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP,
89 .flow_spec.tcp_udp = {
90 .type = IBV_FLOW_SPEC_UDP,
91 .size = sizeof(hash_rxq_init[0].flow_spec.tcp_udp),
93 .underlayer = &hash_rxq_init[HASH_RXQ_IPV4],
96 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
97 IBV_RX_HASH_DST_IPV4),
98 .dpdk_rss_hf = (ETH_RSS_IPV4 |
102 .type = IBV_FLOW_SPEC_IPV4,
103 .size = sizeof(hash_rxq_init[0].flow_spec.ipv4),
105 .underlayer = &hash_rxq_init[HASH_RXQ_ETH],
108 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
109 IBV_RX_HASH_DST_IPV6 |
110 IBV_RX_HASH_SRC_PORT_TCP |
111 IBV_RX_HASH_DST_PORT_TCP),
112 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP,
114 .flow_spec.tcp_udp = {
115 .type = IBV_FLOW_SPEC_TCP,
116 .size = sizeof(hash_rxq_init[0].flow_spec.tcp_udp),
118 .underlayer = &hash_rxq_init[HASH_RXQ_IPV6],
121 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
122 IBV_RX_HASH_DST_IPV6 |
123 IBV_RX_HASH_SRC_PORT_UDP |
124 IBV_RX_HASH_DST_PORT_UDP),
125 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP,
127 .flow_spec.tcp_udp = {
128 .type = IBV_FLOW_SPEC_UDP,
129 .size = sizeof(hash_rxq_init[0].flow_spec.tcp_udp),
131 .underlayer = &hash_rxq_init[HASH_RXQ_IPV6],
134 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
135 IBV_RX_HASH_DST_IPV6),
136 .dpdk_rss_hf = (ETH_RSS_IPV6 |
140 .type = IBV_FLOW_SPEC_IPV6,
141 .size = sizeof(hash_rxq_init[0].flow_spec.ipv6),
143 .underlayer = &hash_rxq_init[HASH_RXQ_ETH],
150 .type = IBV_FLOW_SPEC_ETH,
151 .size = sizeof(hash_rxq_init[0].flow_spec.eth),
157 /* Number of entries in hash_rxq_init[]. */
158 const unsigned int hash_rxq_init_n = RTE_DIM(hash_rxq_init);
160 /* Initialization data for hash RX queue indirection tables. */
161 static const struct ind_table_init ind_table_init[] = {
163 .max_size = -1u, /* Superseded by HW limitations. */
165 1 << HASH_RXQ_TCPV4 |
166 1 << HASH_RXQ_UDPV4 |
168 1 << HASH_RXQ_TCPV6 |
169 1 << HASH_RXQ_UDPV6 |
176 .hash_types = 1 << HASH_RXQ_ETH,
181 #define IND_TABLE_INIT_N RTE_DIM(ind_table_init)
183 /* Default RSS hash key also used for ConnectX-3. */
184 uint8_t rss_hash_default_key[] = {
185 0x2c, 0xc6, 0x81, 0xd1,
186 0x5b, 0xdb, 0xf4, 0xf7,
187 0xfc, 0xa2, 0x83, 0x19,
188 0xdb, 0x1a, 0x3e, 0x94,
189 0x6b, 0x9e, 0x38, 0xd9,
190 0x2c, 0x9c, 0x03, 0xd1,
191 0xad, 0x99, 0x44, 0xa7,
192 0xd9, 0x56, 0x3d, 0x59,
193 0x06, 0x3c, 0x25, 0xf3,
194 0xfc, 0x1f, 0xdc, 0x2a,
197 /* Length of the default RSS hash key. */
198 const size_t rss_hash_default_key_len = sizeof(rss_hash_default_key);
201 * Populate flow steering rule for a given hash RX queue type using
202 * information from hash_rxq_init[]. Nothing is written to flow_attr when
203 * flow_attr_size is not large enough, but the required size is still returned.
206 * Pointer to private structure.
207 * @param[out] flow_attr
208 * Pointer to flow attribute structure to fill. Note that the allocated
209 * area must be larger and large enough to hold all flow specifications.
210 * @param flow_attr_size
211 * Entire size of flow_attr and trailing room for flow specifications.
213 * Hash RX queue type to use for flow steering rule.
216 * Total size of the flow attribute buffer. No errors are defined.
219 priv_flow_attr(struct priv *priv, struct ibv_flow_attr *flow_attr,
220 size_t flow_attr_size, enum hash_rxq_type type)
222 size_t offset = sizeof(*flow_attr);
223 const struct hash_rxq_init *init = &hash_rxq_init[type];
225 assert(priv != NULL);
226 assert((size_t)type < RTE_DIM(hash_rxq_init));
228 offset += init->flow_spec.hdr.size;
229 init = init->underlayer;
230 } while (init != NULL);
231 if (offset > flow_attr_size)
233 flow_attr_size = offset;
234 init = &hash_rxq_init[type];
235 *flow_attr = (struct ibv_flow_attr){
236 .type = IBV_FLOW_ATTR_NORMAL,
237 /* Priorities < 3 are reserved for flow director. */
238 .priority = init->flow_priority + 3,
244 offset -= init->flow_spec.hdr.size;
245 memcpy((void *)((uintptr_t)flow_attr + offset),
247 init->flow_spec.hdr.size);
248 ++flow_attr->num_of_specs;
249 init = init->underlayer;
250 } while (init != NULL);
251 return flow_attr_size;
255 * Convert hash type position in indirection table initializer to
256 * hash RX queue type.
259 * Indirection table initializer.
261 * Hash type position.
264 * Hash RX queue type.
266 static enum hash_rxq_type
267 hash_rxq_type_from_pos(const struct ind_table_init *table, unsigned int pos)
269 enum hash_rxq_type type = HASH_RXQ_TCPV4;
271 assert(pos < table->hash_types_n);
273 if ((table->hash_types & (1 << type)) && (pos-- == 0))
281 * Filter out disabled hash RX queue types from ind_table_init[].
284 * Pointer to private structure.
289 * Number of table entries.
292 priv_make_ind_table_init(struct priv *priv,
293 struct ind_table_init (*table)[IND_TABLE_INIT_N])
298 unsigned int table_n = 0;
299 /* Mandatory to receive frames not handled by normal hash RX queues. */
300 unsigned int hash_types_sup = 1 << HASH_RXQ_ETH;
302 rss_hf = priv->rss_hf;
303 /* Process other protocols only if more than one queue. */
304 if (priv->rxqs_n > 1)
305 for (i = 0; (i != hash_rxq_init_n); ++i)
306 if (rss_hf & hash_rxq_init[i].dpdk_rss_hf)
307 hash_types_sup |= (1 << i);
309 /* Filter out entries whose protocols are not in the set. */
310 for (i = 0, j = 0; (i != IND_TABLE_INIT_N); ++i) {
314 /* j is increased only if the table has valid protocols. */
316 (*table)[j] = ind_table_init[i];
317 (*table)[j].hash_types &= hash_types_sup;
318 for (h = 0, nb = 0; (h != hash_rxq_init_n); ++h)
319 if (((*table)[j].hash_types >> h) & 0x1)
321 (*table)[i].hash_types_n = nb;
331 * Initialize hash RX queues and indirection table.
334 * Pointer to private structure.
337 * 0 on success, errno value on failure.
340 priv_create_hash_rxqs(struct priv *priv)
342 struct ibv_wq *wqs[priv->reta_idx_n];
343 struct ind_table_init ind_table_init[IND_TABLE_INIT_N];
344 unsigned int ind_tables_n =
345 priv_make_ind_table_init(priv, &ind_table_init);
346 unsigned int hash_rxqs_n = 0;
347 struct hash_rxq (*hash_rxqs)[] = NULL;
348 struct ibv_rwq_ind_table *(*ind_tables)[] = NULL;
354 assert(priv->ind_tables == NULL);
355 assert(priv->ind_tables_n == 0);
356 assert(priv->hash_rxqs == NULL);
357 assert(priv->hash_rxqs_n == 0);
358 assert(priv->pd != NULL);
359 assert(priv->ctx != NULL);
362 if (priv->rxqs_n == 0)
364 assert(priv->rxqs != NULL);
365 if (ind_tables_n == 0) {
366 ERROR("all hash RX queue types have been filtered out,"
367 " indirection table cannot be created");
370 if (priv->rxqs_n & (priv->rxqs_n - 1)) {
371 INFO("%u RX queues are configured, consider rounding this"
372 " number to the next power of two for better balancing",
374 DEBUG("indirection table extended to assume %u WQs",
377 for (i = 0; (i != priv->reta_idx_n); ++i) {
378 struct mlx5_rxq_ctrl *rxq_ctrl;
380 rxq_ctrl = container_of((*priv->rxqs)[(*priv->reta_idx)[i]],
381 struct mlx5_rxq_ctrl, rxq);
382 wqs[i] = rxq_ctrl->ibv->wq;
384 /* Get number of hash RX queues to configure. */
385 for (i = 0, hash_rxqs_n = 0; (i != ind_tables_n); ++i)
386 hash_rxqs_n += ind_table_init[i].hash_types_n;
387 DEBUG("allocating %u hash RX queues for %u WQs, %u indirection tables",
388 hash_rxqs_n, priv->rxqs_n, ind_tables_n);
389 /* Create indirection tables. */
390 ind_tables = rte_calloc(__func__, ind_tables_n,
391 sizeof((*ind_tables)[0]), 0);
392 if (ind_tables == NULL) {
394 ERROR("cannot allocate indirection tables container: %s",
398 for (i = 0; (i != ind_tables_n); ++i) {
399 struct ibv_rwq_ind_table_init_attr ind_init_attr = {
400 .log_ind_tbl_size = 0, /* Set below. */
404 unsigned int ind_tbl_size = ind_table_init[i].max_size;
405 struct ibv_rwq_ind_table *ind_table;
407 if (priv->reta_idx_n < ind_tbl_size)
408 ind_tbl_size = priv->reta_idx_n;
409 ind_init_attr.log_ind_tbl_size = log2above(ind_tbl_size);
411 ind_table = ibv_create_rwq_ind_table(priv->ctx,
413 if (ind_table != NULL) {
414 (*ind_tables)[i] = ind_table;
417 /* Not clear whether errno is set. */
418 err = (errno ? errno : EINVAL);
419 ERROR("RX indirection table creation failed with error %d: %s",
423 /* Allocate array that holds hash RX queues and related data. */
424 hash_rxqs = rte_calloc(__func__, hash_rxqs_n,
425 sizeof((*hash_rxqs)[0]), 0);
426 if (hash_rxqs == NULL) {
428 ERROR("cannot allocate hash RX queues container: %s",
432 for (i = 0, j = 0, k = 0;
433 ((i != hash_rxqs_n) && (j != ind_tables_n));
435 struct hash_rxq *hash_rxq = &(*hash_rxqs)[i];
436 enum hash_rxq_type type =
437 hash_rxq_type_from_pos(&ind_table_init[j], k);
438 struct rte_eth_rss_conf *priv_rss_conf =
439 (*priv->rss_conf)[type];
440 struct ibv_rx_hash_conf hash_conf = {
441 .rx_hash_function = IBV_RX_HASH_FUNC_TOEPLITZ,
442 .rx_hash_key_len = (priv_rss_conf ?
443 priv_rss_conf->rss_key_len :
444 rss_hash_default_key_len),
445 .rx_hash_key = (priv_rss_conf ?
446 priv_rss_conf->rss_key :
447 rss_hash_default_key),
448 .rx_hash_fields_mask = hash_rxq_init[type].hash_fields,
450 struct ibv_qp_init_attr_ex qp_init_attr = {
451 .qp_type = IBV_QPT_RAW_PACKET,
452 .comp_mask = (IBV_QP_INIT_ATTR_PD |
453 IBV_QP_INIT_ATTR_IND_TABLE |
454 IBV_QP_INIT_ATTR_RX_HASH),
455 .rx_hash_conf = hash_conf,
456 .rwq_ind_tbl = (*ind_tables)[j],
460 DEBUG("using indirection table %u for hash RX queue %u type %d",
462 *hash_rxq = (struct hash_rxq){
464 .qp = ibv_create_qp_ex(priv->ctx, &qp_init_attr),
467 if (hash_rxq->qp == NULL) {
468 err = (errno ? errno : EINVAL);
469 ERROR("Hash RX QP creation failure: %s",
473 if (++k < ind_table_init[j].hash_types_n)
475 /* Switch to the next indirection table and reset hash RX
476 * queue type array index. */
480 priv->ind_tables = ind_tables;
481 priv->ind_tables_n = ind_tables_n;
482 priv->hash_rxqs = hash_rxqs;
483 priv->hash_rxqs_n = hash_rxqs_n;
487 if (hash_rxqs != NULL) {
488 for (i = 0; (i != hash_rxqs_n); ++i) {
489 struct ibv_qp *qp = (*hash_rxqs)[i].qp;
493 claim_zero(ibv_destroy_qp(qp));
497 if (ind_tables != NULL) {
498 for (j = 0; (j != ind_tables_n); ++j) {
499 struct ibv_rwq_ind_table *ind_table =
502 if (ind_table == NULL)
504 claim_zero(ibv_destroy_rwq_ind_table(ind_table));
506 rte_free(ind_tables);
512 * Clean up hash RX queues and indirection table.
515 * Pointer to private structure.
518 priv_destroy_hash_rxqs(struct priv *priv)
522 DEBUG("destroying %u hash RX queues", priv->hash_rxqs_n);
523 if (priv->hash_rxqs_n == 0) {
524 assert(priv->hash_rxqs == NULL);
525 assert(priv->ind_tables == NULL);
528 for (i = 0; (i != priv->hash_rxqs_n); ++i) {
529 struct hash_rxq *hash_rxq = &(*priv->hash_rxqs)[i];
532 assert(hash_rxq->priv == priv);
533 assert(hash_rxq->qp != NULL);
534 for (j = 0; (j != RTE_DIM(hash_rxq->mac_flow)); ++j)
535 for (k = 0; (k != RTE_DIM(hash_rxq->mac_flow[j])); ++k)
536 assert(hash_rxq->mac_flow[j][k] == NULL);
537 claim_zero(ibv_destroy_qp(hash_rxq->qp));
539 priv->hash_rxqs_n = 0;
540 rte_free(priv->hash_rxqs);
541 priv->hash_rxqs = NULL;
542 for (i = 0; (i != priv->ind_tables_n); ++i) {
543 struct ibv_rwq_ind_table *ind_table =
544 (*priv->ind_tables)[i];
546 assert(ind_table != NULL);
547 claim_zero(ibv_destroy_rwq_ind_table(ind_table));
549 priv->ind_tables_n = 0;
550 rte_free(priv->ind_tables);
551 priv->ind_tables = NULL;
555 * Allocate RX queue elements.
558 * Pointer to RX queue structure.
561 * 0 on success, errno value on failure.
564 rxq_alloc_elts(struct mlx5_rxq_ctrl *rxq_ctrl)
566 const unsigned int sges_n = 1 << rxq_ctrl->rxq.sges_n;
567 unsigned int elts_n = 1 << rxq_ctrl->rxq.elts_n;
571 /* Iterate on segments. */
572 for (i = 0; (i != elts_n); ++i) {
573 struct rte_mbuf *buf;
575 buf = rte_pktmbuf_alloc(rxq_ctrl->rxq.mp);
577 ERROR("%p: empty mbuf pool", (void *)rxq_ctrl);
581 /* Headroom is reserved by rte_pktmbuf_alloc(). */
582 assert(DATA_OFF(buf) == RTE_PKTMBUF_HEADROOM);
583 /* Buffer is supposed to be empty. */
584 assert(rte_pktmbuf_data_len(buf) == 0);
585 assert(rte_pktmbuf_pkt_len(buf) == 0);
587 /* Only the first segment keeps headroom. */
589 SET_DATA_OFF(buf, 0);
590 PORT(buf) = rxq_ctrl->rxq.port_id;
591 DATA_LEN(buf) = rte_pktmbuf_tailroom(buf);
592 PKT_LEN(buf) = DATA_LEN(buf);
594 (*rxq_ctrl->rxq.elts)[i] = buf;
596 /* If Rx vector is activated. */
597 if (rxq_check_vec_support(&rxq_ctrl->rxq) > 0) {
598 struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq;
599 struct rte_mbuf *mbuf_init = &rxq->fake_mbuf;
602 /* Initialize default rearm_data for vPMD. */
603 mbuf_init->data_off = RTE_PKTMBUF_HEADROOM;
604 rte_mbuf_refcnt_set(mbuf_init, 1);
605 mbuf_init->nb_segs = 1;
606 mbuf_init->port = rxq->port_id;
608 * prevent compiler reordering:
609 * rearm_data covers previous fields.
611 rte_compiler_barrier();
612 rxq->mbuf_initializer =
613 *(uint64_t *)&mbuf_init->rearm_data;
614 /* Padding with a fake mbuf for vectorized Rx. */
615 for (j = 0; j < MLX5_VPMD_DESCS_PER_LOOP; ++j)
616 (*rxq->elts)[elts_n + j] = &rxq->fake_mbuf;
618 DEBUG("%p: allocated and configured %u segments (max %u packets)",
619 (void *)rxq_ctrl, elts_n, elts_n / (1 << rxq_ctrl->rxq.sges_n));
624 for (i = 0; (i != elts_n); ++i) {
625 if ((*rxq_ctrl->rxq.elts)[i] != NULL)
626 rte_pktmbuf_free_seg((*rxq_ctrl->rxq.elts)[i]);
627 (*rxq_ctrl->rxq.elts)[i] = NULL;
629 DEBUG("%p: failed, freed everything", (void *)rxq_ctrl);
635 * Free RX queue elements.
638 * Pointer to RX queue structure.
641 rxq_free_elts(struct mlx5_rxq_ctrl *rxq_ctrl)
643 struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq;
644 const uint16_t q_n = (1 << rxq->elts_n);
645 const uint16_t q_mask = q_n - 1;
646 uint16_t used = q_n - (rxq->rq_ci - rxq->rq_pi);
649 DEBUG("%p: freeing WRs", (void *)rxq_ctrl);
650 if (rxq->elts == NULL)
653 * Some mbuf in the Ring belongs to the application. They cannot be
656 if (rxq_check_vec_support(rxq) > 0) {
657 for (i = 0; i < used; ++i)
658 (*rxq->elts)[(rxq->rq_ci + i) & q_mask] = NULL;
659 rxq->rq_pi = rxq->rq_ci;
661 for (i = 0; (i != (1u << rxq->elts_n)); ++i) {
662 if ((*rxq->elts)[i] != NULL)
663 rte_pktmbuf_free_seg((*rxq->elts)[i]);
664 (*rxq->elts)[i] = NULL;
669 * Clean up a RX queue.
671 * Destroy objects, free allocated memory and reset the structure for reuse.
674 * Pointer to RX queue structure.
677 mlx5_rxq_cleanup(struct mlx5_rxq_ctrl *rxq_ctrl)
679 DEBUG("cleaning up %p", (void *)rxq_ctrl);
681 mlx5_priv_rxq_ibv_release(rxq_ctrl->priv, rxq_ctrl->ibv);
682 memset(rxq_ctrl, 0, sizeof(*rxq_ctrl));
688 * Pointer to Ethernet device structure.
692 * Number of descriptors to configure in queue.
694 * NUMA socket on which memory must be allocated.
696 * Thresholds parameters.
698 * Memory pool for buffer allocations.
701 * 0 on success, negative errno value on failure.
704 mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
705 unsigned int socket, const struct rte_eth_rxconf *conf,
706 struct rte_mempool *mp)
708 struct priv *priv = dev->data->dev_private;
709 struct mlx5_rxq_data *rxq = (*priv->rxqs)[idx];
710 struct mlx5_rxq_ctrl *rxq_ctrl =
711 container_of(rxq, struct mlx5_rxq_ctrl, rxq);
715 if (mlx5_is_secondary())
716 return -E_RTE_SECONDARY;
718 if (!rte_is_power_of_2(desc)) {
719 desc = 1 << log2above(desc);
720 WARN("%p: increased number of descriptors in RX queue %u"
721 " to the next power of two (%d)",
722 (void *)dev, idx, desc);
724 DEBUG("%p: configuring queue %u for %u descriptors",
725 (void *)dev, idx, desc);
726 if (idx >= priv->rxqs_n) {
727 ERROR("%p: queue index out of range (%u >= %u)",
728 (void *)dev, idx, priv->rxqs_n);
732 if (!mlx5_priv_rxq_releasable(priv, idx)) {
734 ERROR("%p: unable to release queue index %u",
738 mlx5_priv_rxq_release(priv, idx);
739 rxq_ctrl = mlx5_priv_rxq_new(priv, idx, desc, socket, mp);
741 ERROR("%p: unable to allocate queue index %u",
746 DEBUG("%p: adding RX queue %p to list",
747 (void *)dev, (void *)rxq_ctrl);
748 (*priv->rxqs)[idx] = &rxq_ctrl->rxq;
755 * DPDK callback to release a RX queue.
758 * Generic RX queue pointer.
761 mlx5_rx_queue_release(void *dpdk_rxq)
763 struct mlx5_rxq_data *rxq = (struct mlx5_rxq_data *)dpdk_rxq;
764 struct mlx5_rxq_ctrl *rxq_ctrl;
767 if (mlx5_is_secondary())
772 rxq_ctrl = container_of(rxq, struct mlx5_rxq_ctrl, rxq);
773 priv = rxq_ctrl->priv;
775 if (!mlx5_priv_rxq_releasable(priv, rxq_ctrl->rxq.stats.idx))
776 rte_panic("Rx queue %p is still used by a flow and cannot be"
777 " removed\n", (void *)rxq_ctrl);
778 mlx5_priv_rxq_release(priv, rxq_ctrl->rxq.stats.idx);
783 * Allocate queue vector and fill epoll fd list for Rx interrupts.
786 * Pointer to private structure.
789 * 0 on success, negative on failure.
792 priv_rx_intr_vec_enable(struct priv *priv)
795 unsigned int rxqs_n = priv->rxqs_n;
796 unsigned int n = RTE_MIN(rxqs_n, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID);
797 unsigned int count = 0;
798 struct rte_intr_handle *intr_handle = priv->dev->intr_handle;
800 assert(!mlx5_is_secondary());
801 if (!priv->dev->data->dev_conf.intr_conf.rxq)
803 priv_rx_intr_vec_disable(priv);
804 intr_handle->intr_vec = malloc(sizeof(intr_handle->intr_vec[rxqs_n]));
805 if (intr_handle->intr_vec == NULL) {
806 ERROR("failed to allocate memory for interrupt vector,"
807 " Rx interrupts will not be supported");
810 intr_handle->type = RTE_INTR_HANDLE_EXT;
811 for (i = 0; i != n; ++i) {
812 /* This rxq ibv must not be released in this function. */
813 struct mlx5_rxq_ibv *rxq_ibv = mlx5_priv_rxq_ibv_get(priv, i);
818 /* Skip queues that cannot request interrupts. */
819 if (!rxq_ibv || !rxq_ibv->channel) {
820 /* Use invalid intr_vec[] index to disable entry. */
821 intr_handle->intr_vec[i] =
822 RTE_INTR_VEC_RXTX_OFFSET +
823 RTE_MAX_RXTX_INTR_VEC_ID;
826 if (count >= RTE_MAX_RXTX_INTR_VEC_ID) {
827 ERROR("too many Rx queues for interrupt vector size"
828 " (%d), Rx interrupts cannot be enabled",
829 RTE_MAX_RXTX_INTR_VEC_ID);
830 priv_rx_intr_vec_disable(priv);
833 fd = rxq_ibv->channel->fd;
834 flags = fcntl(fd, F_GETFL);
835 rc = fcntl(fd, F_SETFL, flags | O_NONBLOCK);
837 ERROR("failed to make Rx interrupt file descriptor"
838 " %d non-blocking for queue index %d", fd, i);
839 priv_rx_intr_vec_disable(priv);
842 intr_handle->intr_vec[i] = RTE_INTR_VEC_RXTX_OFFSET + count;
843 intr_handle->efds[count] = fd;
847 priv_rx_intr_vec_disable(priv);
849 intr_handle->nb_efd = count;
854 * Clean up Rx interrupts handler.
857 * Pointer to private structure.
860 priv_rx_intr_vec_disable(struct priv *priv)
862 struct rte_intr_handle *intr_handle = priv->dev->intr_handle;
864 unsigned int rxqs_n = priv->rxqs_n;
865 unsigned int n = RTE_MIN(rxqs_n, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID);
867 if (!priv->dev->data->dev_conf.intr_conf.rxq)
869 for (i = 0; i != n; ++i) {
870 struct mlx5_rxq_ctrl *rxq_ctrl;
871 struct mlx5_rxq_data *rxq_data;
873 if (intr_handle->intr_vec[i] == RTE_INTR_VEC_RXTX_OFFSET +
874 RTE_MAX_RXTX_INTR_VEC_ID)
877 * Need to access directly the queue to release the reference
878 * kept in priv_rx_intr_vec_enable().
880 rxq_data = (*priv->rxqs)[i];
881 rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
882 mlx5_priv_rxq_ibv_release(priv, rxq_ctrl->ibv);
884 rte_intr_free_epoll_fd(intr_handle);
885 free(intr_handle->intr_vec);
886 intr_handle->nb_efd = 0;
887 intr_handle->intr_vec = NULL;
891 * MLX5 CQ notification .
894 * Pointer to receive queue structure.
896 * Sequence number per receive queue .
899 mlx5_arm_cq(struct mlx5_rxq_data *rxq, int sq_n_rxq)
902 uint32_t doorbell_hi;
904 void *cq_db_reg = (char *)rxq->cq_uar + MLX5_CQ_DOORBELL;
906 sq_n = sq_n_rxq & MLX5_CQ_SQN_MASK;
907 doorbell_hi = sq_n << MLX5_CQ_SQN_OFFSET | (rxq->cq_ci & MLX5_CI_MASK);
908 doorbell = (uint64_t)doorbell_hi << 32;
909 doorbell |= rxq->cqn;
910 rxq->cq_db[MLX5_CQ_ARM_DB] = rte_cpu_to_be_32(doorbell_hi);
912 rte_write64(rte_cpu_to_be_64(doorbell), cq_db_reg);
916 * DPDK callback for Rx queue interrupt enable.
919 * Pointer to Ethernet device structure.
924 * 0 on success, negative on failure.
927 mlx5_rx_intr_enable(struct rte_eth_dev *dev, uint16_t rx_queue_id)
929 struct priv *priv = mlx5_get_priv(dev);
930 struct mlx5_rxq_data *rxq_data;
931 struct mlx5_rxq_ctrl *rxq_ctrl;
935 rxq_data = (*priv->rxqs)[rx_queue_id];
940 rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
942 struct mlx5_rxq_ibv *rxq_ibv;
944 rxq_ibv = mlx5_priv_rxq_ibv_get(priv, rx_queue_id);
949 mlx5_arm_cq(rxq_data, rxq_data->cq_arm_sn);
950 mlx5_priv_rxq_ibv_release(priv, rxq_ibv);
955 WARN("unable to arm interrupt on rx queue %d", rx_queue_id);
960 * DPDK callback for Rx queue interrupt disable.
963 * Pointer to Ethernet device structure.
968 * 0 on success, negative on failure.
971 mlx5_rx_intr_disable(struct rte_eth_dev *dev, uint16_t rx_queue_id)
973 struct priv *priv = mlx5_get_priv(dev);
974 struct mlx5_rxq_data *rxq_data;
975 struct mlx5_rxq_ctrl *rxq_ctrl;
976 struct mlx5_rxq_ibv *rxq_ibv = NULL;
977 struct ibv_cq *ev_cq;
982 rxq_data = (*priv->rxqs)[rx_queue_id];
987 rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
990 rxq_ibv = mlx5_priv_rxq_ibv_get(priv, rx_queue_id);
995 ret = ibv_get_cq_event(rxq_ibv->channel, &ev_cq, &ev_ctx);
996 if (ret || ev_cq != rxq_ibv->cq) {
1000 rxq_data->cq_arm_sn++;
1001 ibv_ack_cq_events(rxq_ibv->cq, 1);
1004 mlx5_priv_rxq_ibv_release(priv, rxq_ibv);
1007 WARN("unable to disable interrupt on rx queue %d",
1013 * Create the Rx queue Verbs object.
1016 * Pointer to private structure.
1018 * Queue index in DPDK Rx queue array
1021 * The Verbs object initialised if it can be created.
1023 struct mlx5_rxq_ibv*
1024 mlx5_priv_rxq_ibv_new(struct priv *priv, uint16_t idx)
1026 struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx];
1027 struct mlx5_rxq_ctrl *rxq_ctrl =
1028 container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
1029 struct ibv_wq_attr mod;
1031 struct ibv_cq_init_attr_ex cq;
1032 struct ibv_wq_init_attr wq;
1033 struct ibv_cq_ex cq_attr;
1035 unsigned int cqe_n = (1 << rxq_data->elts_n) - 1;
1036 struct mlx5_rxq_ibv *tmpl;
1037 struct mlx5dv_cq cq_info;
1038 struct mlx5dv_rwq rwq;
1041 struct mlx5dv_obj obj;
1044 assert(!rxq_ctrl->ibv);
1045 tmpl = rte_calloc_socket(__func__, 1, sizeof(*tmpl), 0,
1048 ERROR("%p: cannot allocate verbs resources",
1052 tmpl->rxq_ctrl = rxq_ctrl;
1053 /* Use the entire RX mempool as the memory region. */
1054 tmpl->mr = priv_mr_get(priv, rxq_data->mp);
1056 tmpl->mr = priv_mr_new(priv, rxq_data->mp);
1058 ERROR("%p: MR creation failure", (void *)rxq_ctrl);
1062 if (rxq_ctrl->irq) {
1063 tmpl->channel = ibv_create_comp_channel(priv->ctx);
1064 if (!tmpl->channel) {
1065 ERROR("%p: Comp Channel creation failure",
1070 attr.cq = (struct ibv_cq_init_attr_ex){
1073 if (priv->cqe_comp) {
1074 attr.cq.comp_mask |= IBV_CQ_INIT_ATTR_MASK_FLAGS;
1075 attr.cq.flags |= MLX5DV_CQ_INIT_ATTR_MASK_COMPRESSED_CQE;
1077 * For vectorized Rx, it must not be doubled in order to
1078 * make cq_ci and rq_ci aligned.
1080 if (rxq_check_vec_support(rxq_data) < 0)
1083 tmpl->cq = ibv_create_cq(priv->ctx, cqe_n, NULL, tmpl->channel, 0);
1084 if (tmpl->cq == NULL) {
1085 ERROR("%p: CQ creation failure", (void *)rxq_ctrl);
1088 DEBUG("priv->device_attr.max_qp_wr is %d",
1089 priv->device_attr.orig_attr.max_qp_wr);
1090 DEBUG("priv->device_attr.max_sge is %d",
1091 priv->device_attr.orig_attr.max_sge);
1092 attr.wq = (struct ibv_wq_init_attr){
1093 .wq_context = NULL, /* Could be useful in the future. */
1094 .wq_type = IBV_WQT_RQ,
1095 /* Max number of outstanding WRs. */
1096 .max_wr = (1 << rxq_data->elts_n) >> rxq_data->sges_n,
1097 /* Max number of scatter/gather elements in a WR. */
1098 .max_sge = 1 << rxq_data->sges_n,
1102 IBV_WQ_FLAGS_CVLAN_STRIPPING |
1104 .create_flags = (rxq_data->vlan_strip ?
1105 IBV_WQ_FLAGS_CVLAN_STRIPPING :
1108 /* By default, FCS (CRC) is stripped by hardware. */
1109 if (rxq_data->crc_present) {
1110 attr.wq.create_flags |= IBV_WQ_FLAGS_SCATTER_FCS;
1111 attr.wq.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS;
1113 #ifdef HAVE_IBV_WQ_FLAG_RX_END_PADDING
1114 if (priv->hw_padding) {
1115 attr.wq.create_flags |= IBV_WQ_FLAG_RX_END_PADDING;
1116 attr.wq.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS;
1119 tmpl->wq = ibv_create_wq(priv->ctx, &attr.wq);
1120 if (tmpl->wq == NULL) {
1121 ERROR("%p: WQ creation failure", (void *)rxq_ctrl);
1125 * Make sure number of WRs*SGEs match expectations since a queue
1126 * cannot allocate more than "desc" buffers.
1128 if (((int)attr.wq.max_wr !=
1129 ((1 << rxq_data->elts_n) >> rxq_data->sges_n)) ||
1130 ((int)attr.wq.max_sge != (1 << rxq_data->sges_n))) {
1131 ERROR("%p: requested %u*%u but got %u*%u WRs*SGEs",
1133 ((1 << rxq_data->elts_n) >> rxq_data->sges_n),
1134 (1 << rxq_data->sges_n),
1135 attr.wq.max_wr, attr.wq.max_sge);
1138 /* Change queue state to ready. */
1139 mod = (struct ibv_wq_attr){
1140 .attr_mask = IBV_WQ_ATTR_STATE,
1141 .wq_state = IBV_WQS_RDY,
1143 ret = ibv_modify_wq(tmpl->wq, &mod);
1145 ERROR("%p: WQ state to IBV_WQS_RDY failed",
1149 obj.cq.in = tmpl->cq;
1150 obj.cq.out = &cq_info;
1151 obj.rwq.in = tmpl->wq;
1153 ret = mlx5dv_init_obj(&obj, MLX5DV_OBJ_CQ | MLX5DV_OBJ_RWQ);
1156 if (cq_info.cqe_size != RTE_CACHE_LINE_SIZE) {
1157 ERROR("Wrong MLX5_CQE_SIZE environment variable value: "
1158 "it should be set to %u", RTE_CACHE_LINE_SIZE);
1161 /* Fill the rings. */
1162 rxq_data->wqes = (volatile struct mlx5_wqe_data_seg (*)[])
1164 for (i = 0; (i != (unsigned int)(1 << rxq_data->elts_n)); ++i) {
1165 struct rte_mbuf *buf = (*rxq_data->elts)[i];
1166 volatile struct mlx5_wqe_data_seg *scat = &(*rxq_data->wqes)[i];
1168 /* scat->addr must be able to store a pointer. */
1169 assert(sizeof(scat->addr) >= sizeof(uintptr_t));
1170 *scat = (struct mlx5_wqe_data_seg){
1171 .addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(buf,
1173 .byte_count = rte_cpu_to_be_32(DATA_LEN(buf)),
1174 .lkey = tmpl->mr->lkey,
1177 rxq_data->rq_db = rwq.dbrec;
1178 rxq_data->cqe_n = log2above(cq_info.cqe_cnt);
1179 rxq_data->cq_ci = 0;
1180 rxq_data->rq_ci = 0;
1181 rxq_data->rq_pi = 0;
1182 rxq_data->zip = (struct rxq_zip){
1185 rxq_data->cq_db = cq_info.dbrec;
1186 rxq_data->cqes = (volatile struct mlx5_cqe (*)[])(uintptr_t)cq_info.buf;
1187 /* Update doorbell counter. */
1188 rxq_data->rq_ci = (1 << rxq_data->elts_n) >> rxq_data->sges_n;
1190 *rxq_data->rq_db = rte_cpu_to_be_32(rxq_data->rq_ci);
1191 DEBUG("%p: rxq updated with %p", (void *)rxq_ctrl, (void *)&tmpl);
1192 rte_atomic32_inc(&tmpl->refcnt);
1193 DEBUG("%p: Verbs Rx queue %p: refcnt %d", (void *)priv,
1194 (void *)tmpl, rte_atomic32_read(&tmpl->refcnt));
1195 LIST_INSERT_HEAD(&priv->rxqsibv, tmpl, next);
1199 claim_zero(ibv_destroy_wq(tmpl->wq));
1201 claim_zero(ibv_destroy_cq(tmpl->cq));
1203 claim_zero(ibv_destroy_comp_channel(tmpl->channel));
1205 priv_mr_release(priv, tmpl->mr);
1210 * Get an Rx queue Verbs object.
1213 * Pointer to private structure.
1215 * Queue index in DPDK Rx queue array
1218 * The Verbs object if it exists.
1220 struct mlx5_rxq_ibv*
1221 mlx5_priv_rxq_ibv_get(struct priv *priv, uint16_t idx)
1223 struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx];
1224 struct mlx5_rxq_ctrl *rxq_ctrl;
1226 if (idx >= priv->rxqs_n)
1230 rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
1231 if (rxq_ctrl->ibv) {
1232 priv_mr_get(priv, rxq_data->mp);
1233 rte_atomic32_inc(&rxq_ctrl->ibv->refcnt);
1234 DEBUG("%p: Verbs Rx queue %p: refcnt %d", (void *)priv,
1235 (void *)rxq_ctrl->ibv,
1236 rte_atomic32_read(&rxq_ctrl->ibv->refcnt));
1238 return rxq_ctrl->ibv;
1242 * Release an Rx verbs queue object.
1245 * Pointer to private structure.
1247 * Verbs Rx queue object.
1250 * 0 on success, errno value on failure.
1253 mlx5_priv_rxq_ibv_release(struct priv *priv, struct mlx5_rxq_ibv *rxq_ibv)
1258 assert(rxq_ibv->wq);
1259 assert(rxq_ibv->cq);
1260 assert(rxq_ibv->mr);
1261 ret = priv_mr_release(priv, rxq_ibv->mr);
1264 DEBUG("%p: Verbs Rx queue %p: refcnt %d", (void *)priv,
1265 (void *)rxq_ibv, rte_atomic32_read(&rxq_ibv->refcnt));
1266 if (rte_atomic32_dec_and_test(&rxq_ibv->refcnt)) {
1267 rxq_free_elts(rxq_ibv->rxq_ctrl);
1268 claim_zero(ibv_destroy_wq(rxq_ibv->wq));
1269 claim_zero(ibv_destroy_cq(rxq_ibv->cq));
1270 if (rxq_ibv->channel)
1271 claim_zero(ibv_destroy_comp_channel(rxq_ibv->channel));
1272 LIST_REMOVE(rxq_ibv, next);
1280 * Verify the Verbs Rx queue list is empty
1283 * Pointer to private structure.
1285 * @return the number of object not released.
1288 mlx5_priv_rxq_ibv_verify(struct priv *priv)
1291 struct mlx5_rxq_ibv *rxq_ibv;
1293 LIST_FOREACH(rxq_ibv, &priv->rxqsibv, next) {
1294 DEBUG("%p: Verbs Rx queue %p still referenced", (void *)priv,
1302 * Return true if a single reference exists on the object.
1305 * Pointer to private structure.
1307 * Verbs Rx queue object.
1310 mlx5_priv_rxq_ibv_releasable(struct priv *priv, struct mlx5_rxq_ibv *rxq_ibv)
1314 return (rte_atomic32_read(&rxq_ibv->refcnt) == 1);
1318 * Create a DPDK Rx queue.
1321 * Pointer to private structure.
1325 * Number of descriptors to configure in queue.
1327 * NUMA socket on which memory must be allocated.
1330 * A DPDK queue object on success.
1332 struct mlx5_rxq_ctrl*
1333 mlx5_priv_rxq_new(struct priv *priv, uint16_t idx, uint16_t desc,
1334 unsigned int socket, struct rte_mempool *mp)
1336 struct rte_eth_dev *dev = priv->dev;
1337 struct mlx5_rxq_ctrl *tmpl;
1338 const uint16_t desc_n =
1339 desc + priv->rx_vec_en * MLX5_VPMD_DESCS_PER_LOOP;
1340 unsigned int mb_len = rte_pktmbuf_data_room_size(mp);
1342 tmpl = rte_calloc_socket("RXQ", 1,
1344 desc_n * sizeof(struct rte_mbuf *),
1348 if (priv->dev->data->dev_conf.intr_conf.rxq)
1350 /* Enable scattered packets support for this queue if necessary. */
1351 assert(mb_len >= RTE_PKTMBUF_HEADROOM);
1352 if (dev->data->dev_conf.rxmode.max_rx_pkt_len <=
1353 (mb_len - RTE_PKTMBUF_HEADROOM)) {
1354 tmpl->rxq.sges_n = 0;
1355 } else if (dev->data->dev_conf.rxmode.enable_scatter) {
1357 RTE_PKTMBUF_HEADROOM +
1358 dev->data->dev_conf.rxmode.max_rx_pkt_len;
1359 unsigned int sges_n;
1362 * Determine the number of SGEs needed for a full packet
1363 * and round it to the next power of two.
1365 sges_n = log2above((size / mb_len) + !!(size % mb_len));
1366 tmpl->rxq.sges_n = sges_n;
1367 /* Make sure rxq.sges_n did not overflow. */
1368 size = mb_len * (1 << tmpl->rxq.sges_n);
1369 size -= RTE_PKTMBUF_HEADROOM;
1370 if (size < dev->data->dev_conf.rxmode.max_rx_pkt_len) {
1371 ERROR("%p: too many SGEs (%u) needed to handle"
1372 " requested maximum packet size %u",
1375 dev->data->dev_conf.rxmode.max_rx_pkt_len);
1379 WARN("%p: the requested maximum Rx packet size (%u) is"
1380 " larger than a single mbuf (%u) and scattered"
1381 " mode has not been requested",
1383 dev->data->dev_conf.rxmode.max_rx_pkt_len,
1384 mb_len - RTE_PKTMBUF_HEADROOM);
1386 DEBUG("%p: maximum number of segments per packet: %u",
1387 (void *)dev, 1 << tmpl->rxq.sges_n);
1388 if (desc % (1 << tmpl->rxq.sges_n)) {
1389 ERROR("%p: number of RX queue descriptors (%u) is not a"
1390 " multiple of SGEs per packet (%u)",
1393 1 << tmpl->rxq.sges_n);
1396 /* Toggle RX checksum offload if hardware supports it. */
1398 tmpl->rxq.csum = !!dev->data->dev_conf.rxmode.hw_ip_checksum;
1399 if (priv->hw_csum_l2tun)
1400 tmpl->rxq.csum_l2tun =
1401 !!dev->data->dev_conf.rxmode.hw_ip_checksum;
1402 /* Configure VLAN stripping. */
1403 tmpl->rxq.vlan_strip = (priv->hw_vlan_strip &&
1404 !!dev->data->dev_conf.rxmode.hw_vlan_strip);
1405 /* By default, FCS (CRC) is stripped by hardware. */
1406 if (dev->data->dev_conf.rxmode.hw_strip_crc) {
1407 tmpl->rxq.crc_present = 0;
1408 } else if (priv->hw_fcs_strip) {
1409 tmpl->rxq.crc_present = 1;
1411 WARN("%p: CRC stripping has been disabled but will still"
1412 " be performed by hardware, make sure MLNX_OFED and"
1413 " firmware are up to date",
1415 tmpl->rxq.crc_present = 0;
1417 DEBUG("%p: CRC stripping is %s, %u bytes will be subtracted from"
1418 " incoming frames to hide it",
1420 tmpl->rxq.crc_present ? "disabled" : "enabled",
1421 tmpl->rxq.crc_present << 2);
1423 tmpl->rxq.rss_hash = priv->rxqs_n > 1;
1424 tmpl->rxq.port_id = dev->data->port_id;
1427 tmpl->rxq.stats.idx = idx;
1428 tmpl->rxq.elts_n = log2above(desc);
1430 (struct rte_mbuf *(*)[1 << tmpl->rxq.elts_n])(tmpl + 1);
1431 rte_atomic32_inc(&tmpl->refcnt);
1432 DEBUG("%p: Rx queue %p: refcnt %d", (void *)priv,
1433 (void *)tmpl, rte_atomic32_read(&tmpl->refcnt));
1434 LIST_INSERT_HEAD(&priv->rxqsctrl, tmpl, next);
1445 * Pointer to private structure.
1450 * A pointer to the queue if it exists.
1452 struct mlx5_rxq_ctrl*
1453 mlx5_priv_rxq_get(struct priv *priv, uint16_t idx)
1455 struct mlx5_rxq_ctrl *rxq_ctrl = NULL;
1457 if ((*priv->rxqs)[idx]) {
1458 rxq_ctrl = container_of((*priv->rxqs)[idx],
1459 struct mlx5_rxq_ctrl,
1462 mlx5_priv_rxq_ibv_get(priv, idx);
1463 rte_atomic32_inc(&rxq_ctrl->refcnt);
1464 DEBUG("%p: Rx queue %p: refcnt %d", (void *)priv,
1465 (void *)rxq_ctrl, rte_atomic32_read(&rxq_ctrl->refcnt));
1471 * Release a Rx queue.
1474 * Pointer to private structure.
1479 * 0 on success, errno value on failure.
1482 mlx5_priv_rxq_release(struct priv *priv, uint16_t idx)
1484 struct mlx5_rxq_ctrl *rxq_ctrl;
1486 if (!(*priv->rxqs)[idx])
1488 rxq_ctrl = container_of((*priv->rxqs)[idx], struct mlx5_rxq_ctrl, rxq);
1489 assert(rxq_ctrl->priv);
1490 if (rxq_ctrl->ibv) {
1493 ret = mlx5_priv_rxq_ibv_release(rxq_ctrl->priv, rxq_ctrl->ibv);
1495 rxq_ctrl->ibv = NULL;
1497 DEBUG("%p: Rx queue %p: refcnt %d", (void *)priv,
1498 (void *)rxq_ctrl, rte_atomic32_read(&rxq_ctrl->refcnt));
1499 if (rte_atomic32_dec_and_test(&rxq_ctrl->refcnt)) {
1500 LIST_REMOVE(rxq_ctrl, next);
1502 (*priv->rxqs)[idx] = NULL;
1509 * Verify if the queue can be released.
1512 * Pointer to private structure.
1517 * 1 if the queue can be released.
1520 mlx5_priv_rxq_releasable(struct priv *priv, uint16_t idx)
1522 struct mlx5_rxq_ctrl *rxq_ctrl;
1524 if (!(*priv->rxqs)[idx])
1526 rxq_ctrl = container_of((*priv->rxqs)[idx], struct mlx5_rxq_ctrl, rxq);
1527 return (rte_atomic32_read(&rxq_ctrl->refcnt) == 1);
1531 * Verify the Rx Queue list is empty
1534 * Pointer to private structure.
1536 * @return the number of object not released.
1539 mlx5_priv_rxq_verify(struct priv *priv)
1541 struct mlx5_rxq_ctrl *rxq_ctrl;
1544 LIST_FOREACH(rxq_ctrl, &priv->rxqsctrl, next) {
1545 DEBUG("%p: Rx Queue %p still referenced", (void *)priv,
1553 * Create an indirection table.
1556 * Pointer to private structure.
1558 * Queues entering in the indirection table.
1560 * Number of queues in the array.
1563 * A new indirection table.
1565 struct mlx5_ind_table_ibv*
1566 mlx5_priv_ind_table_ibv_new(struct priv *priv, uint16_t queues[],
1569 struct mlx5_ind_table_ibv *ind_tbl;
1570 const unsigned int wq_n = rte_is_power_of_2(queues_n) ?
1571 log2above(queues_n) :
1572 priv->ind_table_max_size;
1573 struct ibv_wq *wq[1 << wq_n];
1577 ind_tbl = rte_calloc(__func__, 1, sizeof(*ind_tbl) +
1578 queues_n * sizeof(uint16_t), 0);
1581 for (i = 0; i != queues_n; ++i) {
1582 struct mlx5_rxq_ctrl *rxq =
1583 mlx5_priv_rxq_get(priv, queues[i]);
1587 wq[i] = rxq->ibv->wq;
1588 ind_tbl->queues[i] = queues[i];
1590 ind_tbl->queues_n = queues_n;
1591 /* Finalise indirection table. */
1592 for (j = 0; i != (unsigned int)(1 << wq_n); ++i, ++j)
1594 ind_tbl->ind_table = ibv_create_rwq_ind_table(
1596 &(struct ibv_rwq_ind_table_init_attr){
1597 .log_ind_tbl_size = wq_n,
1601 if (!ind_tbl->ind_table)
1603 rte_atomic32_inc(&ind_tbl->refcnt);
1604 LIST_INSERT_HEAD(&priv->ind_tbls, ind_tbl, next);
1605 DEBUG("%p: Indirection table %p: refcnt %d", (void *)priv,
1606 (void *)ind_tbl, rte_atomic32_read(&ind_tbl->refcnt));
1610 DEBUG("%p cannot create indirection table", (void *)priv);
1615 * Get an indirection table.
1618 * Pointer to private structure.
1620 * Queues entering in the indirection table.
1622 * Number of queues in the array.
1625 * An indirection table if found.
1627 struct mlx5_ind_table_ibv*
1628 mlx5_priv_ind_table_ibv_get(struct priv *priv, uint16_t queues[],
1631 struct mlx5_ind_table_ibv *ind_tbl;
1633 LIST_FOREACH(ind_tbl, &priv->ind_tbls, next) {
1634 if ((ind_tbl->queues_n == queues_n) &&
1635 (memcmp(ind_tbl->queues, queues,
1636 ind_tbl->queues_n * sizeof(ind_tbl->queues[0]))
1643 rte_atomic32_inc(&ind_tbl->refcnt);
1644 DEBUG("%p: Indirection table %p: refcnt %d", (void *)priv,
1645 (void *)ind_tbl, rte_atomic32_read(&ind_tbl->refcnt));
1646 for (i = 0; i != ind_tbl->queues_n; ++i)
1647 mlx5_priv_rxq_get(priv, ind_tbl->queues[i]);
1653 * Release an indirection table.
1656 * Pointer to private structure.
1658 * Indirection table to release.
1661 * 0 on success, errno value on failure.
1664 mlx5_priv_ind_table_ibv_release(struct priv *priv,
1665 struct mlx5_ind_table_ibv *ind_tbl)
1669 DEBUG("%p: Indirection table %p: refcnt %d", (void *)priv,
1670 (void *)ind_tbl, rte_atomic32_read(&ind_tbl->refcnt));
1671 if (rte_atomic32_dec_and_test(&ind_tbl->refcnt))
1672 claim_zero(ibv_destroy_rwq_ind_table(ind_tbl->ind_table));
1673 for (i = 0; i != ind_tbl->queues_n; ++i)
1674 claim_nonzero(mlx5_priv_rxq_release(priv, ind_tbl->queues[i]));
1675 if (!rte_atomic32_read(&ind_tbl->refcnt)) {
1676 LIST_REMOVE(ind_tbl, next);
1684 * Verify the Rx Queue list is empty
1687 * Pointer to private structure.
1689 * @return the number of object not released.
1692 mlx5_priv_ind_table_ibv_verify(struct priv *priv)
1694 struct mlx5_ind_table_ibv *ind_tbl;
1697 LIST_FOREACH(ind_tbl, &priv->ind_tbls, next) {
1698 DEBUG("%p: Verbs indirection table %p still referenced",
1699 (void *)priv, (void *)ind_tbl);
1706 * Create an Rx Hash queue.
1709 * Pointer to private structure.
1711 * RSS key for the Rx hash queue.
1712 * @param rss_key_len
1714 * @param hash_fields
1715 * Verbs protocol hash field to make the RSS on.
1717 * Queues entering in hash queue.
1722 * An hash Rx queue on success.
1725 mlx5_priv_hrxq_new(struct priv *priv, uint8_t *rss_key, uint8_t rss_key_len,
1726 uint64_t hash_fields, uint16_t queues[], uint16_t queues_n)
1728 struct mlx5_hrxq *hrxq;
1729 struct mlx5_ind_table_ibv *ind_tbl;
1732 ind_tbl = mlx5_priv_ind_table_ibv_get(priv, queues, queues_n);
1734 ind_tbl = mlx5_priv_ind_table_ibv_new(priv, queues, queues_n);
1737 qp = ibv_create_qp_ex(
1739 &(struct ibv_qp_init_attr_ex){
1740 .qp_type = IBV_QPT_RAW_PACKET,
1742 IBV_QP_INIT_ATTR_PD |
1743 IBV_QP_INIT_ATTR_IND_TABLE |
1744 IBV_QP_INIT_ATTR_RX_HASH,
1745 .rx_hash_conf = (struct ibv_rx_hash_conf){
1746 .rx_hash_function = IBV_RX_HASH_FUNC_TOEPLITZ,
1747 .rx_hash_key_len = rss_key_len,
1748 .rx_hash_key = rss_key,
1749 .rx_hash_fields_mask = hash_fields,
1751 .rwq_ind_tbl = ind_tbl->ind_table,
1756 hrxq = rte_calloc(__func__, 1, sizeof(*hrxq) + rss_key_len, 0);
1759 hrxq->ind_table = ind_tbl;
1761 hrxq->rss_key_len = rss_key_len;
1762 hrxq->hash_fields = hash_fields;
1763 memcpy(hrxq->rss_key, rss_key, rss_key_len);
1764 rte_atomic32_inc(&hrxq->refcnt);
1765 LIST_INSERT_HEAD(&priv->hrxqs, hrxq, next);
1766 DEBUG("%p: Hash Rx queue %p: refcnt %d", (void *)priv,
1767 (void *)hrxq, rte_atomic32_read(&hrxq->refcnt));
1770 mlx5_priv_ind_table_ibv_release(priv, ind_tbl);
1772 claim_zero(ibv_destroy_qp(qp));
1777 * Get an Rx Hash queue.
1780 * Pointer to private structure.
1782 * RSS configuration for the Rx hash queue.
1784 * Queues entering in hash queue.
1789 * An hash Rx queue on success.
1792 mlx5_priv_hrxq_get(struct priv *priv, uint8_t *rss_key, uint8_t rss_key_len,
1793 uint64_t hash_fields, uint16_t queues[], uint16_t queues_n)
1795 struct mlx5_hrxq *hrxq;
1797 LIST_FOREACH(hrxq, &priv->hrxqs, next) {
1798 struct mlx5_ind_table_ibv *ind_tbl;
1800 if (hrxq->rss_key_len != rss_key_len)
1802 if (memcmp(hrxq->rss_key, rss_key, rss_key_len))
1804 if (hrxq->hash_fields != hash_fields)
1806 ind_tbl = mlx5_priv_ind_table_ibv_get(priv, queues, queues_n);
1809 if (ind_tbl != hrxq->ind_table) {
1810 mlx5_priv_ind_table_ibv_release(priv, ind_tbl);
1813 rte_atomic32_inc(&hrxq->refcnt);
1814 DEBUG("%p: Hash Rx queue %p: refcnt %d", (void *)priv,
1815 (void *)hrxq, rte_atomic32_read(&hrxq->refcnt));
1822 * Release the hash Rx queue.
1825 * Pointer to private structure.
1827 * Pointer to Hash Rx queue to release.
1830 * 0 on success, errno value on failure.
1833 mlx5_priv_hrxq_release(struct priv *priv, struct mlx5_hrxq *hrxq)
1835 DEBUG("%p: Hash Rx queue %p: refcnt %d", (void *)priv,
1836 (void *)hrxq, rte_atomic32_read(&hrxq->refcnt));
1837 if (rte_atomic32_dec_and_test(&hrxq->refcnt)) {
1838 claim_zero(ibv_destroy_qp(hrxq->qp));
1839 mlx5_priv_ind_table_ibv_release(priv, hrxq->ind_table);
1840 LIST_REMOVE(hrxq, next);
1844 claim_nonzero(mlx5_priv_ind_table_ibv_release(priv, hrxq->ind_table));
1849 * Verify the Rx Queue list is empty
1852 * Pointer to private structure.
1854 * @return the number of object not released.
1857 mlx5_priv_hrxq_ibv_verify(struct priv *priv)
1859 struct mlx5_hrxq *hrxq;
1862 LIST_FOREACH(hrxq, &priv->hrxqs, next) {
1863 DEBUG("%p: Verbs Hash Rx queue %p still referenced",
1864 (void *)priv, (void *)hrxq);