4 * Copyright 2015 6WIND S.A.
5 * Copyright 2015 Mellanox.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of 6WIND S.A. nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40 #include <sys/queue.h>
43 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
45 #pragma GCC diagnostic ignored "-Wpedantic"
47 #include <infiniband/verbs.h>
48 #include <infiniband/mlx5dv.h>
50 #pragma GCC diagnostic error "-Wpedantic"
54 #include <rte_malloc.h>
55 #include <rte_ethdev.h>
56 #include <rte_common.h>
57 #include <rte_interrupts.h>
58 #include <rte_debug.h>
62 #include "mlx5_rxtx.h"
63 #include "mlx5_utils.h"
64 #include "mlx5_autoconf.h"
65 #include "mlx5_defs.h"
67 /* Initialization data for hash RX queues. */
68 const struct hash_rxq_init hash_rxq_init[] = {
70 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
71 IBV_RX_HASH_DST_IPV4 |
72 IBV_RX_HASH_SRC_PORT_TCP |
73 IBV_RX_HASH_DST_PORT_TCP),
74 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,
76 .flow_spec.tcp_udp = {
77 .type = IBV_FLOW_SPEC_TCP,
78 .size = sizeof(hash_rxq_init[0].flow_spec.tcp_udp),
80 .underlayer = &hash_rxq_init[HASH_RXQ_IPV4],
83 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
84 IBV_RX_HASH_DST_IPV4 |
85 IBV_RX_HASH_SRC_PORT_UDP |
86 IBV_RX_HASH_DST_PORT_UDP),
87 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP,
89 .flow_spec.tcp_udp = {
90 .type = IBV_FLOW_SPEC_UDP,
91 .size = sizeof(hash_rxq_init[0].flow_spec.tcp_udp),
93 .underlayer = &hash_rxq_init[HASH_RXQ_IPV4],
96 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
97 IBV_RX_HASH_DST_IPV4),
98 .dpdk_rss_hf = (ETH_RSS_IPV4 |
102 .type = IBV_FLOW_SPEC_IPV4,
103 .size = sizeof(hash_rxq_init[0].flow_spec.ipv4),
105 .underlayer = &hash_rxq_init[HASH_RXQ_ETH],
108 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
109 IBV_RX_HASH_DST_IPV6 |
110 IBV_RX_HASH_SRC_PORT_TCP |
111 IBV_RX_HASH_DST_PORT_TCP),
112 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP,
114 .flow_spec.tcp_udp = {
115 .type = IBV_FLOW_SPEC_TCP,
116 .size = sizeof(hash_rxq_init[0].flow_spec.tcp_udp),
118 .underlayer = &hash_rxq_init[HASH_RXQ_IPV6],
121 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
122 IBV_RX_HASH_DST_IPV6 |
123 IBV_RX_HASH_SRC_PORT_UDP |
124 IBV_RX_HASH_DST_PORT_UDP),
125 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP,
127 .flow_spec.tcp_udp = {
128 .type = IBV_FLOW_SPEC_UDP,
129 .size = sizeof(hash_rxq_init[0].flow_spec.tcp_udp),
131 .underlayer = &hash_rxq_init[HASH_RXQ_IPV6],
134 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
135 IBV_RX_HASH_DST_IPV6),
136 .dpdk_rss_hf = (ETH_RSS_IPV6 |
140 .type = IBV_FLOW_SPEC_IPV6,
141 .size = sizeof(hash_rxq_init[0].flow_spec.ipv6),
143 .underlayer = &hash_rxq_init[HASH_RXQ_ETH],
150 .type = IBV_FLOW_SPEC_ETH,
151 .size = sizeof(hash_rxq_init[0].flow_spec.eth),
157 /* Number of entries in hash_rxq_init[]. */
158 const unsigned int hash_rxq_init_n = RTE_DIM(hash_rxq_init);
160 /* Initialization data for hash RX queue indirection tables. */
161 static const struct ind_table_init ind_table_init[] = {
163 .max_size = -1u, /* Superseded by HW limitations. */
165 1 << HASH_RXQ_TCPV4 |
166 1 << HASH_RXQ_UDPV4 |
168 1 << HASH_RXQ_TCPV6 |
169 1 << HASH_RXQ_UDPV6 |
176 .hash_types = 1 << HASH_RXQ_ETH,
181 #define IND_TABLE_INIT_N RTE_DIM(ind_table_init)
183 /* Default RSS hash key also used for ConnectX-3. */
184 uint8_t rss_hash_default_key[] = {
185 0x2c, 0xc6, 0x81, 0xd1,
186 0x5b, 0xdb, 0xf4, 0xf7,
187 0xfc, 0xa2, 0x83, 0x19,
188 0xdb, 0x1a, 0x3e, 0x94,
189 0x6b, 0x9e, 0x38, 0xd9,
190 0x2c, 0x9c, 0x03, 0xd1,
191 0xad, 0x99, 0x44, 0xa7,
192 0xd9, 0x56, 0x3d, 0x59,
193 0x06, 0x3c, 0x25, 0xf3,
194 0xfc, 0x1f, 0xdc, 0x2a,
197 /* Length of the default RSS hash key. */
198 const size_t rss_hash_default_key_len = sizeof(rss_hash_default_key);
201 * Populate flow steering rule for a given hash RX queue type using
202 * information from hash_rxq_init[]. Nothing is written to flow_attr when
203 * flow_attr_size is not large enough, but the required size is still returned.
206 * Pointer to private structure.
207 * @param[out] flow_attr
208 * Pointer to flow attribute structure to fill. Note that the allocated
209 * area must be larger and large enough to hold all flow specifications.
210 * @param flow_attr_size
211 * Entire size of flow_attr and trailing room for flow specifications.
213 * Hash RX queue type to use for flow steering rule.
216 * Total size of the flow attribute buffer. No errors are defined.
219 priv_flow_attr(struct priv *priv, struct ibv_flow_attr *flow_attr,
220 size_t flow_attr_size, enum hash_rxq_type type)
222 size_t offset = sizeof(*flow_attr);
223 const struct hash_rxq_init *init = &hash_rxq_init[type];
225 assert(priv != NULL);
226 assert((size_t)type < RTE_DIM(hash_rxq_init));
228 offset += init->flow_spec.hdr.size;
229 init = init->underlayer;
230 } while (init != NULL);
231 if (offset > flow_attr_size)
233 flow_attr_size = offset;
234 init = &hash_rxq_init[type];
235 *flow_attr = (struct ibv_flow_attr){
236 .type = IBV_FLOW_ATTR_NORMAL,
237 /* Priorities < 3 are reserved for flow director. */
238 .priority = init->flow_priority + 3,
244 offset -= init->flow_spec.hdr.size;
245 memcpy((void *)((uintptr_t)flow_attr + offset),
247 init->flow_spec.hdr.size);
248 ++flow_attr->num_of_specs;
249 init = init->underlayer;
250 } while (init != NULL);
251 return flow_attr_size;
255 * Convert hash type position in indirection table initializer to
256 * hash RX queue type.
259 * Indirection table initializer.
261 * Hash type position.
264 * Hash RX queue type.
266 static enum hash_rxq_type
267 hash_rxq_type_from_pos(const struct ind_table_init *table, unsigned int pos)
269 enum hash_rxq_type type = HASH_RXQ_TCPV4;
271 assert(pos < table->hash_types_n);
273 if ((table->hash_types & (1 << type)) && (pos-- == 0))
281 * Filter out disabled hash RX queue types from ind_table_init[].
284 * Pointer to private structure.
289 * Number of table entries.
292 priv_make_ind_table_init(struct priv *priv,
293 struct ind_table_init (*table)[IND_TABLE_INIT_N])
298 unsigned int table_n = 0;
299 /* Mandatory to receive frames not handled by normal hash RX queues. */
300 unsigned int hash_types_sup = 1 << HASH_RXQ_ETH;
302 rss_hf = priv->rss_conf.rss_hf;
303 /* Process other protocols only if more than one queue. */
304 if (priv->rxqs_n > 1)
305 for (i = 0; (i != hash_rxq_init_n); ++i)
306 if (rss_hf & hash_rxq_init[i].dpdk_rss_hf)
307 hash_types_sup |= (1 << i);
309 /* Filter out entries whose protocols are not in the set. */
310 for (i = 0, j = 0; (i != IND_TABLE_INIT_N); ++i) {
314 /* j is increased only if the table has valid protocols. */
316 (*table)[j] = ind_table_init[i];
317 (*table)[j].hash_types &= hash_types_sup;
318 for (h = 0, nb = 0; (h != hash_rxq_init_n); ++h)
319 if (((*table)[j].hash_types >> h) & 0x1)
321 (*table)[i].hash_types_n = nb;
331 * Initialize hash RX queues and indirection table.
334 * Pointer to private structure.
337 * 0 on success, errno value on failure.
340 priv_create_hash_rxqs(struct priv *priv)
342 struct ibv_wq *wqs[priv->reta_idx_n];
343 struct ind_table_init ind_table_init[IND_TABLE_INIT_N];
344 unsigned int ind_tables_n =
345 priv_make_ind_table_init(priv, &ind_table_init);
346 unsigned int hash_rxqs_n = 0;
347 struct hash_rxq (*hash_rxqs)[] = NULL;
348 struct ibv_rwq_ind_table *(*ind_tables)[] = NULL;
354 assert(priv->ind_tables == NULL);
355 assert(priv->ind_tables_n == 0);
356 assert(priv->hash_rxqs == NULL);
357 assert(priv->hash_rxqs_n == 0);
358 assert(priv->pd != NULL);
359 assert(priv->ctx != NULL);
362 if (priv->rxqs_n == 0)
364 assert(priv->rxqs != NULL);
365 if (ind_tables_n == 0) {
366 ERROR("all hash RX queue types have been filtered out,"
367 " indirection table cannot be created");
370 if (priv->rxqs_n & (priv->rxqs_n - 1)) {
371 INFO("%u RX queues are configured, consider rounding this"
372 " number to the next power of two for better balancing",
374 DEBUG("indirection table extended to assume %u WQs",
377 for (i = 0; (i != priv->reta_idx_n); ++i) {
378 struct mlx5_rxq_ctrl *rxq_ctrl;
380 rxq_ctrl = container_of((*priv->rxqs)[(*priv->reta_idx)[i]],
381 struct mlx5_rxq_ctrl, rxq);
382 wqs[i] = rxq_ctrl->ibv->wq;
384 /* Get number of hash RX queues to configure. */
385 for (i = 0, hash_rxqs_n = 0; (i != ind_tables_n); ++i)
386 hash_rxqs_n += ind_table_init[i].hash_types_n;
387 DEBUG("allocating %u hash RX queues for %u WQs, %u indirection tables",
388 hash_rxqs_n, priv->rxqs_n, ind_tables_n);
389 /* Create indirection tables. */
390 ind_tables = rte_calloc(__func__, ind_tables_n,
391 sizeof((*ind_tables)[0]), 0);
392 if (ind_tables == NULL) {
394 ERROR("cannot allocate indirection tables container: %s",
398 for (i = 0; (i != ind_tables_n); ++i) {
399 struct ibv_rwq_ind_table_init_attr ind_init_attr = {
400 .log_ind_tbl_size = 0, /* Set below. */
404 unsigned int ind_tbl_size = ind_table_init[i].max_size;
405 struct ibv_rwq_ind_table *ind_table;
407 if (priv->reta_idx_n < ind_tbl_size)
408 ind_tbl_size = priv->reta_idx_n;
409 ind_init_attr.log_ind_tbl_size = log2above(ind_tbl_size);
411 ind_table = ibv_create_rwq_ind_table(priv->ctx,
413 if (ind_table != NULL) {
414 (*ind_tables)[i] = ind_table;
417 /* Not clear whether errno is set. */
418 err = (errno ? errno : EINVAL);
419 ERROR("RX indirection table creation failed with error %d: %s",
423 /* Allocate array that holds hash RX queues and related data. */
424 hash_rxqs = rte_calloc(__func__, hash_rxqs_n,
425 sizeof((*hash_rxqs)[0]), 0);
426 if (hash_rxqs == NULL) {
428 ERROR("cannot allocate hash RX queues container: %s",
432 for (i = 0, j = 0, k = 0;
433 ((i != hash_rxqs_n) && (j != ind_tables_n));
435 struct hash_rxq *hash_rxq = &(*hash_rxqs)[i];
436 enum hash_rxq_type type =
437 hash_rxq_type_from_pos(&ind_table_init[j], k);
438 struct rte_eth_rss_conf *priv_rss_conf = &priv->rss_conf;
439 struct ibv_rx_hash_conf hash_conf = {
440 .rx_hash_function = IBV_RX_HASH_FUNC_TOEPLITZ,
441 .rx_hash_key_len = (priv_rss_conf ?
442 priv_rss_conf->rss_key_len :
443 rss_hash_default_key_len),
444 .rx_hash_key = (priv_rss_conf ?
445 priv_rss_conf->rss_key :
446 rss_hash_default_key),
447 .rx_hash_fields_mask = hash_rxq_init[type].hash_fields,
449 struct ibv_qp_init_attr_ex qp_init_attr = {
450 .qp_type = IBV_QPT_RAW_PACKET,
451 .comp_mask = (IBV_QP_INIT_ATTR_PD |
452 IBV_QP_INIT_ATTR_IND_TABLE |
453 IBV_QP_INIT_ATTR_RX_HASH),
454 .rx_hash_conf = hash_conf,
455 .rwq_ind_tbl = (*ind_tables)[j],
459 DEBUG("using indirection table %u for hash RX queue %u type %d",
461 *hash_rxq = (struct hash_rxq){
463 .qp = ibv_create_qp_ex(priv->ctx, &qp_init_attr),
466 if (hash_rxq->qp == NULL) {
467 err = (errno ? errno : EINVAL);
468 ERROR("Hash RX QP creation failure: %s",
472 if (++k < ind_table_init[j].hash_types_n)
474 /* Switch to the next indirection table and reset hash RX
475 * queue type array index. */
479 priv->ind_tables = ind_tables;
480 priv->ind_tables_n = ind_tables_n;
481 priv->hash_rxqs = hash_rxqs;
482 priv->hash_rxqs_n = hash_rxqs_n;
486 if (hash_rxqs != NULL) {
487 for (i = 0; (i != hash_rxqs_n); ++i) {
488 struct ibv_qp *qp = (*hash_rxqs)[i].qp;
492 claim_zero(ibv_destroy_qp(qp));
496 if (ind_tables != NULL) {
497 for (j = 0; (j != ind_tables_n); ++j) {
498 struct ibv_rwq_ind_table *ind_table =
501 if (ind_table == NULL)
503 claim_zero(ibv_destroy_rwq_ind_table(ind_table));
505 rte_free(ind_tables);
511 * Clean up hash RX queues and indirection table.
514 * Pointer to private structure.
517 priv_destroy_hash_rxqs(struct priv *priv)
521 DEBUG("destroying %u hash RX queues", priv->hash_rxqs_n);
522 if (priv->hash_rxqs_n == 0) {
523 assert(priv->hash_rxqs == NULL);
524 assert(priv->ind_tables == NULL);
527 for (i = 0; (i != priv->hash_rxqs_n); ++i) {
528 struct hash_rxq *hash_rxq = &(*priv->hash_rxqs)[i];
531 assert(hash_rxq->priv == priv);
532 assert(hash_rxq->qp != NULL);
533 for (j = 0; (j != RTE_DIM(hash_rxq->mac_flow)); ++j)
534 for (k = 0; (k != RTE_DIM(hash_rxq->mac_flow[j])); ++k)
535 assert(hash_rxq->mac_flow[j][k] == NULL);
536 claim_zero(ibv_destroy_qp(hash_rxq->qp));
538 priv->hash_rxqs_n = 0;
539 rte_free(priv->hash_rxqs);
540 priv->hash_rxqs = NULL;
541 for (i = 0; (i != priv->ind_tables_n); ++i) {
542 struct ibv_rwq_ind_table *ind_table =
543 (*priv->ind_tables)[i];
545 assert(ind_table != NULL);
546 claim_zero(ibv_destroy_rwq_ind_table(ind_table));
548 priv->ind_tables_n = 0;
549 rte_free(priv->ind_tables);
550 priv->ind_tables = NULL;
554 * Allocate RX queue elements.
557 * Pointer to RX queue structure.
560 * 0 on success, errno value on failure.
563 rxq_alloc_elts(struct mlx5_rxq_ctrl *rxq_ctrl)
565 const unsigned int sges_n = 1 << rxq_ctrl->rxq.sges_n;
566 unsigned int elts_n = 1 << rxq_ctrl->rxq.elts_n;
570 /* Iterate on segments. */
571 for (i = 0; (i != elts_n); ++i) {
572 struct rte_mbuf *buf;
574 buf = rte_pktmbuf_alloc(rxq_ctrl->rxq.mp);
576 ERROR("%p: empty mbuf pool", (void *)rxq_ctrl);
580 /* Headroom is reserved by rte_pktmbuf_alloc(). */
581 assert(DATA_OFF(buf) == RTE_PKTMBUF_HEADROOM);
582 /* Buffer is supposed to be empty. */
583 assert(rte_pktmbuf_data_len(buf) == 0);
584 assert(rte_pktmbuf_pkt_len(buf) == 0);
586 /* Only the first segment keeps headroom. */
588 SET_DATA_OFF(buf, 0);
589 PORT(buf) = rxq_ctrl->rxq.port_id;
590 DATA_LEN(buf) = rte_pktmbuf_tailroom(buf);
591 PKT_LEN(buf) = DATA_LEN(buf);
593 (*rxq_ctrl->rxq.elts)[i] = buf;
595 /* If Rx vector is activated. */
596 if (rxq_check_vec_support(&rxq_ctrl->rxq) > 0) {
597 struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq;
598 struct rte_mbuf *mbuf_init = &rxq->fake_mbuf;
601 /* Initialize default rearm_data for vPMD. */
602 mbuf_init->data_off = RTE_PKTMBUF_HEADROOM;
603 rte_mbuf_refcnt_set(mbuf_init, 1);
604 mbuf_init->nb_segs = 1;
605 mbuf_init->port = rxq->port_id;
607 * prevent compiler reordering:
608 * rearm_data covers previous fields.
610 rte_compiler_barrier();
611 rxq->mbuf_initializer =
612 *(uint64_t *)&mbuf_init->rearm_data;
613 /* Padding with a fake mbuf for vectorized Rx. */
614 for (j = 0; j < MLX5_VPMD_DESCS_PER_LOOP; ++j)
615 (*rxq->elts)[elts_n + j] = &rxq->fake_mbuf;
617 DEBUG("%p: allocated and configured %u segments (max %u packets)",
618 (void *)rxq_ctrl, elts_n, elts_n / (1 << rxq_ctrl->rxq.sges_n));
623 for (i = 0; (i != elts_n); ++i) {
624 if ((*rxq_ctrl->rxq.elts)[i] != NULL)
625 rte_pktmbuf_free_seg((*rxq_ctrl->rxq.elts)[i]);
626 (*rxq_ctrl->rxq.elts)[i] = NULL;
628 DEBUG("%p: failed, freed everything", (void *)rxq_ctrl);
634 * Free RX queue elements.
637 * Pointer to RX queue structure.
640 rxq_free_elts(struct mlx5_rxq_ctrl *rxq_ctrl)
642 struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq;
643 const uint16_t q_n = (1 << rxq->elts_n);
644 const uint16_t q_mask = q_n - 1;
645 uint16_t used = q_n - (rxq->rq_ci - rxq->rq_pi);
648 DEBUG("%p: freeing WRs", (void *)rxq_ctrl);
649 if (rxq->elts == NULL)
652 * Some mbuf in the Ring belongs to the application. They cannot be
655 if (rxq_check_vec_support(rxq) > 0) {
656 for (i = 0; i < used; ++i)
657 (*rxq->elts)[(rxq->rq_ci + i) & q_mask] = NULL;
658 rxq->rq_pi = rxq->rq_ci;
660 for (i = 0; (i != (1u << rxq->elts_n)); ++i) {
661 if ((*rxq->elts)[i] != NULL)
662 rte_pktmbuf_free_seg((*rxq->elts)[i]);
663 (*rxq->elts)[i] = NULL;
668 * Clean up a RX queue.
670 * Destroy objects, free allocated memory and reset the structure for reuse.
673 * Pointer to RX queue structure.
676 mlx5_rxq_cleanup(struct mlx5_rxq_ctrl *rxq_ctrl)
678 DEBUG("cleaning up %p", (void *)rxq_ctrl);
680 mlx5_priv_rxq_ibv_release(rxq_ctrl->priv, rxq_ctrl->ibv);
681 memset(rxq_ctrl, 0, sizeof(*rxq_ctrl));
687 * Pointer to Ethernet device structure.
691 * Number of descriptors to configure in queue.
693 * NUMA socket on which memory must be allocated.
695 * Thresholds parameters.
697 * Memory pool for buffer allocations.
700 * 0 on success, negative errno value on failure.
703 mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
704 unsigned int socket, const struct rte_eth_rxconf *conf,
705 struct rte_mempool *mp)
707 struct priv *priv = dev->data->dev_private;
708 struct mlx5_rxq_data *rxq = (*priv->rxqs)[idx];
709 struct mlx5_rxq_ctrl *rxq_ctrl =
710 container_of(rxq, struct mlx5_rxq_ctrl, rxq);
714 if (mlx5_is_secondary())
715 return -E_RTE_SECONDARY;
717 if (!rte_is_power_of_2(desc)) {
718 desc = 1 << log2above(desc);
719 WARN("%p: increased number of descriptors in RX queue %u"
720 " to the next power of two (%d)",
721 (void *)dev, idx, desc);
723 DEBUG("%p: configuring queue %u for %u descriptors",
724 (void *)dev, idx, desc);
725 if (idx >= priv->rxqs_n) {
726 ERROR("%p: queue index out of range (%u >= %u)",
727 (void *)dev, idx, priv->rxqs_n);
731 if (!mlx5_priv_rxq_releasable(priv, idx)) {
733 ERROR("%p: unable to release queue index %u",
737 mlx5_priv_rxq_release(priv, idx);
738 rxq_ctrl = mlx5_priv_rxq_new(priv, idx, desc, socket, mp);
740 ERROR("%p: unable to allocate queue index %u",
745 DEBUG("%p: adding RX queue %p to list",
746 (void *)dev, (void *)rxq_ctrl);
747 (*priv->rxqs)[idx] = &rxq_ctrl->rxq;
754 * DPDK callback to release a RX queue.
757 * Generic RX queue pointer.
760 mlx5_rx_queue_release(void *dpdk_rxq)
762 struct mlx5_rxq_data *rxq = (struct mlx5_rxq_data *)dpdk_rxq;
763 struct mlx5_rxq_ctrl *rxq_ctrl;
766 if (mlx5_is_secondary())
771 rxq_ctrl = container_of(rxq, struct mlx5_rxq_ctrl, rxq);
772 priv = rxq_ctrl->priv;
774 if (!mlx5_priv_rxq_releasable(priv, rxq_ctrl->rxq.stats.idx))
775 rte_panic("Rx queue %p is still used by a flow and cannot be"
776 " removed\n", (void *)rxq_ctrl);
777 mlx5_priv_rxq_release(priv, rxq_ctrl->rxq.stats.idx);
782 * Allocate queue vector and fill epoll fd list for Rx interrupts.
785 * Pointer to private structure.
788 * 0 on success, negative on failure.
791 priv_rx_intr_vec_enable(struct priv *priv)
794 unsigned int rxqs_n = priv->rxqs_n;
795 unsigned int n = RTE_MIN(rxqs_n, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID);
796 unsigned int count = 0;
797 struct rte_intr_handle *intr_handle = priv->dev->intr_handle;
799 assert(!mlx5_is_secondary());
800 if (!priv->dev->data->dev_conf.intr_conf.rxq)
802 priv_rx_intr_vec_disable(priv);
803 intr_handle->intr_vec = malloc(sizeof(intr_handle->intr_vec[rxqs_n]));
804 if (intr_handle->intr_vec == NULL) {
805 ERROR("failed to allocate memory for interrupt vector,"
806 " Rx interrupts will not be supported");
809 intr_handle->type = RTE_INTR_HANDLE_EXT;
810 for (i = 0; i != n; ++i) {
811 /* This rxq ibv must not be released in this function. */
812 struct mlx5_rxq_ibv *rxq_ibv = mlx5_priv_rxq_ibv_get(priv, i);
817 /* Skip queues that cannot request interrupts. */
818 if (!rxq_ibv || !rxq_ibv->channel) {
819 /* Use invalid intr_vec[] index to disable entry. */
820 intr_handle->intr_vec[i] =
821 RTE_INTR_VEC_RXTX_OFFSET +
822 RTE_MAX_RXTX_INTR_VEC_ID;
825 if (count >= RTE_MAX_RXTX_INTR_VEC_ID) {
826 ERROR("too many Rx queues for interrupt vector size"
827 " (%d), Rx interrupts cannot be enabled",
828 RTE_MAX_RXTX_INTR_VEC_ID);
829 priv_rx_intr_vec_disable(priv);
832 fd = rxq_ibv->channel->fd;
833 flags = fcntl(fd, F_GETFL);
834 rc = fcntl(fd, F_SETFL, flags | O_NONBLOCK);
836 ERROR("failed to make Rx interrupt file descriptor"
837 " %d non-blocking for queue index %d", fd, i);
838 priv_rx_intr_vec_disable(priv);
841 intr_handle->intr_vec[i] = RTE_INTR_VEC_RXTX_OFFSET + count;
842 intr_handle->efds[count] = fd;
846 priv_rx_intr_vec_disable(priv);
848 intr_handle->nb_efd = count;
853 * Clean up Rx interrupts handler.
856 * Pointer to private structure.
859 priv_rx_intr_vec_disable(struct priv *priv)
861 struct rte_intr_handle *intr_handle = priv->dev->intr_handle;
863 unsigned int rxqs_n = priv->rxqs_n;
864 unsigned int n = RTE_MIN(rxqs_n, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID);
866 if (!priv->dev->data->dev_conf.intr_conf.rxq)
868 for (i = 0; i != n; ++i) {
869 struct mlx5_rxq_ctrl *rxq_ctrl;
870 struct mlx5_rxq_data *rxq_data;
872 if (intr_handle->intr_vec[i] == RTE_INTR_VEC_RXTX_OFFSET +
873 RTE_MAX_RXTX_INTR_VEC_ID)
876 * Need to access directly the queue to release the reference
877 * kept in priv_rx_intr_vec_enable().
879 rxq_data = (*priv->rxqs)[i];
880 rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
881 mlx5_priv_rxq_ibv_release(priv, rxq_ctrl->ibv);
883 rte_intr_free_epoll_fd(intr_handle);
884 free(intr_handle->intr_vec);
885 intr_handle->nb_efd = 0;
886 intr_handle->intr_vec = NULL;
890 * MLX5 CQ notification .
893 * Pointer to receive queue structure.
895 * Sequence number per receive queue .
898 mlx5_arm_cq(struct mlx5_rxq_data *rxq, int sq_n_rxq)
901 uint32_t doorbell_hi;
903 void *cq_db_reg = (char *)rxq->cq_uar + MLX5_CQ_DOORBELL;
905 sq_n = sq_n_rxq & MLX5_CQ_SQN_MASK;
906 doorbell_hi = sq_n << MLX5_CQ_SQN_OFFSET | (rxq->cq_ci & MLX5_CI_MASK);
907 doorbell = (uint64_t)doorbell_hi << 32;
908 doorbell |= rxq->cqn;
909 rxq->cq_db[MLX5_CQ_ARM_DB] = rte_cpu_to_be_32(doorbell_hi);
911 rte_write64(rte_cpu_to_be_64(doorbell), cq_db_reg);
915 * DPDK callback for Rx queue interrupt enable.
918 * Pointer to Ethernet device structure.
923 * 0 on success, negative on failure.
926 mlx5_rx_intr_enable(struct rte_eth_dev *dev, uint16_t rx_queue_id)
928 struct priv *priv = mlx5_get_priv(dev);
929 struct mlx5_rxq_data *rxq_data;
930 struct mlx5_rxq_ctrl *rxq_ctrl;
934 rxq_data = (*priv->rxqs)[rx_queue_id];
939 rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
941 struct mlx5_rxq_ibv *rxq_ibv;
943 rxq_ibv = mlx5_priv_rxq_ibv_get(priv, rx_queue_id);
948 mlx5_arm_cq(rxq_data, rxq_data->cq_arm_sn);
949 mlx5_priv_rxq_ibv_release(priv, rxq_ibv);
954 WARN("unable to arm interrupt on rx queue %d", rx_queue_id);
959 * DPDK callback for Rx queue interrupt disable.
962 * Pointer to Ethernet device structure.
967 * 0 on success, negative on failure.
970 mlx5_rx_intr_disable(struct rte_eth_dev *dev, uint16_t rx_queue_id)
972 struct priv *priv = mlx5_get_priv(dev);
973 struct mlx5_rxq_data *rxq_data;
974 struct mlx5_rxq_ctrl *rxq_ctrl;
975 struct mlx5_rxq_ibv *rxq_ibv = NULL;
976 struct ibv_cq *ev_cq;
981 rxq_data = (*priv->rxqs)[rx_queue_id];
986 rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
989 rxq_ibv = mlx5_priv_rxq_ibv_get(priv, rx_queue_id);
994 ret = ibv_get_cq_event(rxq_ibv->channel, &ev_cq, &ev_ctx);
995 if (ret || ev_cq != rxq_ibv->cq) {
999 rxq_data->cq_arm_sn++;
1000 ibv_ack_cq_events(rxq_ibv->cq, 1);
1003 mlx5_priv_rxq_ibv_release(priv, rxq_ibv);
1006 WARN("unable to disable interrupt on rx queue %d",
1012 * Create the Rx queue Verbs object.
1015 * Pointer to private structure.
1017 * Queue index in DPDK Rx queue array
1020 * The Verbs object initialised if it can be created.
1022 struct mlx5_rxq_ibv*
1023 mlx5_priv_rxq_ibv_new(struct priv *priv, uint16_t idx)
1025 struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx];
1026 struct mlx5_rxq_ctrl *rxq_ctrl =
1027 container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
1028 struct ibv_wq_attr mod;
1030 struct ibv_cq_init_attr_ex cq;
1031 struct ibv_wq_init_attr wq;
1032 struct ibv_cq_ex cq_attr;
1034 unsigned int cqe_n = (1 << rxq_data->elts_n) - 1;
1035 struct mlx5_rxq_ibv *tmpl;
1036 struct mlx5dv_cq cq_info;
1037 struct mlx5dv_rwq rwq;
1040 struct mlx5dv_obj obj;
1043 assert(!rxq_ctrl->ibv);
1044 tmpl = rte_calloc_socket(__func__, 1, sizeof(*tmpl), 0,
1047 ERROR("%p: cannot allocate verbs resources",
1051 tmpl->rxq_ctrl = rxq_ctrl;
1052 /* Use the entire RX mempool as the memory region. */
1053 tmpl->mr = priv_mr_get(priv, rxq_data->mp);
1055 tmpl->mr = priv_mr_new(priv, rxq_data->mp);
1057 ERROR("%p: MR creation failure", (void *)rxq_ctrl);
1061 if (rxq_ctrl->irq) {
1062 tmpl->channel = ibv_create_comp_channel(priv->ctx);
1063 if (!tmpl->channel) {
1064 ERROR("%p: Comp Channel creation failure",
1069 attr.cq = (struct ibv_cq_init_attr_ex){
1072 if (priv->cqe_comp) {
1073 attr.cq.comp_mask |= IBV_CQ_INIT_ATTR_MASK_FLAGS;
1074 attr.cq.flags |= MLX5DV_CQ_INIT_ATTR_MASK_COMPRESSED_CQE;
1076 * For vectorized Rx, it must not be doubled in order to
1077 * make cq_ci and rq_ci aligned.
1079 if (rxq_check_vec_support(rxq_data) < 0)
1082 tmpl->cq = ibv_create_cq(priv->ctx, cqe_n, NULL, tmpl->channel, 0);
1083 if (tmpl->cq == NULL) {
1084 ERROR("%p: CQ creation failure", (void *)rxq_ctrl);
1087 DEBUG("priv->device_attr.max_qp_wr is %d",
1088 priv->device_attr.orig_attr.max_qp_wr);
1089 DEBUG("priv->device_attr.max_sge is %d",
1090 priv->device_attr.orig_attr.max_sge);
1091 attr.wq = (struct ibv_wq_init_attr){
1092 .wq_context = NULL, /* Could be useful in the future. */
1093 .wq_type = IBV_WQT_RQ,
1094 /* Max number of outstanding WRs. */
1095 .max_wr = (1 << rxq_data->elts_n) >> rxq_data->sges_n,
1096 /* Max number of scatter/gather elements in a WR. */
1097 .max_sge = 1 << rxq_data->sges_n,
1101 IBV_WQ_FLAGS_CVLAN_STRIPPING |
1103 .create_flags = (rxq_data->vlan_strip ?
1104 IBV_WQ_FLAGS_CVLAN_STRIPPING :
1107 /* By default, FCS (CRC) is stripped by hardware. */
1108 if (rxq_data->crc_present) {
1109 attr.wq.create_flags |= IBV_WQ_FLAGS_SCATTER_FCS;
1110 attr.wq.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS;
1112 #ifdef HAVE_IBV_WQ_FLAG_RX_END_PADDING
1113 if (priv->hw_padding) {
1114 attr.wq.create_flags |= IBV_WQ_FLAG_RX_END_PADDING;
1115 attr.wq.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS;
1118 tmpl->wq = ibv_create_wq(priv->ctx, &attr.wq);
1119 if (tmpl->wq == NULL) {
1120 ERROR("%p: WQ creation failure", (void *)rxq_ctrl);
1124 * Make sure number of WRs*SGEs match expectations since a queue
1125 * cannot allocate more than "desc" buffers.
1127 if (((int)attr.wq.max_wr !=
1128 ((1 << rxq_data->elts_n) >> rxq_data->sges_n)) ||
1129 ((int)attr.wq.max_sge != (1 << rxq_data->sges_n))) {
1130 ERROR("%p: requested %u*%u but got %u*%u WRs*SGEs",
1132 ((1 << rxq_data->elts_n) >> rxq_data->sges_n),
1133 (1 << rxq_data->sges_n),
1134 attr.wq.max_wr, attr.wq.max_sge);
1137 /* Change queue state to ready. */
1138 mod = (struct ibv_wq_attr){
1139 .attr_mask = IBV_WQ_ATTR_STATE,
1140 .wq_state = IBV_WQS_RDY,
1142 ret = ibv_modify_wq(tmpl->wq, &mod);
1144 ERROR("%p: WQ state to IBV_WQS_RDY failed",
1148 obj.cq.in = tmpl->cq;
1149 obj.cq.out = &cq_info;
1150 obj.rwq.in = tmpl->wq;
1152 ret = mlx5dv_init_obj(&obj, MLX5DV_OBJ_CQ | MLX5DV_OBJ_RWQ);
1155 if (cq_info.cqe_size != RTE_CACHE_LINE_SIZE) {
1156 ERROR("Wrong MLX5_CQE_SIZE environment variable value: "
1157 "it should be set to %u", RTE_CACHE_LINE_SIZE);
1160 /* Fill the rings. */
1161 rxq_data->wqes = (volatile struct mlx5_wqe_data_seg (*)[])
1163 for (i = 0; (i != (unsigned int)(1 << rxq_data->elts_n)); ++i) {
1164 struct rte_mbuf *buf = (*rxq_data->elts)[i];
1165 volatile struct mlx5_wqe_data_seg *scat = &(*rxq_data->wqes)[i];
1167 /* scat->addr must be able to store a pointer. */
1168 assert(sizeof(scat->addr) >= sizeof(uintptr_t));
1169 *scat = (struct mlx5_wqe_data_seg){
1170 .addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(buf,
1172 .byte_count = rte_cpu_to_be_32(DATA_LEN(buf)),
1173 .lkey = tmpl->mr->lkey,
1176 rxq_data->rq_db = rwq.dbrec;
1177 rxq_data->cqe_n = log2above(cq_info.cqe_cnt);
1178 rxq_data->cq_ci = 0;
1179 rxq_data->rq_ci = 0;
1180 rxq_data->rq_pi = 0;
1181 rxq_data->zip = (struct rxq_zip){
1184 rxq_data->cq_db = cq_info.dbrec;
1185 rxq_data->cqes = (volatile struct mlx5_cqe (*)[])(uintptr_t)cq_info.buf;
1186 /* Update doorbell counter. */
1187 rxq_data->rq_ci = (1 << rxq_data->elts_n) >> rxq_data->sges_n;
1189 *rxq_data->rq_db = rte_cpu_to_be_32(rxq_data->rq_ci);
1190 DEBUG("%p: rxq updated with %p", (void *)rxq_ctrl, (void *)&tmpl);
1191 rte_atomic32_inc(&tmpl->refcnt);
1192 DEBUG("%p: Verbs Rx queue %p: refcnt %d", (void *)priv,
1193 (void *)tmpl, rte_atomic32_read(&tmpl->refcnt));
1194 LIST_INSERT_HEAD(&priv->rxqsibv, tmpl, next);
1198 claim_zero(ibv_destroy_wq(tmpl->wq));
1200 claim_zero(ibv_destroy_cq(tmpl->cq));
1202 claim_zero(ibv_destroy_comp_channel(tmpl->channel));
1204 priv_mr_release(priv, tmpl->mr);
1209 * Get an Rx queue Verbs object.
1212 * Pointer to private structure.
1214 * Queue index in DPDK Rx queue array
1217 * The Verbs object if it exists.
1219 struct mlx5_rxq_ibv*
1220 mlx5_priv_rxq_ibv_get(struct priv *priv, uint16_t idx)
1222 struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx];
1223 struct mlx5_rxq_ctrl *rxq_ctrl;
1225 if (idx >= priv->rxqs_n)
1229 rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
1230 if (rxq_ctrl->ibv) {
1231 priv_mr_get(priv, rxq_data->mp);
1232 rte_atomic32_inc(&rxq_ctrl->ibv->refcnt);
1233 DEBUG("%p: Verbs Rx queue %p: refcnt %d", (void *)priv,
1234 (void *)rxq_ctrl->ibv,
1235 rte_atomic32_read(&rxq_ctrl->ibv->refcnt));
1237 return rxq_ctrl->ibv;
1241 * Release an Rx verbs queue object.
1244 * Pointer to private structure.
1246 * Verbs Rx queue object.
1249 * 0 on success, errno value on failure.
1252 mlx5_priv_rxq_ibv_release(struct priv *priv, struct mlx5_rxq_ibv *rxq_ibv)
1257 assert(rxq_ibv->wq);
1258 assert(rxq_ibv->cq);
1259 assert(rxq_ibv->mr);
1260 ret = priv_mr_release(priv, rxq_ibv->mr);
1263 DEBUG("%p: Verbs Rx queue %p: refcnt %d", (void *)priv,
1264 (void *)rxq_ibv, rte_atomic32_read(&rxq_ibv->refcnt));
1265 if (rte_atomic32_dec_and_test(&rxq_ibv->refcnt)) {
1266 rxq_free_elts(rxq_ibv->rxq_ctrl);
1267 claim_zero(ibv_destroy_wq(rxq_ibv->wq));
1268 claim_zero(ibv_destroy_cq(rxq_ibv->cq));
1269 if (rxq_ibv->channel)
1270 claim_zero(ibv_destroy_comp_channel(rxq_ibv->channel));
1271 LIST_REMOVE(rxq_ibv, next);
1279 * Verify the Verbs Rx queue list is empty
1282 * Pointer to private structure.
1284 * @return the number of object not released.
1287 mlx5_priv_rxq_ibv_verify(struct priv *priv)
1290 struct mlx5_rxq_ibv *rxq_ibv;
1292 LIST_FOREACH(rxq_ibv, &priv->rxqsibv, next) {
1293 DEBUG("%p: Verbs Rx queue %p still referenced", (void *)priv,
1301 * Return true if a single reference exists on the object.
1304 * Pointer to private structure.
1306 * Verbs Rx queue object.
1309 mlx5_priv_rxq_ibv_releasable(struct priv *priv, struct mlx5_rxq_ibv *rxq_ibv)
1313 return (rte_atomic32_read(&rxq_ibv->refcnt) == 1);
1317 * Create a DPDK Rx queue.
1320 * Pointer to private structure.
1324 * Number of descriptors to configure in queue.
1326 * NUMA socket on which memory must be allocated.
1329 * A DPDK queue object on success.
1331 struct mlx5_rxq_ctrl*
1332 mlx5_priv_rxq_new(struct priv *priv, uint16_t idx, uint16_t desc,
1333 unsigned int socket, struct rte_mempool *mp)
1335 struct rte_eth_dev *dev = priv->dev;
1336 struct mlx5_rxq_ctrl *tmpl;
1337 const uint16_t desc_n =
1338 desc + priv->rx_vec_en * MLX5_VPMD_DESCS_PER_LOOP;
1339 unsigned int mb_len = rte_pktmbuf_data_room_size(mp);
1341 tmpl = rte_calloc_socket("RXQ", 1,
1343 desc_n * sizeof(struct rte_mbuf *),
1347 if (priv->dev->data->dev_conf.intr_conf.rxq)
1349 /* Enable scattered packets support for this queue if necessary. */
1350 assert(mb_len >= RTE_PKTMBUF_HEADROOM);
1351 if (dev->data->dev_conf.rxmode.max_rx_pkt_len <=
1352 (mb_len - RTE_PKTMBUF_HEADROOM)) {
1353 tmpl->rxq.sges_n = 0;
1354 } else if (dev->data->dev_conf.rxmode.enable_scatter) {
1356 RTE_PKTMBUF_HEADROOM +
1357 dev->data->dev_conf.rxmode.max_rx_pkt_len;
1358 unsigned int sges_n;
1361 * Determine the number of SGEs needed for a full packet
1362 * and round it to the next power of two.
1364 sges_n = log2above((size / mb_len) + !!(size % mb_len));
1365 tmpl->rxq.sges_n = sges_n;
1366 /* Make sure rxq.sges_n did not overflow. */
1367 size = mb_len * (1 << tmpl->rxq.sges_n);
1368 size -= RTE_PKTMBUF_HEADROOM;
1369 if (size < dev->data->dev_conf.rxmode.max_rx_pkt_len) {
1370 ERROR("%p: too many SGEs (%u) needed to handle"
1371 " requested maximum packet size %u",
1374 dev->data->dev_conf.rxmode.max_rx_pkt_len);
1378 WARN("%p: the requested maximum Rx packet size (%u) is"
1379 " larger than a single mbuf (%u) and scattered"
1380 " mode has not been requested",
1382 dev->data->dev_conf.rxmode.max_rx_pkt_len,
1383 mb_len - RTE_PKTMBUF_HEADROOM);
1385 DEBUG("%p: maximum number of segments per packet: %u",
1386 (void *)dev, 1 << tmpl->rxq.sges_n);
1387 if (desc % (1 << tmpl->rxq.sges_n)) {
1388 ERROR("%p: number of RX queue descriptors (%u) is not a"
1389 " multiple of SGEs per packet (%u)",
1392 1 << tmpl->rxq.sges_n);
1395 /* Toggle RX checksum offload if hardware supports it. */
1397 tmpl->rxq.csum = !!dev->data->dev_conf.rxmode.hw_ip_checksum;
1398 if (priv->hw_csum_l2tun)
1399 tmpl->rxq.csum_l2tun =
1400 !!dev->data->dev_conf.rxmode.hw_ip_checksum;
1401 /* Configure VLAN stripping. */
1402 tmpl->rxq.vlan_strip = (priv->hw_vlan_strip &&
1403 !!dev->data->dev_conf.rxmode.hw_vlan_strip);
1404 /* By default, FCS (CRC) is stripped by hardware. */
1405 if (dev->data->dev_conf.rxmode.hw_strip_crc) {
1406 tmpl->rxq.crc_present = 0;
1407 } else if (priv->hw_fcs_strip) {
1408 tmpl->rxq.crc_present = 1;
1410 WARN("%p: CRC stripping has been disabled but will still"
1411 " be performed by hardware, make sure MLNX_OFED and"
1412 " firmware are up to date",
1414 tmpl->rxq.crc_present = 0;
1416 DEBUG("%p: CRC stripping is %s, %u bytes will be subtracted from"
1417 " incoming frames to hide it",
1419 tmpl->rxq.crc_present ? "disabled" : "enabled",
1420 tmpl->rxq.crc_present << 2);
1422 tmpl->rxq.rss_hash = priv->rxqs_n > 1;
1423 tmpl->rxq.port_id = dev->data->port_id;
1426 tmpl->rxq.stats.idx = idx;
1427 tmpl->rxq.elts_n = log2above(desc);
1429 (struct rte_mbuf *(*)[1 << tmpl->rxq.elts_n])(tmpl + 1);
1430 rte_atomic32_inc(&tmpl->refcnt);
1431 DEBUG("%p: Rx queue %p: refcnt %d", (void *)priv,
1432 (void *)tmpl, rte_atomic32_read(&tmpl->refcnt));
1433 LIST_INSERT_HEAD(&priv->rxqsctrl, tmpl, next);
1444 * Pointer to private structure.
1449 * A pointer to the queue if it exists.
1451 struct mlx5_rxq_ctrl*
1452 mlx5_priv_rxq_get(struct priv *priv, uint16_t idx)
1454 struct mlx5_rxq_ctrl *rxq_ctrl = NULL;
1456 if ((*priv->rxqs)[idx]) {
1457 rxq_ctrl = container_of((*priv->rxqs)[idx],
1458 struct mlx5_rxq_ctrl,
1461 mlx5_priv_rxq_ibv_get(priv, idx);
1462 rte_atomic32_inc(&rxq_ctrl->refcnt);
1463 DEBUG("%p: Rx queue %p: refcnt %d", (void *)priv,
1464 (void *)rxq_ctrl, rte_atomic32_read(&rxq_ctrl->refcnt));
1470 * Release a Rx queue.
1473 * Pointer to private structure.
1478 * 0 on success, errno value on failure.
1481 mlx5_priv_rxq_release(struct priv *priv, uint16_t idx)
1483 struct mlx5_rxq_ctrl *rxq_ctrl;
1485 if (!(*priv->rxqs)[idx])
1487 rxq_ctrl = container_of((*priv->rxqs)[idx], struct mlx5_rxq_ctrl, rxq);
1488 assert(rxq_ctrl->priv);
1489 if (rxq_ctrl->ibv) {
1492 ret = mlx5_priv_rxq_ibv_release(rxq_ctrl->priv, rxq_ctrl->ibv);
1494 rxq_ctrl->ibv = NULL;
1496 DEBUG("%p: Rx queue %p: refcnt %d", (void *)priv,
1497 (void *)rxq_ctrl, rte_atomic32_read(&rxq_ctrl->refcnt));
1498 if (rte_atomic32_dec_and_test(&rxq_ctrl->refcnt)) {
1499 LIST_REMOVE(rxq_ctrl, next);
1501 (*priv->rxqs)[idx] = NULL;
1508 * Verify if the queue can be released.
1511 * Pointer to private structure.
1516 * 1 if the queue can be released.
1519 mlx5_priv_rxq_releasable(struct priv *priv, uint16_t idx)
1521 struct mlx5_rxq_ctrl *rxq_ctrl;
1523 if (!(*priv->rxqs)[idx])
1525 rxq_ctrl = container_of((*priv->rxqs)[idx], struct mlx5_rxq_ctrl, rxq);
1526 return (rte_atomic32_read(&rxq_ctrl->refcnt) == 1);
1530 * Verify the Rx Queue list is empty
1533 * Pointer to private structure.
1535 * @return the number of object not released.
1538 mlx5_priv_rxq_verify(struct priv *priv)
1540 struct mlx5_rxq_ctrl *rxq_ctrl;
1543 LIST_FOREACH(rxq_ctrl, &priv->rxqsctrl, next) {
1544 DEBUG("%p: Rx Queue %p still referenced", (void *)priv,
1552 * Create an indirection table.
1555 * Pointer to private structure.
1557 * Queues entering in the indirection table.
1559 * Number of queues in the array.
1562 * A new indirection table.
1564 struct mlx5_ind_table_ibv*
1565 mlx5_priv_ind_table_ibv_new(struct priv *priv, uint16_t queues[],
1568 struct mlx5_ind_table_ibv *ind_tbl;
1569 const unsigned int wq_n = rte_is_power_of_2(queues_n) ?
1570 log2above(queues_n) :
1571 priv->ind_table_max_size;
1572 struct ibv_wq *wq[1 << wq_n];
1576 ind_tbl = rte_calloc(__func__, 1, sizeof(*ind_tbl) +
1577 queues_n * sizeof(uint16_t), 0);
1580 for (i = 0; i != queues_n; ++i) {
1581 struct mlx5_rxq_ctrl *rxq =
1582 mlx5_priv_rxq_get(priv, queues[i]);
1586 wq[i] = rxq->ibv->wq;
1587 ind_tbl->queues[i] = queues[i];
1589 ind_tbl->queues_n = queues_n;
1590 /* Finalise indirection table. */
1591 for (j = 0; i != (unsigned int)(1 << wq_n); ++i, ++j)
1593 ind_tbl->ind_table = ibv_create_rwq_ind_table(
1595 &(struct ibv_rwq_ind_table_init_attr){
1596 .log_ind_tbl_size = wq_n,
1600 if (!ind_tbl->ind_table)
1602 rte_atomic32_inc(&ind_tbl->refcnt);
1603 LIST_INSERT_HEAD(&priv->ind_tbls, ind_tbl, next);
1604 DEBUG("%p: Indirection table %p: refcnt %d", (void *)priv,
1605 (void *)ind_tbl, rte_atomic32_read(&ind_tbl->refcnt));
1609 DEBUG("%p cannot create indirection table", (void *)priv);
1614 * Get an indirection table.
1617 * Pointer to private structure.
1619 * Queues entering in the indirection table.
1621 * Number of queues in the array.
1624 * An indirection table if found.
1626 struct mlx5_ind_table_ibv*
1627 mlx5_priv_ind_table_ibv_get(struct priv *priv, uint16_t queues[],
1630 struct mlx5_ind_table_ibv *ind_tbl;
1632 LIST_FOREACH(ind_tbl, &priv->ind_tbls, next) {
1633 if ((ind_tbl->queues_n == queues_n) &&
1634 (memcmp(ind_tbl->queues, queues,
1635 ind_tbl->queues_n * sizeof(ind_tbl->queues[0]))
1642 rte_atomic32_inc(&ind_tbl->refcnt);
1643 DEBUG("%p: Indirection table %p: refcnt %d", (void *)priv,
1644 (void *)ind_tbl, rte_atomic32_read(&ind_tbl->refcnt));
1645 for (i = 0; i != ind_tbl->queues_n; ++i)
1646 mlx5_priv_rxq_get(priv, ind_tbl->queues[i]);
1652 * Release an indirection table.
1655 * Pointer to private structure.
1657 * Indirection table to release.
1660 * 0 on success, errno value on failure.
1663 mlx5_priv_ind_table_ibv_release(struct priv *priv,
1664 struct mlx5_ind_table_ibv *ind_tbl)
1668 DEBUG("%p: Indirection table %p: refcnt %d", (void *)priv,
1669 (void *)ind_tbl, rte_atomic32_read(&ind_tbl->refcnt));
1670 if (rte_atomic32_dec_and_test(&ind_tbl->refcnt))
1671 claim_zero(ibv_destroy_rwq_ind_table(ind_tbl->ind_table));
1672 for (i = 0; i != ind_tbl->queues_n; ++i)
1673 claim_nonzero(mlx5_priv_rxq_release(priv, ind_tbl->queues[i]));
1674 if (!rte_atomic32_read(&ind_tbl->refcnt)) {
1675 LIST_REMOVE(ind_tbl, next);
1683 * Verify the Rx Queue list is empty
1686 * Pointer to private structure.
1688 * @return the number of object not released.
1691 mlx5_priv_ind_table_ibv_verify(struct priv *priv)
1693 struct mlx5_ind_table_ibv *ind_tbl;
1696 LIST_FOREACH(ind_tbl, &priv->ind_tbls, next) {
1697 DEBUG("%p: Verbs indirection table %p still referenced",
1698 (void *)priv, (void *)ind_tbl);
1705 * Create an Rx Hash queue.
1708 * Pointer to private structure.
1710 * RSS key for the Rx hash queue.
1711 * @param rss_key_len
1713 * @param hash_fields
1714 * Verbs protocol hash field to make the RSS on.
1716 * Queues entering in hash queue.
1721 * An hash Rx queue on success.
1724 mlx5_priv_hrxq_new(struct priv *priv, uint8_t *rss_key, uint8_t rss_key_len,
1725 uint64_t hash_fields, uint16_t queues[], uint16_t queues_n)
1727 struct mlx5_hrxq *hrxq;
1728 struct mlx5_ind_table_ibv *ind_tbl;
1731 ind_tbl = mlx5_priv_ind_table_ibv_get(priv, queues, queues_n);
1733 ind_tbl = mlx5_priv_ind_table_ibv_new(priv, queues, queues_n);
1736 qp = ibv_create_qp_ex(
1738 &(struct ibv_qp_init_attr_ex){
1739 .qp_type = IBV_QPT_RAW_PACKET,
1741 IBV_QP_INIT_ATTR_PD |
1742 IBV_QP_INIT_ATTR_IND_TABLE |
1743 IBV_QP_INIT_ATTR_RX_HASH,
1744 .rx_hash_conf = (struct ibv_rx_hash_conf){
1745 .rx_hash_function = IBV_RX_HASH_FUNC_TOEPLITZ,
1746 .rx_hash_key_len = rss_key_len,
1747 .rx_hash_key = rss_key,
1748 .rx_hash_fields_mask = hash_fields,
1750 .rwq_ind_tbl = ind_tbl->ind_table,
1755 hrxq = rte_calloc(__func__, 1, sizeof(*hrxq) + rss_key_len, 0);
1758 hrxq->ind_table = ind_tbl;
1760 hrxq->rss_key_len = rss_key_len;
1761 hrxq->hash_fields = hash_fields;
1762 memcpy(hrxq->rss_key, rss_key, rss_key_len);
1763 rte_atomic32_inc(&hrxq->refcnt);
1764 LIST_INSERT_HEAD(&priv->hrxqs, hrxq, next);
1765 DEBUG("%p: Hash Rx queue %p: refcnt %d", (void *)priv,
1766 (void *)hrxq, rte_atomic32_read(&hrxq->refcnt));
1769 mlx5_priv_ind_table_ibv_release(priv, ind_tbl);
1771 claim_zero(ibv_destroy_qp(qp));
1776 * Get an Rx Hash queue.
1779 * Pointer to private structure.
1781 * RSS configuration for the Rx hash queue.
1783 * Queues entering in hash queue.
1788 * An hash Rx queue on success.
1791 mlx5_priv_hrxq_get(struct priv *priv, uint8_t *rss_key, uint8_t rss_key_len,
1792 uint64_t hash_fields, uint16_t queues[], uint16_t queues_n)
1794 struct mlx5_hrxq *hrxq;
1796 LIST_FOREACH(hrxq, &priv->hrxqs, next) {
1797 struct mlx5_ind_table_ibv *ind_tbl;
1799 if (hrxq->rss_key_len != rss_key_len)
1801 if (memcmp(hrxq->rss_key, rss_key, rss_key_len))
1803 if (hrxq->hash_fields != hash_fields)
1805 ind_tbl = mlx5_priv_ind_table_ibv_get(priv, queues, queues_n);
1808 if (ind_tbl != hrxq->ind_table) {
1809 mlx5_priv_ind_table_ibv_release(priv, ind_tbl);
1812 rte_atomic32_inc(&hrxq->refcnt);
1813 DEBUG("%p: Hash Rx queue %p: refcnt %d", (void *)priv,
1814 (void *)hrxq, rte_atomic32_read(&hrxq->refcnt));
1821 * Release the hash Rx queue.
1824 * Pointer to private structure.
1826 * Pointer to Hash Rx queue to release.
1829 * 0 on success, errno value on failure.
1832 mlx5_priv_hrxq_release(struct priv *priv, struct mlx5_hrxq *hrxq)
1834 DEBUG("%p: Hash Rx queue %p: refcnt %d", (void *)priv,
1835 (void *)hrxq, rte_atomic32_read(&hrxq->refcnt));
1836 if (rte_atomic32_dec_and_test(&hrxq->refcnt)) {
1837 claim_zero(ibv_destroy_qp(hrxq->qp));
1838 mlx5_priv_ind_table_ibv_release(priv, hrxq->ind_table);
1839 LIST_REMOVE(hrxq, next);
1843 claim_nonzero(mlx5_priv_ind_table_ibv_release(priv, hrxq->ind_table));
1848 * Verify the Rx Queue list is empty
1851 * Pointer to private structure.
1853 * @return the number of object not released.
1856 mlx5_priv_hrxq_ibv_verify(struct priv *priv)
1858 struct mlx5_hrxq *hrxq;
1861 LIST_FOREACH(hrxq, &priv->hrxqs, next) {
1862 DEBUG("%p: Verbs Hash Rx queue %p still referenced",
1863 (void *)priv, (void *)hrxq);