4 * Copyright 2015 6WIND S.A.
5 * Copyright 2015 Mellanox.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of 6WIND S.A. nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40 #include <sys/queue.h>
43 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
45 #pragma GCC diagnostic ignored "-Wpedantic"
47 #include <infiniband/verbs.h>
48 #include <infiniband/mlx5dv.h>
50 #pragma GCC diagnostic error "-Wpedantic"
54 #include <rte_malloc.h>
55 #include <rte_ethdev.h>
56 #include <rte_common.h>
57 #include <rte_interrupts.h>
58 #include <rte_debug.h>
62 #include "mlx5_rxtx.h"
63 #include "mlx5_utils.h"
64 #include "mlx5_autoconf.h"
65 #include "mlx5_defs.h"
67 /* Initialization data for hash RX queues. */
68 const struct hash_rxq_init hash_rxq_init[] = {
70 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
71 IBV_RX_HASH_DST_IPV4 |
72 IBV_RX_HASH_SRC_PORT_TCP |
73 IBV_RX_HASH_DST_PORT_TCP),
74 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,
76 .flow_spec.tcp_udp = {
77 .type = IBV_FLOW_SPEC_TCP,
78 .size = sizeof(hash_rxq_init[0].flow_spec.tcp_udp),
80 .underlayer = &hash_rxq_init[HASH_RXQ_IPV4],
83 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
84 IBV_RX_HASH_DST_IPV4 |
85 IBV_RX_HASH_SRC_PORT_UDP |
86 IBV_RX_HASH_DST_PORT_UDP),
87 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP,
89 .flow_spec.tcp_udp = {
90 .type = IBV_FLOW_SPEC_UDP,
91 .size = sizeof(hash_rxq_init[0].flow_spec.tcp_udp),
93 .underlayer = &hash_rxq_init[HASH_RXQ_IPV4],
96 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
97 IBV_RX_HASH_DST_IPV4),
98 .dpdk_rss_hf = (ETH_RSS_IPV4 |
102 .type = IBV_FLOW_SPEC_IPV4,
103 .size = sizeof(hash_rxq_init[0].flow_spec.ipv4),
105 .underlayer = &hash_rxq_init[HASH_RXQ_ETH],
108 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
109 IBV_RX_HASH_DST_IPV6 |
110 IBV_RX_HASH_SRC_PORT_TCP |
111 IBV_RX_HASH_DST_PORT_TCP),
112 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP,
114 .flow_spec.tcp_udp = {
115 .type = IBV_FLOW_SPEC_TCP,
116 .size = sizeof(hash_rxq_init[0].flow_spec.tcp_udp),
118 .underlayer = &hash_rxq_init[HASH_RXQ_IPV6],
121 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
122 IBV_RX_HASH_DST_IPV6 |
123 IBV_RX_HASH_SRC_PORT_UDP |
124 IBV_RX_HASH_DST_PORT_UDP),
125 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP,
127 .flow_spec.tcp_udp = {
128 .type = IBV_FLOW_SPEC_UDP,
129 .size = sizeof(hash_rxq_init[0].flow_spec.tcp_udp),
131 .underlayer = &hash_rxq_init[HASH_RXQ_IPV6],
134 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
135 IBV_RX_HASH_DST_IPV6),
136 .dpdk_rss_hf = (ETH_RSS_IPV6 |
140 .type = IBV_FLOW_SPEC_IPV6,
141 .size = sizeof(hash_rxq_init[0].flow_spec.ipv6),
143 .underlayer = &hash_rxq_init[HASH_RXQ_ETH],
150 .type = IBV_FLOW_SPEC_ETH,
151 .size = sizeof(hash_rxq_init[0].flow_spec.eth),
157 /* Number of entries in hash_rxq_init[]. */
158 const unsigned int hash_rxq_init_n = RTE_DIM(hash_rxq_init);
160 /* Initialization data for hash RX queue indirection tables. */
161 static const struct ind_table_init ind_table_init[] = {
163 .max_size = -1u, /* Superseded by HW limitations. */
165 1 << HASH_RXQ_TCPV4 |
166 1 << HASH_RXQ_UDPV4 |
168 1 << HASH_RXQ_TCPV6 |
169 1 << HASH_RXQ_UDPV6 |
176 .hash_types = 1 << HASH_RXQ_ETH,
181 #define IND_TABLE_INIT_N RTE_DIM(ind_table_init)
183 /* Default RSS hash key also used for ConnectX-3. */
184 uint8_t rss_hash_default_key[] = {
185 0x2c, 0xc6, 0x81, 0xd1,
186 0x5b, 0xdb, 0xf4, 0xf7,
187 0xfc, 0xa2, 0x83, 0x19,
188 0xdb, 0x1a, 0x3e, 0x94,
189 0x6b, 0x9e, 0x38, 0xd9,
190 0x2c, 0x9c, 0x03, 0xd1,
191 0xad, 0x99, 0x44, 0xa7,
192 0xd9, 0x56, 0x3d, 0x59,
193 0x06, 0x3c, 0x25, 0xf3,
194 0xfc, 0x1f, 0xdc, 0x2a,
197 /* Length of the default RSS hash key. */
198 const size_t rss_hash_default_key_len = sizeof(rss_hash_default_key);
201 * Populate flow steering rule for a given hash RX queue type using
202 * information from hash_rxq_init[]. Nothing is written to flow_attr when
203 * flow_attr_size is not large enough, but the required size is still returned.
206 * Pointer to private structure.
207 * @param[out] flow_attr
208 * Pointer to flow attribute structure to fill. Note that the allocated
209 * area must be larger and large enough to hold all flow specifications.
210 * @param flow_attr_size
211 * Entire size of flow_attr and trailing room for flow specifications.
213 * Hash RX queue type to use for flow steering rule.
216 * Total size of the flow attribute buffer. No errors are defined.
219 priv_flow_attr(struct priv *priv, struct ibv_flow_attr *flow_attr,
220 size_t flow_attr_size, enum hash_rxq_type type)
222 size_t offset = sizeof(*flow_attr);
223 const struct hash_rxq_init *init = &hash_rxq_init[type];
225 assert(priv != NULL);
226 assert((size_t)type < RTE_DIM(hash_rxq_init));
228 offset += init->flow_spec.hdr.size;
229 init = init->underlayer;
230 } while (init != NULL);
231 if (offset > flow_attr_size)
233 flow_attr_size = offset;
234 init = &hash_rxq_init[type];
235 *flow_attr = (struct ibv_flow_attr){
236 .type = IBV_FLOW_ATTR_NORMAL,
237 /* Priorities < 3 are reserved for flow director. */
238 .priority = init->flow_priority + 3,
244 offset -= init->flow_spec.hdr.size;
245 memcpy((void *)((uintptr_t)flow_attr + offset),
247 init->flow_spec.hdr.size);
248 ++flow_attr->num_of_specs;
249 init = init->underlayer;
250 } while (init != NULL);
251 return flow_attr_size;
255 * Convert hash type position in indirection table initializer to
256 * hash RX queue type.
259 * Indirection table initializer.
261 * Hash type position.
264 * Hash RX queue type.
266 static enum hash_rxq_type
267 hash_rxq_type_from_pos(const struct ind_table_init *table, unsigned int pos)
269 enum hash_rxq_type type = HASH_RXQ_TCPV4;
271 assert(pos < table->hash_types_n);
273 if ((table->hash_types & (1 << type)) && (pos-- == 0))
281 * Filter out disabled hash RX queue types from ind_table_init[].
284 * Pointer to private structure.
289 * Number of table entries.
292 priv_make_ind_table_init(struct priv *priv,
293 struct ind_table_init (*table)[IND_TABLE_INIT_N])
298 unsigned int table_n = 0;
299 /* Mandatory to receive frames not handled by normal hash RX queues. */
300 unsigned int hash_types_sup = 1 << HASH_RXQ_ETH;
302 rss_hf = priv->rss_hf;
303 /* Process other protocols only if more than one queue. */
304 if (priv->rxqs_n > 1)
305 for (i = 0; (i != hash_rxq_init_n); ++i)
306 if (rss_hf & hash_rxq_init[i].dpdk_rss_hf)
307 hash_types_sup |= (1 << i);
309 /* Filter out entries whose protocols are not in the set. */
310 for (i = 0, j = 0; (i != IND_TABLE_INIT_N); ++i) {
314 /* j is increased only if the table has valid protocols. */
316 (*table)[j] = ind_table_init[i];
317 (*table)[j].hash_types &= hash_types_sup;
318 for (h = 0, nb = 0; (h != hash_rxq_init_n); ++h)
319 if (((*table)[j].hash_types >> h) & 0x1)
321 (*table)[i].hash_types_n = nb;
331 * Initialize hash RX queues and indirection table.
334 * Pointer to private structure.
337 * 0 on success, errno value on failure.
340 priv_create_hash_rxqs(struct priv *priv)
342 struct ibv_wq *wqs[priv->reta_idx_n];
343 struct ind_table_init ind_table_init[IND_TABLE_INIT_N];
344 unsigned int ind_tables_n =
345 priv_make_ind_table_init(priv, &ind_table_init);
346 unsigned int hash_rxqs_n = 0;
347 struct hash_rxq (*hash_rxqs)[] = NULL;
348 struct ibv_rwq_ind_table *(*ind_tables)[] = NULL;
354 assert(priv->ind_tables == NULL);
355 assert(priv->ind_tables_n == 0);
356 assert(priv->hash_rxqs == NULL);
357 assert(priv->hash_rxqs_n == 0);
358 assert(priv->pd != NULL);
359 assert(priv->ctx != NULL);
362 if (priv->rxqs_n == 0)
364 assert(priv->rxqs != NULL);
365 if (ind_tables_n == 0) {
366 ERROR("all hash RX queue types have been filtered out,"
367 " indirection table cannot be created");
370 if (priv->rxqs_n & (priv->rxqs_n - 1)) {
371 INFO("%u RX queues are configured, consider rounding this"
372 " number to the next power of two for better balancing",
374 DEBUG("indirection table extended to assume %u WQs",
377 for (i = 0; (i != priv->reta_idx_n); ++i) {
378 struct mlx5_rxq_ctrl *rxq_ctrl;
380 rxq_ctrl = container_of((*priv->rxqs)[(*priv->reta_idx)[i]],
381 struct mlx5_rxq_ctrl, rxq);
382 wqs[i] = rxq_ctrl->ibv->wq;
384 /* Get number of hash RX queues to configure. */
385 for (i = 0, hash_rxqs_n = 0; (i != ind_tables_n); ++i)
386 hash_rxqs_n += ind_table_init[i].hash_types_n;
387 DEBUG("allocating %u hash RX queues for %u WQs, %u indirection tables",
388 hash_rxqs_n, priv->rxqs_n, ind_tables_n);
389 /* Create indirection tables. */
390 ind_tables = rte_calloc(__func__, ind_tables_n,
391 sizeof((*ind_tables)[0]), 0);
392 if (ind_tables == NULL) {
394 ERROR("cannot allocate indirection tables container: %s",
398 for (i = 0; (i != ind_tables_n); ++i) {
399 struct ibv_rwq_ind_table_init_attr ind_init_attr = {
400 .log_ind_tbl_size = 0, /* Set below. */
404 unsigned int ind_tbl_size = ind_table_init[i].max_size;
405 struct ibv_rwq_ind_table *ind_table;
407 if (priv->reta_idx_n < ind_tbl_size)
408 ind_tbl_size = priv->reta_idx_n;
409 ind_init_attr.log_ind_tbl_size = log2above(ind_tbl_size);
411 ind_table = ibv_create_rwq_ind_table(priv->ctx,
413 if (ind_table != NULL) {
414 (*ind_tables)[i] = ind_table;
417 /* Not clear whether errno is set. */
418 err = (errno ? errno : EINVAL);
419 ERROR("RX indirection table creation failed with error %d: %s",
423 /* Allocate array that holds hash RX queues and related data. */
424 hash_rxqs = rte_calloc(__func__, hash_rxqs_n,
425 sizeof((*hash_rxqs)[0]), 0);
426 if (hash_rxqs == NULL) {
428 ERROR("cannot allocate hash RX queues container: %s",
432 for (i = 0, j = 0, k = 0;
433 ((i != hash_rxqs_n) && (j != ind_tables_n));
435 struct hash_rxq *hash_rxq = &(*hash_rxqs)[i];
436 enum hash_rxq_type type =
437 hash_rxq_type_from_pos(&ind_table_init[j], k);
438 struct rte_eth_rss_conf *priv_rss_conf =
439 (*priv->rss_conf)[type];
440 struct ibv_rx_hash_conf hash_conf = {
441 .rx_hash_function = IBV_RX_HASH_FUNC_TOEPLITZ,
442 .rx_hash_key_len = (priv_rss_conf ?
443 priv_rss_conf->rss_key_len :
444 rss_hash_default_key_len),
445 .rx_hash_key = (priv_rss_conf ?
446 priv_rss_conf->rss_key :
447 rss_hash_default_key),
448 .rx_hash_fields_mask = hash_rxq_init[type].hash_fields,
450 struct ibv_qp_init_attr_ex qp_init_attr = {
451 .qp_type = IBV_QPT_RAW_PACKET,
452 .comp_mask = (IBV_QP_INIT_ATTR_PD |
453 IBV_QP_INIT_ATTR_IND_TABLE |
454 IBV_QP_INIT_ATTR_RX_HASH),
455 .rx_hash_conf = hash_conf,
456 .rwq_ind_tbl = (*ind_tables)[j],
460 DEBUG("using indirection table %u for hash RX queue %u type %d",
462 *hash_rxq = (struct hash_rxq){
464 .qp = ibv_create_qp_ex(priv->ctx, &qp_init_attr),
467 if (hash_rxq->qp == NULL) {
468 err = (errno ? errno : EINVAL);
469 ERROR("Hash RX QP creation failure: %s",
473 if (++k < ind_table_init[j].hash_types_n)
475 /* Switch to the next indirection table and reset hash RX
476 * queue type array index. */
480 priv->ind_tables = ind_tables;
481 priv->ind_tables_n = ind_tables_n;
482 priv->hash_rxqs = hash_rxqs;
483 priv->hash_rxqs_n = hash_rxqs_n;
487 if (hash_rxqs != NULL) {
488 for (i = 0; (i != hash_rxqs_n); ++i) {
489 struct ibv_qp *qp = (*hash_rxqs)[i].qp;
493 claim_zero(ibv_destroy_qp(qp));
497 if (ind_tables != NULL) {
498 for (j = 0; (j != ind_tables_n); ++j) {
499 struct ibv_rwq_ind_table *ind_table =
502 if (ind_table == NULL)
504 claim_zero(ibv_destroy_rwq_ind_table(ind_table));
506 rte_free(ind_tables);
512 * Clean up hash RX queues and indirection table.
515 * Pointer to private structure.
518 priv_destroy_hash_rxqs(struct priv *priv)
522 DEBUG("destroying %u hash RX queues", priv->hash_rxqs_n);
523 if (priv->hash_rxqs_n == 0) {
524 assert(priv->hash_rxqs == NULL);
525 assert(priv->ind_tables == NULL);
528 for (i = 0; (i != priv->hash_rxqs_n); ++i) {
529 struct hash_rxq *hash_rxq = &(*priv->hash_rxqs)[i];
532 assert(hash_rxq->priv == priv);
533 assert(hash_rxq->qp != NULL);
534 /* Also check that there are no remaining flows. */
535 for (j = 0; (j != RTE_DIM(hash_rxq->special_flow)); ++j)
537 (k != RTE_DIM(hash_rxq->special_flow[j]));
539 assert(hash_rxq->special_flow[j][k] == NULL);
540 for (j = 0; (j != RTE_DIM(hash_rxq->mac_flow)); ++j)
541 for (k = 0; (k != RTE_DIM(hash_rxq->mac_flow[j])); ++k)
542 assert(hash_rxq->mac_flow[j][k] == NULL);
543 claim_zero(ibv_destroy_qp(hash_rxq->qp));
545 priv->hash_rxqs_n = 0;
546 rte_free(priv->hash_rxqs);
547 priv->hash_rxqs = NULL;
548 for (i = 0; (i != priv->ind_tables_n); ++i) {
549 struct ibv_rwq_ind_table *ind_table =
550 (*priv->ind_tables)[i];
552 assert(ind_table != NULL);
553 claim_zero(ibv_destroy_rwq_ind_table(ind_table));
555 priv->ind_tables_n = 0;
556 rte_free(priv->ind_tables);
557 priv->ind_tables = NULL;
561 * Check whether a given flow type is allowed.
564 * Pointer to private structure.
566 * Flow type to check.
569 * Nonzero if the given flow type is allowed.
572 priv_allow_flow_type(struct priv *priv, enum hash_rxq_flow_type type)
575 case HASH_RXQ_FLOW_TYPE_ALLMULTI:
576 return !!priv->allmulti_req;
577 case HASH_RXQ_FLOW_TYPE_BROADCAST:
578 case HASH_RXQ_FLOW_TYPE_IPV6MULTI:
579 /* If allmulti is enabled, broadcast and ipv6multi
580 * are unnecessary. */
581 return !priv->allmulti_req;
582 case HASH_RXQ_FLOW_TYPE_MAC:
585 /* Unsupported flow type is not allowed. */
592 * Automatically enable/disable flows according to configuration.
598 * 0 on success, errno value on failure.
601 priv_rehash_flows(struct priv *priv)
605 for (i = 0; i != RTE_DIM((*priv->hash_rxqs)[0].special_flow); ++i)
606 if (!priv_allow_flow_type(priv, i)) {
607 priv_special_flow_disable(priv, i);
609 int ret = priv_special_flow_enable(priv, i);
614 if (priv_allow_flow_type(priv, HASH_RXQ_FLOW_TYPE_MAC))
615 return priv_mac_addrs_enable(priv);
616 priv_mac_addrs_disable(priv);
621 * Allocate RX queue elements.
624 * Pointer to RX queue structure.
627 * 0 on success, errno value on failure.
630 rxq_alloc_elts(struct mlx5_rxq_ctrl *rxq_ctrl)
632 const unsigned int sges_n = 1 << rxq_ctrl->rxq.sges_n;
633 unsigned int elts_n = 1 << rxq_ctrl->rxq.elts_n;
637 /* Iterate on segments. */
638 for (i = 0; (i != elts_n); ++i) {
639 struct rte_mbuf *buf;
641 buf = rte_pktmbuf_alloc(rxq_ctrl->rxq.mp);
643 ERROR("%p: empty mbuf pool", (void *)rxq_ctrl);
647 /* Headroom is reserved by rte_pktmbuf_alloc(). */
648 assert(DATA_OFF(buf) == RTE_PKTMBUF_HEADROOM);
649 /* Buffer is supposed to be empty. */
650 assert(rte_pktmbuf_data_len(buf) == 0);
651 assert(rte_pktmbuf_pkt_len(buf) == 0);
653 /* Only the first segment keeps headroom. */
655 SET_DATA_OFF(buf, 0);
656 PORT(buf) = rxq_ctrl->rxq.port_id;
657 DATA_LEN(buf) = rte_pktmbuf_tailroom(buf);
658 PKT_LEN(buf) = DATA_LEN(buf);
660 (*rxq_ctrl->rxq.elts)[i] = buf;
662 /* If Rx vector is activated. */
663 if (rxq_check_vec_support(&rxq_ctrl->rxq) > 0) {
664 struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq;
665 struct rte_mbuf *mbuf_init = &rxq->fake_mbuf;
668 /* Initialize default rearm_data for vPMD. */
669 mbuf_init->data_off = RTE_PKTMBUF_HEADROOM;
670 rte_mbuf_refcnt_set(mbuf_init, 1);
671 mbuf_init->nb_segs = 1;
672 mbuf_init->port = rxq->port_id;
674 * prevent compiler reordering:
675 * rearm_data covers previous fields.
677 rte_compiler_barrier();
678 rxq->mbuf_initializer =
679 *(uint64_t *)&mbuf_init->rearm_data;
680 /* Padding with a fake mbuf for vectorized Rx. */
681 for (j = 0; j < MLX5_VPMD_DESCS_PER_LOOP; ++j)
682 (*rxq->elts)[elts_n + j] = &rxq->fake_mbuf;
684 DEBUG("%p: allocated and configured %u segments (max %u packets)",
685 (void *)rxq_ctrl, elts_n, elts_n / (1 << rxq_ctrl->rxq.sges_n));
690 for (i = 0; (i != elts_n); ++i) {
691 if ((*rxq_ctrl->rxq.elts)[i] != NULL)
692 rte_pktmbuf_free_seg((*rxq_ctrl->rxq.elts)[i]);
693 (*rxq_ctrl->rxq.elts)[i] = NULL;
695 DEBUG("%p: failed, freed everything", (void *)rxq_ctrl);
701 * Free RX queue elements.
704 * Pointer to RX queue structure.
707 rxq_free_elts(struct mlx5_rxq_ctrl *rxq_ctrl)
709 struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq;
710 const uint16_t q_n = (1 << rxq->elts_n);
711 const uint16_t q_mask = q_n - 1;
712 uint16_t used = q_n - (rxq->rq_ci - rxq->rq_pi);
715 DEBUG("%p: freeing WRs", (void *)rxq_ctrl);
716 if (rxq->elts == NULL)
719 * Some mbuf in the Ring belongs to the application. They cannot be
722 if (rxq_check_vec_support(rxq) > 0) {
723 for (i = 0; i < used; ++i)
724 (*rxq->elts)[(rxq->rq_ci + i) & q_mask] = NULL;
725 rxq->rq_pi = rxq->rq_ci;
727 for (i = 0; (i != (1u << rxq->elts_n)); ++i) {
728 if ((*rxq->elts)[i] != NULL)
729 rte_pktmbuf_free_seg((*rxq->elts)[i]);
730 (*rxq->elts)[i] = NULL;
735 * Clean up a RX queue.
737 * Destroy objects, free allocated memory and reset the structure for reuse.
740 * Pointer to RX queue structure.
743 mlx5_rxq_cleanup(struct mlx5_rxq_ctrl *rxq_ctrl)
745 DEBUG("cleaning up %p", (void *)rxq_ctrl);
747 mlx5_priv_rxq_ibv_release(rxq_ctrl->priv, rxq_ctrl->ibv);
748 memset(rxq_ctrl, 0, sizeof(*rxq_ctrl));
754 * Pointer to Ethernet device structure.
758 * Number of descriptors to configure in queue.
760 * NUMA socket on which memory must be allocated.
762 * Thresholds parameters.
764 * Memory pool for buffer allocations.
767 * 0 on success, negative errno value on failure.
770 mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
771 unsigned int socket, const struct rte_eth_rxconf *conf,
772 struct rte_mempool *mp)
774 struct priv *priv = dev->data->dev_private;
775 struct mlx5_rxq_data *rxq = (*priv->rxqs)[idx];
776 struct mlx5_rxq_ctrl *rxq_ctrl =
777 container_of(rxq, struct mlx5_rxq_ctrl, rxq);
781 if (mlx5_is_secondary())
782 return -E_RTE_SECONDARY;
784 if (!rte_is_power_of_2(desc)) {
785 desc = 1 << log2above(desc);
786 WARN("%p: increased number of descriptors in RX queue %u"
787 " to the next power of two (%d)",
788 (void *)dev, idx, desc);
790 DEBUG("%p: configuring queue %u for %u descriptors",
791 (void *)dev, idx, desc);
792 if (idx >= priv->rxqs_n) {
793 ERROR("%p: queue index out of range (%u >= %u)",
794 (void *)dev, idx, priv->rxqs_n);
798 if (!mlx5_priv_rxq_releasable(priv, idx)) {
800 ERROR("%p: unable to release queue index %u",
804 mlx5_priv_rxq_release(priv, idx);
805 rxq_ctrl = mlx5_priv_rxq_new(priv, idx, desc, socket, mp);
807 ERROR("%p: unable to allocate queue index %u",
812 DEBUG("%p: adding RX queue %p to list",
813 (void *)dev, (void *)rxq_ctrl);
814 (*priv->rxqs)[idx] = &rxq_ctrl->rxq;
821 * DPDK callback to release a RX queue.
824 * Generic RX queue pointer.
827 mlx5_rx_queue_release(void *dpdk_rxq)
829 struct mlx5_rxq_data *rxq = (struct mlx5_rxq_data *)dpdk_rxq;
830 struct mlx5_rxq_ctrl *rxq_ctrl;
833 if (mlx5_is_secondary())
838 rxq_ctrl = container_of(rxq, struct mlx5_rxq_ctrl, rxq);
839 priv = rxq_ctrl->priv;
841 if (!mlx5_priv_rxq_releasable(priv, rxq_ctrl->rxq.stats.idx))
842 rte_panic("Rx queue %p is still used by a flow and cannot be"
843 " removed\n", (void *)rxq_ctrl);
844 mlx5_priv_rxq_release(priv, rxq_ctrl->rxq.stats.idx);
849 * Allocate queue vector and fill epoll fd list for Rx interrupts.
852 * Pointer to private structure.
855 * 0 on success, negative on failure.
858 priv_rx_intr_vec_enable(struct priv *priv)
861 unsigned int rxqs_n = priv->rxqs_n;
862 unsigned int n = RTE_MIN(rxqs_n, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID);
863 unsigned int count = 0;
864 struct rte_intr_handle *intr_handle = priv->dev->intr_handle;
866 assert(!mlx5_is_secondary());
867 if (!priv->dev->data->dev_conf.intr_conf.rxq)
869 priv_rx_intr_vec_disable(priv);
870 intr_handle->intr_vec = malloc(sizeof(intr_handle->intr_vec[rxqs_n]));
871 if (intr_handle->intr_vec == NULL) {
872 ERROR("failed to allocate memory for interrupt vector,"
873 " Rx interrupts will not be supported");
876 intr_handle->type = RTE_INTR_HANDLE_EXT;
877 for (i = 0; i != n; ++i) {
878 /* This rxq ibv must not be released in this function. */
879 struct mlx5_rxq_ibv *rxq_ibv = mlx5_priv_rxq_ibv_get(priv, i);
884 /* Skip queues that cannot request interrupts. */
885 if (!rxq_ibv || !rxq_ibv->channel) {
886 /* Use invalid intr_vec[] index to disable entry. */
887 intr_handle->intr_vec[i] =
888 RTE_INTR_VEC_RXTX_OFFSET +
889 RTE_MAX_RXTX_INTR_VEC_ID;
892 if (count >= RTE_MAX_RXTX_INTR_VEC_ID) {
893 ERROR("too many Rx queues for interrupt vector size"
894 " (%d), Rx interrupts cannot be enabled",
895 RTE_MAX_RXTX_INTR_VEC_ID);
896 priv_rx_intr_vec_disable(priv);
899 fd = rxq_ibv->channel->fd;
900 flags = fcntl(fd, F_GETFL);
901 rc = fcntl(fd, F_SETFL, flags | O_NONBLOCK);
903 ERROR("failed to make Rx interrupt file descriptor"
904 " %d non-blocking for queue index %d", fd, i);
905 priv_rx_intr_vec_disable(priv);
908 intr_handle->intr_vec[i] = RTE_INTR_VEC_RXTX_OFFSET + count;
909 intr_handle->efds[count] = fd;
913 priv_rx_intr_vec_disable(priv);
915 intr_handle->nb_efd = count;
920 * Clean up Rx interrupts handler.
923 * Pointer to private structure.
926 priv_rx_intr_vec_disable(struct priv *priv)
928 struct rte_intr_handle *intr_handle = priv->dev->intr_handle;
930 unsigned int rxqs_n = priv->rxqs_n;
931 unsigned int n = RTE_MIN(rxqs_n, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID);
933 if (!priv->dev->data->dev_conf.intr_conf.rxq)
935 for (i = 0; i != n; ++i) {
936 struct mlx5_rxq_ctrl *rxq_ctrl;
937 struct mlx5_rxq_data *rxq_data;
939 if (intr_handle->intr_vec[i] == RTE_INTR_VEC_RXTX_OFFSET +
940 RTE_MAX_RXTX_INTR_VEC_ID)
943 * Need to access directly the queue to release the reference
944 * kept in priv_rx_intr_vec_enable().
946 rxq_data = (*priv->rxqs)[i];
947 rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
948 mlx5_priv_rxq_ibv_release(priv, rxq_ctrl->ibv);
950 rte_intr_free_epoll_fd(intr_handle);
951 free(intr_handle->intr_vec);
952 intr_handle->nb_efd = 0;
953 intr_handle->intr_vec = NULL;
957 * MLX5 CQ notification .
960 * Pointer to receive queue structure.
962 * Sequence number per receive queue .
965 mlx5_arm_cq(struct mlx5_rxq_data *rxq, int sq_n_rxq)
968 uint32_t doorbell_hi;
970 void *cq_db_reg = (char *)rxq->cq_uar + MLX5_CQ_DOORBELL;
972 sq_n = sq_n_rxq & MLX5_CQ_SQN_MASK;
973 doorbell_hi = sq_n << MLX5_CQ_SQN_OFFSET | (rxq->cq_ci & MLX5_CI_MASK);
974 doorbell = (uint64_t)doorbell_hi << 32;
975 doorbell |= rxq->cqn;
976 rxq->cq_db[MLX5_CQ_ARM_DB] = rte_cpu_to_be_32(doorbell_hi);
978 rte_write64(rte_cpu_to_be_64(doorbell), cq_db_reg);
982 * DPDK callback for Rx queue interrupt enable.
985 * Pointer to Ethernet device structure.
990 * 0 on success, negative on failure.
993 mlx5_rx_intr_enable(struct rte_eth_dev *dev, uint16_t rx_queue_id)
995 struct priv *priv = mlx5_get_priv(dev);
996 struct mlx5_rxq_data *rxq_data;
997 struct mlx5_rxq_ctrl *rxq_ctrl;
1001 rxq_data = (*priv->rxqs)[rx_queue_id];
1006 rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
1007 if (rxq_ctrl->irq) {
1008 struct mlx5_rxq_ibv *rxq_ibv;
1010 rxq_ibv = mlx5_priv_rxq_ibv_get(priv, rx_queue_id);
1015 mlx5_arm_cq(rxq_data, rxq_data->cq_arm_sn);
1016 mlx5_priv_rxq_ibv_release(priv, rxq_ibv);
1021 WARN("unable to arm interrupt on rx queue %d", rx_queue_id);
1026 * DPDK callback for Rx queue interrupt disable.
1029 * Pointer to Ethernet device structure.
1030 * @param rx_queue_id
1034 * 0 on success, negative on failure.
1037 mlx5_rx_intr_disable(struct rte_eth_dev *dev, uint16_t rx_queue_id)
1039 struct priv *priv = mlx5_get_priv(dev);
1040 struct mlx5_rxq_data *rxq_data;
1041 struct mlx5_rxq_ctrl *rxq_ctrl;
1042 struct mlx5_rxq_ibv *rxq_ibv = NULL;
1043 struct ibv_cq *ev_cq;
1048 rxq_data = (*priv->rxqs)[rx_queue_id];
1053 rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
1056 rxq_ibv = mlx5_priv_rxq_ibv_get(priv, rx_queue_id);
1061 ret = ibv_get_cq_event(rxq_ibv->channel, &ev_cq, &ev_ctx);
1062 if (ret || ev_cq != rxq_ibv->cq) {
1066 rxq_data->cq_arm_sn++;
1067 ibv_ack_cq_events(rxq_ibv->cq, 1);
1070 mlx5_priv_rxq_ibv_release(priv, rxq_ibv);
1073 WARN("unable to disable interrupt on rx queue %d",
1079 * Create the Rx queue Verbs object.
1082 * Pointer to private structure.
1084 * Queue index in DPDK Rx queue array
1087 * The Verbs object initialised if it can be created.
1089 struct mlx5_rxq_ibv*
1090 mlx5_priv_rxq_ibv_new(struct priv *priv, uint16_t idx)
1092 struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx];
1093 struct mlx5_rxq_ctrl *rxq_ctrl =
1094 container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
1095 struct ibv_wq_attr mod;
1097 struct ibv_cq_init_attr_ex cq;
1098 struct ibv_wq_init_attr wq;
1099 struct ibv_cq_ex cq_attr;
1101 unsigned int cqe_n = (1 << rxq_data->elts_n) - 1;
1102 struct mlx5_rxq_ibv *tmpl;
1103 struct mlx5dv_cq cq_info;
1104 struct mlx5dv_rwq rwq;
1107 struct mlx5dv_obj obj;
1110 assert(!rxq_ctrl->ibv);
1111 tmpl = rte_calloc_socket(__func__, 1, sizeof(*tmpl), 0,
1114 ERROR("%p: cannot allocate verbs resources",
1118 tmpl->rxq_ctrl = rxq_ctrl;
1119 /* Use the entire RX mempool as the memory region. */
1120 tmpl->mr = priv_mr_get(priv, rxq_data->mp);
1122 tmpl->mr = priv_mr_new(priv, rxq_data->mp);
1124 ERROR("%p: MR creation failure", (void *)rxq_ctrl);
1128 if (rxq_ctrl->irq) {
1129 tmpl->channel = ibv_create_comp_channel(priv->ctx);
1130 if (!tmpl->channel) {
1131 ERROR("%p: Comp Channel creation failure",
1136 attr.cq = (struct ibv_cq_init_attr_ex){
1139 if (priv->cqe_comp) {
1140 attr.cq.comp_mask |= IBV_CQ_INIT_ATTR_MASK_FLAGS;
1141 attr.cq.flags |= MLX5DV_CQ_INIT_ATTR_MASK_COMPRESSED_CQE;
1143 * For vectorized Rx, it must not be doubled in order to
1144 * make cq_ci and rq_ci aligned.
1146 if (rxq_check_vec_support(rxq_data) < 0)
1149 tmpl->cq = ibv_create_cq(priv->ctx, cqe_n, NULL, tmpl->channel, 0);
1150 if (tmpl->cq == NULL) {
1151 ERROR("%p: CQ creation failure", (void *)rxq_ctrl);
1154 DEBUG("priv->device_attr.max_qp_wr is %d",
1155 priv->device_attr.orig_attr.max_qp_wr);
1156 DEBUG("priv->device_attr.max_sge is %d",
1157 priv->device_attr.orig_attr.max_sge);
1158 attr.wq = (struct ibv_wq_init_attr){
1159 .wq_context = NULL, /* Could be useful in the future. */
1160 .wq_type = IBV_WQT_RQ,
1161 /* Max number of outstanding WRs. */
1162 .max_wr = (1 << rxq_data->elts_n) >> rxq_data->sges_n,
1163 /* Max number of scatter/gather elements in a WR. */
1164 .max_sge = 1 << rxq_data->sges_n,
1168 IBV_WQ_FLAGS_CVLAN_STRIPPING |
1170 .create_flags = (rxq_data->vlan_strip ?
1171 IBV_WQ_FLAGS_CVLAN_STRIPPING :
1174 /* By default, FCS (CRC) is stripped by hardware. */
1175 if (rxq_data->crc_present) {
1176 attr.wq.create_flags |= IBV_WQ_FLAGS_SCATTER_FCS;
1177 attr.wq.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS;
1179 #ifdef HAVE_IBV_WQ_FLAG_RX_END_PADDING
1180 if (priv->hw_padding) {
1181 attr.wq.create_flags |= IBV_WQ_FLAG_RX_END_PADDING;
1182 attr.wq.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS;
1185 tmpl->wq = ibv_create_wq(priv->ctx, &attr.wq);
1186 if (tmpl->wq == NULL) {
1187 ERROR("%p: WQ creation failure", (void *)rxq_ctrl);
1191 * Make sure number of WRs*SGEs match expectations since a queue
1192 * cannot allocate more than "desc" buffers.
1194 if (((int)attr.wq.max_wr !=
1195 ((1 << rxq_data->elts_n) >> rxq_data->sges_n)) ||
1196 ((int)attr.wq.max_sge != (1 << rxq_data->sges_n))) {
1197 ERROR("%p: requested %u*%u but got %u*%u WRs*SGEs",
1199 ((1 << rxq_data->elts_n) >> rxq_data->sges_n),
1200 (1 << rxq_data->sges_n),
1201 attr.wq.max_wr, attr.wq.max_sge);
1204 /* Change queue state to ready. */
1205 mod = (struct ibv_wq_attr){
1206 .attr_mask = IBV_WQ_ATTR_STATE,
1207 .wq_state = IBV_WQS_RDY,
1209 ret = ibv_modify_wq(tmpl->wq, &mod);
1211 ERROR("%p: WQ state to IBV_WQS_RDY failed",
1215 obj.cq.in = tmpl->cq;
1216 obj.cq.out = &cq_info;
1217 obj.rwq.in = tmpl->wq;
1219 ret = mlx5dv_init_obj(&obj, MLX5DV_OBJ_CQ | MLX5DV_OBJ_RWQ);
1222 if (cq_info.cqe_size != RTE_CACHE_LINE_SIZE) {
1223 ERROR("Wrong MLX5_CQE_SIZE environment variable value: "
1224 "it should be set to %u", RTE_CACHE_LINE_SIZE);
1227 /* Fill the rings. */
1228 rxq_data->wqes = (volatile struct mlx5_wqe_data_seg (*)[])
1230 for (i = 0; (i != (unsigned int)(1 << rxq_data->elts_n)); ++i) {
1231 struct rte_mbuf *buf = (*rxq_data->elts)[i];
1232 volatile struct mlx5_wqe_data_seg *scat = &(*rxq_data->wqes)[i];
1234 /* scat->addr must be able to store a pointer. */
1235 assert(sizeof(scat->addr) >= sizeof(uintptr_t));
1236 *scat = (struct mlx5_wqe_data_seg){
1237 .addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(buf,
1239 .byte_count = rte_cpu_to_be_32(DATA_LEN(buf)),
1240 .lkey = tmpl->mr->lkey,
1243 rxq_data->rq_db = rwq.dbrec;
1244 rxq_data->cqe_n = log2above(cq_info.cqe_cnt);
1245 rxq_data->cq_ci = 0;
1246 rxq_data->rq_ci = 0;
1247 rxq_data->rq_pi = 0;
1248 rxq_data->zip = (struct rxq_zip){
1251 rxq_data->cq_db = cq_info.dbrec;
1252 rxq_data->cqes = (volatile struct mlx5_cqe (*)[])(uintptr_t)cq_info.buf;
1253 /* Update doorbell counter. */
1254 rxq_data->rq_ci = (1 << rxq_data->elts_n) >> rxq_data->sges_n;
1256 *rxq_data->rq_db = rte_cpu_to_be_32(rxq_data->rq_ci);
1257 DEBUG("%p: rxq updated with %p", (void *)rxq_ctrl, (void *)&tmpl);
1258 rte_atomic32_inc(&tmpl->refcnt);
1259 DEBUG("%p: Verbs Rx queue %p: refcnt %d", (void *)priv,
1260 (void *)tmpl, rte_atomic32_read(&tmpl->refcnt));
1261 LIST_INSERT_HEAD(&priv->rxqsibv, tmpl, next);
1265 claim_zero(ibv_destroy_wq(tmpl->wq));
1267 claim_zero(ibv_destroy_cq(tmpl->cq));
1269 claim_zero(ibv_destroy_comp_channel(tmpl->channel));
1271 priv_mr_release(priv, tmpl->mr);
1276 * Get an Rx queue Verbs object.
1279 * Pointer to private structure.
1281 * Queue index in DPDK Rx queue array
1284 * The Verbs object if it exists.
1286 struct mlx5_rxq_ibv*
1287 mlx5_priv_rxq_ibv_get(struct priv *priv, uint16_t idx)
1289 struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx];
1290 struct mlx5_rxq_ctrl *rxq_ctrl;
1292 if (idx >= priv->rxqs_n)
1296 rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
1297 if (rxq_ctrl->ibv) {
1298 priv_mr_get(priv, rxq_data->mp);
1299 rte_atomic32_inc(&rxq_ctrl->ibv->refcnt);
1300 DEBUG("%p: Verbs Rx queue %p: refcnt %d", (void *)priv,
1301 (void *)rxq_ctrl->ibv,
1302 rte_atomic32_read(&rxq_ctrl->ibv->refcnt));
1304 return rxq_ctrl->ibv;
1308 * Release an Rx verbs queue object.
1311 * Pointer to private structure.
1313 * Verbs Rx queue object.
1316 * 0 on success, errno value on failure.
1319 mlx5_priv_rxq_ibv_release(struct priv *priv, struct mlx5_rxq_ibv *rxq_ibv)
1324 assert(rxq_ibv->wq);
1325 assert(rxq_ibv->cq);
1326 assert(rxq_ibv->mr);
1327 ret = priv_mr_release(priv, rxq_ibv->mr);
1330 DEBUG("%p: Verbs Rx queue %p: refcnt %d", (void *)priv,
1331 (void *)rxq_ibv, rte_atomic32_read(&rxq_ibv->refcnt));
1332 if (rte_atomic32_dec_and_test(&rxq_ibv->refcnt)) {
1333 rxq_free_elts(rxq_ibv->rxq_ctrl);
1334 claim_zero(ibv_destroy_wq(rxq_ibv->wq));
1335 claim_zero(ibv_destroy_cq(rxq_ibv->cq));
1336 if (rxq_ibv->channel)
1337 claim_zero(ibv_destroy_comp_channel(rxq_ibv->channel));
1338 LIST_REMOVE(rxq_ibv, next);
1346 * Verify the Verbs Rx queue list is empty
1349 * Pointer to private structure.
1351 * @return the number of object not released.
1354 mlx5_priv_rxq_ibv_verify(struct priv *priv)
1357 struct mlx5_rxq_ibv *rxq_ibv;
1359 LIST_FOREACH(rxq_ibv, &priv->rxqsibv, next) {
1360 DEBUG("%p: Verbs Rx queue %p still referenced", (void *)priv,
1368 * Return true if a single reference exists on the object.
1371 * Pointer to private structure.
1373 * Verbs Rx queue object.
1376 mlx5_priv_rxq_ibv_releasable(struct priv *priv, struct mlx5_rxq_ibv *rxq_ibv)
1380 return (rte_atomic32_read(&rxq_ibv->refcnt) == 1);
1384 * Create a DPDK Rx queue.
1387 * Pointer to private structure.
1391 * Number of descriptors to configure in queue.
1393 * NUMA socket on which memory must be allocated.
1396 * A DPDK queue object on success.
1398 struct mlx5_rxq_ctrl*
1399 mlx5_priv_rxq_new(struct priv *priv, uint16_t idx, uint16_t desc,
1400 unsigned int socket, struct rte_mempool *mp)
1402 struct rte_eth_dev *dev = priv->dev;
1403 struct mlx5_rxq_ctrl *tmpl;
1404 const uint16_t desc_n =
1405 desc + priv->rx_vec_en * MLX5_VPMD_DESCS_PER_LOOP;
1406 unsigned int mb_len = rte_pktmbuf_data_room_size(mp);
1408 tmpl = rte_calloc_socket("RXQ", 1,
1410 desc_n * sizeof(struct rte_mbuf *),
1414 if (priv->dev->data->dev_conf.intr_conf.rxq)
1416 /* Enable scattered packets support for this queue if necessary. */
1417 assert(mb_len >= RTE_PKTMBUF_HEADROOM);
1418 if (dev->data->dev_conf.rxmode.max_rx_pkt_len <=
1419 (mb_len - RTE_PKTMBUF_HEADROOM)) {
1420 tmpl->rxq.sges_n = 0;
1421 } else if (dev->data->dev_conf.rxmode.enable_scatter) {
1423 RTE_PKTMBUF_HEADROOM +
1424 dev->data->dev_conf.rxmode.max_rx_pkt_len;
1425 unsigned int sges_n;
1428 * Determine the number of SGEs needed for a full packet
1429 * and round it to the next power of two.
1431 sges_n = log2above((size / mb_len) + !!(size % mb_len));
1432 tmpl->rxq.sges_n = sges_n;
1433 /* Make sure rxq.sges_n did not overflow. */
1434 size = mb_len * (1 << tmpl->rxq.sges_n);
1435 size -= RTE_PKTMBUF_HEADROOM;
1436 if (size < dev->data->dev_conf.rxmode.max_rx_pkt_len) {
1437 ERROR("%p: too many SGEs (%u) needed to handle"
1438 " requested maximum packet size %u",
1441 dev->data->dev_conf.rxmode.max_rx_pkt_len);
1445 WARN("%p: the requested maximum Rx packet size (%u) is"
1446 " larger than a single mbuf (%u) and scattered"
1447 " mode has not been requested",
1449 dev->data->dev_conf.rxmode.max_rx_pkt_len,
1450 mb_len - RTE_PKTMBUF_HEADROOM);
1452 DEBUG("%p: maximum number of segments per packet: %u",
1453 (void *)dev, 1 << tmpl->rxq.sges_n);
1454 if (desc % (1 << tmpl->rxq.sges_n)) {
1455 ERROR("%p: number of RX queue descriptors (%u) is not a"
1456 " multiple of SGEs per packet (%u)",
1459 1 << tmpl->rxq.sges_n);
1462 /* Toggle RX checksum offload if hardware supports it. */
1464 tmpl->rxq.csum = !!dev->data->dev_conf.rxmode.hw_ip_checksum;
1465 if (priv->hw_csum_l2tun)
1466 tmpl->rxq.csum_l2tun =
1467 !!dev->data->dev_conf.rxmode.hw_ip_checksum;
1468 /* Configure VLAN stripping. */
1469 tmpl->rxq.vlan_strip = (priv->hw_vlan_strip &&
1470 !!dev->data->dev_conf.rxmode.hw_vlan_strip);
1471 /* By default, FCS (CRC) is stripped by hardware. */
1472 if (dev->data->dev_conf.rxmode.hw_strip_crc) {
1473 tmpl->rxq.crc_present = 0;
1474 } else if (priv->hw_fcs_strip) {
1475 tmpl->rxq.crc_present = 1;
1477 WARN("%p: CRC stripping has been disabled but will still"
1478 " be performed by hardware, make sure MLNX_OFED and"
1479 " firmware are up to date",
1481 tmpl->rxq.crc_present = 0;
1483 DEBUG("%p: CRC stripping is %s, %u bytes will be subtracted from"
1484 " incoming frames to hide it",
1486 tmpl->rxq.crc_present ? "disabled" : "enabled",
1487 tmpl->rxq.crc_present << 2);
1489 tmpl->rxq.rss_hash = priv->rxqs_n > 1;
1490 tmpl->rxq.port_id = dev->data->port_id;
1493 tmpl->rxq.stats.idx = idx;
1494 tmpl->rxq.elts_n = log2above(desc);
1496 (struct rte_mbuf *(*)[1 << tmpl->rxq.elts_n])(tmpl + 1);
1497 rte_atomic32_inc(&tmpl->refcnt);
1498 DEBUG("%p: Rx queue %p: refcnt %d", (void *)priv,
1499 (void *)tmpl, rte_atomic32_read(&tmpl->refcnt));
1500 LIST_INSERT_HEAD(&priv->rxqsctrl, tmpl, next);
1511 * Pointer to private structure.
1516 * A pointer to the queue if it exists.
1518 struct mlx5_rxq_ctrl*
1519 mlx5_priv_rxq_get(struct priv *priv, uint16_t idx)
1521 struct mlx5_rxq_ctrl *rxq_ctrl = NULL;
1523 if ((*priv->rxqs)[idx]) {
1524 rxq_ctrl = container_of((*priv->rxqs)[idx],
1525 struct mlx5_rxq_ctrl,
1528 mlx5_priv_rxq_ibv_get(priv, idx);
1529 rte_atomic32_inc(&rxq_ctrl->refcnt);
1530 DEBUG("%p: Rx queue %p: refcnt %d", (void *)priv,
1531 (void *)rxq_ctrl, rte_atomic32_read(&rxq_ctrl->refcnt));
1537 * Release a Rx queue.
1540 * Pointer to private structure.
1545 * 0 on success, errno value on failure.
1548 mlx5_priv_rxq_release(struct priv *priv, uint16_t idx)
1550 struct mlx5_rxq_ctrl *rxq_ctrl;
1552 if (!(*priv->rxqs)[idx])
1554 rxq_ctrl = container_of((*priv->rxqs)[idx], struct mlx5_rxq_ctrl, rxq);
1555 assert(rxq_ctrl->priv);
1556 if (rxq_ctrl->ibv) {
1559 ret = mlx5_priv_rxq_ibv_release(rxq_ctrl->priv, rxq_ctrl->ibv);
1561 rxq_ctrl->ibv = NULL;
1563 DEBUG("%p: Rx queue %p: refcnt %d", (void *)priv,
1564 (void *)rxq_ctrl, rte_atomic32_read(&rxq_ctrl->refcnt));
1565 if (rte_atomic32_dec_and_test(&rxq_ctrl->refcnt)) {
1566 LIST_REMOVE(rxq_ctrl, next);
1568 (*priv->rxqs)[idx] = NULL;
1575 * Verify if the queue can be released.
1578 * Pointer to private structure.
1583 * 1 if the queue can be released.
1586 mlx5_priv_rxq_releasable(struct priv *priv, uint16_t idx)
1588 struct mlx5_rxq_ctrl *rxq_ctrl;
1590 if (!(*priv->rxqs)[idx])
1592 rxq_ctrl = container_of((*priv->rxqs)[idx], struct mlx5_rxq_ctrl, rxq);
1593 return (rte_atomic32_read(&rxq_ctrl->refcnt) == 1);
1597 * Verify the Rx Queue list is empty
1600 * Pointer to private structure.
1602 * @return the number of object not released.
1605 mlx5_priv_rxq_verify(struct priv *priv)
1607 struct mlx5_rxq_ctrl *rxq_ctrl;
1610 LIST_FOREACH(rxq_ctrl, &priv->rxqsctrl, next) {
1611 DEBUG("%p: Rx Queue %p still referenced", (void *)priv,
1619 * Create an indirection table.
1622 * Pointer to private structure.
1624 * Queues entering in the indirection table.
1626 * Number of queues in the array.
1629 * A new indirection table.
1631 struct mlx5_ind_table_ibv*
1632 mlx5_priv_ind_table_ibv_new(struct priv *priv, uint16_t queues[],
1635 struct mlx5_ind_table_ibv *ind_tbl;
1636 const unsigned int wq_n = rte_is_power_of_2(queues_n) ?
1637 log2above(queues_n) :
1638 priv->ind_table_max_size;
1639 struct ibv_wq *wq[1 << wq_n];
1643 ind_tbl = rte_calloc(__func__, 1, sizeof(*ind_tbl) +
1644 queues_n * sizeof(uint16_t), 0);
1647 for (i = 0; i != queues_n; ++i) {
1648 struct mlx5_rxq_ctrl *rxq =
1649 mlx5_priv_rxq_get(priv, queues[i]);
1653 wq[i] = rxq->ibv->wq;
1654 ind_tbl->queues[i] = queues[i];
1656 ind_tbl->queues_n = queues_n;
1657 /* Finalise indirection table. */
1658 for (j = 0; i != (unsigned int)(1 << wq_n); ++i, ++j)
1660 ind_tbl->ind_table = ibv_create_rwq_ind_table(
1662 &(struct ibv_rwq_ind_table_init_attr){
1663 .log_ind_tbl_size = wq_n,
1667 if (!ind_tbl->ind_table)
1669 rte_atomic32_inc(&ind_tbl->refcnt);
1670 LIST_INSERT_HEAD(&priv->ind_tbls, ind_tbl, next);
1671 DEBUG("%p: Indirection table %p: refcnt %d", (void *)priv,
1672 (void *)ind_tbl, rte_atomic32_read(&ind_tbl->refcnt));
1676 DEBUG("%p cannot create indirection table", (void *)priv);
1681 * Get an indirection table.
1684 * Pointer to private structure.
1686 * Queues entering in the indirection table.
1688 * Number of queues in the array.
1691 * An indirection table if found.
1693 struct mlx5_ind_table_ibv*
1694 mlx5_priv_ind_table_ibv_get(struct priv *priv, uint16_t queues[],
1697 struct mlx5_ind_table_ibv *ind_tbl;
1699 LIST_FOREACH(ind_tbl, &priv->ind_tbls, next) {
1700 if ((ind_tbl->queues_n == queues_n) &&
1701 (memcmp(ind_tbl->queues, queues,
1702 ind_tbl->queues_n * sizeof(ind_tbl->queues[0]))
1709 rte_atomic32_inc(&ind_tbl->refcnt);
1710 DEBUG("%p: Indirection table %p: refcnt %d", (void *)priv,
1711 (void *)ind_tbl, rte_atomic32_read(&ind_tbl->refcnt));
1712 for (i = 0; i != ind_tbl->queues_n; ++i)
1713 mlx5_priv_rxq_get(priv, ind_tbl->queues[i]);
1719 * Release an indirection table.
1722 * Pointer to private structure.
1724 * Indirection table to release.
1727 * 0 on success, errno value on failure.
1730 mlx5_priv_ind_table_ibv_release(struct priv *priv,
1731 struct mlx5_ind_table_ibv *ind_tbl)
1735 DEBUG("%p: Indirection table %p: refcnt %d", (void *)priv,
1736 (void *)ind_tbl, rte_atomic32_read(&ind_tbl->refcnt));
1737 if (rte_atomic32_dec_and_test(&ind_tbl->refcnt))
1738 claim_zero(ibv_destroy_rwq_ind_table(ind_tbl->ind_table));
1739 for (i = 0; i != ind_tbl->queues_n; ++i)
1740 claim_nonzero(mlx5_priv_rxq_release(priv, ind_tbl->queues[i]));
1741 if (!rte_atomic32_read(&ind_tbl->refcnt)) {
1742 LIST_REMOVE(ind_tbl, next);
1750 * Verify the Rx Queue list is empty
1753 * Pointer to private structure.
1755 * @return the number of object not released.
1758 mlx5_priv_ind_table_ibv_verify(struct priv *priv)
1760 struct mlx5_ind_table_ibv *ind_tbl;
1763 LIST_FOREACH(ind_tbl, &priv->ind_tbls, next) {
1764 DEBUG("%p: Verbs indirection table %p still referenced",
1765 (void *)priv, (void *)ind_tbl);
1772 * Create an Rx Hash queue.
1775 * Pointer to private structure.
1777 * RSS key for the Rx hash queue.
1778 * @param rss_key_len
1780 * @param hash_fields
1781 * Verbs protocol hash field to make the RSS on.
1783 * Queues entering in hash queue.
1788 * An hash Rx queue on success.
1791 mlx5_priv_hrxq_new(struct priv *priv, uint8_t *rss_key, uint8_t rss_key_len,
1792 uint64_t hash_fields, uint16_t queues[], uint16_t queues_n)
1794 struct mlx5_hrxq *hrxq;
1795 struct mlx5_ind_table_ibv *ind_tbl;
1798 ind_tbl = mlx5_priv_ind_table_ibv_get(priv, queues, queues_n);
1800 ind_tbl = mlx5_priv_ind_table_ibv_new(priv, queues, queues_n);
1803 qp = ibv_create_qp_ex(
1805 &(struct ibv_qp_init_attr_ex){
1806 .qp_type = IBV_QPT_RAW_PACKET,
1808 IBV_QP_INIT_ATTR_PD |
1809 IBV_QP_INIT_ATTR_IND_TABLE |
1810 IBV_QP_INIT_ATTR_RX_HASH,
1811 .rx_hash_conf = (struct ibv_rx_hash_conf){
1812 .rx_hash_function = IBV_RX_HASH_FUNC_TOEPLITZ,
1813 .rx_hash_key_len = rss_key_len,
1814 .rx_hash_key = rss_key,
1815 .rx_hash_fields_mask = hash_fields,
1817 .rwq_ind_tbl = ind_tbl->ind_table,
1822 hrxq = rte_calloc(__func__, 1, sizeof(*hrxq) + rss_key_len, 0);
1825 hrxq->ind_table = ind_tbl;
1827 hrxq->rss_key_len = rss_key_len;
1828 hrxq->hash_fields = hash_fields;
1829 memcpy(hrxq->rss_key, rss_key, rss_key_len);
1830 rte_atomic32_inc(&hrxq->refcnt);
1831 LIST_INSERT_HEAD(&priv->hrxqs, hrxq, next);
1832 DEBUG("%p: Hash Rx queue %p: refcnt %d", (void *)priv,
1833 (void *)hrxq, rte_atomic32_read(&hrxq->refcnt));
1836 mlx5_priv_ind_table_ibv_release(priv, ind_tbl);
1838 claim_zero(ibv_destroy_qp(qp));
1843 * Get an Rx Hash queue.
1846 * Pointer to private structure.
1848 * RSS configuration for the Rx hash queue.
1850 * Queues entering in hash queue.
1855 * An hash Rx queue on success.
1858 mlx5_priv_hrxq_get(struct priv *priv, uint8_t *rss_key, uint8_t rss_key_len,
1859 uint64_t hash_fields, uint16_t queues[], uint16_t queues_n)
1861 struct mlx5_hrxq *hrxq;
1863 LIST_FOREACH(hrxq, &priv->hrxqs, next) {
1864 struct mlx5_ind_table_ibv *ind_tbl;
1866 if (hrxq->rss_key_len != rss_key_len)
1868 if (memcmp(hrxq->rss_key, rss_key, rss_key_len))
1870 if (hrxq->hash_fields != hash_fields)
1872 ind_tbl = mlx5_priv_ind_table_ibv_get(priv, queues, queues_n);
1875 if (ind_tbl != hrxq->ind_table) {
1876 mlx5_priv_ind_table_ibv_release(priv, ind_tbl);
1879 rte_atomic32_inc(&hrxq->refcnt);
1880 DEBUG("%p: Hash Rx queue %p: refcnt %d", (void *)priv,
1881 (void *)hrxq, rte_atomic32_read(&hrxq->refcnt));
1888 * Release the hash Rx queue.
1891 * Pointer to private structure.
1893 * Pointer to Hash Rx queue to release.
1896 * 0 on success, errno value on failure.
1899 mlx5_priv_hrxq_release(struct priv *priv, struct mlx5_hrxq *hrxq)
1901 DEBUG("%p: Hash Rx queue %p: refcnt %d", (void *)priv,
1902 (void *)hrxq, rte_atomic32_read(&hrxq->refcnt));
1903 if (rte_atomic32_dec_and_test(&hrxq->refcnt)) {
1904 claim_zero(ibv_destroy_qp(hrxq->qp));
1905 mlx5_priv_ind_table_ibv_release(priv, hrxq->ind_table);
1906 LIST_REMOVE(hrxq, next);
1910 claim_nonzero(mlx5_priv_ind_table_ibv_release(priv, hrxq->ind_table));
1915 * Verify the Rx Queue list is empty
1918 * Pointer to private structure.
1920 * @return the number of object not released.
1923 mlx5_priv_hrxq_ibv_verify(struct priv *priv)
1925 struct mlx5_hrxq *hrxq;
1928 LIST_FOREACH(hrxq, &priv->hrxqs, next) {
1929 DEBUG("%p: Verbs Hash Rx queue %p still referenced",
1930 (void *)priv, (void *)hrxq);