4 * Copyright 2015 6WIND S.A.
5 * Copyright 2015 Mellanox.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of 6WIND S.A. nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40 #include <sys/queue.h>
43 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
45 #pragma GCC diagnostic ignored "-Wpedantic"
47 #include <infiniband/verbs.h>
48 #include <infiniband/mlx5dv.h>
50 #pragma GCC diagnostic error "-Wpedantic"
54 #include <rte_malloc.h>
55 #include <rte_ethdev.h>
56 #include <rte_common.h>
57 #include <rte_interrupts.h>
58 #include <rte_debug.h>
62 #include "mlx5_rxtx.h"
63 #include "mlx5_utils.h"
64 #include "mlx5_autoconf.h"
65 #include "mlx5_defs.h"
67 /* Initialization data for hash RX queues. */
68 const struct hash_rxq_init hash_rxq_init[] = {
70 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
71 IBV_RX_HASH_DST_IPV4 |
72 IBV_RX_HASH_SRC_PORT_TCP |
73 IBV_RX_HASH_DST_PORT_TCP),
74 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,
76 .flow_spec.tcp_udp = {
77 .type = IBV_FLOW_SPEC_TCP,
78 .size = sizeof(hash_rxq_init[0].flow_spec.tcp_udp),
80 .underlayer = &hash_rxq_init[HASH_RXQ_IPV4],
83 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
84 IBV_RX_HASH_DST_IPV4 |
85 IBV_RX_HASH_SRC_PORT_UDP |
86 IBV_RX_HASH_DST_PORT_UDP),
87 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP,
89 .flow_spec.tcp_udp = {
90 .type = IBV_FLOW_SPEC_UDP,
91 .size = sizeof(hash_rxq_init[0].flow_spec.tcp_udp),
93 .underlayer = &hash_rxq_init[HASH_RXQ_IPV4],
96 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
97 IBV_RX_HASH_DST_IPV4),
98 .dpdk_rss_hf = (ETH_RSS_IPV4 |
102 .type = IBV_FLOW_SPEC_IPV4,
103 .size = sizeof(hash_rxq_init[0].flow_spec.ipv4),
105 .underlayer = &hash_rxq_init[HASH_RXQ_ETH],
108 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
109 IBV_RX_HASH_DST_IPV6 |
110 IBV_RX_HASH_SRC_PORT_TCP |
111 IBV_RX_HASH_DST_PORT_TCP),
112 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP,
114 .flow_spec.tcp_udp = {
115 .type = IBV_FLOW_SPEC_TCP,
116 .size = sizeof(hash_rxq_init[0].flow_spec.tcp_udp),
118 .underlayer = &hash_rxq_init[HASH_RXQ_IPV6],
121 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
122 IBV_RX_HASH_DST_IPV6 |
123 IBV_RX_HASH_SRC_PORT_UDP |
124 IBV_RX_HASH_DST_PORT_UDP),
125 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP,
127 .flow_spec.tcp_udp = {
128 .type = IBV_FLOW_SPEC_UDP,
129 .size = sizeof(hash_rxq_init[0].flow_spec.tcp_udp),
131 .underlayer = &hash_rxq_init[HASH_RXQ_IPV6],
134 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
135 IBV_RX_HASH_DST_IPV6),
136 .dpdk_rss_hf = (ETH_RSS_IPV6 |
140 .type = IBV_FLOW_SPEC_IPV6,
141 .size = sizeof(hash_rxq_init[0].flow_spec.ipv6),
143 .underlayer = &hash_rxq_init[HASH_RXQ_ETH],
150 .type = IBV_FLOW_SPEC_ETH,
151 .size = sizeof(hash_rxq_init[0].flow_spec.eth),
157 /* Number of entries in hash_rxq_init[]. */
158 const unsigned int hash_rxq_init_n = RTE_DIM(hash_rxq_init);
160 /* Initialization data for hash RX queue indirection tables. */
161 static const struct ind_table_init ind_table_init[] = {
163 .max_size = -1u, /* Superseded by HW limitations. */
165 1 << HASH_RXQ_TCPV4 |
166 1 << HASH_RXQ_UDPV4 |
168 1 << HASH_RXQ_TCPV6 |
169 1 << HASH_RXQ_UDPV6 |
176 .hash_types = 1 << HASH_RXQ_ETH,
181 #define IND_TABLE_INIT_N RTE_DIM(ind_table_init)
183 /* Default RSS hash key also used for ConnectX-3. */
184 uint8_t rss_hash_default_key[] = {
185 0x2c, 0xc6, 0x81, 0xd1,
186 0x5b, 0xdb, 0xf4, 0xf7,
187 0xfc, 0xa2, 0x83, 0x19,
188 0xdb, 0x1a, 0x3e, 0x94,
189 0x6b, 0x9e, 0x38, 0xd9,
190 0x2c, 0x9c, 0x03, 0xd1,
191 0xad, 0x99, 0x44, 0xa7,
192 0xd9, 0x56, 0x3d, 0x59,
193 0x06, 0x3c, 0x25, 0xf3,
194 0xfc, 0x1f, 0xdc, 0x2a,
197 /* Length of the default RSS hash key. */
198 const size_t rss_hash_default_key_len = sizeof(rss_hash_default_key);
201 * Populate flow steering rule for a given hash RX queue type using
202 * information from hash_rxq_init[]. Nothing is written to flow_attr when
203 * flow_attr_size is not large enough, but the required size is still returned.
206 * Pointer to private structure.
207 * @param[out] flow_attr
208 * Pointer to flow attribute structure to fill. Note that the allocated
209 * area must be larger and large enough to hold all flow specifications.
210 * @param flow_attr_size
211 * Entire size of flow_attr and trailing room for flow specifications.
213 * Hash RX queue type to use for flow steering rule.
216 * Total size of the flow attribute buffer. No errors are defined.
219 priv_flow_attr(struct priv *priv, struct ibv_flow_attr *flow_attr,
220 size_t flow_attr_size, enum hash_rxq_type type)
222 size_t offset = sizeof(*flow_attr);
223 const struct hash_rxq_init *init = &hash_rxq_init[type];
225 assert(priv != NULL);
226 assert((size_t)type < RTE_DIM(hash_rxq_init));
228 offset += init->flow_spec.hdr.size;
229 init = init->underlayer;
230 } while (init != NULL);
231 if (offset > flow_attr_size)
233 flow_attr_size = offset;
234 init = &hash_rxq_init[type];
235 *flow_attr = (struct ibv_flow_attr){
236 .type = IBV_FLOW_ATTR_NORMAL,
237 /* Priorities < 3 are reserved for flow director. */
238 .priority = init->flow_priority + 3,
244 offset -= init->flow_spec.hdr.size;
245 memcpy((void *)((uintptr_t)flow_attr + offset),
247 init->flow_spec.hdr.size);
248 ++flow_attr->num_of_specs;
249 init = init->underlayer;
250 } while (init != NULL);
251 return flow_attr_size;
255 * Convert hash type position in indirection table initializer to
256 * hash RX queue type.
259 * Indirection table initializer.
261 * Hash type position.
264 * Hash RX queue type.
266 static enum hash_rxq_type
267 hash_rxq_type_from_pos(const struct ind_table_init *table, unsigned int pos)
269 enum hash_rxq_type type = HASH_RXQ_TCPV4;
271 assert(pos < table->hash_types_n);
273 if ((table->hash_types & (1 << type)) && (pos-- == 0))
281 * Filter out disabled hash RX queue types from ind_table_init[].
284 * Pointer to private structure.
289 * Number of table entries.
292 priv_make_ind_table_init(struct priv *priv,
293 struct ind_table_init (*table)[IND_TABLE_INIT_N])
298 unsigned int table_n = 0;
299 /* Mandatory to receive frames not handled by normal hash RX queues. */
300 unsigned int hash_types_sup = 1 << HASH_RXQ_ETH;
302 rss_hf = priv->rss_hf;
303 /* Process other protocols only if more than one queue. */
304 if (priv->rxqs_n > 1)
305 for (i = 0; (i != hash_rxq_init_n); ++i)
306 if (rss_hf & hash_rxq_init[i].dpdk_rss_hf)
307 hash_types_sup |= (1 << i);
309 /* Filter out entries whose protocols are not in the set. */
310 for (i = 0, j = 0; (i != IND_TABLE_INIT_N); ++i) {
314 /* j is increased only if the table has valid protocols. */
316 (*table)[j] = ind_table_init[i];
317 (*table)[j].hash_types &= hash_types_sup;
318 for (h = 0, nb = 0; (h != hash_rxq_init_n); ++h)
319 if (((*table)[j].hash_types >> h) & 0x1)
321 (*table)[i].hash_types_n = nb;
331 * Initialize hash RX queues and indirection table.
334 * Pointer to private structure.
337 * 0 on success, errno value on failure.
340 priv_create_hash_rxqs(struct priv *priv)
342 struct ibv_wq *wqs[priv->reta_idx_n];
343 struct ind_table_init ind_table_init[IND_TABLE_INIT_N];
344 unsigned int ind_tables_n =
345 priv_make_ind_table_init(priv, &ind_table_init);
346 unsigned int hash_rxqs_n = 0;
347 struct hash_rxq (*hash_rxqs)[] = NULL;
348 struct ibv_rwq_ind_table *(*ind_tables)[] = NULL;
354 assert(priv->ind_tables == NULL);
355 assert(priv->ind_tables_n == 0);
356 assert(priv->hash_rxqs == NULL);
357 assert(priv->hash_rxqs_n == 0);
358 assert(priv->pd != NULL);
359 assert(priv->ctx != NULL);
362 if (priv->rxqs_n == 0)
364 assert(priv->rxqs != NULL);
365 if (ind_tables_n == 0) {
366 ERROR("all hash RX queue types have been filtered out,"
367 " indirection table cannot be created");
370 if (priv->rxqs_n & (priv->rxqs_n - 1)) {
371 INFO("%u RX queues are configured, consider rounding this"
372 " number to the next power of two for better balancing",
374 DEBUG("indirection table extended to assume %u WQs",
377 for (i = 0; (i != priv->reta_idx_n); ++i) {
378 struct mlx5_rxq_ctrl *rxq_ctrl;
380 rxq_ctrl = container_of((*priv->rxqs)[(*priv->reta_idx)[i]],
381 struct mlx5_rxq_ctrl, rxq);
382 wqs[i] = rxq_ctrl->ibv->wq;
384 /* Get number of hash RX queues to configure. */
385 for (i = 0, hash_rxqs_n = 0; (i != ind_tables_n); ++i)
386 hash_rxqs_n += ind_table_init[i].hash_types_n;
387 DEBUG("allocating %u hash RX queues for %u WQs, %u indirection tables",
388 hash_rxqs_n, priv->rxqs_n, ind_tables_n);
389 /* Create indirection tables. */
390 ind_tables = rte_calloc(__func__, ind_tables_n,
391 sizeof((*ind_tables)[0]), 0);
392 if (ind_tables == NULL) {
394 ERROR("cannot allocate indirection tables container: %s",
398 for (i = 0; (i != ind_tables_n); ++i) {
399 struct ibv_rwq_ind_table_init_attr ind_init_attr = {
400 .log_ind_tbl_size = 0, /* Set below. */
404 unsigned int ind_tbl_size = ind_table_init[i].max_size;
405 struct ibv_rwq_ind_table *ind_table;
407 if (priv->reta_idx_n < ind_tbl_size)
408 ind_tbl_size = priv->reta_idx_n;
409 ind_init_attr.log_ind_tbl_size = log2above(ind_tbl_size);
411 ind_table = ibv_create_rwq_ind_table(priv->ctx,
413 if (ind_table != NULL) {
414 (*ind_tables)[i] = ind_table;
417 /* Not clear whether errno is set. */
418 err = (errno ? errno : EINVAL);
419 ERROR("RX indirection table creation failed with error %d: %s",
423 /* Allocate array that holds hash RX queues and related data. */
424 hash_rxqs = rte_calloc(__func__, hash_rxqs_n,
425 sizeof((*hash_rxqs)[0]), 0);
426 if (hash_rxqs == NULL) {
428 ERROR("cannot allocate hash RX queues container: %s",
432 for (i = 0, j = 0, k = 0;
433 ((i != hash_rxqs_n) && (j != ind_tables_n));
435 struct hash_rxq *hash_rxq = &(*hash_rxqs)[i];
436 enum hash_rxq_type type =
437 hash_rxq_type_from_pos(&ind_table_init[j], k);
438 struct rte_eth_rss_conf *priv_rss_conf =
439 (*priv->rss_conf)[type];
440 struct ibv_rx_hash_conf hash_conf = {
441 .rx_hash_function = IBV_RX_HASH_FUNC_TOEPLITZ,
442 .rx_hash_key_len = (priv_rss_conf ?
443 priv_rss_conf->rss_key_len :
444 rss_hash_default_key_len),
445 .rx_hash_key = (priv_rss_conf ?
446 priv_rss_conf->rss_key :
447 rss_hash_default_key),
448 .rx_hash_fields_mask = hash_rxq_init[type].hash_fields,
450 struct ibv_qp_init_attr_ex qp_init_attr = {
451 .qp_type = IBV_QPT_RAW_PACKET,
452 .comp_mask = (IBV_QP_INIT_ATTR_PD |
453 IBV_QP_INIT_ATTR_IND_TABLE |
454 IBV_QP_INIT_ATTR_RX_HASH),
455 .rx_hash_conf = hash_conf,
456 .rwq_ind_tbl = (*ind_tables)[j],
460 DEBUG("using indirection table %u for hash RX queue %u type %d",
462 *hash_rxq = (struct hash_rxq){
464 .qp = ibv_create_qp_ex(priv->ctx, &qp_init_attr),
467 if (hash_rxq->qp == NULL) {
468 err = (errno ? errno : EINVAL);
469 ERROR("Hash RX QP creation failure: %s",
473 if (++k < ind_table_init[j].hash_types_n)
475 /* Switch to the next indirection table and reset hash RX
476 * queue type array index. */
480 priv->ind_tables = ind_tables;
481 priv->ind_tables_n = ind_tables_n;
482 priv->hash_rxqs = hash_rxqs;
483 priv->hash_rxqs_n = hash_rxqs_n;
487 if (hash_rxqs != NULL) {
488 for (i = 0; (i != hash_rxqs_n); ++i) {
489 struct ibv_qp *qp = (*hash_rxqs)[i].qp;
493 claim_zero(ibv_destroy_qp(qp));
497 if (ind_tables != NULL) {
498 for (j = 0; (j != ind_tables_n); ++j) {
499 struct ibv_rwq_ind_table *ind_table =
502 if (ind_table == NULL)
504 claim_zero(ibv_destroy_rwq_ind_table(ind_table));
506 rte_free(ind_tables);
512 * Clean up hash RX queues and indirection table.
515 * Pointer to private structure.
518 priv_destroy_hash_rxqs(struct priv *priv)
522 DEBUG("destroying %u hash RX queues", priv->hash_rxqs_n);
523 if (priv->hash_rxqs_n == 0) {
524 assert(priv->hash_rxqs == NULL);
525 assert(priv->ind_tables == NULL);
528 for (i = 0; (i != priv->hash_rxqs_n); ++i) {
529 struct hash_rxq *hash_rxq = &(*priv->hash_rxqs)[i];
532 assert(hash_rxq->priv == priv);
533 assert(hash_rxq->qp != NULL);
534 /* Also check that there are no remaining flows. */
535 for (j = 0; (j != RTE_DIM(hash_rxq->special_flow)); ++j)
537 (k != RTE_DIM(hash_rxq->special_flow[j]));
539 assert(hash_rxq->special_flow[j][k] == NULL);
540 for (j = 0; (j != RTE_DIM(hash_rxq->mac_flow)); ++j)
541 for (k = 0; (k != RTE_DIM(hash_rxq->mac_flow[j])); ++k)
542 assert(hash_rxq->mac_flow[j][k] == NULL);
543 claim_zero(ibv_destroy_qp(hash_rxq->qp));
545 priv->hash_rxqs_n = 0;
546 rte_free(priv->hash_rxqs);
547 priv->hash_rxqs = NULL;
548 for (i = 0; (i != priv->ind_tables_n); ++i) {
549 struct ibv_rwq_ind_table *ind_table =
550 (*priv->ind_tables)[i];
552 assert(ind_table != NULL);
553 claim_zero(ibv_destroy_rwq_ind_table(ind_table));
555 priv->ind_tables_n = 0;
556 rte_free(priv->ind_tables);
557 priv->ind_tables = NULL;
561 * Check whether a given flow type is allowed.
564 * Pointer to private structure.
566 * Flow type to check.
569 * Nonzero if the given flow type is allowed.
572 priv_allow_flow_type(struct priv *priv, enum hash_rxq_flow_type type)
574 /* Only FLOW_TYPE_PROMISC is allowed when promiscuous mode
575 * has been requested. */
576 if (priv->promisc_req)
577 return type == HASH_RXQ_FLOW_TYPE_PROMISC;
579 case HASH_RXQ_FLOW_TYPE_PROMISC:
580 return !!priv->promisc_req;
581 case HASH_RXQ_FLOW_TYPE_ALLMULTI:
582 return !!priv->allmulti_req;
583 case HASH_RXQ_FLOW_TYPE_BROADCAST:
584 case HASH_RXQ_FLOW_TYPE_IPV6MULTI:
585 /* If allmulti is enabled, broadcast and ipv6multi
586 * are unnecessary. */
587 return !priv->allmulti_req;
588 case HASH_RXQ_FLOW_TYPE_MAC:
591 /* Unsupported flow type is not allowed. */
598 * Automatically enable/disable flows according to configuration.
604 * 0 on success, errno value on failure.
607 priv_rehash_flows(struct priv *priv)
609 enum hash_rxq_flow_type i;
611 for (i = HASH_RXQ_FLOW_TYPE_PROMISC;
612 i != RTE_DIM((*priv->hash_rxqs)[0].special_flow);
614 if (!priv_allow_flow_type(priv, i)) {
615 priv_special_flow_disable(priv, i);
617 int ret = priv_special_flow_enable(priv, i);
622 if (priv_allow_flow_type(priv, HASH_RXQ_FLOW_TYPE_MAC))
623 return priv_mac_addrs_enable(priv);
624 priv_mac_addrs_disable(priv);
629 * Allocate RX queue elements.
632 * Pointer to RX queue structure.
635 * 0 on success, errno value on failure.
638 rxq_alloc_elts(struct mlx5_rxq_ctrl *rxq_ctrl)
640 const unsigned int sges_n = 1 << rxq_ctrl->rxq.sges_n;
641 unsigned int elts_n = 1 << rxq_ctrl->rxq.elts_n;
645 /* Iterate on segments. */
646 for (i = 0; (i != elts_n); ++i) {
647 struct rte_mbuf *buf;
649 buf = rte_pktmbuf_alloc(rxq_ctrl->rxq.mp);
651 ERROR("%p: empty mbuf pool", (void *)rxq_ctrl);
655 /* Headroom is reserved by rte_pktmbuf_alloc(). */
656 assert(DATA_OFF(buf) == RTE_PKTMBUF_HEADROOM);
657 /* Buffer is supposed to be empty. */
658 assert(rte_pktmbuf_data_len(buf) == 0);
659 assert(rte_pktmbuf_pkt_len(buf) == 0);
661 /* Only the first segment keeps headroom. */
663 SET_DATA_OFF(buf, 0);
664 PORT(buf) = rxq_ctrl->rxq.port_id;
665 DATA_LEN(buf) = rte_pktmbuf_tailroom(buf);
666 PKT_LEN(buf) = DATA_LEN(buf);
668 (*rxq_ctrl->rxq.elts)[i] = buf;
670 /* If Rx vector is activated. */
671 if (rxq_check_vec_support(&rxq_ctrl->rxq) > 0) {
672 struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq;
673 struct rte_mbuf *mbuf_init = &rxq->fake_mbuf;
676 /* Initialize default rearm_data for vPMD. */
677 mbuf_init->data_off = RTE_PKTMBUF_HEADROOM;
678 rte_mbuf_refcnt_set(mbuf_init, 1);
679 mbuf_init->nb_segs = 1;
680 mbuf_init->port = rxq->port_id;
682 * prevent compiler reordering:
683 * rearm_data covers previous fields.
685 rte_compiler_barrier();
686 rxq->mbuf_initializer =
687 *(uint64_t *)&mbuf_init->rearm_data;
688 /* Padding with a fake mbuf for vectorized Rx. */
689 for (j = 0; j < MLX5_VPMD_DESCS_PER_LOOP; ++j)
690 (*rxq->elts)[elts_n + j] = &rxq->fake_mbuf;
692 DEBUG("%p: allocated and configured %u segments (max %u packets)",
693 (void *)rxq_ctrl, elts_n, elts_n / (1 << rxq_ctrl->rxq.sges_n));
698 for (i = 0; (i != elts_n); ++i) {
699 if ((*rxq_ctrl->rxq.elts)[i] != NULL)
700 rte_pktmbuf_free_seg((*rxq_ctrl->rxq.elts)[i]);
701 (*rxq_ctrl->rxq.elts)[i] = NULL;
703 DEBUG("%p: failed, freed everything", (void *)rxq_ctrl);
709 * Free RX queue elements.
712 * Pointer to RX queue structure.
715 rxq_free_elts(struct mlx5_rxq_ctrl *rxq_ctrl)
717 struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq;
718 const uint16_t q_n = (1 << rxq->elts_n);
719 const uint16_t q_mask = q_n - 1;
720 uint16_t used = q_n - (rxq->rq_ci - rxq->rq_pi);
723 DEBUG("%p: freeing WRs", (void *)rxq_ctrl);
724 if (rxq->elts == NULL)
727 * Some mbuf in the Ring belongs to the application. They cannot be
730 if (rxq_check_vec_support(rxq) > 0) {
731 for (i = 0; i < used; ++i)
732 (*rxq->elts)[(rxq->rq_ci + i) & q_mask] = NULL;
733 rxq->rq_pi = rxq->rq_ci;
735 for (i = 0; (i != (1u << rxq->elts_n)); ++i) {
736 if ((*rxq->elts)[i] != NULL)
737 rte_pktmbuf_free_seg((*rxq->elts)[i]);
738 (*rxq->elts)[i] = NULL;
743 * Clean up a RX queue.
745 * Destroy objects, free allocated memory and reset the structure for reuse.
748 * Pointer to RX queue structure.
751 mlx5_rxq_cleanup(struct mlx5_rxq_ctrl *rxq_ctrl)
753 DEBUG("cleaning up %p", (void *)rxq_ctrl);
755 mlx5_priv_rxq_ibv_release(rxq_ctrl->priv, rxq_ctrl->ibv);
756 memset(rxq_ctrl, 0, sizeof(*rxq_ctrl));
762 * Pointer to Ethernet device structure.
766 * Number of descriptors to configure in queue.
768 * NUMA socket on which memory must be allocated.
770 * Thresholds parameters.
772 * Memory pool for buffer allocations.
775 * 0 on success, negative errno value on failure.
778 mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
779 unsigned int socket, const struct rte_eth_rxconf *conf,
780 struct rte_mempool *mp)
782 struct priv *priv = dev->data->dev_private;
783 struct mlx5_rxq_data *rxq = (*priv->rxqs)[idx];
784 struct mlx5_rxq_ctrl *rxq_ctrl =
785 container_of(rxq, struct mlx5_rxq_ctrl, rxq);
789 if (mlx5_is_secondary())
790 return -E_RTE_SECONDARY;
792 if (!rte_is_power_of_2(desc)) {
793 desc = 1 << log2above(desc);
794 WARN("%p: increased number of descriptors in RX queue %u"
795 " to the next power of two (%d)",
796 (void *)dev, idx, desc);
798 DEBUG("%p: configuring queue %u for %u descriptors",
799 (void *)dev, idx, desc);
800 if (idx >= priv->rxqs_n) {
801 ERROR("%p: queue index out of range (%u >= %u)",
802 (void *)dev, idx, priv->rxqs_n);
806 if (!mlx5_priv_rxq_releasable(priv, idx)) {
808 ERROR("%p: unable to release queue index %u",
812 mlx5_priv_rxq_release(priv, idx);
813 rxq_ctrl = mlx5_priv_rxq_new(priv, idx, desc, socket, mp);
815 ERROR("%p: unable to allocate queue index %u",
820 DEBUG("%p: adding RX queue %p to list",
821 (void *)dev, (void *)rxq_ctrl);
822 (*priv->rxqs)[idx] = &rxq_ctrl->rxq;
829 * DPDK callback to release a RX queue.
832 * Generic RX queue pointer.
835 mlx5_rx_queue_release(void *dpdk_rxq)
837 struct mlx5_rxq_data *rxq = (struct mlx5_rxq_data *)dpdk_rxq;
838 struct mlx5_rxq_ctrl *rxq_ctrl;
841 if (mlx5_is_secondary())
846 rxq_ctrl = container_of(rxq, struct mlx5_rxq_ctrl, rxq);
847 priv = rxq_ctrl->priv;
849 if (!mlx5_priv_rxq_releasable(priv, rxq_ctrl->rxq.stats.idx))
850 rte_panic("Rx queue %p is still used by a flow and cannot be"
851 " removed\n", (void *)rxq_ctrl);
852 mlx5_priv_rxq_release(priv, rxq_ctrl->rxq.stats.idx);
857 * Allocate queue vector and fill epoll fd list for Rx interrupts.
860 * Pointer to private structure.
863 * 0 on success, negative on failure.
866 priv_rx_intr_vec_enable(struct priv *priv)
869 unsigned int rxqs_n = priv->rxqs_n;
870 unsigned int n = RTE_MIN(rxqs_n, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID);
871 unsigned int count = 0;
872 struct rte_intr_handle *intr_handle = priv->dev->intr_handle;
874 assert(!mlx5_is_secondary());
875 if (!priv->dev->data->dev_conf.intr_conf.rxq)
877 priv_rx_intr_vec_disable(priv);
878 intr_handle->intr_vec = malloc(sizeof(intr_handle->intr_vec[rxqs_n]));
879 if (intr_handle->intr_vec == NULL) {
880 ERROR("failed to allocate memory for interrupt vector,"
881 " Rx interrupts will not be supported");
884 intr_handle->type = RTE_INTR_HANDLE_EXT;
885 for (i = 0; i != n; ++i) {
886 /* This rxq ibv must not be released in this function. */
887 struct mlx5_rxq_ibv *rxq_ibv = mlx5_priv_rxq_ibv_get(priv, i);
892 /* Skip queues that cannot request interrupts. */
893 if (!rxq_ibv || !rxq_ibv->channel) {
894 /* Use invalid intr_vec[] index to disable entry. */
895 intr_handle->intr_vec[i] =
896 RTE_INTR_VEC_RXTX_OFFSET +
897 RTE_MAX_RXTX_INTR_VEC_ID;
900 if (count >= RTE_MAX_RXTX_INTR_VEC_ID) {
901 ERROR("too many Rx queues for interrupt vector size"
902 " (%d), Rx interrupts cannot be enabled",
903 RTE_MAX_RXTX_INTR_VEC_ID);
904 priv_rx_intr_vec_disable(priv);
907 fd = rxq_ibv->channel->fd;
908 flags = fcntl(fd, F_GETFL);
909 rc = fcntl(fd, F_SETFL, flags | O_NONBLOCK);
911 ERROR("failed to make Rx interrupt file descriptor"
912 " %d non-blocking for queue index %d", fd, i);
913 priv_rx_intr_vec_disable(priv);
916 intr_handle->intr_vec[i] = RTE_INTR_VEC_RXTX_OFFSET + count;
917 intr_handle->efds[count] = fd;
921 priv_rx_intr_vec_disable(priv);
923 intr_handle->nb_efd = count;
928 * Clean up Rx interrupts handler.
931 * Pointer to private structure.
934 priv_rx_intr_vec_disable(struct priv *priv)
936 struct rte_intr_handle *intr_handle = priv->dev->intr_handle;
938 unsigned int rxqs_n = priv->rxqs_n;
939 unsigned int n = RTE_MIN(rxqs_n, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID);
941 if (!priv->dev->data->dev_conf.intr_conf.rxq)
943 for (i = 0; i != n; ++i) {
944 struct mlx5_rxq_ctrl *rxq_ctrl;
945 struct mlx5_rxq_data *rxq_data;
947 if (intr_handle->intr_vec[i] == RTE_INTR_VEC_RXTX_OFFSET +
948 RTE_MAX_RXTX_INTR_VEC_ID)
951 * Need to access directly the queue to release the reference
952 * kept in priv_rx_intr_vec_enable().
954 rxq_data = (*priv->rxqs)[i];
955 rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
956 mlx5_priv_rxq_ibv_release(priv, rxq_ctrl->ibv);
958 rte_intr_free_epoll_fd(intr_handle);
959 free(intr_handle->intr_vec);
960 intr_handle->nb_efd = 0;
961 intr_handle->intr_vec = NULL;
965 * MLX5 CQ notification .
968 * Pointer to receive queue structure.
970 * Sequence number per receive queue .
973 mlx5_arm_cq(struct mlx5_rxq_data *rxq, int sq_n_rxq)
976 uint32_t doorbell_hi;
978 void *cq_db_reg = (char *)rxq->cq_uar + MLX5_CQ_DOORBELL;
980 sq_n = sq_n_rxq & MLX5_CQ_SQN_MASK;
981 doorbell_hi = sq_n << MLX5_CQ_SQN_OFFSET | (rxq->cq_ci & MLX5_CI_MASK);
982 doorbell = (uint64_t)doorbell_hi << 32;
983 doorbell |= rxq->cqn;
984 rxq->cq_db[MLX5_CQ_ARM_DB] = rte_cpu_to_be_32(doorbell_hi);
986 rte_write64(rte_cpu_to_be_64(doorbell), cq_db_reg);
990 * DPDK callback for Rx queue interrupt enable.
993 * Pointer to Ethernet device structure.
998 * 0 on success, negative on failure.
1001 mlx5_rx_intr_enable(struct rte_eth_dev *dev, uint16_t rx_queue_id)
1003 struct priv *priv = mlx5_get_priv(dev);
1004 struct mlx5_rxq_data *rxq_data;
1005 struct mlx5_rxq_ctrl *rxq_ctrl;
1009 rxq_data = (*priv->rxqs)[rx_queue_id];
1014 rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
1015 if (rxq_ctrl->irq) {
1016 struct mlx5_rxq_ibv *rxq_ibv;
1018 rxq_ibv = mlx5_priv_rxq_ibv_get(priv, rx_queue_id);
1023 mlx5_arm_cq(rxq_data, rxq_data->cq_arm_sn);
1024 mlx5_priv_rxq_ibv_release(priv, rxq_ibv);
1029 WARN("unable to arm interrupt on rx queue %d", rx_queue_id);
1034 * DPDK callback for Rx queue interrupt disable.
1037 * Pointer to Ethernet device structure.
1038 * @param rx_queue_id
1042 * 0 on success, negative on failure.
1045 mlx5_rx_intr_disable(struct rte_eth_dev *dev, uint16_t rx_queue_id)
1047 struct priv *priv = mlx5_get_priv(dev);
1048 struct mlx5_rxq_data *rxq_data;
1049 struct mlx5_rxq_ctrl *rxq_ctrl;
1050 struct mlx5_rxq_ibv *rxq_ibv = NULL;
1051 struct ibv_cq *ev_cq;
1056 rxq_data = (*priv->rxqs)[rx_queue_id];
1061 rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
1064 rxq_ibv = mlx5_priv_rxq_ibv_get(priv, rx_queue_id);
1069 ret = ibv_get_cq_event(rxq_ibv->channel, &ev_cq, &ev_ctx);
1070 if (ret || ev_cq != rxq_ibv->cq) {
1074 rxq_data->cq_arm_sn++;
1075 ibv_ack_cq_events(rxq_ibv->cq, 1);
1078 mlx5_priv_rxq_ibv_release(priv, rxq_ibv);
1081 WARN("unable to disable interrupt on rx queue %d",
1087 * Create the Rx queue Verbs object.
1090 * Pointer to private structure.
1092 * Queue index in DPDK Rx queue array
1095 * The Verbs object initialised if it can be created.
1097 struct mlx5_rxq_ibv*
1098 mlx5_priv_rxq_ibv_new(struct priv *priv, uint16_t idx)
1100 struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx];
1101 struct mlx5_rxq_ctrl *rxq_ctrl =
1102 container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
1103 struct ibv_wq_attr mod;
1105 struct ibv_cq_init_attr_ex cq;
1106 struct ibv_wq_init_attr wq;
1107 struct ibv_cq_ex cq_attr;
1109 unsigned int cqe_n = (1 << rxq_data->elts_n) - 1;
1110 struct mlx5_rxq_ibv *tmpl;
1111 struct mlx5dv_cq cq_info;
1112 struct mlx5dv_rwq rwq;
1115 struct mlx5dv_obj obj;
1118 assert(!rxq_ctrl->ibv);
1119 tmpl = rte_calloc_socket(__func__, 1, sizeof(*tmpl), 0,
1122 ERROR("%p: cannot allocate verbs resources",
1126 tmpl->rxq_ctrl = rxq_ctrl;
1127 /* Use the entire RX mempool as the memory region. */
1128 tmpl->mr = priv_mr_get(priv, rxq_data->mp);
1130 tmpl->mr = priv_mr_new(priv, rxq_data->mp);
1132 ERROR("%p: MR creation failure", (void *)rxq_ctrl);
1136 if (rxq_ctrl->irq) {
1137 tmpl->channel = ibv_create_comp_channel(priv->ctx);
1138 if (!tmpl->channel) {
1139 ERROR("%p: Comp Channel creation failure",
1144 attr.cq = (struct ibv_cq_init_attr_ex){
1147 if (priv->cqe_comp) {
1148 attr.cq.comp_mask |= IBV_CQ_INIT_ATTR_MASK_FLAGS;
1149 attr.cq.flags |= MLX5DV_CQ_INIT_ATTR_MASK_COMPRESSED_CQE;
1151 * For vectorized Rx, it must not be doubled in order to
1152 * make cq_ci and rq_ci aligned.
1154 if (rxq_check_vec_support(rxq_data) < 0)
1157 tmpl->cq = ibv_create_cq(priv->ctx, cqe_n, NULL, tmpl->channel, 0);
1158 if (tmpl->cq == NULL) {
1159 ERROR("%p: CQ creation failure", (void *)rxq_ctrl);
1162 DEBUG("priv->device_attr.max_qp_wr is %d",
1163 priv->device_attr.orig_attr.max_qp_wr);
1164 DEBUG("priv->device_attr.max_sge is %d",
1165 priv->device_attr.orig_attr.max_sge);
1166 attr.wq = (struct ibv_wq_init_attr){
1167 .wq_context = NULL, /* Could be useful in the future. */
1168 .wq_type = IBV_WQT_RQ,
1169 /* Max number of outstanding WRs. */
1170 .max_wr = (1 << rxq_data->elts_n) >> rxq_data->sges_n,
1171 /* Max number of scatter/gather elements in a WR. */
1172 .max_sge = 1 << rxq_data->sges_n,
1176 IBV_WQ_FLAGS_CVLAN_STRIPPING |
1178 .create_flags = (rxq_data->vlan_strip ?
1179 IBV_WQ_FLAGS_CVLAN_STRIPPING :
1182 /* By default, FCS (CRC) is stripped by hardware. */
1183 if (rxq_data->crc_present) {
1184 attr.wq.create_flags |= IBV_WQ_FLAGS_SCATTER_FCS;
1185 attr.wq.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS;
1187 #ifdef HAVE_IBV_WQ_FLAG_RX_END_PADDING
1188 if (priv->hw_padding) {
1189 attr.wq.create_flags |= IBV_WQ_FLAG_RX_END_PADDING;
1190 attr.wq.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS;
1193 tmpl->wq = ibv_create_wq(priv->ctx, &attr.wq);
1194 if (tmpl->wq == NULL) {
1195 ERROR("%p: WQ creation failure", (void *)rxq_ctrl);
1199 * Make sure number of WRs*SGEs match expectations since a queue
1200 * cannot allocate more than "desc" buffers.
1202 if (((int)attr.wq.max_wr !=
1203 ((1 << rxq_data->elts_n) >> rxq_data->sges_n)) ||
1204 ((int)attr.wq.max_sge != (1 << rxq_data->sges_n))) {
1205 ERROR("%p: requested %u*%u but got %u*%u WRs*SGEs",
1207 ((1 << rxq_data->elts_n) >> rxq_data->sges_n),
1208 (1 << rxq_data->sges_n),
1209 attr.wq.max_wr, attr.wq.max_sge);
1212 /* Change queue state to ready. */
1213 mod = (struct ibv_wq_attr){
1214 .attr_mask = IBV_WQ_ATTR_STATE,
1215 .wq_state = IBV_WQS_RDY,
1217 ret = ibv_modify_wq(tmpl->wq, &mod);
1219 ERROR("%p: WQ state to IBV_WQS_RDY failed",
1223 obj.cq.in = tmpl->cq;
1224 obj.cq.out = &cq_info;
1225 obj.rwq.in = tmpl->wq;
1227 ret = mlx5dv_init_obj(&obj, MLX5DV_OBJ_CQ | MLX5DV_OBJ_RWQ);
1230 if (cq_info.cqe_size != RTE_CACHE_LINE_SIZE) {
1231 ERROR("Wrong MLX5_CQE_SIZE environment variable value: "
1232 "it should be set to %u", RTE_CACHE_LINE_SIZE);
1235 /* Fill the rings. */
1236 rxq_data->wqes = (volatile struct mlx5_wqe_data_seg (*)[])
1238 for (i = 0; (i != (unsigned int)(1 << rxq_data->elts_n)); ++i) {
1239 struct rte_mbuf *buf = (*rxq_data->elts)[i];
1240 volatile struct mlx5_wqe_data_seg *scat = &(*rxq_data->wqes)[i];
1242 /* scat->addr must be able to store a pointer. */
1243 assert(sizeof(scat->addr) >= sizeof(uintptr_t));
1244 *scat = (struct mlx5_wqe_data_seg){
1245 .addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(buf,
1247 .byte_count = rte_cpu_to_be_32(DATA_LEN(buf)),
1248 .lkey = tmpl->mr->lkey,
1251 rxq_data->rq_db = rwq.dbrec;
1252 rxq_data->cqe_n = log2above(cq_info.cqe_cnt);
1253 rxq_data->cq_ci = 0;
1254 rxq_data->rq_ci = 0;
1255 rxq_data->rq_pi = 0;
1256 rxq_data->zip = (struct rxq_zip){
1259 rxq_data->cq_db = cq_info.dbrec;
1260 rxq_data->cqes = (volatile struct mlx5_cqe (*)[])(uintptr_t)cq_info.buf;
1261 /* Update doorbell counter. */
1262 rxq_data->rq_ci = (1 << rxq_data->elts_n) >> rxq_data->sges_n;
1264 *rxq_data->rq_db = rte_cpu_to_be_32(rxq_data->rq_ci);
1265 DEBUG("%p: rxq updated with %p", (void *)rxq_ctrl, (void *)&tmpl);
1266 rte_atomic32_inc(&tmpl->refcnt);
1267 DEBUG("%p: Verbs Rx queue %p: refcnt %d", (void *)priv,
1268 (void *)tmpl, rte_atomic32_read(&tmpl->refcnt));
1269 LIST_INSERT_HEAD(&priv->rxqsibv, tmpl, next);
1273 claim_zero(ibv_destroy_wq(tmpl->wq));
1275 claim_zero(ibv_destroy_cq(tmpl->cq));
1277 claim_zero(ibv_destroy_comp_channel(tmpl->channel));
1279 priv_mr_release(priv, tmpl->mr);
1284 * Get an Rx queue Verbs object.
1287 * Pointer to private structure.
1289 * Queue index in DPDK Rx queue array
1292 * The Verbs object if it exists.
1294 struct mlx5_rxq_ibv*
1295 mlx5_priv_rxq_ibv_get(struct priv *priv, uint16_t idx)
1297 struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx];
1298 struct mlx5_rxq_ctrl *rxq_ctrl;
1300 if (idx >= priv->rxqs_n)
1304 rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
1305 if (rxq_ctrl->ibv) {
1306 priv_mr_get(priv, rxq_data->mp);
1307 rte_atomic32_inc(&rxq_ctrl->ibv->refcnt);
1308 DEBUG("%p: Verbs Rx queue %p: refcnt %d", (void *)priv,
1309 (void *)rxq_ctrl->ibv,
1310 rte_atomic32_read(&rxq_ctrl->ibv->refcnt));
1312 return rxq_ctrl->ibv;
1316 * Release an Rx verbs queue object.
1319 * Pointer to private structure.
1321 * Verbs Rx queue object.
1324 * 0 on success, errno value on failure.
1327 mlx5_priv_rxq_ibv_release(struct priv *priv, struct mlx5_rxq_ibv *rxq_ibv)
1332 assert(rxq_ibv->wq);
1333 assert(rxq_ibv->cq);
1334 assert(rxq_ibv->mr);
1335 ret = priv_mr_release(priv, rxq_ibv->mr);
1338 DEBUG("%p: Verbs Rx queue %p: refcnt %d", (void *)priv,
1339 (void *)rxq_ibv, rte_atomic32_read(&rxq_ibv->refcnt));
1340 if (rte_atomic32_dec_and_test(&rxq_ibv->refcnt)) {
1341 rxq_free_elts(rxq_ibv->rxq_ctrl);
1342 claim_zero(ibv_destroy_wq(rxq_ibv->wq));
1343 claim_zero(ibv_destroy_cq(rxq_ibv->cq));
1344 if (rxq_ibv->channel)
1345 claim_zero(ibv_destroy_comp_channel(rxq_ibv->channel));
1346 LIST_REMOVE(rxq_ibv, next);
1354 * Verify the Verbs Rx queue list is empty
1357 * Pointer to private structure.
1359 * @return the number of object not released.
1362 mlx5_priv_rxq_ibv_verify(struct priv *priv)
1365 struct mlx5_rxq_ibv *rxq_ibv;
1367 LIST_FOREACH(rxq_ibv, &priv->rxqsibv, next) {
1368 DEBUG("%p: Verbs Rx queue %p still referenced", (void *)priv,
1376 * Return true if a single reference exists on the object.
1379 * Pointer to private structure.
1381 * Verbs Rx queue object.
1384 mlx5_priv_rxq_ibv_releasable(struct priv *priv, struct mlx5_rxq_ibv *rxq_ibv)
1388 return (rte_atomic32_read(&rxq_ibv->refcnt) == 1);
1392 * Create a DPDK Rx queue.
1395 * Pointer to private structure.
1399 * Number of descriptors to configure in queue.
1401 * NUMA socket on which memory must be allocated.
1404 * A DPDK queue object on success.
1406 struct mlx5_rxq_ctrl*
1407 mlx5_priv_rxq_new(struct priv *priv, uint16_t idx, uint16_t desc,
1408 unsigned int socket, struct rte_mempool *mp)
1410 struct rte_eth_dev *dev = priv->dev;
1411 struct mlx5_rxq_ctrl *tmpl;
1412 const uint16_t desc_n =
1413 desc + priv->rx_vec_en * MLX5_VPMD_DESCS_PER_LOOP;
1414 unsigned int mb_len = rte_pktmbuf_data_room_size(mp);
1416 tmpl = rte_calloc_socket("RXQ", 1,
1418 desc_n * sizeof(struct rte_mbuf *),
1422 if (priv->dev->data->dev_conf.intr_conf.rxq)
1424 /* Enable scattered packets support for this queue if necessary. */
1425 assert(mb_len >= RTE_PKTMBUF_HEADROOM);
1426 if (dev->data->dev_conf.rxmode.max_rx_pkt_len <=
1427 (mb_len - RTE_PKTMBUF_HEADROOM)) {
1428 tmpl->rxq.sges_n = 0;
1429 } else if (dev->data->dev_conf.rxmode.enable_scatter) {
1431 RTE_PKTMBUF_HEADROOM +
1432 dev->data->dev_conf.rxmode.max_rx_pkt_len;
1433 unsigned int sges_n;
1436 * Determine the number of SGEs needed for a full packet
1437 * and round it to the next power of two.
1439 sges_n = log2above((size / mb_len) + !!(size % mb_len));
1440 tmpl->rxq.sges_n = sges_n;
1441 /* Make sure rxq.sges_n did not overflow. */
1442 size = mb_len * (1 << tmpl->rxq.sges_n);
1443 size -= RTE_PKTMBUF_HEADROOM;
1444 if (size < dev->data->dev_conf.rxmode.max_rx_pkt_len) {
1445 ERROR("%p: too many SGEs (%u) needed to handle"
1446 " requested maximum packet size %u",
1449 dev->data->dev_conf.rxmode.max_rx_pkt_len);
1453 WARN("%p: the requested maximum Rx packet size (%u) is"
1454 " larger than a single mbuf (%u) and scattered"
1455 " mode has not been requested",
1457 dev->data->dev_conf.rxmode.max_rx_pkt_len,
1458 mb_len - RTE_PKTMBUF_HEADROOM);
1460 DEBUG("%p: maximum number of segments per packet: %u",
1461 (void *)dev, 1 << tmpl->rxq.sges_n);
1462 if (desc % (1 << tmpl->rxq.sges_n)) {
1463 ERROR("%p: number of RX queue descriptors (%u) is not a"
1464 " multiple of SGEs per packet (%u)",
1467 1 << tmpl->rxq.sges_n);
1470 /* Toggle RX checksum offload if hardware supports it. */
1472 tmpl->rxq.csum = !!dev->data->dev_conf.rxmode.hw_ip_checksum;
1473 if (priv->hw_csum_l2tun)
1474 tmpl->rxq.csum_l2tun =
1475 !!dev->data->dev_conf.rxmode.hw_ip_checksum;
1476 /* Configure VLAN stripping. */
1477 tmpl->rxq.vlan_strip = (priv->hw_vlan_strip &&
1478 !!dev->data->dev_conf.rxmode.hw_vlan_strip);
1479 /* By default, FCS (CRC) is stripped by hardware. */
1480 if (dev->data->dev_conf.rxmode.hw_strip_crc) {
1481 tmpl->rxq.crc_present = 0;
1482 } else if (priv->hw_fcs_strip) {
1483 tmpl->rxq.crc_present = 1;
1485 WARN("%p: CRC stripping has been disabled but will still"
1486 " be performed by hardware, make sure MLNX_OFED and"
1487 " firmware are up to date",
1489 tmpl->rxq.crc_present = 0;
1491 DEBUG("%p: CRC stripping is %s, %u bytes will be subtracted from"
1492 " incoming frames to hide it",
1494 tmpl->rxq.crc_present ? "disabled" : "enabled",
1495 tmpl->rxq.crc_present << 2);
1497 tmpl->rxq.rss_hash = priv->rxqs_n > 1;
1498 tmpl->rxq.port_id = dev->data->port_id;
1501 tmpl->rxq.stats.idx = idx;
1502 tmpl->rxq.elts_n = log2above(desc);
1504 (struct rte_mbuf *(*)[1 << tmpl->rxq.elts_n])(tmpl + 1);
1505 rte_atomic32_inc(&tmpl->refcnt);
1506 DEBUG("%p: Rx queue %p: refcnt %d", (void *)priv,
1507 (void *)tmpl, rte_atomic32_read(&tmpl->refcnt));
1508 LIST_INSERT_HEAD(&priv->rxqsctrl, tmpl, next);
1519 * Pointer to private structure.
1524 * A pointer to the queue if it exists.
1526 struct mlx5_rxq_ctrl*
1527 mlx5_priv_rxq_get(struct priv *priv, uint16_t idx)
1529 struct mlx5_rxq_ctrl *rxq_ctrl = NULL;
1531 if ((*priv->rxqs)[idx]) {
1532 rxq_ctrl = container_of((*priv->rxqs)[idx],
1533 struct mlx5_rxq_ctrl,
1536 mlx5_priv_rxq_ibv_get(priv, idx);
1537 rte_atomic32_inc(&rxq_ctrl->refcnt);
1538 DEBUG("%p: Rx queue %p: refcnt %d", (void *)priv,
1539 (void *)rxq_ctrl, rte_atomic32_read(&rxq_ctrl->refcnt));
1545 * Release a Rx queue.
1548 * Pointer to private structure.
1553 * 0 on success, errno value on failure.
1556 mlx5_priv_rxq_release(struct priv *priv, uint16_t idx)
1558 struct mlx5_rxq_ctrl *rxq_ctrl;
1560 if (!(*priv->rxqs)[idx])
1562 rxq_ctrl = container_of((*priv->rxqs)[idx], struct mlx5_rxq_ctrl, rxq);
1563 assert(rxq_ctrl->priv);
1564 if (rxq_ctrl->ibv) {
1567 ret = mlx5_priv_rxq_ibv_release(rxq_ctrl->priv, rxq_ctrl->ibv);
1569 rxq_ctrl->ibv = NULL;
1571 DEBUG("%p: Rx queue %p: refcnt %d", (void *)priv,
1572 (void *)rxq_ctrl, rte_atomic32_read(&rxq_ctrl->refcnt));
1573 if (rte_atomic32_dec_and_test(&rxq_ctrl->refcnt)) {
1574 LIST_REMOVE(rxq_ctrl, next);
1576 (*priv->rxqs)[idx] = NULL;
1583 * Verify if the queue can be released.
1586 * Pointer to private structure.
1591 * 1 if the queue can be released.
1594 mlx5_priv_rxq_releasable(struct priv *priv, uint16_t idx)
1596 struct mlx5_rxq_ctrl *rxq_ctrl;
1598 if (!(*priv->rxqs)[idx])
1600 rxq_ctrl = container_of((*priv->rxqs)[idx], struct mlx5_rxq_ctrl, rxq);
1601 return (rte_atomic32_read(&rxq_ctrl->refcnt) == 1);
1605 * Verify the Rx Queue list is empty
1608 * Pointer to private structure.
1610 * @return the number of object not released.
1613 mlx5_priv_rxq_verify(struct priv *priv)
1615 struct mlx5_rxq_ctrl *rxq_ctrl;
1618 LIST_FOREACH(rxq_ctrl, &priv->rxqsctrl, next) {
1619 DEBUG("%p: Rx Queue %p still referenced", (void *)priv,
1627 * Create an indirection table.
1630 * Pointer to private structure.
1632 * Queues entering in the indirection table.
1634 * Number of queues in the array.
1637 * A new indirection table.
1639 struct mlx5_ind_table_ibv*
1640 mlx5_priv_ind_table_ibv_new(struct priv *priv, uint16_t queues[],
1643 struct mlx5_ind_table_ibv *ind_tbl;
1644 const unsigned int wq_n = rte_is_power_of_2(queues_n) ?
1645 log2above(queues_n) :
1646 priv->ind_table_max_size;
1647 struct ibv_wq *wq[1 << wq_n];
1651 ind_tbl = rte_calloc(__func__, 1, sizeof(*ind_tbl) +
1652 queues_n * sizeof(uint16_t), 0);
1655 for (i = 0; i != queues_n; ++i) {
1656 struct mlx5_rxq_ctrl *rxq =
1657 mlx5_priv_rxq_get(priv, queues[i]);
1661 wq[i] = rxq->ibv->wq;
1662 ind_tbl->queues[i] = queues[i];
1664 ind_tbl->queues_n = queues_n;
1665 /* Finalise indirection table. */
1666 for (j = 0; i != (unsigned int)(1 << wq_n); ++i, ++j)
1668 ind_tbl->ind_table = ibv_create_rwq_ind_table(
1670 &(struct ibv_rwq_ind_table_init_attr){
1671 .log_ind_tbl_size = wq_n,
1675 if (!ind_tbl->ind_table)
1677 rte_atomic32_inc(&ind_tbl->refcnt);
1678 LIST_INSERT_HEAD(&priv->ind_tbls, ind_tbl, next);
1679 DEBUG("%p: Indirection table %p: refcnt %d", (void *)priv,
1680 (void *)ind_tbl, rte_atomic32_read(&ind_tbl->refcnt));
1684 DEBUG("%p cannot create indirection table", (void *)priv);
1689 * Get an indirection table.
1692 * Pointer to private structure.
1694 * Queues entering in the indirection table.
1696 * Number of queues in the array.
1699 * An indirection table if found.
1701 struct mlx5_ind_table_ibv*
1702 mlx5_priv_ind_table_ibv_get(struct priv *priv, uint16_t queues[],
1705 struct mlx5_ind_table_ibv *ind_tbl;
1707 LIST_FOREACH(ind_tbl, &priv->ind_tbls, next) {
1708 if ((ind_tbl->queues_n == queues_n) &&
1709 (memcmp(ind_tbl->queues, queues,
1710 ind_tbl->queues_n * sizeof(ind_tbl->queues[0]))
1717 rte_atomic32_inc(&ind_tbl->refcnt);
1718 DEBUG("%p: Indirection table %p: refcnt %d", (void *)priv,
1719 (void *)ind_tbl, rte_atomic32_read(&ind_tbl->refcnt));
1720 for (i = 0; i != ind_tbl->queues_n; ++i)
1721 mlx5_priv_rxq_get(priv, ind_tbl->queues[i]);
1727 * Release an indirection table.
1730 * Pointer to private structure.
1732 * Indirection table to release.
1735 * 0 on success, errno value on failure.
1738 mlx5_priv_ind_table_ibv_release(struct priv *priv,
1739 struct mlx5_ind_table_ibv *ind_tbl)
1743 DEBUG("%p: Indirection table %p: refcnt %d", (void *)priv,
1744 (void *)ind_tbl, rte_atomic32_read(&ind_tbl->refcnt));
1745 if (rte_atomic32_dec_and_test(&ind_tbl->refcnt))
1746 claim_zero(ibv_destroy_rwq_ind_table(ind_tbl->ind_table));
1747 for (i = 0; i != ind_tbl->queues_n; ++i)
1748 claim_nonzero(mlx5_priv_rxq_release(priv, ind_tbl->queues[i]));
1749 if (!rte_atomic32_read(&ind_tbl->refcnt)) {
1750 LIST_REMOVE(ind_tbl, next);
1758 * Verify the Rx Queue list is empty
1761 * Pointer to private structure.
1763 * @return the number of object not released.
1766 mlx5_priv_ind_table_ibv_verify(struct priv *priv)
1768 struct mlx5_ind_table_ibv *ind_tbl;
1771 LIST_FOREACH(ind_tbl, &priv->ind_tbls, next) {
1772 DEBUG("%p: Verbs indirection table %p still referenced",
1773 (void *)priv, (void *)ind_tbl);