4 * Copyright 2015 6WIND S.A.
5 * Copyright 2015 Mellanox.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of 6WIND S.A. nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
41 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
43 #pragma GCC diagnostic ignored "-pedantic"
45 #include <infiniband/verbs.h>
47 #pragma GCC diagnostic error "-pedantic"
50 /* DPDK headers don't like -pedantic. */
52 #pragma GCC diagnostic ignored "-pedantic"
55 #include <rte_malloc.h>
56 #include <rte_ethdev.h>
57 #include <rte_common.h>
59 #pragma GCC diagnostic error "-pedantic"
63 #include "mlx5_rxtx.h"
64 #include "mlx5_utils.h"
65 #include "mlx5_defs.h"
67 /* Default RSS hash key also used for ConnectX-3. */
68 static uint8_t hash_rxq_default_key[] = {
69 0x2c, 0xc6, 0x81, 0xd1,
70 0x5b, 0xdb, 0xf4, 0xf7,
71 0xfc, 0xa2, 0x83, 0x19,
72 0xdb, 0x1a, 0x3e, 0x94,
73 0x6b, 0x9e, 0x38, 0xd9,
74 0x2c, 0x9c, 0x03, 0xd1,
75 0xad, 0x99, 0x44, 0xa7,
76 0xd9, 0x56, 0x3d, 0x59,
77 0x06, 0x3c, 0x25, 0xf3,
78 0xfc, 0x1f, 0xdc, 0x2a,
82 * Return nearest power of two above input value.
88 * Nearest power of two above input value.
91 log2above(unsigned int v)
96 for (l = 0, r = 0; (v >> 1); ++l, v >>= 1)
102 * Initialize hash RX queues and indirection table.
105 * Pointer to private structure.
108 * 0 on success, errno value on failure.
111 priv_create_hash_rxqs(struct priv *priv)
113 static const uint64_t rss_hash_table[] = {
115 (IBV_EXP_RX_HASH_SRC_IPV4 | IBV_EXP_RX_HASH_DST_IPV4 |
116 IBV_EXP_RX_HASH_SRC_PORT_TCP | IBV_EXP_RX_HASH_DST_PORT_TCP),
118 (IBV_EXP_RX_HASH_SRC_IPV4 | IBV_EXP_RX_HASH_DST_IPV4 |
119 IBV_EXP_RX_HASH_SRC_PORT_UDP | IBV_EXP_RX_HASH_DST_PORT_UDP),
121 (IBV_EXP_RX_HASH_SRC_IPV4 | IBV_EXP_RX_HASH_DST_IPV4),
122 /* None, used for everything else. */
126 DEBUG("allocating hash RX queues for %u WQs", priv->rxqs_n);
127 assert(priv->ind_table == NULL);
128 assert(priv->hash_rxqs == NULL);
129 assert(priv->hash_rxqs_n == 0);
130 assert(priv->pd != NULL);
131 assert(priv->ctx != NULL);
132 if (priv->rxqs_n == 0)
134 assert(priv->rxqs != NULL);
136 /* FIXME: large data structures are allocated on the stack. */
137 unsigned int wqs_n = (1 << log2above(priv->rxqs_n));
138 struct ibv_exp_wq *wqs[wqs_n];
139 struct ibv_exp_rwq_ind_table_init_attr ind_init_attr = {
141 .log_ind_tbl_size = log2above(priv->rxqs_n),
145 struct ibv_exp_rwq_ind_table *ind_table = NULL;
146 /* If only one RX queue is configured, RSS is not needed and a single
147 * empty hash entry is used (last rss_hash_table[] entry). */
148 unsigned int hash_rxqs_n =
149 ((priv->rxqs_n == 1) ? 1 : RTE_DIM(rss_hash_table));
150 struct hash_rxq (*hash_rxqs)[hash_rxqs_n] = NULL;
155 if (wqs_n < priv->rxqs_n) {
156 ERROR("cannot handle this many RX queues (%u)", priv->rxqs_n);
160 if (wqs_n != priv->rxqs_n)
161 WARN("%u RX queues are configured, consider rounding this"
162 " number to the next power of two (%u) for optimal"
164 priv->rxqs_n, wqs_n);
165 /* When the number of RX queues is not a power of two, the remaining
166 * table entries are padded with reused WQs and hashes are not spread
168 for (i = 0, j = 0; (i != wqs_n); ++i) {
169 wqs[i] = (*priv->rxqs)[j]->wq;
170 if (++j == priv->rxqs_n)
174 ind_table = ibv_exp_create_rwq_ind_table(priv->ctx, &ind_init_attr);
175 if (ind_table == NULL) {
176 /* Not clear whether errno is set. */
177 err = (errno ? errno : EINVAL);
178 ERROR("RX indirection table creation failed with error %d: %s",
182 /* Allocate array that holds hash RX queues and related data. */
183 hash_rxqs = rte_malloc(__func__, sizeof(*hash_rxqs), 0);
184 if (hash_rxqs == NULL) {
186 ERROR("cannot allocate hash RX queues container: %s",
190 for (i = 0, j = (RTE_DIM(rss_hash_table) - hash_rxqs_n);
191 (j != RTE_DIM(rss_hash_table));
193 struct hash_rxq *hash_rxq = &(*hash_rxqs)[i];
195 struct ibv_exp_rx_hash_conf hash_conf = {
196 .rx_hash_function = IBV_EXP_RX_HASH_FUNC_TOEPLITZ,
197 .rx_hash_key_len = sizeof(hash_rxq_default_key),
198 .rx_hash_key = hash_rxq_default_key,
199 .rx_hash_fields_mask = rss_hash_table[j],
200 .rwq_ind_tbl = ind_table,
202 struct ibv_exp_qp_init_attr qp_init_attr = {
203 .max_inl_recv = 0, /* Currently not supported. */
204 .qp_type = IBV_QPT_RAW_PACKET,
205 .comp_mask = (IBV_EXP_QP_INIT_ATTR_PD |
206 IBV_EXP_QP_INIT_ATTR_RX_HASH),
208 .rx_hash_conf = &hash_conf,
209 .port_num = priv->port,
212 *hash_rxq = (struct hash_rxq){
214 .qp = ibv_exp_create_qp(priv->ctx, &qp_init_attr),
216 if (hash_rxq->qp == NULL) {
217 err = (errno ? errno : EINVAL);
218 ERROR("Hash RX QP creation failure: %s",
221 hash_rxq = &(*hash_rxqs)[--i];
222 claim_zero(ibv_destroy_qp(hash_rxq->qp));
227 priv->ind_table = ind_table;
228 priv->hash_rxqs = hash_rxqs;
229 priv->hash_rxqs_n = hash_rxqs_n;
234 if (ind_table != NULL)
235 claim_zero(ibv_exp_destroy_rwq_ind_table(ind_table));
240 * Clean up hash RX queues and indirection table.
243 * Pointer to private structure.
246 priv_destroy_hash_rxqs(struct priv *priv)
250 DEBUG("destroying %u hash RX queues", priv->hash_rxqs_n);
251 if (priv->hash_rxqs_n == 0) {
252 assert(priv->hash_rxqs == NULL);
253 assert(priv->ind_table == NULL);
256 for (i = 0; (i != priv->hash_rxqs_n); ++i) {
257 struct hash_rxq *hash_rxq = &(*priv->hash_rxqs)[i];
260 assert(hash_rxq->priv == priv);
261 assert(hash_rxq->qp != NULL);
262 /* Also check that there are no remaining flows. */
263 assert(hash_rxq->allmulti_flow == NULL);
264 assert(hash_rxq->promisc_flow == NULL);
265 for (j = 0; (j != RTE_DIM(hash_rxq->mac_flow)); ++j)
266 for (k = 0; (k != RTE_DIM(hash_rxq->mac_flow[j])); ++k)
267 assert(hash_rxq->mac_flow[j][k] == NULL);
268 claim_zero(ibv_destroy_qp(hash_rxq->qp));
270 priv->hash_rxqs_n = 0;
271 rte_free(priv->hash_rxqs);
272 priv->hash_rxqs = NULL;
273 claim_zero(ibv_exp_destroy_rwq_ind_table(priv->ind_table));
274 priv->ind_table = NULL;
278 * Allocate RX queue elements with scattered packets support.
281 * Pointer to RX queue structure.
283 * Number of elements to allocate.
285 * If not NULL, fetch buffers from this array instead of allocating them
286 * with rte_pktmbuf_alloc().
289 * 0 on success, errno value on failure.
292 rxq_alloc_elts_sp(struct rxq *rxq, unsigned int elts_n,
293 struct rte_mbuf **pool)
296 struct rxq_elt_sp (*elts)[elts_n] =
297 rte_calloc_socket("RXQ elements", 1, sizeof(*elts), 0,
302 ERROR("%p: can't allocate packets array", (void *)rxq);
306 /* For each WR (packet). */
307 for (i = 0; (i != elts_n); ++i) {
309 struct rxq_elt_sp *elt = &(*elts)[i];
310 struct ibv_sge (*sges)[RTE_DIM(elt->sges)] = &elt->sges;
312 /* These two arrays must have the same size. */
313 assert(RTE_DIM(elt->sges) == RTE_DIM(elt->bufs));
314 /* For each SGE (segment). */
315 for (j = 0; (j != RTE_DIM(elt->bufs)); ++j) {
316 struct ibv_sge *sge = &(*sges)[j];
317 struct rte_mbuf *buf;
322 rte_pktmbuf_reset(buf);
324 buf = rte_pktmbuf_alloc(rxq->mp);
326 assert(pool == NULL);
327 ERROR("%p: empty mbuf pool", (void *)rxq);
332 /* Headroom is reserved by rte_pktmbuf_alloc(). */
333 assert(DATA_OFF(buf) == RTE_PKTMBUF_HEADROOM);
334 /* Buffer is supposed to be empty. */
335 assert(rte_pktmbuf_data_len(buf) == 0);
336 assert(rte_pktmbuf_pkt_len(buf) == 0);
337 /* sge->addr must be able to store a pointer. */
338 assert(sizeof(sge->addr) >= sizeof(uintptr_t));
340 /* The first SGE keeps its headroom. */
341 sge->addr = rte_pktmbuf_mtod(buf, uintptr_t);
342 sge->length = (buf->buf_len -
343 RTE_PKTMBUF_HEADROOM);
345 /* Subsequent SGEs lose theirs. */
346 assert(DATA_OFF(buf) == RTE_PKTMBUF_HEADROOM);
347 SET_DATA_OFF(buf, 0);
348 sge->addr = (uintptr_t)buf->buf_addr;
349 sge->length = buf->buf_len;
351 sge->lkey = rxq->mr->lkey;
352 /* Redundant check for tailroom. */
353 assert(sge->length == rte_pktmbuf_tailroom(buf));
356 DEBUG("%p: allocated and configured %u WRs (%zu segments)",
357 (void *)rxq, elts_n, (elts_n * RTE_DIM((*elts)[0].sges)));
358 rxq->elts_n = elts_n;
365 assert(pool == NULL);
366 for (i = 0; (i != RTE_DIM(*elts)); ++i) {
368 struct rxq_elt_sp *elt = &(*elts)[i];
370 for (j = 0; (j != RTE_DIM(elt->bufs)); ++j) {
371 struct rte_mbuf *buf = elt->bufs[j];
374 rte_pktmbuf_free_seg(buf);
379 DEBUG("%p: failed, freed everything", (void *)rxq);
385 * Free RX queue elements with scattered packets support.
388 * Pointer to RX queue structure.
391 rxq_free_elts_sp(struct rxq *rxq)
394 unsigned int elts_n = rxq->elts_n;
395 struct rxq_elt_sp (*elts)[elts_n] = rxq->elts.sp;
397 DEBUG("%p: freeing WRs", (void *)rxq);
402 for (i = 0; (i != RTE_DIM(*elts)); ++i) {
404 struct rxq_elt_sp *elt = &(*elts)[i];
406 for (j = 0; (j != RTE_DIM(elt->bufs)); ++j) {
407 struct rte_mbuf *buf = elt->bufs[j];
410 rte_pktmbuf_free_seg(buf);
417 * Allocate RX queue elements.
420 * Pointer to RX queue structure.
422 * Number of elements to allocate.
424 * If not NULL, fetch buffers from this array instead of allocating them
425 * with rte_pktmbuf_alloc().
428 * 0 on success, errno value on failure.
431 rxq_alloc_elts(struct rxq *rxq, unsigned int elts_n, struct rte_mbuf **pool)
434 struct rxq_elt (*elts)[elts_n] =
435 rte_calloc_socket("RXQ elements", 1, sizeof(*elts), 0,
440 ERROR("%p: can't allocate packets array", (void *)rxq);
444 /* For each WR (packet). */
445 for (i = 0; (i != elts_n); ++i) {
446 struct rxq_elt *elt = &(*elts)[i];
447 struct ibv_sge *sge = &(*elts)[i].sge;
448 struct rte_mbuf *buf;
453 rte_pktmbuf_reset(buf);
455 buf = rte_pktmbuf_alloc(rxq->mp);
457 assert(pool == NULL);
458 ERROR("%p: empty mbuf pool", (void *)rxq);
463 /* Headroom is reserved by rte_pktmbuf_alloc(). */
464 assert(DATA_OFF(buf) == RTE_PKTMBUF_HEADROOM);
465 /* Buffer is supposed to be empty. */
466 assert(rte_pktmbuf_data_len(buf) == 0);
467 assert(rte_pktmbuf_pkt_len(buf) == 0);
468 /* sge->addr must be able to store a pointer. */
469 assert(sizeof(sge->addr) >= sizeof(uintptr_t));
470 /* SGE keeps its headroom. */
471 sge->addr = (uintptr_t)
472 ((uint8_t *)buf->buf_addr + RTE_PKTMBUF_HEADROOM);
473 sge->length = (buf->buf_len - RTE_PKTMBUF_HEADROOM);
474 sge->lkey = rxq->mr->lkey;
475 /* Redundant check for tailroom. */
476 assert(sge->length == rte_pktmbuf_tailroom(buf));
478 DEBUG("%p: allocated and configured %u single-segment WRs",
479 (void *)rxq, elts_n);
480 rxq->elts_n = elts_n;
482 rxq->elts.no_sp = elts;
487 assert(pool == NULL);
488 for (i = 0; (i != RTE_DIM(*elts)); ++i) {
489 struct rxq_elt *elt = &(*elts)[i];
490 struct rte_mbuf *buf = elt->buf;
493 rte_pktmbuf_free_seg(buf);
497 DEBUG("%p: failed, freed everything", (void *)rxq);
503 * Free RX queue elements.
506 * Pointer to RX queue structure.
509 rxq_free_elts(struct rxq *rxq)
512 unsigned int elts_n = rxq->elts_n;
513 struct rxq_elt (*elts)[elts_n] = rxq->elts.no_sp;
515 DEBUG("%p: freeing WRs", (void *)rxq);
517 rxq->elts.no_sp = NULL;
520 for (i = 0; (i != RTE_DIM(*elts)); ++i) {
521 struct rxq_elt *elt = &(*elts)[i];
522 struct rte_mbuf *buf = elt->buf;
525 rte_pktmbuf_free_seg(buf);
531 * Clean up a RX queue.
533 * Destroy objects, free allocated memory and reset the structure for reuse.
536 * Pointer to RX queue structure.
539 rxq_cleanup(struct rxq *rxq)
541 struct ibv_exp_release_intf_params params;
543 DEBUG("cleaning up %p", (void *)rxq);
545 rxq_free_elts_sp(rxq);
548 if (rxq->if_wq != NULL) {
549 assert(rxq->priv != NULL);
550 assert(rxq->priv->ctx != NULL);
551 assert(rxq->wq != NULL);
552 params = (struct ibv_exp_release_intf_params){
555 claim_zero(ibv_exp_release_intf(rxq->priv->ctx,
559 if (rxq->if_cq != NULL) {
560 assert(rxq->priv != NULL);
561 assert(rxq->priv->ctx != NULL);
562 assert(rxq->cq != NULL);
563 params = (struct ibv_exp_release_intf_params){
566 claim_zero(ibv_exp_release_intf(rxq->priv->ctx,
571 claim_zero(ibv_exp_destroy_wq(rxq->wq));
573 claim_zero(ibv_destroy_cq(rxq->cq));
574 if (rxq->rd != NULL) {
575 struct ibv_exp_destroy_res_domain_attr attr = {
579 assert(rxq->priv != NULL);
580 assert(rxq->priv->ctx != NULL);
581 claim_zero(ibv_exp_destroy_res_domain(rxq->priv->ctx,
586 claim_zero(ibv_dereg_mr(rxq->mr));
587 memset(rxq, 0, sizeof(*rxq));
591 * Reconfigure a RX queue with new parameters.
593 * rxq_rehash() does not allocate mbufs, which, if not done from the right
594 * thread (such as a control thread), may corrupt the pool.
595 * In case of failure, the queue is left untouched.
598 * Pointer to Ethernet device structure.
603 * 0 on success, errno value on failure.
606 rxq_rehash(struct rte_eth_dev *dev, struct rxq *rxq)
608 struct priv *priv = rxq->priv;
609 struct rxq tmpl = *rxq;
612 struct rte_mbuf **pool;
614 struct ibv_exp_wq_attr mod;
617 DEBUG("%p: rehashing queue %p", (void *)dev, (void *)rxq);
618 /* Number of descriptors and mbufs currently allocated. */
619 desc_n = (tmpl.elts_n * (tmpl.sp ? MLX5_PMD_SGE_WR_N : 1));
621 /* Toggle RX checksum offload if hardware supports it. */
623 tmpl.csum = !!dev->data->dev_conf.rxmode.hw_ip_checksum;
624 rxq->csum = tmpl.csum;
626 if (priv->hw_csum_l2tun) {
627 tmpl.csum_l2tun = !!dev->data->dev_conf.rxmode.hw_ip_checksum;
628 rxq->csum_l2tun = tmpl.csum_l2tun;
630 /* Enable scattered packets support for this queue if necessary. */
631 if ((dev->data->dev_conf.rxmode.jumbo_frame) &&
632 (dev->data->dev_conf.rxmode.max_rx_pkt_len >
633 (tmpl.mb_len - RTE_PKTMBUF_HEADROOM))) {
635 desc_n /= MLX5_PMD_SGE_WR_N;
638 DEBUG("%p: %s scattered packets support (%u WRs)",
639 (void *)dev, (tmpl.sp ? "enabling" : "disabling"), desc_n);
640 /* If scatter mode is the same as before, nothing to do. */
641 if (tmpl.sp == rxq->sp) {
642 DEBUG("%p: nothing to do", (void *)dev);
645 /* From now on, any failure will render the queue unusable.
646 * Reinitialize WQ. */
647 mod = (struct ibv_exp_wq_attr){
648 .attr_mask = IBV_EXP_WQ_ATTR_STATE,
649 .wq_state = IBV_EXP_WQS_RESET,
651 err = ibv_exp_modify_wq(tmpl.wq, &mod);
653 ERROR("%p: cannot reset WQ: %s", (void *)dev, strerror(err));
658 pool = rte_malloc(__func__, (mbuf_n * sizeof(*pool)), 0);
660 ERROR("%p: cannot allocate memory", (void *)dev);
663 /* Snatch mbufs from original queue. */
666 struct rxq_elt_sp (*elts)[rxq->elts_n] = rxq->elts.sp;
668 for (i = 0; (i != RTE_DIM(*elts)); ++i) {
669 struct rxq_elt_sp *elt = &(*elts)[i];
672 for (j = 0; (j != RTE_DIM(elt->bufs)); ++j) {
673 assert(elt->bufs[j] != NULL);
674 pool[k++] = elt->bufs[j];
678 struct rxq_elt (*elts)[rxq->elts_n] = rxq->elts.no_sp;
680 for (i = 0; (i != RTE_DIM(*elts)); ++i) {
681 struct rxq_elt *elt = &(*elts)[i];
682 struct rte_mbuf *buf = elt->buf;
690 assert((void *)&tmpl.elts.sp == (void *)&tmpl.elts.no_sp);
692 rxq_alloc_elts_sp(&tmpl, desc_n, pool) :
693 rxq_alloc_elts(&tmpl, desc_n, pool));
695 ERROR("%p: cannot reallocate WRs, aborting", (void *)dev);
700 assert(tmpl.elts_n == desc_n);
701 assert(tmpl.elts.sp != NULL);
703 /* Clean up original data. */
705 rte_free(rxq->elts.sp);
707 /* Change queue state to ready. */
708 mod = (struct ibv_exp_wq_attr){
709 .attr_mask = IBV_EXP_WQ_ATTR_STATE,
710 .wq_state = IBV_EXP_WQS_RDY,
712 err = ibv_exp_modify_wq(tmpl.wq, &mod);
714 ERROR("%p: WQ state to IBV_EXP_WQS_RDY failed: %s",
715 (void *)dev, strerror(err));
719 assert(tmpl.if_wq != NULL);
721 struct rxq_elt_sp (*elts)[tmpl.elts_n] = tmpl.elts.sp;
723 for (i = 0; (i != RTE_DIM(*elts)); ++i) {
724 err = tmpl.if_wq->recv_sg_list
727 RTE_DIM((*elts)[i].sges));
732 struct rxq_elt (*elts)[tmpl.elts_n] = tmpl.elts.no_sp;
734 for (i = 0; (i != RTE_DIM(*elts)); ++i) {
735 err = tmpl.if_wq->recv_burst(
744 ERROR("%p: failed to post SGEs with error %d",
746 /* Set err because it does not contain a valid errno value. */
757 * Configure a RX queue.
760 * Pointer to Ethernet device structure.
762 * Pointer to RX queue structure.
764 * Number of descriptors to configure in queue.
766 * NUMA socket on which memory must be allocated.
768 * Thresholds parameters.
770 * Memory pool for buffer allocations.
773 * 0 on success, errno value on failure.
776 rxq_setup(struct rte_eth_dev *dev, struct rxq *rxq, uint16_t desc,
777 unsigned int socket, const struct rte_eth_rxconf *conf,
778 struct rte_mempool *mp)
780 struct priv *priv = dev->data->dev_private;
786 struct ibv_exp_wq_attr mod;
788 struct ibv_exp_query_intf_params params;
789 struct ibv_exp_cq_init_attr cq;
790 struct ibv_exp_res_domain_init_attr rd;
791 struct ibv_exp_wq_init_attr wq;
793 enum ibv_exp_query_intf_status status;
794 struct rte_mbuf *buf;
797 unsigned int cq_size = desc;
799 (void)conf; /* Thresholds configuration (ignored). */
800 if ((desc == 0) || (desc % MLX5_PMD_SGE_WR_N)) {
801 ERROR("%p: invalid number of RX descriptors (must be a"
802 " multiple of %d)", (void *)dev, MLX5_PMD_SGE_WR_N);
805 /* Get mbuf length. */
806 buf = rte_pktmbuf_alloc(mp);
808 ERROR("%p: unable to allocate mbuf", (void *)dev);
811 tmpl.mb_len = buf->buf_len;
812 assert((rte_pktmbuf_headroom(buf) +
813 rte_pktmbuf_tailroom(buf)) == tmpl.mb_len);
814 assert(rte_pktmbuf_headroom(buf) == RTE_PKTMBUF_HEADROOM);
815 rte_pktmbuf_free(buf);
816 /* Toggle RX checksum offload if hardware supports it. */
818 tmpl.csum = !!dev->data->dev_conf.rxmode.hw_ip_checksum;
819 if (priv->hw_csum_l2tun)
820 tmpl.csum_l2tun = !!dev->data->dev_conf.rxmode.hw_ip_checksum;
821 /* Enable scattered packets support for this queue if necessary. */
822 if ((dev->data->dev_conf.rxmode.jumbo_frame) &&
823 (dev->data->dev_conf.rxmode.max_rx_pkt_len >
824 (tmpl.mb_len - RTE_PKTMBUF_HEADROOM))) {
826 desc /= MLX5_PMD_SGE_WR_N;
828 DEBUG("%p: %s scattered packets support (%u WRs)",
829 (void *)dev, (tmpl.sp ? "enabling" : "disabling"), desc);
830 /* Use the entire RX mempool as the memory region. */
831 tmpl.mr = ibv_reg_mr(priv->pd,
832 (void *)mp->elt_va_start,
833 (mp->elt_va_end - mp->elt_va_start),
834 (IBV_ACCESS_LOCAL_WRITE |
835 IBV_ACCESS_REMOTE_WRITE));
836 if (tmpl.mr == NULL) {
838 ERROR("%p: MR creation failure: %s",
839 (void *)dev, strerror(ret));
842 attr.rd = (struct ibv_exp_res_domain_init_attr){
843 .comp_mask = (IBV_EXP_RES_DOMAIN_THREAD_MODEL |
844 IBV_EXP_RES_DOMAIN_MSG_MODEL),
845 .thread_model = IBV_EXP_THREAD_SINGLE,
846 .msg_model = IBV_EXP_MSG_HIGH_BW,
848 tmpl.rd = ibv_exp_create_res_domain(priv->ctx, &attr.rd);
849 if (tmpl.rd == NULL) {
851 ERROR("%p: RD creation failure: %s",
852 (void *)dev, strerror(ret));
855 attr.cq = (struct ibv_exp_cq_init_attr){
856 .comp_mask = IBV_EXP_CQ_INIT_ATTR_RES_DOMAIN,
857 .res_domain = tmpl.rd,
859 tmpl.cq = ibv_exp_create_cq(priv->ctx, cq_size, NULL, NULL, 0,
861 if (tmpl.cq == NULL) {
863 ERROR("%p: CQ creation failure: %s",
864 (void *)dev, strerror(ret));
867 DEBUG("priv->device_attr.max_qp_wr is %d",
868 priv->device_attr.max_qp_wr);
869 DEBUG("priv->device_attr.max_sge is %d",
870 priv->device_attr.max_sge);
871 attr.wq = (struct ibv_exp_wq_init_attr){
872 .wq_context = NULL, /* Could be useful in the future. */
873 .wq_type = IBV_EXP_WQT_RQ,
874 /* Max number of outstanding WRs. */
875 .max_recv_wr = ((priv->device_attr.max_qp_wr < (int)cq_size) ?
876 priv->device_attr.max_qp_wr :
878 /* Max number of scatter/gather elements in a WR. */
879 .max_recv_sge = ((priv->device_attr.max_sge <
881 priv->device_attr.max_sge :
885 .comp_mask = IBV_EXP_CREATE_WQ_RES_DOMAIN,
886 .res_domain = tmpl.rd,
888 tmpl.wq = ibv_exp_create_wq(priv->ctx, &attr.wq);
889 if (tmpl.wq == NULL) {
890 ret = (errno ? errno : EINVAL);
891 ERROR("%p: WQ creation failure: %s",
892 (void *)dev, strerror(ret));
896 ret = rxq_alloc_elts_sp(&tmpl, desc, NULL);
898 ret = rxq_alloc_elts(&tmpl, desc, NULL);
900 ERROR("%p: RXQ allocation failed: %s",
901 (void *)dev, strerror(ret));
905 tmpl.port_id = dev->data->port_id;
906 DEBUG("%p: RTE port ID: %u", (void *)rxq, tmpl.port_id);
907 attr.params = (struct ibv_exp_query_intf_params){
908 .intf_scope = IBV_EXP_INTF_GLOBAL,
909 .intf = IBV_EXP_INTF_CQ,
912 tmpl.if_cq = ibv_exp_query_intf(priv->ctx, &attr.params, &status);
913 if (tmpl.if_cq == NULL) {
914 ERROR("%p: CQ interface family query failed with status %d",
915 (void *)dev, status);
918 attr.params = (struct ibv_exp_query_intf_params){
919 .intf_scope = IBV_EXP_INTF_GLOBAL,
920 .intf = IBV_EXP_INTF_WQ,
923 tmpl.if_wq = ibv_exp_query_intf(priv->ctx, &attr.params, &status);
924 if (tmpl.if_wq == NULL) {
925 ERROR("%p: WQ interface family query failed with status %d",
926 (void *)dev, status);
929 /* Change queue state to ready. */
930 mod = (struct ibv_exp_wq_attr){
931 .attr_mask = IBV_EXP_WQ_ATTR_STATE,
932 .wq_state = IBV_EXP_WQS_RDY,
934 ret = ibv_exp_modify_wq(tmpl.wq, &mod);
936 ERROR("%p: WQ state to IBV_EXP_WQS_RDY failed: %s",
937 (void *)dev, strerror(ret));
942 struct rxq_elt_sp (*elts)[tmpl.elts_n] = tmpl.elts.sp;
944 for (i = 0; (i != RTE_DIM(*elts)); ++i) {
945 ret = tmpl.if_wq->recv_sg_list
948 RTE_DIM((*elts)[i].sges));
953 struct rxq_elt (*elts)[tmpl.elts_n] = tmpl.elts.no_sp;
955 for (i = 0; (i != RTE_DIM(*elts)); ++i) {
956 ret = tmpl.if_wq->recv_burst(
965 ERROR("%p: failed to post SGEs with error %d",
967 /* Set ret because it does not contain a valid errno value. */
971 /* Clean up rxq in case we're reinitializing it. */
972 DEBUG("%p: cleaning-up old rxq just in case", (void *)rxq);
975 DEBUG("%p: rxq updated with %p", (void *)rxq, (void *)&tmpl);
985 * DPDK callback to configure a RX queue.
988 * Pointer to Ethernet device structure.
992 * Number of descriptors to configure in queue.
994 * NUMA socket on which memory must be allocated.
996 * Thresholds parameters.
998 * Memory pool for buffer allocations.
1001 * 0 on success, negative errno value on failure.
1004 mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
1005 unsigned int socket, const struct rte_eth_rxconf *conf,
1006 struct rte_mempool *mp)
1008 struct priv *priv = dev->data->dev_private;
1009 struct rxq *rxq = (*priv->rxqs)[idx];
1013 DEBUG("%p: configuring queue %u for %u descriptors",
1014 (void *)dev, idx, desc);
1015 if (idx >= priv->rxqs_n) {
1016 ERROR("%p: queue index out of range (%u >= %u)",
1017 (void *)dev, idx, priv->rxqs_n);
1022 DEBUG("%p: reusing already allocated queue index %u (%p)",
1023 (void *)dev, idx, (void *)rxq);
1024 if (priv->started) {
1028 (*priv->rxqs)[idx] = NULL;
1031 rxq = rte_calloc_socket("RXQ", 1, sizeof(*rxq), 0, socket);
1033 ERROR("%p: unable to allocate queue index %u",
1039 ret = rxq_setup(dev, rxq, desc, socket, conf, mp);
1043 rxq->stats.idx = idx;
1044 DEBUG("%p: adding RX queue %p to list",
1045 (void *)dev, (void *)rxq);
1046 (*priv->rxqs)[idx] = rxq;
1047 /* Update receive callback. */
1049 dev->rx_pkt_burst = mlx5_rx_burst_sp;
1051 dev->rx_pkt_burst = mlx5_rx_burst;
1058 * DPDK callback to release a RX queue.
1061 * Generic RX queue pointer.
1064 mlx5_rx_queue_release(void *dpdk_rxq)
1066 struct rxq *rxq = (struct rxq *)dpdk_rxq;
1074 for (i = 0; (i != priv->rxqs_n); ++i)
1075 if ((*priv->rxqs)[i] == rxq) {
1076 DEBUG("%p: removing RX queue %p from list",
1077 (void *)priv->dev, (void *)rxq);
1078 (*priv->rxqs)[i] = NULL;