4 * Copyright 2015 6WIND S.A.
5 * Copyright 2015 Mellanox.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of 6WIND S.A. nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
41 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
43 #pragma GCC diagnostic ignored "-pedantic"
45 #include <infiniband/verbs.h>
47 #pragma GCC diagnostic error "-pedantic"
50 /* DPDK headers don't like -pedantic. */
52 #pragma GCC diagnostic ignored "-pedantic"
55 #include <rte_malloc.h>
56 #include <rte_ethdev.h>
57 #include <rte_common.h>
59 #pragma GCC diagnostic error "-pedantic"
63 #include "mlx5_autoconf.h"
64 #include "mlx5_rxtx.h"
65 #include "mlx5_utils.h"
66 #include "mlx5_defs.h"
69 * Allocate RX queue elements with scattered packets support.
72 * Pointer to RX queue structure.
74 * Number of elements to allocate.
76 * If not NULL, fetch buffers from this array instead of allocating them
77 * with rte_pktmbuf_alloc().
80 * 0 on success, errno value on failure.
83 rxq_alloc_elts_sp(struct rxq *rxq, unsigned int elts_n,
84 struct rte_mbuf **pool)
87 struct rxq_elt_sp (*elts)[elts_n] =
88 rte_calloc_socket("RXQ elements", 1, sizeof(*elts), 0,
93 ERROR("%p: can't allocate packets array", (void *)rxq);
97 /* For each WR (packet). */
98 for (i = 0; (i != elts_n); ++i) {
100 struct rxq_elt_sp *elt = &(*elts)[i];
101 struct ibv_sge (*sges)[RTE_DIM(elt->sges)] = &elt->sges;
103 /* These two arrays must have the same size. */
104 assert(RTE_DIM(elt->sges) == RTE_DIM(elt->bufs));
105 /* For each SGE (segment). */
106 for (j = 0; (j != RTE_DIM(elt->bufs)); ++j) {
107 struct ibv_sge *sge = &(*sges)[j];
108 struct rte_mbuf *buf;
113 rte_pktmbuf_reset(buf);
115 buf = rte_pktmbuf_alloc(rxq->mp);
117 assert(pool == NULL);
118 ERROR("%p: empty mbuf pool", (void *)rxq);
123 /* Headroom is reserved by rte_pktmbuf_alloc(). */
124 assert(DATA_OFF(buf) == RTE_PKTMBUF_HEADROOM);
125 /* Buffer is supposed to be empty. */
126 assert(rte_pktmbuf_data_len(buf) == 0);
127 assert(rte_pktmbuf_pkt_len(buf) == 0);
128 /* sge->addr must be able to store a pointer. */
129 assert(sizeof(sge->addr) >= sizeof(uintptr_t));
131 /* The first SGE keeps its headroom. */
132 sge->addr = rte_pktmbuf_mtod(buf, uintptr_t);
133 sge->length = (buf->buf_len -
134 RTE_PKTMBUF_HEADROOM);
136 /* Subsequent SGEs lose theirs. */
137 assert(DATA_OFF(buf) == RTE_PKTMBUF_HEADROOM);
138 SET_DATA_OFF(buf, 0);
139 sge->addr = (uintptr_t)buf->buf_addr;
140 sge->length = buf->buf_len;
142 sge->lkey = rxq->mr->lkey;
143 /* Redundant check for tailroom. */
144 assert(sge->length == rte_pktmbuf_tailroom(buf));
147 DEBUG("%p: allocated and configured %u WRs (%zu segments)",
148 (void *)rxq, elts_n, (elts_n * RTE_DIM((*elts)[0].sges)));
149 rxq->elts_n = elts_n;
156 assert(pool == NULL);
157 for (i = 0; (i != RTE_DIM(*elts)); ++i) {
159 struct rxq_elt_sp *elt = &(*elts)[i];
161 for (j = 0; (j != RTE_DIM(elt->bufs)); ++j) {
162 struct rte_mbuf *buf = elt->bufs[j];
165 rte_pktmbuf_free_seg(buf);
170 DEBUG("%p: failed, freed everything", (void *)rxq);
176 * Free RX queue elements with scattered packets support.
179 * Pointer to RX queue structure.
182 rxq_free_elts_sp(struct rxq *rxq)
185 unsigned int elts_n = rxq->elts_n;
186 struct rxq_elt_sp (*elts)[elts_n] = rxq->elts.sp;
188 DEBUG("%p: freeing WRs", (void *)rxq);
193 for (i = 0; (i != RTE_DIM(*elts)); ++i) {
195 struct rxq_elt_sp *elt = &(*elts)[i];
197 for (j = 0; (j != RTE_DIM(elt->bufs)); ++j) {
198 struct rte_mbuf *buf = elt->bufs[j];
201 rte_pktmbuf_free_seg(buf);
208 * Allocate RX queue elements.
211 * Pointer to RX queue structure.
213 * Number of elements to allocate.
215 * If not NULL, fetch buffers from this array instead of allocating them
216 * with rte_pktmbuf_alloc().
219 * 0 on success, errno value on failure.
222 rxq_alloc_elts(struct rxq *rxq, unsigned int elts_n, struct rte_mbuf **pool)
225 struct rxq_elt (*elts)[elts_n] =
226 rte_calloc_socket("RXQ elements", 1, sizeof(*elts), 0,
231 ERROR("%p: can't allocate packets array", (void *)rxq);
235 /* For each WR (packet). */
236 for (i = 0; (i != elts_n); ++i) {
237 struct rxq_elt *elt = &(*elts)[i];
238 struct ibv_sge *sge = &(*elts)[i].sge;
239 struct rte_mbuf *buf;
244 rte_pktmbuf_reset(buf);
246 buf = rte_pktmbuf_alloc(rxq->mp);
248 assert(pool == NULL);
249 ERROR("%p: empty mbuf pool", (void *)rxq);
254 /* Headroom is reserved by rte_pktmbuf_alloc(). */
255 assert(DATA_OFF(buf) == RTE_PKTMBUF_HEADROOM);
256 /* Buffer is supposed to be empty. */
257 assert(rte_pktmbuf_data_len(buf) == 0);
258 assert(rte_pktmbuf_pkt_len(buf) == 0);
259 /* sge->addr must be able to store a pointer. */
260 assert(sizeof(sge->addr) >= sizeof(uintptr_t));
261 /* SGE keeps its headroom. */
262 sge->addr = (uintptr_t)
263 ((uint8_t *)buf->buf_addr + RTE_PKTMBUF_HEADROOM);
264 sge->length = (buf->buf_len - RTE_PKTMBUF_HEADROOM);
265 sge->lkey = rxq->mr->lkey;
266 /* Redundant check for tailroom. */
267 assert(sge->length == rte_pktmbuf_tailroom(buf));
269 DEBUG("%p: allocated and configured %u single-segment WRs",
270 (void *)rxq, elts_n);
271 rxq->elts_n = elts_n;
273 rxq->elts.no_sp = elts;
278 assert(pool == NULL);
279 for (i = 0; (i != RTE_DIM(*elts)); ++i) {
280 struct rxq_elt *elt = &(*elts)[i];
281 struct rte_mbuf *buf = elt->buf;
284 rte_pktmbuf_free_seg(buf);
288 DEBUG("%p: failed, freed everything", (void *)rxq);
294 * Free RX queue elements.
297 * Pointer to RX queue structure.
300 rxq_free_elts(struct rxq *rxq)
303 unsigned int elts_n = rxq->elts_n;
304 struct rxq_elt (*elts)[elts_n] = rxq->elts.no_sp;
306 DEBUG("%p: freeing WRs", (void *)rxq);
308 rxq->elts.no_sp = NULL;
311 for (i = 0; (i != RTE_DIM(*elts)); ++i) {
312 struct rxq_elt *elt = &(*elts)[i];
313 struct rte_mbuf *buf = elt->buf;
316 rte_pktmbuf_free_seg(buf);
322 * Clean up a RX queue.
324 * Destroy objects, free allocated memory and reset the structure for reuse.
327 * Pointer to RX queue structure.
330 rxq_cleanup(struct rxq *rxq)
332 struct ibv_exp_release_intf_params params;
334 DEBUG("cleaning up %p", (void *)rxq);
336 rxq_free_elts_sp(rxq);
339 if (rxq->if_qp != NULL) {
340 assert(rxq->priv != NULL);
341 assert(rxq->priv->ctx != NULL);
342 assert(rxq->qp != NULL);
343 params = (struct ibv_exp_release_intf_params){
346 claim_zero(ibv_exp_release_intf(rxq->priv->ctx,
350 if (rxq->if_cq != NULL) {
351 assert(rxq->priv != NULL);
352 assert(rxq->priv->ctx != NULL);
353 assert(rxq->cq != NULL);
354 params = (struct ibv_exp_release_intf_params){
357 claim_zero(ibv_exp_release_intf(rxq->priv->ctx,
361 if (rxq->qp != NULL) {
362 rxq_promiscuous_disable(rxq);
363 rxq_allmulticast_disable(rxq);
364 rxq_mac_addrs_del(rxq);
365 claim_zero(ibv_destroy_qp(rxq->qp));
368 claim_zero(ibv_destroy_cq(rxq->cq));
369 if (rxq->rd != NULL) {
370 struct ibv_exp_destroy_res_domain_attr attr = {
374 assert(rxq->priv != NULL);
375 assert(rxq->priv->ctx != NULL);
376 claim_zero(ibv_exp_destroy_res_domain(rxq->priv->ctx,
381 claim_zero(ibv_dereg_mr(rxq->mr));
382 memset(rxq, 0, sizeof(*rxq));
386 * Allocate a Queue Pair.
387 * Optionally setup inline receive if supported.
390 * Pointer to private structure.
392 * Completion queue to associate with QP.
394 * Number of descriptors in QP (hint only).
397 * QP pointer or NULL in case of error.
399 static struct ibv_qp *
400 rxq_setup_qp(struct priv *priv, struct ibv_cq *cq, uint16_t desc,
401 struct ibv_exp_res_domain *rd)
403 struct ibv_exp_qp_init_attr attr = {
404 /* CQ to be associated with the send queue. */
406 /* CQ to be associated with the receive queue. */
409 /* Max number of outstanding WRs. */
410 .max_recv_wr = ((priv->device_attr.max_qp_wr < desc) ?
411 priv->device_attr.max_qp_wr :
413 /* Max number of scatter/gather elements in a WR. */
414 .max_recv_sge = ((priv->device_attr.max_sge <
416 priv->device_attr.max_sge :
419 .qp_type = IBV_QPT_RAW_PACKET,
420 .comp_mask = (IBV_EXP_QP_INIT_ATTR_PD |
421 IBV_EXP_QP_INIT_ATTR_RES_DOMAIN),
426 return ibv_exp_create_qp(priv->ctx, &attr);
432 * Allocate a RSS Queue Pair.
433 * Optionally setup inline receive if supported.
436 * Pointer to private structure.
438 * Completion queue to associate with QP.
440 * Number of descriptors in QP (hint only).
442 * If nonzero, create a parent QP, otherwise a child.
445 * QP pointer or NULL in case of error.
447 static struct ibv_qp *
448 rxq_setup_qp_rss(struct priv *priv, struct ibv_cq *cq, uint16_t desc,
449 int parent, struct ibv_exp_res_domain *rd)
451 struct ibv_exp_qp_init_attr attr = {
452 /* CQ to be associated with the send queue. */
454 /* CQ to be associated with the receive queue. */
457 /* Max number of outstanding WRs. */
458 .max_recv_wr = ((priv->device_attr.max_qp_wr < desc) ?
459 priv->device_attr.max_qp_wr :
461 /* Max number of scatter/gather elements in a WR. */
462 .max_recv_sge = ((priv->device_attr.max_sge <
464 priv->device_attr.max_sge :
467 .qp_type = IBV_QPT_RAW_PACKET,
468 .comp_mask = (IBV_EXP_QP_INIT_ATTR_PD |
469 IBV_EXP_QP_INIT_ATTR_RES_DOMAIN |
470 IBV_EXP_QP_INIT_ATTR_QPG),
476 attr.qpg.qpg_type = IBV_EXP_QPG_PARENT;
477 /* TSS isn't necessary. */
478 attr.qpg.parent_attrib.tss_child_count = 0;
479 attr.qpg.parent_attrib.rss_child_count = priv->rxqs_n;
480 DEBUG("initializing parent RSS queue");
482 attr.qpg.qpg_type = IBV_EXP_QPG_CHILD_RX;
483 attr.qpg.qpg_parent = priv->rxq_parent.qp;
484 DEBUG("initializing child RSS queue");
486 return ibv_exp_create_qp(priv->ctx, &attr);
489 #endif /* RSS_SUPPORT */
492 * Reconfigure a RX queue with new parameters.
494 * rxq_rehash() does not allocate mbufs, which, if not done from the right
495 * thread (such as a control thread), may corrupt the pool.
496 * In case of failure, the queue is left untouched.
499 * Pointer to Ethernet device structure.
504 * 0 on success, errno value on failure.
507 rxq_rehash(struct rte_eth_dev *dev, struct rxq *rxq)
509 struct priv *priv = rxq->priv;
510 struct rxq tmpl = *rxq;
513 struct rte_mbuf **pool;
515 struct ibv_exp_qp_attr mod;
517 int parent = (rxq == &priv->rxq_parent);
520 ERROR("%p: cannot rehash parent queue %p",
521 (void *)dev, (void *)rxq);
524 DEBUG("%p: rehashing queue %p", (void *)dev, (void *)rxq);
525 /* Number of descriptors and mbufs currently allocated. */
526 desc_n = (tmpl.elts_n * (tmpl.sp ? MLX5_PMD_SGE_WR_N : 1));
528 /* Toggle RX checksum offload if hardware supports it. */
530 tmpl.csum = !!dev->data->dev_conf.rxmode.hw_ip_checksum;
531 rxq->csum = tmpl.csum;
533 if (priv->hw_csum_l2tun) {
534 tmpl.csum_l2tun = !!dev->data->dev_conf.rxmode.hw_ip_checksum;
535 rxq->csum_l2tun = tmpl.csum_l2tun;
537 /* Enable scattered packets support for this queue if necessary. */
538 if ((dev->data->dev_conf.rxmode.jumbo_frame) &&
539 (dev->data->dev_conf.rxmode.max_rx_pkt_len >
540 (tmpl.mb_len - RTE_PKTMBUF_HEADROOM))) {
542 desc_n /= MLX5_PMD_SGE_WR_N;
545 DEBUG("%p: %s scattered packets support (%u WRs)",
546 (void *)dev, (tmpl.sp ? "enabling" : "disabling"), desc_n);
547 /* If scatter mode is the same as before, nothing to do. */
548 if (tmpl.sp == rxq->sp) {
549 DEBUG("%p: nothing to do", (void *)dev);
552 /* Remove attached flows if RSS is disabled (no parent queue). */
554 rxq_allmulticast_disable(&tmpl);
555 rxq_promiscuous_disable(&tmpl);
556 rxq_mac_addrs_del(&tmpl);
557 /* Update original queue in case of failure. */
558 rxq->allmulti_flow = tmpl.allmulti_flow;
559 rxq->promisc_flow = tmpl.promisc_flow;
560 memcpy(rxq->mac_flow, tmpl.mac_flow, sizeof(rxq->mac_flow));
562 /* From now on, any failure will render the queue unusable.
563 * Reinitialize QP. */
564 mod = (struct ibv_exp_qp_attr){ .qp_state = IBV_QPS_RESET };
565 err = ibv_exp_modify_qp(tmpl.qp, &mod, IBV_EXP_QP_STATE);
567 ERROR("%p: cannot reset QP: %s", (void *)dev, strerror(err));
571 err = ibv_resize_cq(tmpl.cq, desc_n);
573 ERROR("%p: cannot resize CQ: %s", (void *)dev, strerror(err));
577 mod = (struct ibv_exp_qp_attr){
578 /* Move the QP to this state. */
579 .qp_state = IBV_QPS_INIT,
580 /* Primary port number. */
581 .port_num = priv->port
583 err = ibv_exp_modify_qp(tmpl.qp, &mod,
586 (parent ? IBV_EXP_QP_GROUP_RSS : 0) |
587 #endif /* RSS_SUPPORT */
590 ERROR("%p: QP state to IBV_QPS_INIT failed: %s",
591 (void *)dev, strerror(err));
595 /* Reconfigure flows. Do not care for errors. */
598 rxq_mac_addrs_add(&tmpl);
599 if (priv->started && priv->promisc_req)
600 rxq_promiscuous_enable(&tmpl);
601 if (priv->started && priv->allmulti_req)
602 rxq_allmulticast_enable(&tmpl);
603 /* Update original queue in case of failure. */
604 rxq->allmulti_flow = tmpl.allmulti_flow;
605 rxq->promisc_flow = tmpl.promisc_flow;
606 memcpy(rxq->mac_flow, tmpl.mac_flow, sizeof(rxq->mac_flow));
609 pool = rte_malloc(__func__, (mbuf_n * sizeof(*pool)), 0);
611 ERROR("%p: cannot allocate memory", (void *)dev);
614 /* Snatch mbufs from original queue. */
617 struct rxq_elt_sp (*elts)[rxq->elts_n] = rxq->elts.sp;
619 for (i = 0; (i != RTE_DIM(*elts)); ++i) {
620 struct rxq_elt_sp *elt = &(*elts)[i];
623 for (j = 0; (j != RTE_DIM(elt->bufs)); ++j) {
624 assert(elt->bufs[j] != NULL);
625 pool[k++] = elt->bufs[j];
629 struct rxq_elt (*elts)[rxq->elts_n] = rxq->elts.no_sp;
631 for (i = 0; (i != RTE_DIM(*elts)); ++i) {
632 struct rxq_elt *elt = &(*elts)[i];
633 struct rte_mbuf *buf = elt->buf;
641 assert((void *)&tmpl.elts.sp == (void *)&tmpl.elts.no_sp);
643 rxq_alloc_elts_sp(&tmpl, desc_n, pool) :
644 rxq_alloc_elts(&tmpl, desc_n, pool));
646 ERROR("%p: cannot reallocate WRs, aborting", (void *)dev);
651 assert(tmpl.elts_n == desc_n);
652 assert(tmpl.elts.sp != NULL);
654 /* Clean up original data. */
656 rte_free(rxq->elts.sp);
659 assert(tmpl.if_qp != NULL);
661 struct rxq_elt_sp (*elts)[tmpl.elts_n] = tmpl.elts.sp;
663 for (i = 0; (i != RTE_DIM(*elts)); ++i) {
664 #ifdef HAVE_EXP_QP_BURST_RECV_SG_LIST
665 err = tmpl.if_qp->recv_sg_list
668 RTE_DIM((*elts)[i].sges));
669 #else /* HAVE_EXP_QP_BURST_RECV_SG_LIST */
672 #endif /* HAVE_EXP_QP_BURST_RECV_SG_LIST */
677 struct rxq_elt (*elts)[tmpl.elts_n] = tmpl.elts.no_sp;
679 for (i = 0; (i != RTE_DIM(*elts)); ++i) {
680 err = tmpl.if_qp->recv_burst(
689 ERROR("%p: failed to post SGEs with error %d",
691 /* Set err because it does not contain a valid errno value. */
695 mod = (struct ibv_exp_qp_attr){
696 .qp_state = IBV_QPS_RTR
698 err = ibv_exp_modify_qp(tmpl.qp, &mod, IBV_EXP_QP_STATE);
700 ERROR("%p: QP state to IBV_QPS_RTR failed: %s",
701 (void *)dev, strerror(err));
709 * Configure a RX queue.
712 * Pointer to Ethernet device structure.
714 * Pointer to RX queue structure.
716 * Number of descriptors to configure in queue.
718 * NUMA socket on which memory must be allocated.
720 * Thresholds parameters.
722 * Memory pool for buffer allocations.
725 * 0 on success, errno value on failure.
728 rxq_setup(struct rte_eth_dev *dev, struct rxq *rxq, uint16_t desc,
729 unsigned int socket, const struct rte_eth_rxconf *conf,
730 struct rte_mempool *mp)
732 struct priv *priv = dev->data->dev_private;
738 struct ibv_exp_qp_attr mod;
740 struct ibv_exp_query_intf_params params;
741 struct ibv_exp_cq_init_attr cq;
742 struct ibv_exp_res_domain_init_attr rd;
744 enum ibv_exp_query_intf_status status;
745 struct rte_mbuf *buf;
747 int parent = (rxq == &priv->rxq_parent);
750 (void)conf; /* Thresholds configuration (ignored). */
752 * If this is a parent queue, hardware must support RSS and
753 * RSS must be enabled.
755 assert((!parent) || ((priv->hw_rss) && (priv->rss)));
757 /* Even if unused, ibv_create_cq() requires at least one
762 if ((desc == 0) || (desc % MLX5_PMD_SGE_WR_N)) {
763 ERROR("%p: invalid number of RX descriptors (must be a"
764 " multiple of %d)", (void *)dev, MLX5_PMD_SGE_WR_N);
767 /* Get mbuf length. */
768 buf = rte_pktmbuf_alloc(mp);
770 ERROR("%p: unable to allocate mbuf", (void *)dev);
773 tmpl.mb_len = buf->buf_len;
774 assert((rte_pktmbuf_headroom(buf) +
775 rte_pktmbuf_tailroom(buf)) == tmpl.mb_len);
776 assert(rte_pktmbuf_headroom(buf) == RTE_PKTMBUF_HEADROOM);
777 rte_pktmbuf_free(buf);
778 /* Toggle RX checksum offload if hardware supports it. */
780 tmpl.csum = !!dev->data->dev_conf.rxmode.hw_ip_checksum;
781 if (priv->hw_csum_l2tun)
782 tmpl.csum_l2tun = !!dev->data->dev_conf.rxmode.hw_ip_checksum;
783 /* Enable scattered packets support for this queue if necessary. */
784 if ((dev->data->dev_conf.rxmode.jumbo_frame) &&
785 (dev->data->dev_conf.rxmode.max_rx_pkt_len >
786 (tmpl.mb_len - RTE_PKTMBUF_HEADROOM))) {
788 desc /= MLX5_PMD_SGE_WR_N;
790 DEBUG("%p: %s scattered packets support (%u WRs)",
791 (void *)dev, (tmpl.sp ? "enabling" : "disabling"), desc);
792 /* Use the entire RX mempool as the memory region. */
793 tmpl.mr = ibv_reg_mr(priv->pd,
794 (void *)mp->elt_va_start,
795 (mp->elt_va_end - mp->elt_va_start),
796 (IBV_ACCESS_LOCAL_WRITE |
797 IBV_ACCESS_REMOTE_WRITE));
798 if (tmpl.mr == NULL) {
800 ERROR("%p: MR creation failure: %s",
801 (void *)dev, strerror(ret));
805 attr.rd = (struct ibv_exp_res_domain_init_attr){
806 .comp_mask = (IBV_EXP_RES_DOMAIN_THREAD_MODEL |
807 IBV_EXP_RES_DOMAIN_MSG_MODEL),
808 .thread_model = IBV_EXP_THREAD_SINGLE,
809 .msg_model = IBV_EXP_MSG_HIGH_BW,
811 tmpl.rd = ibv_exp_create_res_domain(priv->ctx, &attr.rd);
812 if (tmpl.rd == NULL) {
814 ERROR("%p: RD creation failure: %s",
815 (void *)dev, strerror(ret));
818 attr.cq = (struct ibv_exp_cq_init_attr){
819 .comp_mask = IBV_EXP_CQ_INIT_ATTR_RES_DOMAIN,
820 .res_domain = tmpl.rd,
822 tmpl.cq = ibv_exp_create_cq(priv->ctx, desc, NULL, NULL, 0, &attr.cq);
823 if (tmpl.cq == NULL) {
825 ERROR("%p: CQ creation failure: %s",
826 (void *)dev, strerror(ret));
829 DEBUG("priv->device_attr.max_qp_wr is %d",
830 priv->device_attr.max_qp_wr);
831 DEBUG("priv->device_attr.max_sge is %d",
832 priv->device_attr.max_sge);
835 tmpl.qp = rxq_setup_qp_rss(priv, tmpl.cq, desc, parent,
838 #endif /* RSS_SUPPORT */
839 tmpl.qp = rxq_setup_qp(priv, tmpl.cq, desc, tmpl.rd);
840 if (tmpl.qp == NULL) {
841 ret = (errno ? errno : EINVAL);
842 ERROR("%p: QP creation failure: %s",
843 (void *)dev, strerror(ret));
846 mod = (struct ibv_exp_qp_attr){
847 /* Move the QP to this state. */
848 .qp_state = IBV_QPS_INIT,
849 /* Primary port number. */
850 .port_num = priv->port
852 ret = ibv_exp_modify_qp(tmpl.qp, &mod,
855 (parent ? IBV_EXP_QP_GROUP_RSS : 0) |
856 #endif /* RSS_SUPPORT */
859 ERROR("%p: QP state to IBV_QPS_INIT failed: %s",
860 (void *)dev, strerror(ret));
863 if ((parent) || (!priv->rss)) {
864 /* Configure MAC and broadcast addresses. */
865 ret = rxq_mac_addrs_add(&tmpl);
867 ERROR("%p: QP flow attachment failed: %s",
868 (void *)dev, strerror(ret));
872 /* Allocate descriptors for RX queues, except for the RSS parent. */
876 ret = rxq_alloc_elts_sp(&tmpl, desc, NULL);
878 ret = rxq_alloc_elts(&tmpl, desc, NULL);
880 ERROR("%p: RXQ allocation failed: %s",
881 (void *)dev, strerror(ret));
886 tmpl.port_id = dev->data->port_id;
887 DEBUG("%p: RTE port ID: %u", (void *)rxq, tmpl.port_id);
888 attr.params = (struct ibv_exp_query_intf_params){
889 .intf_scope = IBV_EXP_INTF_GLOBAL,
890 .intf = IBV_EXP_INTF_CQ,
893 tmpl.if_cq = ibv_exp_query_intf(priv->ctx, &attr.params, &status);
894 if (tmpl.if_cq == NULL) {
895 ERROR("%p: CQ interface family query failed with status %d",
896 (void *)dev, status);
899 attr.params = (struct ibv_exp_query_intf_params){
900 .intf_scope = IBV_EXP_INTF_GLOBAL,
901 .intf = IBV_EXP_INTF_QP_BURST,
904 tmpl.if_qp = ibv_exp_query_intf(priv->ctx, &attr.params, &status);
905 if (tmpl.if_qp == NULL) {
906 ERROR("%p: QP interface family query failed with status %d",
907 (void *)dev, status);
911 if (!parent && tmpl.sp) {
912 struct rxq_elt_sp (*elts)[tmpl.elts_n] = tmpl.elts.sp;
914 for (i = 0; (i != RTE_DIM(*elts)); ++i) {
915 #ifdef HAVE_EXP_QP_BURST_RECV_SG_LIST
916 ret = tmpl.if_qp->recv_sg_list
919 RTE_DIM((*elts)[i].sges));
920 #else /* HAVE_EXP_QP_BURST_RECV_SG_LIST */
923 #endif /* HAVE_EXP_QP_BURST_RECV_SG_LIST */
927 } else if (!parent) {
928 struct rxq_elt (*elts)[tmpl.elts_n] = tmpl.elts.no_sp;
930 for (i = 0; (i != RTE_DIM(*elts)); ++i) {
931 ret = tmpl.if_qp->recv_burst(
940 ERROR("%p: failed to post SGEs with error %d",
942 /* Set ret because it does not contain a valid errno value. */
946 mod = (struct ibv_exp_qp_attr){
947 .qp_state = IBV_QPS_RTR
949 ret = ibv_exp_modify_qp(tmpl.qp, &mod, IBV_EXP_QP_STATE);
951 ERROR("%p: QP state to IBV_QPS_RTR failed: %s",
952 (void *)dev, strerror(ret));
955 /* Clean up rxq in case we're reinitializing it. */
956 DEBUG("%p: cleaning-up old rxq just in case", (void *)rxq);
959 DEBUG("%p: rxq updated with %p", (void *)rxq, (void *)&tmpl);
969 * DPDK callback to configure a RX queue.
972 * Pointer to Ethernet device structure.
976 * Number of descriptors to configure in queue.
978 * NUMA socket on which memory must be allocated.
980 * Thresholds parameters.
982 * Memory pool for buffer allocations.
985 * 0 on success, negative errno value on failure.
988 mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
989 unsigned int socket, const struct rte_eth_rxconf *conf,
990 struct rte_mempool *mp)
992 struct priv *priv = dev->data->dev_private;
993 struct rxq *rxq = (*priv->rxqs)[idx];
997 DEBUG("%p: configuring queue %u for %u descriptors",
998 (void *)dev, idx, desc);
999 if (idx >= priv->rxqs_n) {
1000 ERROR("%p: queue index out of range (%u >= %u)",
1001 (void *)dev, idx, priv->rxqs_n);
1006 DEBUG("%p: reusing already allocated queue index %u (%p)",
1007 (void *)dev, idx, (void *)rxq);
1008 if (priv->started) {
1012 (*priv->rxqs)[idx] = NULL;
1015 rxq = rte_calloc_socket("RXQ", 1, sizeof(*rxq), 0, socket);
1017 ERROR("%p: unable to allocate queue index %u",
1023 ret = rxq_setup(dev, rxq, desc, socket, conf, mp);
1027 rxq->stats.idx = idx;
1028 DEBUG("%p: adding RX queue %p to list",
1029 (void *)dev, (void *)rxq);
1030 (*priv->rxqs)[idx] = rxq;
1031 /* Update receive callback. */
1033 dev->rx_pkt_burst = mlx5_rx_burst_sp;
1035 dev->rx_pkt_burst = mlx5_rx_burst;
1042 * DPDK callback to release a RX queue.
1045 * Generic RX queue pointer.
1048 mlx5_rx_queue_release(void *dpdk_rxq)
1050 struct rxq *rxq = (struct rxq *)dpdk_rxq;
1058 assert(rxq != &priv->rxq_parent);
1059 for (i = 0; (i != priv->rxqs_n); ++i)
1060 if ((*priv->rxqs)[i] == rxq) {
1061 DEBUG("%p: removing RX queue %p from list",
1062 (void *)priv->dev, (void *)rxq);
1063 (*priv->rxqs)[i] = NULL;