4 * Copyright 2015 6WIND S.A.
5 * Copyright 2015 Mellanox.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of 6WIND S.A. nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
41 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
43 #pragma GCC diagnostic ignored "-pedantic"
45 #include <infiniband/verbs.h>
47 #pragma GCC diagnostic error "-pedantic"
50 /* DPDK headers don't like -pedantic. */
52 #pragma GCC diagnostic ignored "-pedantic"
55 #include <rte_malloc.h>
56 #include <rte_ethdev.h>
57 #include <rte_common.h>
59 #pragma GCC diagnostic error "-pedantic"
63 #include "mlx5_rxtx.h"
64 #include "mlx5_utils.h"
65 #include "mlx5_defs.h"
68 * Allocate RX queue elements with scattered packets support.
71 * Pointer to RX queue structure.
73 * Number of elements to allocate.
75 * If not NULL, fetch buffers from this array instead of allocating them
76 * with rte_pktmbuf_alloc().
79 * 0 on success, errno value on failure.
82 rxq_alloc_elts_sp(struct rxq *rxq, unsigned int elts_n,
83 struct rte_mbuf **pool)
86 struct rxq_elt_sp (*elts)[elts_n] =
87 rte_calloc_socket("RXQ elements", 1, sizeof(*elts), 0,
92 ERROR("%p: can't allocate packets array", (void *)rxq);
96 /* For each WR (packet). */
97 for (i = 0; (i != elts_n); ++i) {
99 struct rxq_elt_sp *elt = &(*elts)[i];
100 struct ibv_recv_wr *wr = &elt->wr;
101 struct ibv_sge (*sges)[RTE_DIM(elt->sges)] = &elt->sges;
103 /* These two arrays must have the same size. */
104 assert(RTE_DIM(elt->sges) == RTE_DIM(elt->bufs));
107 wr->next = &(*elts)[(i + 1)].wr;
108 wr->sg_list = &(*sges)[0];
109 wr->num_sge = RTE_DIM(*sges);
110 /* For each SGE (segment). */
111 for (j = 0; (j != RTE_DIM(elt->bufs)); ++j) {
112 struct ibv_sge *sge = &(*sges)[j];
113 struct rte_mbuf *buf;
118 rte_pktmbuf_reset(buf);
120 buf = rte_pktmbuf_alloc(rxq->mp);
122 assert(pool == NULL);
123 ERROR("%p: empty mbuf pool", (void *)rxq);
128 /* Headroom is reserved by rte_pktmbuf_alloc(). */
129 assert(DATA_OFF(buf) == RTE_PKTMBUF_HEADROOM);
130 /* Buffer is supposed to be empty. */
131 assert(rte_pktmbuf_data_len(buf) == 0);
132 assert(rte_pktmbuf_pkt_len(buf) == 0);
133 /* sge->addr must be able to store a pointer. */
134 assert(sizeof(sge->addr) >= sizeof(uintptr_t));
136 /* The first SGE keeps its headroom. */
137 sge->addr = rte_pktmbuf_mtod(buf, uintptr_t);
138 sge->length = (buf->buf_len -
139 RTE_PKTMBUF_HEADROOM);
141 /* Subsequent SGEs lose theirs. */
142 assert(DATA_OFF(buf) == RTE_PKTMBUF_HEADROOM);
143 SET_DATA_OFF(buf, 0);
144 sge->addr = (uintptr_t)buf->buf_addr;
145 sge->length = buf->buf_len;
147 sge->lkey = rxq->mr->lkey;
148 /* Redundant check for tailroom. */
149 assert(sge->length == rte_pktmbuf_tailroom(buf));
152 /* The last WR pointer must be NULL. */
153 (*elts)[(i - 1)].wr.next = NULL;
154 DEBUG("%p: allocated and configured %u WRs (%zu segments)",
155 (void *)rxq, elts_n, (elts_n * RTE_DIM((*elts)[0].sges)));
156 rxq->elts_n = elts_n;
163 assert(pool == NULL);
164 for (i = 0; (i != RTE_DIM(*elts)); ++i) {
166 struct rxq_elt_sp *elt = &(*elts)[i];
168 for (j = 0; (j != RTE_DIM(elt->bufs)); ++j) {
169 struct rte_mbuf *buf = elt->bufs[j];
172 rte_pktmbuf_free_seg(buf);
177 DEBUG("%p: failed, freed everything", (void *)rxq);
183 * Free RX queue elements with scattered packets support.
186 * Pointer to RX queue structure.
189 rxq_free_elts_sp(struct rxq *rxq)
192 unsigned int elts_n = rxq->elts_n;
193 struct rxq_elt_sp (*elts)[elts_n] = rxq->elts.sp;
195 DEBUG("%p: freeing WRs", (void *)rxq);
200 for (i = 0; (i != RTE_DIM(*elts)); ++i) {
202 struct rxq_elt_sp *elt = &(*elts)[i];
204 for (j = 0; (j != RTE_DIM(elt->bufs)); ++j) {
205 struct rte_mbuf *buf = elt->bufs[j];
208 rte_pktmbuf_free_seg(buf);
215 * Allocate RX queue elements.
218 * Pointer to RX queue structure.
220 * Number of elements to allocate.
222 * If not NULL, fetch buffers from this array instead of allocating them
223 * with rte_pktmbuf_alloc().
226 * 0 on success, errno value on failure.
229 rxq_alloc_elts(struct rxq *rxq, unsigned int elts_n, struct rte_mbuf **pool)
232 struct rxq_elt (*elts)[elts_n] =
233 rte_calloc_socket("RXQ elements", 1, sizeof(*elts), 0,
238 ERROR("%p: can't allocate packets array", (void *)rxq);
242 /* For each WR (packet). */
243 for (i = 0; (i != elts_n); ++i) {
244 struct rxq_elt *elt = &(*elts)[i];
245 struct ibv_recv_wr *wr = &elt->wr;
246 struct ibv_sge *sge = &(*elts)[i].sge;
247 struct rte_mbuf *buf;
252 rte_pktmbuf_reset(buf);
254 buf = rte_pktmbuf_alloc(rxq->mp);
256 assert(pool == NULL);
257 ERROR("%p: empty mbuf pool", (void *)rxq);
261 /* Configure WR. Work request ID contains its own index in
262 * the elts array and the offset between SGE buffer header and
264 WR_ID(wr->wr_id).id = i;
265 WR_ID(wr->wr_id).offset =
266 (((uintptr_t)buf->buf_addr + RTE_PKTMBUF_HEADROOM) -
268 wr->next = &(*elts)[(i + 1)].wr;
271 /* Headroom is reserved by rte_pktmbuf_alloc(). */
272 assert(DATA_OFF(buf) == RTE_PKTMBUF_HEADROOM);
273 /* Buffer is supposed to be empty. */
274 assert(rte_pktmbuf_data_len(buf) == 0);
275 assert(rte_pktmbuf_pkt_len(buf) == 0);
276 /* sge->addr must be able to store a pointer. */
277 assert(sizeof(sge->addr) >= sizeof(uintptr_t));
278 /* SGE keeps its headroom. */
279 sge->addr = (uintptr_t)
280 ((uint8_t *)buf->buf_addr + RTE_PKTMBUF_HEADROOM);
281 sge->length = (buf->buf_len - RTE_PKTMBUF_HEADROOM);
282 sge->lkey = rxq->mr->lkey;
283 /* Redundant check for tailroom. */
284 assert(sge->length == rte_pktmbuf_tailroom(buf));
285 /* Make sure elts index and SGE mbuf pointer can be deduced
287 if ((WR_ID(wr->wr_id).id != i) ||
288 ((void *)((uintptr_t)sge->addr -
289 WR_ID(wr->wr_id).offset) != buf)) {
290 ERROR("%p: cannot store index and offset in WR ID",
293 rte_pktmbuf_free(buf);
298 /* The last WR pointer must be NULL. */
299 (*elts)[(i - 1)].wr.next = NULL;
300 DEBUG("%p: allocated and configured %u single-segment WRs",
301 (void *)rxq, elts_n);
302 rxq->elts_n = elts_n;
304 rxq->elts.no_sp = elts;
309 assert(pool == NULL);
310 for (i = 0; (i != RTE_DIM(*elts)); ++i) {
311 struct rxq_elt *elt = &(*elts)[i];
312 struct rte_mbuf *buf;
314 if (elt->sge.addr == 0)
316 assert(WR_ID(elt->wr.wr_id).id == i);
317 buf = (void *)((uintptr_t)elt->sge.addr -
318 WR_ID(elt->wr.wr_id).offset);
319 rte_pktmbuf_free_seg(buf);
323 DEBUG("%p: failed, freed everything", (void *)rxq);
329 * Free RX queue elements.
332 * Pointer to RX queue structure.
335 rxq_free_elts(struct rxq *rxq)
338 unsigned int elts_n = rxq->elts_n;
339 struct rxq_elt (*elts)[elts_n] = rxq->elts.no_sp;
341 DEBUG("%p: freeing WRs", (void *)rxq);
343 rxq->elts.no_sp = NULL;
346 for (i = 0; (i != RTE_DIM(*elts)); ++i) {
347 struct rxq_elt *elt = &(*elts)[i];
348 struct rte_mbuf *buf;
350 if (elt->sge.addr == 0)
352 assert(WR_ID(elt->wr.wr_id).id == i);
353 buf = (void *)((uintptr_t)elt->sge.addr -
354 WR_ID(elt->wr.wr_id).offset);
355 rte_pktmbuf_free_seg(buf);
361 * Clean up a RX queue.
363 * Destroy objects, free allocated memory and reset the structure for reuse.
366 * Pointer to RX queue structure.
369 rxq_cleanup(struct rxq *rxq)
371 struct ibv_exp_release_intf_params params;
373 DEBUG("cleaning up %p", (void *)rxq);
375 rxq_free_elts_sp(rxq);
378 if (rxq->if_qp != NULL) {
379 assert(rxq->priv != NULL);
380 assert(rxq->priv->ctx != NULL);
381 assert(rxq->qp != NULL);
382 params = (struct ibv_exp_release_intf_params){
385 claim_zero(ibv_exp_release_intf(rxq->priv->ctx,
389 if (rxq->if_cq != NULL) {
390 assert(rxq->priv != NULL);
391 assert(rxq->priv->ctx != NULL);
392 assert(rxq->cq != NULL);
393 params = (struct ibv_exp_release_intf_params){
396 claim_zero(ibv_exp_release_intf(rxq->priv->ctx,
400 if (rxq->qp != NULL) {
401 rxq_promiscuous_disable(rxq);
402 rxq_allmulticast_disable(rxq);
403 rxq_mac_addrs_del(rxq);
404 claim_zero(ibv_destroy_qp(rxq->qp));
407 claim_zero(ibv_destroy_cq(rxq->cq));
408 if (rxq->rd != NULL) {
409 struct ibv_exp_destroy_res_domain_attr attr = {
413 assert(rxq->priv != NULL);
414 assert(rxq->priv->ctx != NULL);
415 claim_zero(ibv_exp_destroy_res_domain(rxq->priv->ctx,
420 claim_zero(ibv_dereg_mr(rxq->mr));
421 memset(rxq, 0, sizeof(*rxq));
425 * Allocate a Queue Pair.
426 * Optionally setup inline receive if supported.
429 * Pointer to private structure.
431 * Completion queue to associate with QP.
433 * Number of descriptors in QP (hint only).
436 * QP pointer or NULL in case of error.
438 static struct ibv_qp *
439 rxq_setup_qp(struct priv *priv, struct ibv_cq *cq, uint16_t desc,
440 struct ibv_exp_res_domain *rd)
442 struct ibv_exp_qp_init_attr attr = {
443 /* CQ to be associated with the send queue. */
445 /* CQ to be associated with the receive queue. */
448 /* Max number of outstanding WRs. */
449 .max_recv_wr = ((priv->device_attr.max_qp_wr < desc) ?
450 priv->device_attr.max_qp_wr :
452 /* Max number of scatter/gather elements in a WR. */
453 .max_recv_sge = ((priv->device_attr.max_sge <
455 priv->device_attr.max_sge :
458 .qp_type = IBV_QPT_RAW_PACKET,
459 .comp_mask = (IBV_EXP_QP_INIT_ATTR_PD |
460 IBV_EXP_QP_INIT_ATTR_RES_DOMAIN),
465 return ibv_exp_create_qp(priv->ctx, &attr);
471 * Allocate a RSS Queue Pair.
472 * Optionally setup inline receive if supported.
475 * Pointer to private structure.
477 * Completion queue to associate with QP.
479 * Number of descriptors in QP (hint only).
481 * If nonzero, create a parent QP, otherwise a child.
484 * QP pointer or NULL in case of error.
486 static struct ibv_qp *
487 rxq_setup_qp_rss(struct priv *priv, struct ibv_cq *cq, uint16_t desc,
488 int parent, struct ibv_exp_res_domain *rd)
490 struct ibv_exp_qp_init_attr attr = {
491 /* CQ to be associated with the send queue. */
493 /* CQ to be associated with the receive queue. */
496 /* Max number of outstanding WRs. */
497 .max_recv_wr = ((priv->device_attr.max_qp_wr < desc) ?
498 priv->device_attr.max_qp_wr :
500 /* Max number of scatter/gather elements in a WR. */
501 .max_recv_sge = ((priv->device_attr.max_sge <
503 priv->device_attr.max_sge :
506 .qp_type = IBV_QPT_RAW_PACKET,
507 .comp_mask = (IBV_EXP_QP_INIT_ATTR_PD |
508 IBV_EXP_QP_INIT_ATTR_RES_DOMAIN |
509 IBV_EXP_QP_INIT_ATTR_QPG),
515 attr.qpg.qpg_type = IBV_EXP_QPG_PARENT;
516 /* TSS isn't necessary. */
517 attr.qpg.parent_attrib.tss_child_count = 0;
518 attr.qpg.parent_attrib.rss_child_count = priv->rxqs_n;
519 DEBUG("initializing parent RSS queue");
521 attr.qpg.qpg_type = IBV_EXP_QPG_CHILD_RX;
522 attr.qpg.qpg_parent = priv->rxq_parent.qp;
523 DEBUG("initializing child RSS queue");
525 return ibv_exp_create_qp(priv->ctx, &attr);
528 #endif /* RSS_SUPPORT */
531 * Reconfigure a RX queue with new parameters.
533 * rxq_rehash() does not allocate mbufs, which, if not done from the right
534 * thread (such as a control thread), may corrupt the pool.
535 * In case of failure, the queue is left untouched.
538 * Pointer to Ethernet device structure.
543 * 0 on success, errno value on failure.
546 rxq_rehash(struct rte_eth_dev *dev, struct rxq *rxq)
548 struct priv *priv = rxq->priv;
549 struct rxq tmpl = *rxq;
552 struct rte_mbuf **pool;
554 struct ibv_exp_qp_attr mod;
555 struct ibv_recv_wr *bad_wr;
557 int parent = (rxq == &priv->rxq_parent);
560 ERROR("%p: cannot rehash parent queue %p",
561 (void *)dev, (void *)rxq);
564 DEBUG("%p: rehashing queue %p", (void *)dev, (void *)rxq);
565 /* Number of descriptors and mbufs currently allocated. */
566 desc_n = (tmpl.elts_n * (tmpl.sp ? MLX5_PMD_SGE_WR_N : 1));
568 /* Toggle RX checksum offload if hardware supports it. */
570 tmpl.csum = !!dev->data->dev_conf.rxmode.hw_ip_checksum;
571 rxq->csum = tmpl.csum;
573 if (priv->hw_csum_l2tun) {
574 tmpl.csum_l2tun = !!dev->data->dev_conf.rxmode.hw_ip_checksum;
575 rxq->csum_l2tun = tmpl.csum_l2tun;
577 /* Enable scattered packets support for this queue if necessary. */
578 if ((dev->data->dev_conf.rxmode.jumbo_frame) &&
579 (dev->data->dev_conf.rxmode.max_rx_pkt_len >
580 (tmpl.mb_len - RTE_PKTMBUF_HEADROOM))) {
582 desc_n /= MLX5_PMD_SGE_WR_N;
585 DEBUG("%p: %s scattered packets support (%u WRs)",
586 (void *)dev, (tmpl.sp ? "enabling" : "disabling"), desc_n);
587 /* If scatter mode is the same as before, nothing to do. */
588 if (tmpl.sp == rxq->sp) {
589 DEBUG("%p: nothing to do", (void *)dev);
592 /* Remove attached flows if RSS is disabled (no parent queue). */
594 rxq_allmulticast_disable(&tmpl);
595 rxq_promiscuous_disable(&tmpl);
596 rxq_mac_addrs_del(&tmpl);
597 /* Update original queue in case of failure. */
598 rxq->allmulti_flow = tmpl.allmulti_flow;
599 rxq->promisc_flow = tmpl.promisc_flow;
600 memcpy(rxq->mac_flow, tmpl.mac_flow, sizeof(rxq->mac_flow));
602 /* From now on, any failure will render the queue unusable.
603 * Reinitialize QP. */
604 mod = (struct ibv_exp_qp_attr){ .qp_state = IBV_QPS_RESET };
605 err = ibv_exp_modify_qp(tmpl.qp, &mod, IBV_EXP_QP_STATE);
607 ERROR("%p: cannot reset QP: %s", (void *)dev, strerror(err));
611 err = ibv_resize_cq(tmpl.cq, desc_n);
613 ERROR("%p: cannot resize CQ: %s", (void *)dev, strerror(err));
617 mod = (struct ibv_exp_qp_attr){
618 /* Move the QP to this state. */
619 .qp_state = IBV_QPS_INIT,
620 /* Primary port number. */
621 .port_num = priv->port
623 err = ibv_exp_modify_qp(tmpl.qp, &mod,
626 (parent ? IBV_EXP_QP_GROUP_RSS : 0) |
627 #endif /* RSS_SUPPORT */
630 ERROR("%p: QP state to IBV_QPS_INIT failed: %s",
631 (void *)dev, strerror(err));
635 /* Reconfigure flows. Do not care for errors. */
638 rxq_mac_addrs_add(&tmpl);
639 if (priv->started && priv->promisc_req)
640 rxq_promiscuous_enable(&tmpl);
641 if (priv->started && priv->allmulti_req)
642 rxq_allmulticast_enable(&tmpl);
643 /* Update original queue in case of failure. */
644 rxq->allmulti_flow = tmpl.allmulti_flow;
645 rxq->promisc_flow = tmpl.promisc_flow;
646 memcpy(rxq->mac_flow, tmpl.mac_flow, sizeof(rxq->mac_flow));
649 pool = rte_malloc(__func__, (mbuf_n * sizeof(*pool)), 0);
651 ERROR("%p: cannot allocate memory", (void *)dev);
654 /* Snatch mbufs from original queue. */
657 struct rxq_elt_sp (*elts)[rxq->elts_n] = rxq->elts.sp;
659 for (i = 0; (i != RTE_DIM(*elts)); ++i) {
660 struct rxq_elt_sp *elt = &(*elts)[i];
663 for (j = 0; (j != RTE_DIM(elt->bufs)); ++j) {
664 assert(elt->bufs[j] != NULL);
665 pool[k++] = elt->bufs[j];
669 struct rxq_elt (*elts)[rxq->elts_n] = rxq->elts.no_sp;
671 for (i = 0; (i != RTE_DIM(*elts)); ++i) {
672 struct rxq_elt *elt = &(*elts)[i];
673 struct rte_mbuf *buf = (void *)
674 ((uintptr_t)elt->sge.addr -
675 WR_ID(elt->wr.wr_id).offset);
677 assert(WR_ID(elt->wr.wr_id).id == i);
684 assert((void *)&tmpl.elts.sp == (void *)&tmpl.elts.no_sp);
686 rxq_alloc_elts_sp(&tmpl, desc_n, pool) :
687 rxq_alloc_elts(&tmpl, desc_n, pool));
689 ERROR("%p: cannot reallocate WRs, aborting", (void *)dev);
694 assert(tmpl.elts_n == desc_n);
695 assert(tmpl.elts.sp != NULL);
697 /* Clean up original data. */
699 rte_free(rxq->elts.sp);
702 err = ibv_post_recv(tmpl.qp,
704 &(*tmpl.elts.sp)[0].wr :
705 &(*tmpl.elts.no_sp)[0].wr),
708 ERROR("%p: ibv_post_recv() failed for WR %p: %s",
714 mod = (struct ibv_exp_qp_attr){
715 .qp_state = IBV_QPS_RTR
717 err = ibv_exp_modify_qp(tmpl.qp, &mod, IBV_EXP_QP_STATE);
719 ERROR("%p: QP state to IBV_QPS_RTR failed: %s",
720 (void *)dev, strerror(err));
728 * Configure a RX queue.
731 * Pointer to Ethernet device structure.
733 * Pointer to RX queue structure.
735 * Number of descriptors to configure in queue.
737 * NUMA socket on which memory must be allocated.
739 * Thresholds parameters.
741 * Memory pool for buffer allocations.
744 * 0 on success, errno value on failure.
747 rxq_setup(struct rte_eth_dev *dev, struct rxq *rxq, uint16_t desc,
748 unsigned int socket, const struct rte_eth_rxconf *conf,
749 struct rte_mempool *mp)
751 struct priv *priv = dev->data->dev_private;
757 struct ibv_exp_qp_attr mod;
759 struct ibv_exp_query_intf_params params;
760 struct ibv_exp_cq_init_attr cq;
761 struct ibv_exp_res_domain_init_attr rd;
763 enum ibv_exp_query_intf_status status;
764 struct ibv_recv_wr *bad_wr;
765 struct rte_mbuf *buf;
767 int parent = (rxq == &priv->rxq_parent);
769 (void)conf; /* Thresholds configuration (ignored). */
771 * If this is a parent queue, hardware must support RSS and
772 * RSS must be enabled.
774 assert((!parent) || ((priv->hw_rss) && (priv->rss)));
776 /* Even if unused, ibv_create_cq() requires at least one
781 if ((desc == 0) || (desc % MLX5_PMD_SGE_WR_N)) {
782 ERROR("%p: invalid number of RX descriptors (must be a"
783 " multiple of %d)", (void *)dev, MLX5_PMD_SGE_WR_N);
786 /* Get mbuf length. */
787 buf = rte_pktmbuf_alloc(mp);
789 ERROR("%p: unable to allocate mbuf", (void *)dev);
792 tmpl.mb_len = buf->buf_len;
793 assert((rte_pktmbuf_headroom(buf) +
794 rte_pktmbuf_tailroom(buf)) == tmpl.mb_len);
795 assert(rte_pktmbuf_headroom(buf) == RTE_PKTMBUF_HEADROOM);
796 rte_pktmbuf_free(buf);
797 /* Toggle RX checksum offload if hardware supports it. */
799 tmpl.csum = !!dev->data->dev_conf.rxmode.hw_ip_checksum;
800 if (priv->hw_csum_l2tun)
801 tmpl.csum_l2tun = !!dev->data->dev_conf.rxmode.hw_ip_checksum;
802 /* Enable scattered packets support for this queue if necessary. */
803 if ((dev->data->dev_conf.rxmode.jumbo_frame) &&
804 (dev->data->dev_conf.rxmode.max_rx_pkt_len >
805 (tmpl.mb_len - RTE_PKTMBUF_HEADROOM))) {
807 desc /= MLX5_PMD_SGE_WR_N;
809 DEBUG("%p: %s scattered packets support (%u WRs)",
810 (void *)dev, (tmpl.sp ? "enabling" : "disabling"), desc);
811 /* Use the entire RX mempool as the memory region. */
812 tmpl.mr = ibv_reg_mr(priv->pd,
813 (void *)mp->elt_va_start,
814 (mp->elt_va_end - mp->elt_va_start),
815 (IBV_ACCESS_LOCAL_WRITE |
816 IBV_ACCESS_REMOTE_WRITE));
817 if (tmpl.mr == NULL) {
819 ERROR("%p: MR creation failure: %s",
820 (void *)dev, strerror(ret));
824 attr.rd = (struct ibv_exp_res_domain_init_attr){
825 .comp_mask = (IBV_EXP_RES_DOMAIN_THREAD_MODEL |
826 IBV_EXP_RES_DOMAIN_MSG_MODEL),
827 .thread_model = IBV_EXP_THREAD_SINGLE,
828 .msg_model = IBV_EXP_MSG_HIGH_BW,
830 tmpl.rd = ibv_exp_create_res_domain(priv->ctx, &attr.rd);
831 if (tmpl.rd == NULL) {
833 ERROR("%p: RD creation failure: %s",
834 (void *)dev, strerror(ret));
837 attr.cq = (struct ibv_exp_cq_init_attr){
838 .comp_mask = IBV_EXP_CQ_INIT_ATTR_RES_DOMAIN,
839 .res_domain = tmpl.rd,
841 tmpl.cq = ibv_exp_create_cq(priv->ctx, desc, NULL, NULL, 0, &attr.cq);
842 if (tmpl.cq == NULL) {
844 ERROR("%p: CQ creation failure: %s",
845 (void *)dev, strerror(ret));
848 DEBUG("priv->device_attr.max_qp_wr is %d",
849 priv->device_attr.max_qp_wr);
850 DEBUG("priv->device_attr.max_sge is %d",
851 priv->device_attr.max_sge);
854 tmpl.qp = rxq_setup_qp_rss(priv, tmpl.cq, desc, parent,
857 #endif /* RSS_SUPPORT */
858 tmpl.qp = rxq_setup_qp(priv, tmpl.cq, desc, tmpl.rd);
859 if (tmpl.qp == NULL) {
860 ret = (errno ? errno : EINVAL);
861 ERROR("%p: QP creation failure: %s",
862 (void *)dev, strerror(ret));
865 mod = (struct ibv_exp_qp_attr){
866 /* Move the QP to this state. */
867 .qp_state = IBV_QPS_INIT,
868 /* Primary port number. */
869 .port_num = priv->port
871 ret = ibv_exp_modify_qp(tmpl.qp, &mod,
874 (parent ? IBV_EXP_QP_GROUP_RSS : 0) |
875 #endif /* RSS_SUPPORT */
878 ERROR("%p: QP state to IBV_QPS_INIT failed: %s",
879 (void *)dev, strerror(ret));
882 if ((parent) || (!priv->rss)) {
883 /* Configure MAC and broadcast addresses. */
884 ret = rxq_mac_addrs_add(&tmpl);
886 ERROR("%p: QP flow attachment failed: %s",
887 (void *)dev, strerror(ret));
891 /* Allocate descriptors for RX queues, except for the RSS parent. */
895 ret = rxq_alloc_elts_sp(&tmpl, desc, NULL);
897 ret = rxq_alloc_elts(&tmpl, desc, NULL);
899 ERROR("%p: RXQ allocation failed: %s",
900 (void *)dev, strerror(ret));
903 ret = ibv_post_recv(tmpl.qp,
905 &(*tmpl.elts.sp)[0].wr :
906 &(*tmpl.elts.no_sp)[0].wr),
909 ERROR("%p: ibv_post_recv() failed for WR %p: %s",
916 mod = (struct ibv_exp_qp_attr){
917 .qp_state = IBV_QPS_RTR
919 ret = ibv_exp_modify_qp(tmpl.qp, &mod, IBV_EXP_QP_STATE);
921 ERROR("%p: QP state to IBV_QPS_RTR failed: %s",
922 (void *)dev, strerror(ret));
926 tmpl.port_id = dev->data->port_id;
927 DEBUG("%p: RTE port ID: %u", (void *)rxq, tmpl.port_id);
928 attr.params = (struct ibv_exp_query_intf_params){
929 .intf_scope = IBV_EXP_INTF_GLOBAL,
930 .intf = IBV_EXP_INTF_CQ,
933 tmpl.if_cq = ibv_exp_query_intf(priv->ctx, &attr.params, &status);
934 if (tmpl.if_cq == NULL) {
935 ERROR("%p: CQ interface family query failed with status %d",
936 (void *)dev, status);
939 attr.params = (struct ibv_exp_query_intf_params){
940 .intf_scope = IBV_EXP_INTF_GLOBAL,
941 .intf = IBV_EXP_INTF_QP_BURST,
944 tmpl.if_qp = ibv_exp_query_intf(priv->ctx, &attr.params, &status);
945 if (tmpl.if_qp == NULL) {
946 ERROR("%p: QP interface family query failed with status %d",
947 (void *)dev, status);
950 /* Clean up rxq in case we're reinitializing it. */
951 DEBUG("%p: cleaning-up old rxq just in case", (void *)rxq);
954 DEBUG("%p: rxq updated with %p", (void *)rxq, (void *)&tmpl);
964 * DPDK callback to configure a RX queue.
967 * Pointer to Ethernet device structure.
971 * Number of descriptors to configure in queue.
973 * NUMA socket on which memory must be allocated.
975 * Thresholds parameters.
977 * Memory pool for buffer allocations.
980 * 0 on success, negative errno value on failure.
983 mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
984 unsigned int socket, const struct rte_eth_rxconf *conf,
985 struct rte_mempool *mp)
987 struct priv *priv = dev->data->dev_private;
988 struct rxq *rxq = (*priv->rxqs)[idx];
992 DEBUG("%p: configuring queue %u for %u descriptors",
993 (void *)dev, idx, desc);
994 if (idx >= priv->rxqs_n) {
995 ERROR("%p: queue index out of range (%u >= %u)",
996 (void *)dev, idx, priv->rxqs_n);
1001 DEBUG("%p: reusing already allocated queue index %u (%p)",
1002 (void *)dev, idx, (void *)rxq);
1003 if (priv->started) {
1007 (*priv->rxqs)[idx] = NULL;
1010 rxq = rte_calloc_socket("RXQ", 1, sizeof(*rxq), 0, socket);
1012 ERROR("%p: unable to allocate queue index %u",
1018 ret = rxq_setup(dev, rxq, desc, socket, conf, mp);
1022 rxq->stats.idx = idx;
1023 DEBUG("%p: adding RX queue %p to list",
1024 (void *)dev, (void *)rxq);
1025 (*priv->rxqs)[idx] = rxq;
1026 /* Update receive callback. */
1028 dev->rx_pkt_burst = mlx5_rx_burst_sp;
1030 dev->rx_pkt_burst = mlx5_rx_burst;
1037 * DPDK callback to release a RX queue.
1040 * Generic RX queue pointer.
1043 mlx5_rx_queue_release(void *dpdk_rxq)
1045 struct rxq *rxq = (struct rxq *)dpdk_rxq;
1053 assert(rxq != &priv->rxq_parent);
1054 for (i = 0; (i != priv->rxqs_n); ++i)
1055 if ((*priv->rxqs)[i] == rxq) {
1056 DEBUG("%p: removing RX queue %p from list",
1057 (void *)priv->dev, (void *)rxq);
1058 (*priv->rxqs)[i] = NULL;