4 * Copyright 2015 6WIND S.A.
5 * Copyright 2015 Mellanox.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of 6WIND S.A. nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
41 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
43 #pragma GCC diagnostic ignored "-pedantic"
45 #include <infiniband/verbs.h>
47 #pragma GCC diagnostic error "-pedantic"
50 /* DPDK headers don't like -pedantic. */
52 #pragma GCC diagnostic ignored "-pedantic"
55 #include <rte_malloc.h>
56 #include <rte_ethdev.h>
57 #include <rte_common.h>
59 #pragma GCC diagnostic error "-pedantic"
63 #include "mlx5_rxtx.h"
64 #include "mlx5_utils.h"
65 #include "mlx5_defs.h"
67 /* Initialization data for hash RX queues. */
68 static const struct hash_rxq_init hash_rxq_init[] = {
70 .hash_fields = (IBV_EXP_RX_HASH_SRC_IPV4 |
71 IBV_EXP_RX_HASH_DST_IPV4 |
72 IBV_EXP_RX_HASH_SRC_PORT_TCP |
73 IBV_EXP_RX_HASH_DST_PORT_TCP),
76 .hash_fields = (IBV_EXP_RX_HASH_SRC_IPV4 |
77 IBV_EXP_RX_HASH_DST_IPV4 |
78 IBV_EXP_RX_HASH_SRC_PORT_UDP |
79 IBV_EXP_RX_HASH_DST_PORT_UDP),
82 .hash_fields = (IBV_EXP_RX_HASH_SRC_IPV4 |
83 IBV_EXP_RX_HASH_DST_IPV4),
90 /* Number of entries in hash_rxq_init[]. */
91 static const unsigned int hash_rxq_init_n = RTE_DIM(hash_rxq_init);
93 /* Initialization data for hash RX queue indirection tables. */
94 static const struct ind_table_init ind_table_init[] = {
96 .max_size = -1u, /* Superseded by HW limitations. */
106 .hash_types = 1 << HASH_RXQ_ETH,
111 #define IND_TABLE_INIT_N RTE_DIM(ind_table_init)
113 /* Default RSS hash key also used for ConnectX-3. */
114 static uint8_t hash_rxq_default_key[] = {
115 0x2c, 0xc6, 0x81, 0xd1,
116 0x5b, 0xdb, 0xf4, 0xf7,
117 0xfc, 0xa2, 0x83, 0x19,
118 0xdb, 0x1a, 0x3e, 0x94,
119 0x6b, 0x9e, 0x38, 0xd9,
120 0x2c, 0x9c, 0x03, 0xd1,
121 0xad, 0x99, 0x44, 0xa7,
122 0xd9, 0x56, 0x3d, 0x59,
123 0x06, 0x3c, 0x25, 0xf3,
124 0xfc, 0x1f, 0xdc, 0x2a,
128 * Return nearest power of two above input value.
134 * Nearest power of two above input value.
137 log2above(unsigned int v)
142 for (l = 0, r = 0; (v >> 1); ++l, v >>= 1)
148 * Return the type corresponding to the n'th bit set.
151 * The indirection table.
156 * The corresponding hash_rxq_type.
158 static enum hash_rxq_type
159 hash_rxq_type_from_n(const struct ind_table_init *table, unsigned int n)
161 assert(n < table->hash_types_n);
162 while (((table->hash_types >> n) & 0x1) == 0)
168 * Filter out disabled hash RX queue types from ind_table_init[].
171 * Pointer to private structure.
176 * Number of table entries.
179 priv_make_ind_table_init(struct priv *priv,
180 struct ind_table_init (*table)[IND_TABLE_INIT_N])
184 unsigned int table_n = 0;
185 /* Mandatory to receive frames not handled by normal hash RX queues. */
186 unsigned int hash_types_sup = 1 << HASH_RXQ_ETH;
188 /* Process other protocols only if more than one queue. */
189 if (priv->rxqs_n > 1)
190 for (i = 0; (i != hash_rxq_init_n); ++i)
191 if (hash_rxq_init[i].hash_fields)
192 hash_types_sup |= (1 << i);
194 /* Filter out entries whose protocols are not in the set. */
195 for (i = 0, j = 0; (i != IND_TABLE_INIT_N); ++i) {
199 /* j is increased only if the table has valid protocols. */
201 (*table)[j] = ind_table_init[i];
202 (*table)[j].hash_types &= hash_types_sup;
203 for (h = 0, nb = 0; (h != hash_rxq_init_n); ++h)
204 if (((*table)[j].hash_types >> h) & 0x1)
206 (*table)[i].hash_types_n = nb;
216 * Initialize hash RX queues and indirection table.
219 * Pointer to private structure.
222 * 0 on success, errno value on failure.
225 priv_create_hash_rxqs(struct priv *priv)
227 unsigned int wqs_n = (1 << log2above(priv->rxqs_n));
228 struct ibv_exp_wq *wqs[wqs_n];
229 struct ind_table_init ind_table_init[IND_TABLE_INIT_N];
230 unsigned int ind_tables_n =
231 priv_make_ind_table_init(priv, &ind_table_init);
232 unsigned int hash_rxqs_n = 0;
233 struct hash_rxq (*hash_rxqs)[] = NULL;
234 struct ibv_exp_rwq_ind_table *(*ind_tables)[] = NULL;
240 assert(priv->ind_tables == NULL);
241 assert(priv->ind_tables_n == 0);
242 assert(priv->hash_rxqs == NULL);
243 assert(priv->hash_rxqs_n == 0);
244 assert(priv->pd != NULL);
245 assert(priv->ctx != NULL);
246 if (priv->rxqs_n == 0)
248 assert(priv->rxqs != NULL);
249 if (ind_tables_n == 0) {
250 ERROR("all hash RX queue types have been filtered out,"
251 " indirection table cannot be created");
254 if (wqs_n < priv->rxqs_n) {
255 ERROR("cannot handle this many RX queues (%u)", priv->rxqs_n);
259 if (wqs_n != priv->rxqs_n)
260 WARN("%u RX queues are configured, consider rounding this"
261 " number to the next power of two (%u) for optimal"
263 priv->rxqs_n, wqs_n);
264 /* When the number of RX queues is not a power of two, the remaining
265 * table entries are padded with reused WQs and hashes are not spread
267 for (i = 0, j = 0; (i != wqs_n); ++i) {
268 wqs[i] = (*priv->rxqs)[j]->wq;
269 if (++j == priv->rxqs_n)
272 /* Get number of hash RX queues to configure. */
273 for (i = 0, hash_rxqs_n = 0; (i != ind_tables_n); ++i)
274 hash_rxqs_n += ind_table_init[i].hash_types_n;
275 DEBUG("allocating %u hash RX queues for %u WQs, %u indirection tables",
276 hash_rxqs_n, priv->rxqs_n, ind_tables_n);
277 /* Create indirection tables. */
278 ind_tables = rte_calloc(__func__, ind_tables_n,
279 sizeof((*ind_tables)[0]), 0);
280 if (ind_tables == NULL) {
282 ERROR("cannot allocate indirection tables container: %s",
286 for (i = 0; (i != ind_tables_n); ++i) {
287 struct ibv_exp_rwq_ind_table_init_attr ind_init_attr = {
289 .log_ind_tbl_size = 0, /* Set below. */
293 unsigned int ind_tbl_size = ind_table_init[i].max_size;
294 struct ibv_exp_rwq_ind_table *ind_table;
296 if (wqs_n < ind_tbl_size)
297 ind_tbl_size = wqs_n;
298 ind_init_attr.log_ind_tbl_size = log2above(ind_tbl_size);
300 ind_table = ibv_exp_create_rwq_ind_table(priv->ctx,
302 if (ind_table != NULL) {
303 (*ind_tables)[i] = ind_table;
306 /* Not clear whether errno is set. */
307 err = (errno ? errno : EINVAL);
308 ERROR("RX indirection table creation failed with error %d: %s",
312 /* Allocate array that holds hash RX queues and related data. */
313 hash_rxqs = rte_calloc(__func__, hash_rxqs_n,
314 sizeof((*hash_rxqs)[0]), 0);
315 if (hash_rxqs == NULL) {
317 ERROR("cannot allocate hash RX queues container: %s",
321 for (i = 0, j = 0, k = 0;
322 ((i != hash_rxqs_n) && (j != ind_tables_n));
324 struct hash_rxq *hash_rxq = &(*hash_rxqs)[i];
325 enum hash_rxq_type type =
326 hash_rxq_type_from_n(&ind_table_init[j], k);
327 struct ibv_exp_rx_hash_conf hash_conf = {
328 .rx_hash_function = IBV_EXP_RX_HASH_FUNC_TOEPLITZ,
329 .rx_hash_key_len = sizeof(hash_rxq_default_key),
330 .rx_hash_key = hash_rxq_default_key,
331 .rx_hash_fields_mask = hash_rxq_init[type].hash_fields,
332 .rwq_ind_tbl = (*ind_tables)[j],
334 struct ibv_exp_qp_init_attr qp_init_attr = {
335 .max_inl_recv = 0, /* Currently not supported. */
336 .qp_type = IBV_QPT_RAW_PACKET,
337 .comp_mask = (IBV_EXP_QP_INIT_ATTR_PD |
338 IBV_EXP_QP_INIT_ATTR_RX_HASH),
340 .rx_hash_conf = &hash_conf,
341 .port_num = priv->port,
344 DEBUG("using indirection table %u for hash RX queue %u",
346 *hash_rxq = (struct hash_rxq){
348 .qp = ibv_exp_create_qp(priv->ctx, &qp_init_attr),
351 if (hash_rxq->qp == NULL) {
352 err = (errno ? errno : EINVAL);
353 ERROR("Hash RX QP creation failure: %s",
357 if (++k < ind_table_init[j].hash_types_n)
359 /* Switch to the next indirection table and reset hash RX
360 * queue type array index. */
364 priv->ind_tables = ind_tables;
365 priv->ind_tables_n = ind_tables_n;
366 priv->hash_rxqs = hash_rxqs;
367 priv->hash_rxqs_n = hash_rxqs_n;
371 if (hash_rxqs != NULL) {
372 for (i = 0; (i != hash_rxqs_n); ++i) {
373 struct ibv_qp *qp = (*hash_rxqs)[i].qp;
377 claim_zero(ibv_destroy_qp(qp));
381 if (ind_tables != NULL) {
382 for (j = 0; (j != ind_tables_n); ++j) {
383 struct ibv_exp_rwq_ind_table *ind_table =
386 if (ind_table == NULL)
388 claim_zero(ibv_exp_destroy_rwq_ind_table(ind_table));
390 rte_free(ind_tables);
396 * Clean up hash RX queues and indirection table.
399 * Pointer to private structure.
402 priv_destroy_hash_rxqs(struct priv *priv)
406 DEBUG("destroying %u hash RX queues", priv->hash_rxqs_n);
407 if (priv->hash_rxqs_n == 0) {
408 assert(priv->hash_rxqs == NULL);
409 assert(priv->ind_tables == NULL);
412 for (i = 0; (i != priv->hash_rxqs_n); ++i) {
413 struct hash_rxq *hash_rxq = &(*priv->hash_rxqs)[i];
416 assert(hash_rxq->priv == priv);
417 assert(hash_rxq->qp != NULL);
418 /* Also check that there are no remaining flows. */
419 assert(hash_rxq->allmulti_flow == NULL);
420 assert(hash_rxq->promisc_flow == NULL);
421 for (j = 0; (j != RTE_DIM(hash_rxq->mac_flow)); ++j)
422 for (k = 0; (k != RTE_DIM(hash_rxq->mac_flow[j])); ++k)
423 assert(hash_rxq->mac_flow[j][k] == NULL);
424 claim_zero(ibv_destroy_qp(hash_rxq->qp));
426 priv->hash_rxqs_n = 0;
427 rte_free(priv->hash_rxqs);
428 priv->hash_rxqs = NULL;
429 for (i = 0; (i != priv->ind_tables_n); ++i) {
430 struct ibv_exp_rwq_ind_table *ind_table =
431 (*priv->ind_tables)[i];
433 assert(ind_table != NULL);
434 claim_zero(ibv_exp_destroy_rwq_ind_table(ind_table));
436 priv->ind_tables_n = 0;
437 rte_free(priv->ind_tables);
438 priv->ind_tables = NULL;
442 * Allocate RX queue elements with scattered packets support.
445 * Pointer to RX queue structure.
447 * Number of elements to allocate.
449 * If not NULL, fetch buffers from this array instead of allocating them
450 * with rte_pktmbuf_alloc().
453 * 0 on success, errno value on failure.
456 rxq_alloc_elts_sp(struct rxq *rxq, unsigned int elts_n,
457 struct rte_mbuf **pool)
460 struct rxq_elt_sp (*elts)[elts_n] =
461 rte_calloc_socket("RXQ elements", 1, sizeof(*elts), 0,
466 ERROR("%p: can't allocate packets array", (void *)rxq);
470 /* For each WR (packet). */
471 for (i = 0; (i != elts_n); ++i) {
473 struct rxq_elt_sp *elt = &(*elts)[i];
474 struct ibv_sge (*sges)[RTE_DIM(elt->sges)] = &elt->sges;
476 /* These two arrays must have the same size. */
477 assert(RTE_DIM(elt->sges) == RTE_DIM(elt->bufs));
478 /* For each SGE (segment). */
479 for (j = 0; (j != RTE_DIM(elt->bufs)); ++j) {
480 struct ibv_sge *sge = &(*sges)[j];
481 struct rte_mbuf *buf;
486 rte_pktmbuf_reset(buf);
488 buf = rte_pktmbuf_alloc(rxq->mp);
490 assert(pool == NULL);
491 ERROR("%p: empty mbuf pool", (void *)rxq);
496 /* Headroom is reserved by rte_pktmbuf_alloc(). */
497 assert(DATA_OFF(buf) == RTE_PKTMBUF_HEADROOM);
498 /* Buffer is supposed to be empty. */
499 assert(rte_pktmbuf_data_len(buf) == 0);
500 assert(rte_pktmbuf_pkt_len(buf) == 0);
501 /* sge->addr must be able to store a pointer. */
502 assert(sizeof(sge->addr) >= sizeof(uintptr_t));
504 /* The first SGE keeps its headroom. */
505 sge->addr = rte_pktmbuf_mtod(buf, uintptr_t);
506 sge->length = (buf->buf_len -
507 RTE_PKTMBUF_HEADROOM);
509 /* Subsequent SGEs lose theirs. */
510 assert(DATA_OFF(buf) == RTE_PKTMBUF_HEADROOM);
511 SET_DATA_OFF(buf, 0);
512 sge->addr = (uintptr_t)buf->buf_addr;
513 sge->length = buf->buf_len;
515 sge->lkey = rxq->mr->lkey;
516 /* Redundant check for tailroom. */
517 assert(sge->length == rte_pktmbuf_tailroom(buf));
520 DEBUG("%p: allocated and configured %u WRs (%zu segments)",
521 (void *)rxq, elts_n, (elts_n * RTE_DIM((*elts)[0].sges)));
522 rxq->elts_n = elts_n;
529 assert(pool == NULL);
530 for (i = 0; (i != RTE_DIM(*elts)); ++i) {
532 struct rxq_elt_sp *elt = &(*elts)[i];
534 for (j = 0; (j != RTE_DIM(elt->bufs)); ++j) {
535 struct rte_mbuf *buf = elt->bufs[j];
538 rte_pktmbuf_free_seg(buf);
543 DEBUG("%p: failed, freed everything", (void *)rxq);
549 * Free RX queue elements with scattered packets support.
552 * Pointer to RX queue structure.
555 rxq_free_elts_sp(struct rxq *rxq)
558 unsigned int elts_n = rxq->elts_n;
559 struct rxq_elt_sp (*elts)[elts_n] = rxq->elts.sp;
561 DEBUG("%p: freeing WRs", (void *)rxq);
566 for (i = 0; (i != RTE_DIM(*elts)); ++i) {
568 struct rxq_elt_sp *elt = &(*elts)[i];
570 for (j = 0; (j != RTE_DIM(elt->bufs)); ++j) {
571 struct rte_mbuf *buf = elt->bufs[j];
574 rte_pktmbuf_free_seg(buf);
581 * Allocate RX queue elements.
584 * Pointer to RX queue structure.
586 * Number of elements to allocate.
588 * If not NULL, fetch buffers from this array instead of allocating them
589 * with rte_pktmbuf_alloc().
592 * 0 on success, errno value on failure.
595 rxq_alloc_elts(struct rxq *rxq, unsigned int elts_n, struct rte_mbuf **pool)
598 struct rxq_elt (*elts)[elts_n] =
599 rte_calloc_socket("RXQ elements", 1, sizeof(*elts), 0,
604 ERROR("%p: can't allocate packets array", (void *)rxq);
608 /* For each WR (packet). */
609 for (i = 0; (i != elts_n); ++i) {
610 struct rxq_elt *elt = &(*elts)[i];
611 struct ibv_sge *sge = &(*elts)[i].sge;
612 struct rte_mbuf *buf;
617 rte_pktmbuf_reset(buf);
619 buf = rte_pktmbuf_alloc(rxq->mp);
621 assert(pool == NULL);
622 ERROR("%p: empty mbuf pool", (void *)rxq);
627 /* Headroom is reserved by rte_pktmbuf_alloc(). */
628 assert(DATA_OFF(buf) == RTE_PKTMBUF_HEADROOM);
629 /* Buffer is supposed to be empty. */
630 assert(rte_pktmbuf_data_len(buf) == 0);
631 assert(rte_pktmbuf_pkt_len(buf) == 0);
632 /* sge->addr must be able to store a pointer. */
633 assert(sizeof(sge->addr) >= sizeof(uintptr_t));
634 /* SGE keeps its headroom. */
635 sge->addr = (uintptr_t)
636 ((uint8_t *)buf->buf_addr + RTE_PKTMBUF_HEADROOM);
637 sge->length = (buf->buf_len - RTE_PKTMBUF_HEADROOM);
638 sge->lkey = rxq->mr->lkey;
639 /* Redundant check for tailroom. */
640 assert(sge->length == rte_pktmbuf_tailroom(buf));
642 DEBUG("%p: allocated and configured %u single-segment WRs",
643 (void *)rxq, elts_n);
644 rxq->elts_n = elts_n;
646 rxq->elts.no_sp = elts;
651 assert(pool == NULL);
652 for (i = 0; (i != RTE_DIM(*elts)); ++i) {
653 struct rxq_elt *elt = &(*elts)[i];
654 struct rte_mbuf *buf = elt->buf;
657 rte_pktmbuf_free_seg(buf);
661 DEBUG("%p: failed, freed everything", (void *)rxq);
667 * Free RX queue elements.
670 * Pointer to RX queue structure.
673 rxq_free_elts(struct rxq *rxq)
676 unsigned int elts_n = rxq->elts_n;
677 struct rxq_elt (*elts)[elts_n] = rxq->elts.no_sp;
679 DEBUG("%p: freeing WRs", (void *)rxq);
681 rxq->elts.no_sp = NULL;
684 for (i = 0; (i != RTE_DIM(*elts)); ++i) {
685 struct rxq_elt *elt = &(*elts)[i];
686 struct rte_mbuf *buf = elt->buf;
689 rte_pktmbuf_free_seg(buf);
695 * Clean up a RX queue.
697 * Destroy objects, free allocated memory and reset the structure for reuse.
700 * Pointer to RX queue structure.
703 rxq_cleanup(struct rxq *rxq)
705 struct ibv_exp_release_intf_params params;
707 DEBUG("cleaning up %p", (void *)rxq);
709 rxq_free_elts_sp(rxq);
712 if (rxq->if_wq != NULL) {
713 assert(rxq->priv != NULL);
714 assert(rxq->priv->ctx != NULL);
715 assert(rxq->wq != NULL);
716 params = (struct ibv_exp_release_intf_params){
719 claim_zero(ibv_exp_release_intf(rxq->priv->ctx,
723 if (rxq->if_cq != NULL) {
724 assert(rxq->priv != NULL);
725 assert(rxq->priv->ctx != NULL);
726 assert(rxq->cq != NULL);
727 params = (struct ibv_exp_release_intf_params){
730 claim_zero(ibv_exp_release_intf(rxq->priv->ctx,
735 claim_zero(ibv_exp_destroy_wq(rxq->wq));
737 claim_zero(ibv_destroy_cq(rxq->cq));
738 if (rxq->rd != NULL) {
739 struct ibv_exp_destroy_res_domain_attr attr = {
743 assert(rxq->priv != NULL);
744 assert(rxq->priv->ctx != NULL);
745 claim_zero(ibv_exp_destroy_res_domain(rxq->priv->ctx,
750 claim_zero(ibv_dereg_mr(rxq->mr));
751 memset(rxq, 0, sizeof(*rxq));
755 * Reconfigure a RX queue with new parameters.
757 * rxq_rehash() does not allocate mbufs, which, if not done from the right
758 * thread (such as a control thread), may corrupt the pool.
759 * In case of failure, the queue is left untouched.
762 * Pointer to Ethernet device structure.
767 * 0 on success, errno value on failure.
770 rxq_rehash(struct rte_eth_dev *dev, struct rxq *rxq)
772 struct priv *priv = rxq->priv;
773 struct rxq tmpl = *rxq;
776 struct rte_mbuf **pool;
778 struct ibv_exp_wq_attr mod;
781 DEBUG("%p: rehashing queue %p", (void *)dev, (void *)rxq);
782 /* Number of descriptors and mbufs currently allocated. */
783 desc_n = (tmpl.elts_n * (tmpl.sp ? MLX5_PMD_SGE_WR_N : 1));
785 /* Toggle RX checksum offload if hardware supports it. */
787 tmpl.csum = !!dev->data->dev_conf.rxmode.hw_ip_checksum;
788 rxq->csum = tmpl.csum;
790 if (priv->hw_csum_l2tun) {
791 tmpl.csum_l2tun = !!dev->data->dev_conf.rxmode.hw_ip_checksum;
792 rxq->csum_l2tun = tmpl.csum_l2tun;
794 /* Enable scattered packets support for this queue if necessary. */
795 if ((dev->data->dev_conf.rxmode.jumbo_frame) &&
796 (dev->data->dev_conf.rxmode.max_rx_pkt_len >
797 (tmpl.mb_len - RTE_PKTMBUF_HEADROOM))) {
799 desc_n /= MLX5_PMD_SGE_WR_N;
802 DEBUG("%p: %s scattered packets support (%u WRs)",
803 (void *)dev, (tmpl.sp ? "enabling" : "disabling"), desc_n);
804 /* If scatter mode is the same as before, nothing to do. */
805 if (tmpl.sp == rxq->sp) {
806 DEBUG("%p: nothing to do", (void *)dev);
809 /* From now on, any failure will render the queue unusable.
810 * Reinitialize WQ. */
811 mod = (struct ibv_exp_wq_attr){
812 .attr_mask = IBV_EXP_WQ_ATTR_STATE,
813 .wq_state = IBV_EXP_WQS_RESET,
815 err = ibv_exp_modify_wq(tmpl.wq, &mod);
817 ERROR("%p: cannot reset WQ: %s", (void *)dev, strerror(err));
822 pool = rte_malloc(__func__, (mbuf_n * sizeof(*pool)), 0);
824 ERROR("%p: cannot allocate memory", (void *)dev);
827 /* Snatch mbufs from original queue. */
830 struct rxq_elt_sp (*elts)[rxq->elts_n] = rxq->elts.sp;
832 for (i = 0; (i != RTE_DIM(*elts)); ++i) {
833 struct rxq_elt_sp *elt = &(*elts)[i];
836 for (j = 0; (j != RTE_DIM(elt->bufs)); ++j) {
837 assert(elt->bufs[j] != NULL);
838 pool[k++] = elt->bufs[j];
842 struct rxq_elt (*elts)[rxq->elts_n] = rxq->elts.no_sp;
844 for (i = 0; (i != RTE_DIM(*elts)); ++i) {
845 struct rxq_elt *elt = &(*elts)[i];
846 struct rte_mbuf *buf = elt->buf;
854 assert((void *)&tmpl.elts.sp == (void *)&tmpl.elts.no_sp);
856 rxq_alloc_elts_sp(&tmpl, desc_n, pool) :
857 rxq_alloc_elts(&tmpl, desc_n, pool));
859 ERROR("%p: cannot reallocate WRs, aborting", (void *)dev);
864 assert(tmpl.elts_n == desc_n);
865 assert(tmpl.elts.sp != NULL);
867 /* Clean up original data. */
869 rte_free(rxq->elts.sp);
871 /* Change queue state to ready. */
872 mod = (struct ibv_exp_wq_attr){
873 .attr_mask = IBV_EXP_WQ_ATTR_STATE,
874 .wq_state = IBV_EXP_WQS_RDY,
876 err = ibv_exp_modify_wq(tmpl.wq, &mod);
878 ERROR("%p: WQ state to IBV_EXP_WQS_RDY failed: %s",
879 (void *)dev, strerror(err));
883 assert(tmpl.if_wq != NULL);
885 struct rxq_elt_sp (*elts)[tmpl.elts_n] = tmpl.elts.sp;
887 for (i = 0; (i != RTE_DIM(*elts)); ++i) {
888 err = tmpl.if_wq->recv_sg_list
891 RTE_DIM((*elts)[i].sges));
896 struct rxq_elt (*elts)[tmpl.elts_n] = tmpl.elts.no_sp;
898 for (i = 0; (i != RTE_DIM(*elts)); ++i) {
899 err = tmpl.if_wq->recv_burst(
908 ERROR("%p: failed to post SGEs with error %d",
910 /* Set err because it does not contain a valid errno value. */
921 * Configure a RX queue.
924 * Pointer to Ethernet device structure.
926 * Pointer to RX queue structure.
928 * Number of descriptors to configure in queue.
930 * NUMA socket on which memory must be allocated.
932 * Thresholds parameters.
934 * Memory pool for buffer allocations.
937 * 0 on success, errno value on failure.
940 rxq_setup(struct rte_eth_dev *dev, struct rxq *rxq, uint16_t desc,
941 unsigned int socket, const struct rte_eth_rxconf *conf,
942 struct rte_mempool *mp)
944 struct priv *priv = dev->data->dev_private;
950 struct ibv_exp_wq_attr mod;
952 struct ibv_exp_query_intf_params params;
953 struct ibv_exp_cq_init_attr cq;
954 struct ibv_exp_res_domain_init_attr rd;
955 struct ibv_exp_wq_init_attr wq;
957 enum ibv_exp_query_intf_status status;
958 struct rte_mbuf *buf;
961 unsigned int cq_size = desc;
963 (void)conf; /* Thresholds configuration (ignored). */
964 if ((desc == 0) || (desc % MLX5_PMD_SGE_WR_N)) {
965 ERROR("%p: invalid number of RX descriptors (must be a"
966 " multiple of %d)", (void *)dev, MLX5_PMD_SGE_WR_N);
969 /* Get mbuf length. */
970 buf = rte_pktmbuf_alloc(mp);
972 ERROR("%p: unable to allocate mbuf", (void *)dev);
975 tmpl.mb_len = buf->buf_len;
976 assert((rte_pktmbuf_headroom(buf) +
977 rte_pktmbuf_tailroom(buf)) == tmpl.mb_len);
978 assert(rte_pktmbuf_headroom(buf) == RTE_PKTMBUF_HEADROOM);
979 rte_pktmbuf_free(buf);
980 /* Toggle RX checksum offload if hardware supports it. */
982 tmpl.csum = !!dev->data->dev_conf.rxmode.hw_ip_checksum;
983 if (priv->hw_csum_l2tun)
984 tmpl.csum_l2tun = !!dev->data->dev_conf.rxmode.hw_ip_checksum;
985 /* Enable scattered packets support for this queue if necessary. */
986 if ((dev->data->dev_conf.rxmode.jumbo_frame) &&
987 (dev->data->dev_conf.rxmode.max_rx_pkt_len >
988 (tmpl.mb_len - RTE_PKTMBUF_HEADROOM))) {
990 desc /= MLX5_PMD_SGE_WR_N;
992 DEBUG("%p: %s scattered packets support (%u WRs)",
993 (void *)dev, (tmpl.sp ? "enabling" : "disabling"), desc);
994 /* Use the entire RX mempool as the memory region. */
995 tmpl.mr = ibv_reg_mr(priv->pd,
996 (void *)mp->elt_va_start,
997 (mp->elt_va_end - mp->elt_va_start),
998 (IBV_ACCESS_LOCAL_WRITE |
999 IBV_ACCESS_REMOTE_WRITE));
1000 if (tmpl.mr == NULL) {
1002 ERROR("%p: MR creation failure: %s",
1003 (void *)dev, strerror(ret));
1006 attr.rd = (struct ibv_exp_res_domain_init_attr){
1007 .comp_mask = (IBV_EXP_RES_DOMAIN_THREAD_MODEL |
1008 IBV_EXP_RES_DOMAIN_MSG_MODEL),
1009 .thread_model = IBV_EXP_THREAD_SINGLE,
1010 .msg_model = IBV_EXP_MSG_HIGH_BW,
1012 tmpl.rd = ibv_exp_create_res_domain(priv->ctx, &attr.rd);
1013 if (tmpl.rd == NULL) {
1015 ERROR("%p: RD creation failure: %s",
1016 (void *)dev, strerror(ret));
1019 attr.cq = (struct ibv_exp_cq_init_attr){
1020 .comp_mask = IBV_EXP_CQ_INIT_ATTR_RES_DOMAIN,
1021 .res_domain = tmpl.rd,
1023 tmpl.cq = ibv_exp_create_cq(priv->ctx, cq_size, NULL, NULL, 0,
1025 if (tmpl.cq == NULL) {
1027 ERROR("%p: CQ creation failure: %s",
1028 (void *)dev, strerror(ret));
1031 DEBUG("priv->device_attr.max_qp_wr is %d",
1032 priv->device_attr.max_qp_wr);
1033 DEBUG("priv->device_attr.max_sge is %d",
1034 priv->device_attr.max_sge);
1035 attr.wq = (struct ibv_exp_wq_init_attr){
1036 .wq_context = NULL, /* Could be useful in the future. */
1037 .wq_type = IBV_EXP_WQT_RQ,
1038 /* Max number of outstanding WRs. */
1039 .max_recv_wr = ((priv->device_attr.max_qp_wr < (int)cq_size) ?
1040 priv->device_attr.max_qp_wr :
1042 /* Max number of scatter/gather elements in a WR. */
1043 .max_recv_sge = ((priv->device_attr.max_sge <
1044 MLX5_PMD_SGE_WR_N) ?
1045 priv->device_attr.max_sge :
1049 .comp_mask = IBV_EXP_CREATE_WQ_RES_DOMAIN,
1050 .res_domain = tmpl.rd,
1052 tmpl.wq = ibv_exp_create_wq(priv->ctx, &attr.wq);
1053 if (tmpl.wq == NULL) {
1054 ret = (errno ? errno : EINVAL);
1055 ERROR("%p: WQ creation failure: %s",
1056 (void *)dev, strerror(ret));
1060 ret = rxq_alloc_elts_sp(&tmpl, desc, NULL);
1062 ret = rxq_alloc_elts(&tmpl, desc, NULL);
1064 ERROR("%p: RXQ allocation failed: %s",
1065 (void *)dev, strerror(ret));
1069 tmpl.port_id = dev->data->port_id;
1070 DEBUG("%p: RTE port ID: %u", (void *)rxq, tmpl.port_id);
1071 attr.params = (struct ibv_exp_query_intf_params){
1072 .intf_scope = IBV_EXP_INTF_GLOBAL,
1073 .intf = IBV_EXP_INTF_CQ,
1076 tmpl.if_cq = ibv_exp_query_intf(priv->ctx, &attr.params, &status);
1077 if (tmpl.if_cq == NULL) {
1078 ERROR("%p: CQ interface family query failed with status %d",
1079 (void *)dev, status);
1082 attr.params = (struct ibv_exp_query_intf_params){
1083 .intf_scope = IBV_EXP_INTF_GLOBAL,
1084 .intf = IBV_EXP_INTF_WQ,
1087 tmpl.if_wq = ibv_exp_query_intf(priv->ctx, &attr.params, &status);
1088 if (tmpl.if_wq == NULL) {
1089 ERROR("%p: WQ interface family query failed with status %d",
1090 (void *)dev, status);
1093 /* Change queue state to ready. */
1094 mod = (struct ibv_exp_wq_attr){
1095 .attr_mask = IBV_EXP_WQ_ATTR_STATE,
1096 .wq_state = IBV_EXP_WQS_RDY,
1098 ret = ibv_exp_modify_wq(tmpl.wq, &mod);
1100 ERROR("%p: WQ state to IBV_EXP_WQS_RDY failed: %s",
1101 (void *)dev, strerror(ret));
1106 struct rxq_elt_sp (*elts)[tmpl.elts_n] = tmpl.elts.sp;
1108 for (i = 0; (i != RTE_DIM(*elts)); ++i) {
1109 ret = tmpl.if_wq->recv_sg_list
1112 RTE_DIM((*elts)[i].sges));
1117 struct rxq_elt (*elts)[tmpl.elts_n] = tmpl.elts.no_sp;
1119 for (i = 0; (i != RTE_DIM(*elts)); ++i) {
1120 ret = tmpl.if_wq->recv_burst(
1129 ERROR("%p: failed to post SGEs with error %d",
1131 /* Set ret because it does not contain a valid errno value. */
1135 /* Clean up rxq in case we're reinitializing it. */
1136 DEBUG("%p: cleaning-up old rxq just in case", (void *)rxq);
1139 DEBUG("%p: rxq updated with %p", (void *)rxq, (void *)&tmpl);
1149 * DPDK callback to configure a RX queue.
1152 * Pointer to Ethernet device structure.
1156 * Number of descriptors to configure in queue.
1158 * NUMA socket on which memory must be allocated.
1160 * Thresholds parameters.
1162 * Memory pool for buffer allocations.
1165 * 0 on success, negative errno value on failure.
1168 mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
1169 unsigned int socket, const struct rte_eth_rxconf *conf,
1170 struct rte_mempool *mp)
1172 struct priv *priv = dev->data->dev_private;
1173 struct rxq *rxq = (*priv->rxqs)[idx];
1177 DEBUG("%p: configuring queue %u for %u descriptors",
1178 (void *)dev, idx, desc);
1179 if (idx >= priv->rxqs_n) {
1180 ERROR("%p: queue index out of range (%u >= %u)",
1181 (void *)dev, idx, priv->rxqs_n);
1186 DEBUG("%p: reusing already allocated queue index %u (%p)",
1187 (void *)dev, idx, (void *)rxq);
1188 if (priv->started) {
1192 (*priv->rxqs)[idx] = NULL;
1195 rxq = rte_calloc_socket("RXQ", 1, sizeof(*rxq), 0, socket);
1197 ERROR("%p: unable to allocate queue index %u",
1203 ret = rxq_setup(dev, rxq, desc, socket, conf, mp);
1207 rxq->stats.idx = idx;
1208 DEBUG("%p: adding RX queue %p to list",
1209 (void *)dev, (void *)rxq);
1210 (*priv->rxqs)[idx] = rxq;
1211 /* Update receive callback. */
1213 dev->rx_pkt_burst = mlx5_rx_burst_sp;
1215 dev->rx_pkt_burst = mlx5_rx_burst;
1222 * DPDK callback to release a RX queue.
1225 * Generic RX queue pointer.
1228 mlx5_rx_queue_release(void *dpdk_rxq)
1230 struct rxq *rxq = (struct rxq *)dpdk_rxq;
1238 for (i = 0; (i != priv->rxqs_n); ++i)
1239 if ((*priv->rxqs)[i] == rxq) {
1240 DEBUG("%p: removing RX queue %p from list",
1241 (void *)priv->dev, (void *)rxq);
1242 (*priv->rxqs)[i] = NULL;