4 * Copyright 2015 6WIND S.A.
5 * Copyright 2015 Mellanox.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of 6WIND S.A. nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
41 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
43 #pragma GCC diagnostic ignored "-pedantic"
45 #include <infiniband/verbs.h>
47 #pragma GCC diagnostic error "-pedantic"
50 /* DPDK headers don't like -pedantic. */
52 #pragma GCC diagnostic ignored "-pedantic"
55 #include <rte_malloc.h>
56 #include <rte_ethdev.h>
57 #include <rte_common.h>
59 #pragma GCC diagnostic error "-pedantic"
63 #include "mlx5_rxtx.h"
64 #include "mlx5_utils.h"
65 #include "mlx5_defs.h"
67 /* Initialization data for hash RX queues. */
68 static const struct hash_rxq_init hash_rxq_init[] = {
70 .hash_fields = (IBV_EXP_RX_HASH_SRC_IPV4 |
71 IBV_EXP_RX_HASH_DST_IPV4 |
72 IBV_EXP_RX_HASH_SRC_PORT_TCP |
73 IBV_EXP_RX_HASH_DST_PORT_TCP),
76 .hash_fields = (IBV_EXP_RX_HASH_SRC_IPV4 |
77 IBV_EXP_RX_HASH_DST_IPV4 |
78 IBV_EXP_RX_HASH_SRC_PORT_UDP |
79 IBV_EXP_RX_HASH_DST_PORT_UDP),
82 .hash_fields = (IBV_EXP_RX_HASH_SRC_IPV4 |
83 IBV_EXP_RX_HASH_DST_IPV4),
90 /* Number of entries in hash_rxq_init[]. */
91 static const unsigned int hash_rxq_init_n = RTE_DIM(hash_rxq_init);
93 /* Initialization data for hash RX queue indirection tables. */
94 static const struct ind_table_init ind_table_init[] = {
96 .max_size = -1u, /* Superseded by HW limitations. */
106 .hash_types = 1 << HASH_RXQ_ETH,
111 #define IND_TABLE_INIT_N RTE_DIM(ind_table_init)
113 /* Default RSS hash key also used for ConnectX-3. */
114 static uint8_t hash_rxq_default_key[] = {
115 0x2c, 0xc6, 0x81, 0xd1,
116 0x5b, 0xdb, 0xf4, 0xf7,
117 0xfc, 0xa2, 0x83, 0x19,
118 0xdb, 0x1a, 0x3e, 0x94,
119 0x6b, 0x9e, 0x38, 0xd9,
120 0x2c, 0x9c, 0x03, 0xd1,
121 0xad, 0x99, 0x44, 0xa7,
122 0xd9, 0x56, 0x3d, 0x59,
123 0x06, 0x3c, 0x25, 0xf3,
124 0xfc, 0x1f, 0xdc, 0x2a,
128 * Return nearest power of two above input value.
134 * Nearest power of two above input value.
137 log2above(unsigned int v)
142 for (l = 0, r = 0; (v >> 1); ++l, v >>= 1)
148 * Return the type corresponding to the n'th bit set.
151 * The indirection table.
156 * The corresponding hash_rxq_type.
158 static enum hash_rxq_type
159 hash_rxq_type_from_n(const struct ind_table_init *table, unsigned int n)
161 assert(n < table->hash_types_n);
162 while (((table->hash_types >> n) & 0x1) == 0)
168 * Filter out disabled hash RX queue types from ind_table_init[].
171 * Pointer to private structure.
176 * Number of table entries.
179 priv_make_ind_table_init(struct priv *priv,
180 struct ind_table_init (*table)[IND_TABLE_INIT_N])
184 unsigned int table_n = 0;
185 /* Mandatory to receive frames not handled by normal hash RX queues. */
186 unsigned int hash_types_sup = 1 << HASH_RXQ_ETH;
188 /* Process other protocols only if more than one queue. */
189 if (priv->rxqs_n > 1)
190 for (i = 0; (i != hash_rxq_init_n); ++i)
191 if (hash_rxq_init[i].hash_fields)
192 hash_types_sup |= (1 << i);
194 /* Filter out entries whose protocols are not in the set. */
195 for (i = 0, j = 0; (i != IND_TABLE_INIT_N); ++i) {
199 /* j is increased only if the table has valid protocols. */
201 (*table)[j] = ind_table_init[i];
202 (*table)[j].hash_types &= hash_types_sup;
203 for (h = 0, nb = 0; (h != hash_rxq_init_n); ++h)
204 if (((*table)[j].hash_types >> h) & 0x1)
206 (*table)[i].hash_types_n = nb;
216 * Initialize hash RX queues and indirection table.
219 * Pointer to private structure.
222 * 0 on success, errno value on failure.
225 priv_create_hash_rxqs(struct priv *priv)
227 /* If the requested number of WQs is not a power of two, use the
228 * maximum indirection table size for better balancing.
229 * The result is always rounded to the next power of two. */
231 (1 << log2above((priv->rxqs_n & (priv->rxqs_n - 1)) ?
232 priv->ind_table_max_size :
234 struct ibv_exp_wq *wqs[wqs_n];
235 struct ind_table_init ind_table_init[IND_TABLE_INIT_N];
236 unsigned int ind_tables_n =
237 priv_make_ind_table_init(priv, &ind_table_init);
238 unsigned int hash_rxqs_n = 0;
239 struct hash_rxq (*hash_rxqs)[] = NULL;
240 struct ibv_exp_rwq_ind_table *(*ind_tables)[] = NULL;
246 assert(priv->ind_tables == NULL);
247 assert(priv->ind_tables_n == 0);
248 assert(priv->hash_rxqs == NULL);
249 assert(priv->hash_rxqs_n == 0);
250 assert(priv->pd != NULL);
251 assert(priv->ctx != NULL);
252 if (priv->rxqs_n == 0)
254 assert(priv->rxqs != NULL);
255 if (ind_tables_n == 0) {
256 ERROR("all hash RX queue types have been filtered out,"
257 " indirection table cannot be created");
260 if ((wqs_n < priv->rxqs_n) || (wqs_n > priv->ind_table_max_size)) {
261 ERROR("cannot handle this many RX queues (%u)", priv->rxqs_n);
265 if (wqs_n != priv->rxqs_n) {
266 INFO("%u RX queues are configured, consider rounding this"
267 " number to the next power of two for better balancing",
269 DEBUG("indirection table extended to assume %u WQs", wqs_n);
271 /* When the number of RX queues is not a power of two, the remaining
272 * table entries are padded with reused WQs and hashes are not spread
274 for (i = 0, j = 0; (i != wqs_n); ++i) {
275 wqs[i] = (*priv->rxqs)[j]->wq;
276 if (++j == priv->rxqs_n)
279 /* Get number of hash RX queues to configure. */
280 for (i = 0, hash_rxqs_n = 0; (i != ind_tables_n); ++i)
281 hash_rxqs_n += ind_table_init[i].hash_types_n;
282 DEBUG("allocating %u hash RX queues for %u WQs, %u indirection tables",
283 hash_rxqs_n, priv->rxqs_n, ind_tables_n);
284 /* Create indirection tables. */
285 ind_tables = rte_calloc(__func__, ind_tables_n,
286 sizeof((*ind_tables)[0]), 0);
287 if (ind_tables == NULL) {
289 ERROR("cannot allocate indirection tables container: %s",
293 for (i = 0; (i != ind_tables_n); ++i) {
294 struct ibv_exp_rwq_ind_table_init_attr ind_init_attr = {
296 .log_ind_tbl_size = 0, /* Set below. */
300 unsigned int ind_tbl_size = ind_table_init[i].max_size;
301 struct ibv_exp_rwq_ind_table *ind_table;
303 if (wqs_n < ind_tbl_size)
304 ind_tbl_size = wqs_n;
305 ind_init_attr.log_ind_tbl_size = log2above(ind_tbl_size);
307 ind_table = ibv_exp_create_rwq_ind_table(priv->ctx,
309 if (ind_table != NULL) {
310 (*ind_tables)[i] = ind_table;
313 /* Not clear whether errno is set. */
314 err = (errno ? errno : EINVAL);
315 ERROR("RX indirection table creation failed with error %d: %s",
319 /* Allocate array that holds hash RX queues and related data. */
320 hash_rxqs = rte_calloc(__func__, hash_rxqs_n,
321 sizeof((*hash_rxqs)[0]), 0);
322 if (hash_rxqs == NULL) {
324 ERROR("cannot allocate hash RX queues container: %s",
328 for (i = 0, j = 0, k = 0;
329 ((i != hash_rxqs_n) && (j != ind_tables_n));
331 struct hash_rxq *hash_rxq = &(*hash_rxqs)[i];
332 enum hash_rxq_type type =
333 hash_rxq_type_from_n(&ind_table_init[j], k);
334 struct ibv_exp_rx_hash_conf hash_conf = {
335 .rx_hash_function = IBV_EXP_RX_HASH_FUNC_TOEPLITZ,
336 .rx_hash_key_len = sizeof(hash_rxq_default_key),
337 .rx_hash_key = hash_rxq_default_key,
338 .rx_hash_fields_mask = hash_rxq_init[type].hash_fields,
339 .rwq_ind_tbl = (*ind_tables)[j],
341 struct ibv_exp_qp_init_attr qp_init_attr = {
342 .max_inl_recv = 0, /* Currently not supported. */
343 .qp_type = IBV_QPT_RAW_PACKET,
344 .comp_mask = (IBV_EXP_QP_INIT_ATTR_PD |
345 IBV_EXP_QP_INIT_ATTR_RX_HASH),
347 .rx_hash_conf = &hash_conf,
348 .port_num = priv->port,
351 DEBUG("using indirection table %u for hash RX queue %u",
353 *hash_rxq = (struct hash_rxq){
355 .qp = ibv_exp_create_qp(priv->ctx, &qp_init_attr),
358 if (hash_rxq->qp == NULL) {
359 err = (errno ? errno : EINVAL);
360 ERROR("Hash RX QP creation failure: %s",
364 if (++k < ind_table_init[j].hash_types_n)
366 /* Switch to the next indirection table and reset hash RX
367 * queue type array index. */
371 priv->ind_tables = ind_tables;
372 priv->ind_tables_n = ind_tables_n;
373 priv->hash_rxqs = hash_rxqs;
374 priv->hash_rxqs_n = hash_rxqs_n;
378 if (hash_rxqs != NULL) {
379 for (i = 0; (i != hash_rxqs_n); ++i) {
380 struct ibv_qp *qp = (*hash_rxqs)[i].qp;
384 claim_zero(ibv_destroy_qp(qp));
388 if (ind_tables != NULL) {
389 for (j = 0; (j != ind_tables_n); ++j) {
390 struct ibv_exp_rwq_ind_table *ind_table =
393 if (ind_table == NULL)
395 claim_zero(ibv_exp_destroy_rwq_ind_table(ind_table));
397 rte_free(ind_tables);
403 * Clean up hash RX queues and indirection table.
406 * Pointer to private structure.
409 priv_destroy_hash_rxqs(struct priv *priv)
413 DEBUG("destroying %u hash RX queues", priv->hash_rxqs_n);
414 if (priv->hash_rxqs_n == 0) {
415 assert(priv->hash_rxqs == NULL);
416 assert(priv->ind_tables == NULL);
419 for (i = 0; (i != priv->hash_rxqs_n); ++i) {
420 struct hash_rxq *hash_rxq = &(*priv->hash_rxqs)[i];
423 assert(hash_rxq->priv == priv);
424 assert(hash_rxq->qp != NULL);
425 /* Also check that there are no remaining flows. */
426 assert(hash_rxq->allmulti_flow == NULL);
427 assert(hash_rxq->promisc_flow == NULL);
428 for (j = 0; (j != RTE_DIM(hash_rxq->mac_flow)); ++j)
429 for (k = 0; (k != RTE_DIM(hash_rxq->mac_flow[j])); ++k)
430 assert(hash_rxq->mac_flow[j][k] == NULL);
431 claim_zero(ibv_destroy_qp(hash_rxq->qp));
433 priv->hash_rxqs_n = 0;
434 rte_free(priv->hash_rxqs);
435 priv->hash_rxqs = NULL;
436 for (i = 0; (i != priv->ind_tables_n); ++i) {
437 struct ibv_exp_rwq_ind_table *ind_table =
438 (*priv->ind_tables)[i];
440 assert(ind_table != NULL);
441 claim_zero(ibv_exp_destroy_rwq_ind_table(ind_table));
443 priv->ind_tables_n = 0;
444 rte_free(priv->ind_tables);
445 priv->ind_tables = NULL;
449 * Allocate RX queue elements with scattered packets support.
452 * Pointer to RX queue structure.
454 * Number of elements to allocate.
456 * If not NULL, fetch buffers from this array instead of allocating them
457 * with rte_pktmbuf_alloc().
460 * 0 on success, errno value on failure.
463 rxq_alloc_elts_sp(struct rxq *rxq, unsigned int elts_n,
464 struct rte_mbuf **pool)
467 struct rxq_elt_sp (*elts)[elts_n] =
468 rte_calloc_socket("RXQ elements", 1, sizeof(*elts), 0,
473 ERROR("%p: can't allocate packets array", (void *)rxq);
477 /* For each WR (packet). */
478 for (i = 0; (i != elts_n); ++i) {
480 struct rxq_elt_sp *elt = &(*elts)[i];
481 struct ibv_sge (*sges)[RTE_DIM(elt->sges)] = &elt->sges;
483 /* These two arrays must have the same size. */
484 assert(RTE_DIM(elt->sges) == RTE_DIM(elt->bufs));
485 /* For each SGE (segment). */
486 for (j = 0; (j != RTE_DIM(elt->bufs)); ++j) {
487 struct ibv_sge *sge = &(*sges)[j];
488 struct rte_mbuf *buf;
493 rte_pktmbuf_reset(buf);
495 buf = rte_pktmbuf_alloc(rxq->mp);
497 assert(pool == NULL);
498 ERROR("%p: empty mbuf pool", (void *)rxq);
503 /* Headroom is reserved by rte_pktmbuf_alloc(). */
504 assert(DATA_OFF(buf) == RTE_PKTMBUF_HEADROOM);
505 /* Buffer is supposed to be empty. */
506 assert(rte_pktmbuf_data_len(buf) == 0);
507 assert(rte_pktmbuf_pkt_len(buf) == 0);
508 /* sge->addr must be able to store a pointer. */
509 assert(sizeof(sge->addr) >= sizeof(uintptr_t));
511 /* The first SGE keeps its headroom. */
512 sge->addr = rte_pktmbuf_mtod(buf, uintptr_t);
513 sge->length = (buf->buf_len -
514 RTE_PKTMBUF_HEADROOM);
516 /* Subsequent SGEs lose theirs. */
517 assert(DATA_OFF(buf) == RTE_PKTMBUF_HEADROOM);
518 SET_DATA_OFF(buf, 0);
519 sge->addr = (uintptr_t)buf->buf_addr;
520 sge->length = buf->buf_len;
522 sge->lkey = rxq->mr->lkey;
523 /* Redundant check for tailroom. */
524 assert(sge->length == rte_pktmbuf_tailroom(buf));
527 DEBUG("%p: allocated and configured %u WRs (%zu segments)",
528 (void *)rxq, elts_n, (elts_n * RTE_DIM((*elts)[0].sges)));
529 rxq->elts_n = elts_n;
536 assert(pool == NULL);
537 for (i = 0; (i != RTE_DIM(*elts)); ++i) {
539 struct rxq_elt_sp *elt = &(*elts)[i];
541 for (j = 0; (j != RTE_DIM(elt->bufs)); ++j) {
542 struct rte_mbuf *buf = elt->bufs[j];
545 rte_pktmbuf_free_seg(buf);
550 DEBUG("%p: failed, freed everything", (void *)rxq);
556 * Free RX queue elements with scattered packets support.
559 * Pointer to RX queue structure.
562 rxq_free_elts_sp(struct rxq *rxq)
565 unsigned int elts_n = rxq->elts_n;
566 struct rxq_elt_sp (*elts)[elts_n] = rxq->elts.sp;
568 DEBUG("%p: freeing WRs", (void *)rxq);
573 for (i = 0; (i != RTE_DIM(*elts)); ++i) {
575 struct rxq_elt_sp *elt = &(*elts)[i];
577 for (j = 0; (j != RTE_DIM(elt->bufs)); ++j) {
578 struct rte_mbuf *buf = elt->bufs[j];
581 rte_pktmbuf_free_seg(buf);
588 * Allocate RX queue elements.
591 * Pointer to RX queue structure.
593 * Number of elements to allocate.
595 * If not NULL, fetch buffers from this array instead of allocating them
596 * with rte_pktmbuf_alloc().
599 * 0 on success, errno value on failure.
602 rxq_alloc_elts(struct rxq *rxq, unsigned int elts_n, struct rte_mbuf **pool)
605 struct rxq_elt (*elts)[elts_n] =
606 rte_calloc_socket("RXQ elements", 1, sizeof(*elts), 0,
611 ERROR("%p: can't allocate packets array", (void *)rxq);
615 /* For each WR (packet). */
616 for (i = 0; (i != elts_n); ++i) {
617 struct rxq_elt *elt = &(*elts)[i];
618 struct ibv_sge *sge = &(*elts)[i].sge;
619 struct rte_mbuf *buf;
624 rte_pktmbuf_reset(buf);
626 buf = rte_pktmbuf_alloc(rxq->mp);
628 assert(pool == NULL);
629 ERROR("%p: empty mbuf pool", (void *)rxq);
634 /* Headroom is reserved by rte_pktmbuf_alloc(). */
635 assert(DATA_OFF(buf) == RTE_PKTMBUF_HEADROOM);
636 /* Buffer is supposed to be empty. */
637 assert(rte_pktmbuf_data_len(buf) == 0);
638 assert(rte_pktmbuf_pkt_len(buf) == 0);
639 /* sge->addr must be able to store a pointer. */
640 assert(sizeof(sge->addr) >= sizeof(uintptr_t));
641 /* SGE keeps its headroom. */
642 sge->addr = (uintptr_t)
643 ((uint8_t *)buf->buf_addr + RTE_PKTMBUF_HEADROOM);
644 sge->length = (buf->buf_len - RTE_PKTMBUF_HEADROOM);
645 sge->lkey = rxq->mr->lkey;
646 /* Redundant check for tailroom. */
647 assert(sge->length == rte_pktmbuf_tailroom(buf));
649 DEBUG("%p: allocated and configured %u single-segment WRs",
650 (void *)rxq, elts_n);
651 rxq->elts_n = elts_n;
653 rxq->elts.no_sp = elts;
658 assert(pool == NULL);
659 for (i = 0; (i != RTE_DIM(*elts)); ++i) {
660 struct rxq_elt *elt = &(*elts)[i];
661 struct rte_mbuf *buf = elt->buf;
664 rte_pktmbuf_free_seg(buf);
668 DEBUG("%p: failed, freed everything", (void *)rxq);
674 * Free RX queue elements.
677 * Pointer to RX queue structure.
680 rxq_free_elts(struct rxq *rxq)
683 unsigned int elts_n = rxq->elts_n;
684 struct rxq_elt (*elts)[elts_n] = rxq->elts.no_sp;
686 DEBUG("%p: freeing WRs", (void *)rxq);
688 rxq->elts.no_sp = NULL;
691 for (i = 0; (i != RTE_DIM(*elts)); ++i) {
692 struct rxq_elt *elt = &(*elts)[i];
693 struct rte_mbuf *buf = elt->buf;
696 rte_pktmbuf_free_seg(buf);
702 * Clean up a RX queue.
704 * Destroy objects, free allocated memory and reset the structure for reuse.
707 * Pointer to RX queue structure.
710 rxq_cleanup(struct rxq *rxq)
712 struct ibv_exp_release_intf_params params;
714 DEBUG("cleaning up %p", (void *)rxq);
716 rxq_free_elts_sp(rxq);
719 if (rxq->if_wq != NULL) {
720 assert(rxq->priv != NULL);
721 assert(rxq->priv->ctx != NULL);
722 assert(rxq->wq != NULL);
723 params = (struct ibv_exp_release_intf_params){
726 claim_zero(ibv_exp_release_intf(rxq->priv->ctx,
730 if (rxq->if_cq != NULL) {
731 assert(rxq->priv != NULL);
732 assert(rxq->priv->ctx != NULL);
733 assert(rxq->cq != NULL);
734 params = (struct ibv_exp_release_intf_params){
737 claim_zero(ibv_exp_release_intf(rxq->priv->ctx,
742 claim_zero(ibv_exp_destroy_wq(rxq->wq));
744 claim_zero(ibv_destroy_cq(rxq->cq));
745 if (rxq->rd != NULL) {
746 struct ibv_exp_destroy_res_domain_attr attr = {
750 assert(rxq->priv != NULL);
751 assert(rxq->priv->ctx != NULL);
752 claim_zero(ibv_exp_destroy_res_domain(rxq->priv->ctx,
757 claim_zero(ibv_dereg_mr(rxq->mr));
758 memset(rxq, 0, sizeof(*rxq));
762 * Reconfigure a RX queue with new parameters.
764 * rxq_rehash() does not allocate mbufs, which, if not done from the right
765 * thread (such as a control thread), may corrupt the pool.
766 * In case of failure, the queue is left untouched.
769 * Pointer to Ethernet device structure.
774 * 0 on success, errno value on failure.
777 rxq_rehash(struct rte_eth_dev *dev, struct rxq *rxq)
779 struct priv *priv = rxq->priv;
780 struct rxq tmpl = *rxq;
783 struct rte_mbuf **pool;
785 struct ibv_exp_wq_attr mod;
788 DEBUG("%p: rehashing queue %p", (void *)dev, (void *)rxq);
789 /* Number of descriptors and mbufs currently allocated. */
790 desc_n = (tmpl.elts_n * (tmpl.sp ? MLX5_PMD_SGE_WR_N : 1));
792 /* Toggle RX checksum offload if hardware supports it. */
794 tmpl.csum = !!dev->data->dev_conf.rxmode.hw_ip_checksum;
795 rxq->csum = tmpl.csum;
797 if (priv->hw_csum_l2tun) {
798 tmpl.csum_l2tun = !!dev->data->dev_conf.rxmode.hw_ip_checksum;
799 rxq->csum_l2tun = tmpl.csum_l2tun;
801 /* Enable scattered packets support for this queue if necessary. */
802 if ((dev->data->dev_conf.rxmode.jumbo_frame) &&
803 (dev->data->dev_conf.rxmode.max_rx_pkt_len >
804 (tmpl.mb_len - RTE_PKTMBUF_HEADROOM))) {
806 desc_n /= MLX5_PMD_SGE_WR_N;
809 DEBUG("%p: %s scattered packets support (%u WRs)",
810 (void *)dev, (tmpl.sp ? "enabling" : "disabling"), desc_n);
811 /* If scatter mode is the same as before, nothing to do. */
812 if (tmpl.sp == rxq->sp) {
813 DEBUG("%p: nothing to do", (void *)dev);
816 /* From now on, any failure will render the queue unusable.
817 * Reinitialize WQ. */
818 mod = (struct ibv_exp_wq_attr){
819 .attr_mask = IBV_EXP_WQ_ATTR_STATE,
820 .wq_state = IBV_EXP_WQS_RESET,
822 err = ibv_exp_modify_wq(tmpl.wq, &mod);
824 ERROR("%p: cannot reset WQ: %s", (void *)dev, strerror(err));
829 pool = rte_malloc(__func__, (mbuf_n * sizeof(*pool)), 0);
831 ERROR("%p: cannot allocate memory", (void *)dev);
834 /* Snatch mbufs from original queue. */
837 struct rxq_elt_sp (*elts)[rxq->elts_n] = rxq->elts.sp;
839 for (i = 0; (i != RTE_DIM(*elts)); ++i) {
840 struct rxq_elt_sp *elt = &(*elts)[i];
843 for (j = 0; (j != RTE_DIM(elt->bufs)); ++j) {
844 assert(elt->bufs[j] != NULL);
845 pool[k++] = elt->bufs[j];
849 struct rxq_elt (*elts)[rxq->elts_n] = rxq->elts.no_sp;
851 for (i = 0; (i != RTE_DIM(*elts)); ++i) {
852 struct rxq_elt *elt = &(*elts)[i];
853 struct rte_mbuf *buf = elt->buf;
861 assert((void *)&tmpl.elts.sp == (void *)&tmpl.elts.no_sp);
863 rxq_alloc_elts_sp(&tmpl, desc_n, pool) :
864 rxq_alloc_elts(&tmpl, desc_n, pool));
866 ERROR("%p: cannot reallocate WRs, aborting", (void *)dev);
871 assert(tmpl.elts_n == desc_n);
872 assert(tmpl.elts.sp != NULL);
874 /* Clean up original data. */
876 rte_free(rxq->elts.sp);
878 /* Change queue state to ready. */
879 mod = (struct ibv_exp_wq_attr){
880 .attr_mask = IBV_EXP_WQ_ATTR_STATE,
881 .wq_state = IBV_EXP_WQS_RDY,
883 err = ibv_exp_modify_wq(tmpl.wq, &mod);
885 ERROR("%p: WQ state to IBV_EXP_WQS_RDY failed: %s",
886 (void *)dev, strerror(err));
890 assert(tmpl.if_wq != NULL);
892 struct rxq_elt_sp (*elts)[tmpl.elts_n] = tmpl.elts.sp;
894 for (i = 0; (i != RTE_DIM(*elts)); ++i) {
895 err = tmpl.if_wq->recv_sg_list
898 RTE_DIM((*elts)[i].sges));
903 struct rxq_elt (*elts)[tmpl.elts_n] = tmpl.elts.no_sp;
905 for (i = 0; (i != RTE_DIM(*elts)); ++i) {
906 err = tmpl.if_wq->recv_burst(
915 ERROR("%p: failed to post SGEs with error %d",
917 /* Set err because it does not contain a valid errno value. */
928 * Configure a RX queue.
931 * Pointer to Ethernet device structure.
933 * Pointer to RX queue structure.
935 * Number of descriptors to configure in queue.
937 * NUMA socket on which memory must be allocated.
939 * Thresholds parameters.
941 * Memory pool for buffer allocations.
944 * 0 on success, errno value on failure.
947 rxq_setup(struct rte_eth_dev *dev, struct rxq *rxq, uint16_t desc,
948 unsigned int socket, const struct rte_eth_rxconf *conf,
949 struct rte_mempool *mp)
951 struct priv *priv = dev->data->dev_private;
957 struct ibv_exp_wq_attr mod;
959 struct ibv_exp_query_intf_params params;
960 struct ibv_exp_cq_init_attr cq;
961 struct ibv_exp_res_domain_init_attr rd;
962 struct ibv_exp_wq_init_attr wq;
964 enum ibv_exp_query_intf_status status;
965 struct rte_mbuf *buf;
968 unsigned int cq_size = desc;
970 (void)conf; /* Thresholds configuration (ignored). */
971 if ((desc == 0) || (desc % MLX5_PMD_SGE_WR_N)) {
972 ERROR("%p: invalid number of RX descriptors (must be a"
973 " multiple of %d)", (void *)dev, MLX5_PMD_SGE_WR_N);
976 /* Get mbuf length. */
977 buf = rte_pktmbuf_alloc(mp);
979 ERROR("%p: unable to allocate mbuf", (void *)dev);
982 tmpl.mb_len = buf->buf_len;
983 assert((rte_pktmbuf_headroom(buf) +
984 rte_pktmbuf_tailroom(buf)) == tmpl.mb_len);
985 assert(rte_pktmbuf_headroom(buf) == RTE_PKTMBUF_HEADROOM);
986 rte_pktmbuf_free(buf);
987 /* Toggle RX checksum offload if hardware supports it. */
989 tmpl.csum = !!dev->data->dev_conf.rxmode.hw_ip_checksum;
990 if (priv->hw_csum_l2tun)
991 tmpl.csum_l2tun = !!dev->data->dev_conf.rxmode.hw_ip_checksum;
992 /* Enable scattered packets support for this queue if necessary. */
993 if ((dev->data->dev_conf.rxmode.jumbo_frame) &&
994 (dev->data->dev_conf.rxmode.max_rx_pkt_len >
995 (tmpl.mb_len - RTE_PKTMBUF_HEADROOM))) {
997 desc /= MLX5_PMD_SGE_WR_N;
999 DEBUG("%p: %s scattered packets support (%u WRs)",
1000 (void *)dev, (tmpl.sp ? "enabling" : "disabling"), desc);
1001 /* Use the entire RX mempool as the memory region. */
1002 tmpl.mr = ibv_reg_mr(priv->pd,
1003 (void *)mp->elt_va_start,
1004 (mp->elt_va_end - mp->elt_va_start),
1005 (IBV_ACCESS_LOCAL_WRITE |
1006 IBV_ACCESS_REMOTE_WRITE));
1007 if (tmpl.mr == NULL) {
1009 ERROR("%p: MR creation failure: %s",
1010 (void *)dev, strerror(ret));
1013 attr.rd = (struct ibv_exp_res_domain_init_attr){
1014 .comp_mask = (IBV_EXP_RES_DOMAIN_THREAD_MODEL |
1015 IBV_EXP_RES_DOMAIN_MSG_MODEL),
1016 .thread_model = IBV_EXP_THREAD_SINGLE,
1017 .msg_model = IBV_EXP_MSG_HIGH_BW,
1019 tmpl.rd = ibv_exp_create_res_domain(priv->ctx, &attr.rd);
1020 if (tmpl.rd == NULL) {
1022 ERROR("%p: RD creation failure: %s",
1023 (void *)dev, strerror(ret));
1026 attr.cq = (struct ibv_exp_cq_init_attr){
1027 .comp_mask = IBV_EXP_CQ_INIT_ATTR_RES_DOMAIN,
1028 .res_domain = tmpl.rd,
1030 tmpl.cq = ibv_exp_create_cq(priv->ctx, cq_size, NULL, NULL, 0,
1032 if (tmpl.cq == NULL) {
1034 ERROR("%p: CQ creation failure: %s",
1035 (void *)dev, strerror(ret));
1038 DEBUG("priv->device_attr.max_qp_wr is %d",
1039 priv->device_attr.max_qp_wr);
1040 DEBUG("priv->device_attr.max_sge is %d",
1041 priv->device_attr.max_sge);
1042 attr.wq = (struct ibv_exp_wq_init_attr){
1043 .wq_context = NULL, /* Could be useful in the future. */
1044 .wq_type = IBV_EXP_WQT_RQ,
1045 /* Max number of outstanding WRs. */
1046 .max_recv_wr = ((priv->device_attr.max_qp_wr < (int)cq_size) ?
1047 priv->device_attr.max_qp_wr :
1049 /* Max number of scatter/gather elements in a WR. */
1050 .max_recv_sge = ((priv->device_attr.max_sge <
1051 MLX5_PMD_SGE_WR_N) ?
1052 priv->device_attr.max_sge :
1056 .comp_mask = IBV_EXP_CREATE_WQ_RES_DOMAIN,
1057 .res_domain = tmpl.rd,
1059 tmpl.wq = ibv_exp_create_wq(priv->ctx, &attr.wq);
1060 if (tmpl.wq == NULL) {
1061 ret = (errno ? errno : EINVAL);
1062 ERROR("%p: WQ creation failure: %s",
1063 (void *)dev, strerror(ret));
1067 ret = rxq_alloc_elts_sp(&tmpl, desc, NULL);
1069 ret = rxq_alloc_elts(&tmpl, desc, NULL);
1071 ERROR("%p: RXQ allocation failed: %s",
1072 (void *)dev, strerror(ret));
1076 tmpl.port_id = dev->data->port_id;
1077 DEBUG("%p: RTE port ID: %u", (void *)rxq, tmpl.port_id);
1078 attr.params = (struct ibv_exp_query_intf_params){
1079 .intf_scope = IBV_EXP_INTF_GLOBAL,
1080 .intf = IBV_EXP_INTF_CQ,
1083 tmpl.if_cq = ibv_exp_query_intf(priv->ctx, &attr.params, &status);
1084 if (tmpl.if_cq == NULL) {
1085 ERROR("%p: CQ interface family query failed with status %d",
1086 (void *)dev, status);
1089 attr.params = (struct ibv_exp_query_intf_params){
1090 .intf_scope = IBV_EXP_INTF_GLOBAL,
1091 .intf = IBV_EXP_INTF_WQ,
1094 tmpl.if_wq = ibv_exp_query_intf(priv->ctx, &attr.params, &status);
1095 if (tmpl.if_wq == NULL) {
1096 ERROR("%p: WQ interface family query failed with status %d",
1097 (void *)dev, status);
1100 /* Change queue state to ready. */
1101 mod = (struct ibv_exp_wq_attr){
1102 .attr_mask = IBV_EXP_WQ_ATTR_STATE,
1103 .wq_state = IBV_EXP_WQS_RDY,
1105 ret = ibv_exp_modify_wq(tmpl.wq, &mod);
1107 ERROR("%p: WQ state to IBV_EXP_WQS_RDY failed: %s",
1108 (void *)dev, strerror(ret));
1113 struct rxq_elt_sp (*elts)[tmpl.elts_n] = tmpl.elts.sp;
1115 for (i = 0; (i != RTE_DIM(*elts)); ++i) {
1116 ret = tmpl.if_wq->recv_sg_list
1119 RTE_DIM((*elts)[i].sges));
1124 struct rxq_elt (*elts)[tmpl.elts_n] = tmpl.elts.no_sp;
1126 for (i = 0; (i != RTE_DIM(*elts)); ++i) {
1127 ret = tmpl.if_wq->recv_burst(
1136 ERROR("%p: failed to post SGEs with error %d",
1138 /* Set ret because it does not contain a valid errno value. */
1142 /* Clean up rxq in case we're reinitializing it. */
1143 DEBUG("%p: cleaning-up old rxq just in case", (void *)rxq);
1146 DEBUG("%p: rxq updated with %p", (void *)rxq, (void *)&tmpl);
1156 * DPDK callback to configure a RX queue.
1159 * Pointer to Ethernet device structure.
1163 * Number of descriptors to configure in queue.
1165 * NUMA socket on which memory must be allocated.
1167 * Thresholds parameters.
1169 * Memory pool for buffer allocations.
1172 * 0 on success, negative errno value on failure.
1175 mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
1176 unsigned int socket, const struct rte_eth_rxconf *conf,
1177 struct rte_mempool *mp)
1179 struct priv *priv = dev->data->dev_private;
1180 struct rxq *rxq = (*priv->rxqs)[idx];
1184 DEBUG("%p: configuring queue %u for %u descriptors",
1185 (void *)dev, idx, desc);
1186 if (idx >= priv->rxqs_n) {
1187 ERROR("%p: queue index out of range (%u >= %u)",
1188 (void *)dev, idx, priv->rxqs_n);
1193 DEBUG("%p: reusing already allocated queue index %u (%p)",
1194 (void *)dev, idx, (void *)rxq);
1195 if (priv->started) {
1199 (*priv->rxqs)[idx] = NULL;
1202 rxq = rte_calloc_socket("RXQ", 1, sizeof(*rxq), 0, socket);
1204 ERROR("%p: unable to allocate queue index %u",
1210 ret = rxq_setup(dev, rxq, desc, socket, conf, mp);
1214 rxq->stats.idx = idx;
1215 DEBUG("%p: adding RX queue %p to list",
1216 (void *)dev, (void *)rxq);
1217 (*priv->rxqs)[idx] = rxq;
1218 /* Update receive callback. */
1220 dev->rx_pkt_burst = mlx5_rx_burst_sp;
1222 dev->rx_pkt_burst = mlx5_rx_burst;
1229 * DPDK callback to release a RX queue.
1232 * Generic RX queue pointer.
1235 mlx5_rx_queue_release(void *dpdk_rxq)
1237 struct rxq *rxq = (struct rxq *)dpdk_rxq;
1245 for (i = 0; (i != priv->rxqs_n); ++i)
1246 if ((*priv->rxqs)[i] == rxq) {
1247 DEBUG("%p: removing RX queue %p from list",
1248 (void *)priv->dev, (void *)rxq);
1249 (*priv->rxqs)[i] = NULL;