1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2020 Mellanox Technologies, Ltd
12 #include <sys/queue.h>
14 #include "mlx5_autoconf.h"
17 #include <rte_malloc.h>
18 #include <rte_ethdev_driver.h>
19 #include <rte_common.h>
21 #include <mlx5_glue.h>
22 #include <mlx5_common.h>
23 #include <mlx5_common_mr.h>
24 #include <mlx5_rxtx.h>
25 #include <mlx5_verbs.h>
26 #include <mlx5_utils.h>
27 #include <mlx5_malloc.h>
30 * Register mr. Given protection domain pointer, pointer to addr and length
31 * register the memory region.
34 * Pointer to protection domain context.
36 * Pointer to memory start address.
38 * Length of the memory to register.
40 * pmd_mr struct set with lkey, address, length and pointer to mr object
43 * 0 on successful registration, -1 otherwise
46 mlx5_reg_mr(void *pd, void *addr, size_t length,
47 struct mlx5_pmd_mr *pmd_mr)
49 return mlx5_common_verbs_reg_mr(pd, addr, length, pmd_mr);
53 * Deregister mr. Given the mlx5 pmd MR - deregister the MR
56 * pmd_mr struct set with lkey, address, length and pointer to mr object
60 mlx5_dereg_mr(struct mlx5_pmd_mr *pmd_mr)
62 mlx5_common_verbs_dereg_mr(pmd_mr);
65 /* verbs operations. */
66 const struct mlx5_verbs_ops mlx5_verbs_ops = {
67 .reg_mr = mlx5_reg_mr,
68 .dereg_mr = mlx5_dereg_mr,
72 * Modify Rx WQ vlan stripping offload
77 * @return 0 on success, non-0 otherwise
80 mlx5_rxq_obj_modify_wq_vlan_strip(struct mlx5_rxq_obj *rxq_obj, int on)
82 uint16_t vlan_offloads =
83 (on ? IBV_WQ_FLAGS_CVLAN_STRIPPING : 0) |
85 struct ibv_wq_attr mod;
86 mod = (struct ibv_wq_attr){
87 .attr_mask = IBV_WQ_ATTR_FLAGS,
88 .flags_mask = IBV_WQ_FLAGS_CVLAN_STRIPPING,
89 .flags = vlan_offloads,
92 return mlx5_glue->modify_wq(rxq_obj->wq, &mod);
96 * Modifies the attributes for the specified WQ.
99 * Verbs Rx queue object.
102 * 0 on success, a negative errno value otherwise and rte_errno is set.
105 mlx5_ibv_modify_wq(struct mlx5_rxq_obj *rxq_obj, bool is_start)
107 struct ibv_wq_attr mod = {
108 .attr_mask = IBV_WQ_ATTR_STATE,
109 .wq_state = is_start ? IBV_WQS_RDY : IBV_WQS_RESET,
112 return mlx5_glue->modify_wq(rxq_obj->wq, &mod);
116 * Create a CQ Verbs object.
119 * Pointer to Ethernet device.
121 * Queue index in DPDK Rx queue array.
124 * The Verbs CQ object initialized, NULL otherwise and rte_errno is set.
126 static struct ibv_cq *
127 mlx5_rxq_ibv_cq_create(struct rte_eth_dev *dev, uint16_t idx)
129 struct mlx5_priv *priv = dev->data->dev_private;
130 struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx];
131 struct mlx5_rxq_ctrl *rxq_ctrl =
132 container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
133 struct mlx5_rxq_obj *rxq_obj = rxq_ctrl->obj;
134 unsigned int cqe_n = mlx5_rxq_cqe_num(rxq_data);
136 struct ibv_cq_init_attr_ex ibv;
137 struct mlx5dv_cq_init_attr mlx5;
140 cq_attr.ibv = (struct ibv_cq_init_attr_ex){
142 .channel = rxq_obj->ibv_channel,
145 cq_attr.mlx5 = (struct mlx5dv_cq_init_attr){
148 if (priv->config.cqe_comp && !rxq_data->hw_timestamp) {
149 cq_attr.mlx5.comp_mask |=
150 MLX5DV_CQ_INIT_ATTR_MASK_COMPRESSED_CQE;
151 #ifdef HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT
152 cq_attr.mlx5.cqe_comp_res_format =
153 mlx5_rxq_mprq_enabled(rxq_data) ?
154 MLX5DV_CQE_RES_FORMAT_CSUM_STRIDX :
155 MLX5DV_CQE_RES_FORMAT_HASH;
157 cq_attr.mlx5.cqe_comp_res_format = MLX5DV_CQE_RES_FORMAT_HASH;
160 * For vectorized Rx, it must not be doubled in order to
161 * make cq_ci and rq_ci aligned.
163 if (mlx5_rxq_check_vec_support(rxq_data) < 0)
164 cq_attr.ibv.cqe *= 2;
165 } else if (priv->config.cqe_comp && rxq_data->hw_timestamp) {
167 "Port %u Rx CQE compression is disabled for HW"
171 #ifdef HAVE_IBV_MLX5_MOD_CQE_128B_PAD
172 if (priv->config.cqe_pad) {
173 cq_attr.mlx5.comp_mask |= MLX5DV_CQ_INIT_ATTR_MASK_FLAGS;
174 cq_attr.mlx5.flags |= MLX5DV_CQ_INIT_ATTR_FLAGS_CQE_PAD;
177 return mlx5_glue->cq_ex_to_cq(mlx5_glue->dv_create_cq(priv->sh->ctx,
183 * Create a WQ Verbs object.
186 * Pointer to Ethernet device.
188 * Queue index in DPDK Rx queue array.
191 * The Verbs WQ object initialized, NULL otherwise and rte_errno is set.
193 static struct ibv_wq *
194 mlx5_rxq_ibv_wq_create(struct rte_eth_dev *dev, uint16_t idx)
196 struct mlx5_priv *priv = dev->data->dev_private;
197 struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx];
198 struct mlx5_rxq_ctrl *rxq_ctrl =
199 container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
200 struct mlx5_rxq_obj *rxq_obj = rxq_ctrl->obj;
201 unsigned int wqe_n = 1 << rxq_data->elts_n;
203 struct ibv_wq_init_attr ibv;
204 #ifdef HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT
205 struct mlx5dv_wq_init_attr mlx5;
209 wq_attr.ibv = (struct ibv_wq_init_attr){
210 .wq_context = NULL, /* Could be useful in the future. */
211 .wq_type = IBV_WQT_RQ,
212 /* Max number of outstanding WRs. */
213 .max_wr = wqe_n >> rxq_data->sges_n,
214 /* Max number of scatter/gather elements in a WR. */
215 .max_sge = 1 << rxq_data->sges_n,
217 .cq = rxq_obj->ibv_cq,
218 .comp_mask = IBV_WQ_FLAGS_CVLAN_STRIPPING | 0,
219 .create_flags = (rxq_data->vlan_strip ?
220 IBV_WQ_FLAGS_CVLAN_STRIPPING : 0),
222 /* By default, FCS (CRC) is stripped by hardware. */
223 if (rxq_data->crc_present) {
224 wq_attr.ibv.create_flags |= IBV_WQ_FLAGS_SCATTER_FCS;
225 wq_attr.ibv.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS;
227 if (priv->config.hw_padding) {
228 #if defined(HAVE_IBV_WQ_FLAG_RX_END_PADDING)
229 wq_attr.ibv.create_flags |= IBV_WQ_FLAG_RX_END_PADDING;
230 wq_attr.ibv.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS;
231 #elif defined(HAVE_IBV_WQ_FLAGS_PCI_WRITE_END_PADDING)
232 wq_attr.ibv.create_flags |= IBV_WQ_FLAGS_PCI_WRITE_END_PADDING;
233 wq_attr.ibv.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS;
236 #ifdef HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT
237 wq_attr.mlx5 = (struct mlx5dv_wq_init_attr){
240 if (mlx5_rxq_mprq_enabled(rxq_data)) {
241 struct mlx5dv_striding_rq_init_attr *mprq_attr =
242 &wq_attr.mlx5.striding_rq_attrs;
244 wq_attr.mlx5.comp_mask |= MLX5DV_WQ_INIT_ATTR_MASK_STRIDING_RQ;
245 *mprq_attr = (struct mlx5dv_striding_rq_init_attr){
246 .single_stride_log_num_of_bytes = rxq_data->strd_sz_n,
247 .single_wqe_log_num_of_strides = rxq_data->strd_num_n,
248 .two_byte_shift_en = MLX5_MPRQ_TWO_BYTE_SHIFT,
251 rxq_obj->wq = mlx5_glue->dv_create_wq(priv->sh->ctx, &wq_attr.ibv,
254 rxq_obj->wq = mlx5_glue->create_wq(priv->sh->ctx, &wq_attr.ibv);
258 * Make sure number of WRs*SGEs match expectations since a queue
259 * cannot allocate more than "desc" buffers.
261 if (wq_attr.ibv.max_wr != (wqe_n >> rxq_data->sges_n) ||
262 wq_attr.ibv.max_sge != (1u << rxq_data->sges_n)) {
264 "Port %u Rx queue %u requested %u*%u but got"
266 dev->data->port_id, idx,
267 wqe_n >> rxq_data->sges_n,
268 (1 << rxq_data->sges_n),
269 wq_attr.ibv.max_wr, wq_attr.ibv.max_sge);
270 claim_zero(mlx5_glue->destroy_wq(rxq_obj->wq));
279 * Create the Rx queue Verbs object.
282 * Pointer to Ethernet device.
284 * Queue index in DPDK Rx queue array.
287 * 0 on success, a negative errno value otherwise and rte_errno is set.
290 mlx5_rxq_ibv_obj_new(struct rte_eth_dev *dev, uint16_t idx)
292 struct mlx5_priv *priv = dev->data->dev_private;
293 struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx];
294 struct mlx5_rxq_ctrl *rxq_ctrl =
295 container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
296 struct mlx5_rxq_obj *tmpl = rxq_ctrl->obj;
297 struct mlx5dv_cq cq_info;
298 struct mlx5dv_rwq rwq;
300 struct mlx5dv_obj obj;
302 MLX5_ASSERT(rxq_data);
304 priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_RX_QUEUE;
305 priv->verbs_alloc_ctx.obj = rxq_ctrl;
306 tmpl->type = MLX5_RXQ_OBJ_TYPE_IBV;
307 tmpl->rxq_ctrl = rxq_ctrl;
310 mlx5_glue->create_comp_channel(priv->sh->ctx);
311 if (!tmpl->ibv_channel) {
312 DRV_LOG(ERR, "Port %u: comp channel creation failure.",
317 tmpl->fd = ((struct ibv_comp_channel *)(tmpl->ibv_channel))->fd;
319 /* Create CQ using Verbs API. */
320 tmpl->ibv_cq = mlx5_rxq_ibv_cq_create(dev, idx);
322 DRV_LOG(ERR, "Port %u Rx queue %u CQ creation failure.",
323 dev->data->port_id, idx);
327 obj.cq.in = tmpl->ibv_cq;
328 obj.cq.out = &cq_info;
329 ret = mlx5_glue->dv_init_obj(&obj, MLX5DV_OBJ_CQ);
334 if (cq_info.cqe_size != RTE_CACHE_LINE_SIZE) {
336 "Port %u wrong MLX5_CQE_SIZE environment "
337 "variable value: it should be set to %u.",
338 dev->data->port_id, RTE_CACHE_LINE_SIZE);
342 /* Fill the rings. */
343 rxq_data->cqe_n = log2above(cq_info.cqe_cnt);
344 rxq_data->cq_db = cq_info.dbrec;
345 rxq_data->cqes = (volatile struct mlx5_cqe (*)[])(uintptr_t)cq_info.buf;
346 rxq_data->cq_uar = cq_info.cq_uar;
347 rxq_data->cqn = cq_info.cqn;
348 /* Create WQ (RQ) using Verbs API. */
349 tmpl->wq = mlx5_rxq_ibv_wq_create(dev, idx);
351 DRV_LOG(ERR, "Port %u Rx queue %u WQ creation failure.",
352 dev->data->port_id, idx);
356 /* Change queue state to ready. */
357 ret = mlx5_ibv_modify_wq(tmpl, true);
360 "Port %u Rx queue %u WQ state to IBV_WQS_RDY failed.",
361 dev->data->port_id, idx);
365 obj.rwq.in = tmpl->wq;
367 ret = mlx5_glue->dv_init_obj(&obj, MLX5DV_OBJ_RWQ);
372 rxq_data->wqes = rwq.buf;
373 rxq_data->rq_db = rwq.dbrec;
374 rxq_data->cq_arm_sn = 0;
375 mlx5_rxq_initialize(rxq_data);
377 priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_NONE;
378 dev->data->rx_queue_state[idx] = RTE_ETH_QUEUE_STATE_STARTED;
379 rxq_ctrl->wqn = ((struct ibv_wq *)(tmpl->wq))->wq_num;
382 ret = rte_errno; /* Save rte_errno before cleanup. */
384 claim_zero(mlx5_glue->destroy_wq(tmpl->wq));
386 claim_zero(mlx5_glue->destroy_cq(tmpl->ibv_cq));
387 if (tmpl->ibv_channel)
388 claim_zero(mlx5_glue->destroy_comp_channel(tmpl->ibv_channel));
389 rte_errno = ret; /* Restore rte_errno. */
390 priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_NONE;
395 * Release an Rx verbs queue object.
398 * Verbs Rx queue object.
401 mlx5_rxq_ibv_obj_release(struct mlx5_rxq_obj *rxq_obj)
403 MLX5_ASSERT(rxq_obj);
404 MLX5_ASSERT(rxq_obj->wq);
405 MLX5_ASSERT(rxq_obj->ibv_cq);
406 claim_zero(mlx5_glue->destroy_wq(rxq_obj->wq));
407 claim_zero(mlx5_glue->destroy_cq(rxq_obj->ibv_cq));
408 if (rxq_obj->ibv_channel)
409 claim_zero(mlx5_glue->destroy_comp_channel
410 (rxq_obj->ibv_channel));
414 * Get event for an Rx verbs queue object.
417 * Verbs Rx queue object.
420 * 0 on success, a negative errno value otherwise and rte_errno is set.
423 mlx5_rx_ibv_get_event(struct mlx5_rxq_obj *rxq_obj)
425 struct ibv_cq *ev_cq;
427 int ret = mlx5_glue->get_cq_event(rxq_obj->ibv_channel,
430 if (ret < 0 || ev_cq != rxq_obj->ibv_cq)
432 mlx5_glue->ack_cq_events(rxq_obj->ibv_cq, 1);
443 * Create an indirection table.
446 * Pointer to Ethernet device.
448 * Queues entering in the indirection table.
450 * Number of queues in the array.
453 * The Verbs object initialized, NULL otherwise and rte_errno is set.
455 static struct mlx5_ind_table_obj *
456 mlx5_ibv_ind_table_obj_new(struct rte_eth_dev *dev, const uint16_t *queues,
459 struct mlx5_priv *priv = dev->data->dev_private;
460 struct mlx5_ind_table_obj *ind_tbl;
461 const unsigned int wq_n = rte_is_power_of_2(queues_n) ?
462 log2above(queues_n) :
463 log2above(priv->config.ind_table_max_size);
464 struct ibv_wq *wq[1 << wq_n];
465 unsigned int i = 0, j = 0, k = 0;
467 ind_tbl = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*ind_tbl) +
468 queues_n * sizeof(uint16_t), 0, SOCKET_ID_ANY);
473 ind_tbl->type = MLX5_IND_TBL_TYPE_IBV;
474 for (i = 0; i != queues_n; ++i) {
475 struct mlx5_rxq_ctrl *rxq = mlx5_rxq_get(dev, queues[i]);
478 wq[i] = rxq->obj->wq;
479 ind_tbl->queues[i] = queues[i];
481 ind_tbl->queues_n = queues_n;
482 /* Finalise indirection table. */
483 k = i; /* Retain value of i for use in error case. */
484 for (j = 0; k != (unsigned int)(1 << wq_n); ++k, ++j)
486 ind_tbl->ind_table = mlx5_glue->create_rwq_ind_table(priv->sh->ctx,
487 &(struct ibv_rwq_ind_table_init_attr){
488 .log_ind_tbl_size = wq_n,
492 if (!ind_tbl->ind_table) {
496 rte_atomic32_inc(&ind_tbl->refcnt);
497 LIST_INSERT_HEAD(&priv->ind_tbls, ind_tbl, next);
500 for (j = 0; j < i; j++)
501 mlx5_rxq_release(dev, ind_tbl->queues[j]);
503 DEBUG("Port %u cannot create indirection table.", dev->data->port_id);
508 * Destroys the specified Indirection Table.
511 * Indirection table to release.
514 mlx5_ibv_ind_table_obj_destroy(struct mlx5_ind_table_obj *ind_tbl)
516 claim_zero(mlx5_glue->destroy_rwq_ind_table(ind_tbl->ind_table));
520 * Create an Rx Hash queue.
523 * Pointer to Ethernet device.
525 * RSS key for the Rx hash queue.
529 * Verbs protocol hash field to make the RSS on.
531 * Queues entering in hash queue. In case of empty hash_fields only the
532 * first queue index will be taken for the indirection table.
539 * The Verbs object initialized index, 0 otherwise and rte_errno is set.
542 mlx5_ibv_hrxq_new(struct rte_eth_dev *dev,
543 const uint8_t *rss_key, uint32_t rss_key_len,
544 uint64_t hash_fields,
545 const uint16_t *queues, uint32_t queues_n,
546 int tunnel __rte_unused)
548 struct mlx5_priv *priv = dev->data->dev_private;
549 struct mlx5_hrxq *hrxq = NULL;
550 uint32_t hrxq_idx = 0;
551 struct ibv_qp *qp = NULL;
552 struct mlx5_ind_table_obj *ind_tbl;
555 queues_n = hash_fields ? queues_n : 1;
556 ind_tbl = mlx5_ind_table_obj_get(dev, queues, queues_n);
558 ind_tbl = priv->obj_ops->ind_table_obj_new(dev, queues,
564 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
565 struct mlx5dv_qp_init_attr qp_init_attr;
567 memset(&qp_init_attr, 0, sizeof(qp_init_attr));
569 qp_init_attr.comp_mask =
570 MLX5DV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS;
571 qp_init_attr.create_flags = MLX5DV_QP_CREATE_TUNNEL_OFFLOADS;
573 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
574 if (dev->data->dev_conf.lpbk_mode) {
575 /* Allow packet sent from NIC loop back w/o source MAC check. */
576 qp_init_attr.comp_mask |=
577 MLX5DV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS;
578 qp_init_attr.create_flags |=
579 MLX5DV_QP_CREATE_TIR_ALLOW_SELF_LOOPBACK_UC;
582 qp = mlx5_glue->dv_create_qp
584 &(struct ibv_qp_init_attr_ex){
585 .qp_type = IBV_QPT_RAW_PACKET,
587 IBV_QP_INIT_ATTR_PD |
588 IBV_QP_INIT_ATTR_IND_TABLE |
589 IBV_QP_INIT_ATTR_RX_HASH,
590 .rx_hash_conf = (struct ibv_rx_hash_conf){
592 IBV_RX_HASH_FUNC_TOEPLITZ,
593 .rx_hash_key_len = rss_key_len,
595 (void *)(uintptr_t)rss_key,
596 .rx_hash_fields_mask = hash_fields,
598 .rwq_ind_tbl = ind_tbl->ind_table,
603 qp = mlx5_glue->create_qp_ex
605 &(struct ibv_qp_init_attr_ex){
606 .qp_type = IBV_QPT_RAW_PACKET,
608 IBV_QP_INIT_ATTR_PD |
609 IBV_QP_INIT_ATTR_IND_TABLE |
610 IBV_QP_INIT_ATTR_RX_HASH,
611 .rx_hash_conf = (struct ibv_rx_hash_conf){
613 IBV_RX_HASH_FUNC_TOEPLITZ,
614 .rx_hash_key_len = rss_key_len,
616 (void *)(uintptr_t)rss_key,
617 .rx_hash_fields_mask = hash_fields,
619 .rwq_ind_tbl = ind_tbl->ind_table,
627 hrxq = mlx5_ipool_zmalloc(priv->sh->ipool[MLX5_IPOOL_HRXQ], &hrxq_idx);
630 hrxq->ind_table = ind_tbl;
632 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
633 hrxq->action = mlx5_glue->dv_create_flow_action_dest_ibv_qp(hrxq->qp);
639 hrxq->rss_key_len = rss_key_len;
640 hrxq->hash_fields = hash_fields;
641 memcpy(hrxq->rss_key, rss_key, rss_key_len);
642 rte_atomic32_inc(&hrxq->refcnt);
643 ILIST_INSERT(priv->sh->ipool[MLX5_IPOOL_HRXQ], &priv->hrxqs, hrxq_idx,
647 err = rte_errno; /* Save rte_errno before cleanup. */
648 mlx5_ind_table_obj_release(dev, ind_tbl);
650 claim_zero(mlx5_glue->destroy_qp(qp));
652 mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_HRXQ], hrxq_idx);
653 rte_errno = err; /* Restore rte_errno. */
658 * Destroy a Verbs queue pair.
661 * Hash Rx queue to release its qp.
664 mlx5_ibv_qp_destroy(struct mlx5_hrxq *hrxq)
666 claim_zero(mlx5_glue->destroy_qp(hrxq->qp));
669 struct mlx5_obj_ops ibv_obj_ops = {
670 .rxq_obj_modify_vlan_strip = mlx5_rxq_obj_modify_wq_vlan_strip,
671 .rxq_obj_new = mlx5_rxq_ibv_obj_new,
672 .rxq_event_get = mlx5_rx_ibv_get_event,
673 .rxq_obj_modify = mlx5_ibv_modify_wq,
674 .rxq_obj_release = mlx5_rxq_ibv_obj_release,
675 .ind_table_obj_new = mlx5_ibv_ind_table_obj_new,
676 .ind_table_obj_destroy = mlx5_ibv_ind_table_obj_destroy,
677 .hrxq_new = mlx5_ibv_hrxq_new,
678 .hrxq_destroy = mlx5_ibv_qp_destroy,