1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2020 Mellanox Technologies, Ltd
11 #include <sys/queue.h>
13 #include "mlx5_autoconf.h"
16 #include <rte_malloc.h>
17 #include <ethdev_driver.h>
18 #include <rte_common.h>
20 #include <mlx5_glue.h>
21 #include <mlx5_common.h>
22 #include <mlx5_common_mr.h>
23 #include <mlx5_verbs.h>
26 #include <mlx5_utils.h>
27 #include <mlx5_malloc.h>
30 * Register mr. Given protection domain pointer, pointer to addr and length
31 * register the memory region.
34 * Pointer to protection domain context.
36 * Pointer to memory start address.
38 * Length of the memory to register.
40 * pmd_mr struct set with lkey, address, length and pointer to mr object
43 * 0 on successful registration, -1 otherwise
46 mlx5_reg_mr(void *pd, void *addr, size_t length,
47 struct mlx5_pmd_mr *pmd_mr)
49 return mlx5_common_verbs_reg_mr(pd, addr, length, pmd_mr);
53 * Deregister mr. Given the mlx5 pmd MR - deregister the MR
56 * pmd_mr struct set with lkey, address, length and pointer to mr object
60 mlx5_dereg_mr(struct mlx5_pmd_mr *pmd_mr)
62 mlx5_common_verbs_dereg_mr(pmd_mr);
65 /* verbs operations. */
66 const struct mlx5_mr_ops mlx5_mr_verbs_ops = {
67 .reg_mr = mlx5_reg_mr,
68 .dereg_mr = mlx5_dereg_mr,
72 * Modify Rx WQ vlan stripping offload
77 * @return 0 on success, non-0 otherwise
80 mlx5_rxq_obj_modify_wq_vlan_strip(struct mlx5_rxq_obj *rxq_obj, int on)
82 uint16_t vlan_offloads =
83 (on ? IBV_WQ_FLAGS_CVLAN_STRIPPING : 0) |
85 struct ibv_wq_attr mod;
86 mod = (struct ibv_wq_attr){
87 .attr_mask = IBV_WQ_ATTR_FLAGS,
88 .flags_mask = IBV_WQ_FLAGS_CVLAN_STRIPPING,
89 .flags = vlan_offloads,
92 return mlx5_glue->modify_wq(rxq_obj->wq, &mod);
96 * Modifies the attributes for the specified WQ.
99 * Verbs Rx queue object.
101 * Type of change queue state.
104 * 0 on success, a negative errno value otherwise and rte_errno is set.
107 mlx5_ibv_modify_wq(struct mlx5_rxq_obj *rxq_obj, uint8_t type)
109 struct ibv_wq_attr mod = {
110 .attr_mask = IBV_WQ_ATTR_STATE,
111 .wq_state = (enum ibv_wq_state)type,
114 return mlx5_glue->modify_wq(rxq_obj->wq, &mod);
118 * Modify QP using Verbs API.
121 * Verbs Tx queue object.
123 * Type of change queue state.
125 * IB device port number.
128 * 0 on success, a negative errno value otherwise and rte_errno is set.
131 mlx5_ibv_modify_qp(struct mlx5_txq_obj *obj, enum mlx5_txq_modify_type type,
134 struct ibv_qp_attr mod = {
135 .qp_state = IBV_QPS_RESET,
136 .port_num = dev_port,
138 int attr_mask = (IBV_QP_STATE | IBV_QP_PORT);
141 if (type != MLX5_TXQ_MOD_RST2RDY) {
142 ret = mlx5_glue->modify_qp(obj->qp, &mod, IBV_QP_STATE);
144 DRV_LOG(ERR, "Cannot change Tx QP state to RESET %s",
149 if (type == MLX5_TXQ_MOD_RDY2RST)
152 if (type == MLX5_TXQ_MOD_ERR2RDY)
153 attr_mask = IBV_QP_STATE;
154 mod.qp_state = IBV_QPS_INIT;
155 ret = mlx5_glue->modify_qp(obj->qp, &mod, attr_mask);
157 DRV_LOG(ERR, "Cannot change Tx QP state to INIT %s",
162 mod.qp_state = IBV_QPS_RTR;
163 ret = mlx5_glue->modify_qp(obj->qp, &mod, IBV_QP_STATE);
165 DRV_LOG(ERR, "Cannot change Tx QP state to RTR %s",
170 mod.qp_state = IBV_QPS_RTS;
171 ret = mlx5_glue->modify_qp(obj->qp, &mod, IBV_QP_STATE);
173 DRV_LOG(ERR, "Cannot change Tx QP state to RTS %s",
182 * Create a CQ Verbs object.
185 * Pointer to Ethernet device.
187 * Queue index in DPDK Rx queue array.
190 * The Verbs CQ object initialized, NULL otherwise and rte_errno is set.
192 static struct ibv_cq *
193 mlx5_rxq_ibv_cq_create(struct rte_eth_dev *dev, uint16_t idx)
195 struct mlx5_priv *priv = dev->data->dev_private;
196 struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx];
197 struct mlx5_rxq_ctrl *rxq_ctrl =
198 container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
199 struct mlx5_rxq_obj *rxq_obj = rxq_ctrl->obj;
200 unsigned int cqe_n = mlx5_rxq_cqe_num(rxq_data);
202 struct ibv_cq_init_attr_ex ibv;
203 struct mlx5dv_cq_init_attr mlx5;
206 cq_attr.ibv = (struct ibv_cq_init_attr_ex){
208 .channel = rxq_obj->ibv_channel,
211 cq_attr.mlx5 = (struct mlx5dv_cq_init_attr){
214 if (priv->config.cqe_comp && !rxq_data->hw_timestamp) {
215 cq_attr.mlx5.comp_mask |=
216 MLX5DV_CQ_INIT_ATTR_MASK_COMPRESSED_CQE;
217 rxq_data->byte_mask = UINT32_MAX;
218 #ifdef HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT
219 if (mlx5_rxq_mprq_enabled(rxq_data)) {
220 cq_attr.mlx5.cqe_comp_res_format =
221 MLX5DV_CQE_RES_FORMAT_CSUM_STRIDX;
222 rxq_data->mcqe_format =
223 MLX5_CQE_RESP_FORMAT_CSUM_STRIDX;
225 cq_attr.mlx5.cqe_comp_res_format =
226 MLX5DV_CQE_RES_FORMAT_HASH;
227 rxq_data->mcqe_format =
228 MLX5_CQE_RESP_FORMAT_HASH;
231 cq_attr.mlx5.cqe_comp_res_format = MLX5DV_CQE_RES_FORMAT_HASH;
232 rxq_data->mcqe_format = MLX5_CQE_RESP_FORMAT_HASH;
235 * For vectorized Rx, it must not be doubled in order to
236 * make cq_ci and rq_ci aligned.
238 if (mlx5_rxq_check_vec_support(rxq_data) < 0)
239 cq_attr.ibv.cqe *= 2;
240 } else if (priv->config.cqe_comp && rxq_data->hw_timestamp) {
242 "Port %u Rx CQE compression is disabled for HW"
246 #ifdef HAVE_IBV_MLX5_MOD_CQE_128B_PAD
247 if (RTE_CACHE_LINE_SIZE == 128) {
248 cq_attr.mlx5.comp_mask |= MLX5DV_CQ_INIT_ATTR_MASK_FLAGS;
249 cq_attr.mlx5.flags |= MLX5DV_CQ_INIT_ATTR_FLAGS_CQE_PAD;
252 return mlx5_glue->cq_ex_to_cq(mlx5_glue->dv_create_cq
253 (priv->sh->cdev->ctx,
259 * Create a WQ Verbs object.
262 * Pointer to Ethernet device.
264 * Queue index in DPDK Rx queue array.
267 * The Verbs WQ object initialized, NULL otherwise and rte_errno is set.
269 static struct ibv_wq *
270 mlx5_rxq_ibv_wq_create(struct rte_eth_dev *dev, uint16_t idx)
272 struct mlx5_priv *priv = dev->data->dev_private;
273 struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx];
274 struct mlx5_rxq_ctrl *rxq_ctrl =
275 container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
276 struct mlx5_rxq_obj *rxq_obj = rxq_ctrl->obj;
277 unsigned int wqe_n = 1 << rxq_data->elts_n;
279 struct ibv_wq_init_attr ibv;
280 #ifdef HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT
281 struct mlx5dv_wq_init_attr mlx5;
285 wq_attr.ibv = (struct ibv_wq_init_attr){
286 .wq_context = NULL, /* Could be useful in the future. */
287 .wq_type = IBV_WQT_RQ,
288 /* Max number of outstanding WRs. */
289 .max_wr = wqe_n >> rxq_data->sges_n,
290 /* Max number of scatter/gather elements in a WR. */
291 .max_sge = 1 << rxq_data->sges_n,
293 .cq = rxq_obj->ibv_cq,
294 .comp_mask = IBV_WQ_FLAGS_CVLAN_STRIPPING | 0,
295 .create_flags = (rxq_data->vlan_strip ?
296 IBV_WQ_FLAGS_CVLAN_STRIPPING : 0),
298 /* By default, FCS (CRC) is stripped by hardware. */
299 if (rxq_data->crc_present) {
300 wq_attr.ibv.create_flags |= IBV_WQ_FLAGS_SCATTER_FCS;
301 wq_attr.ibv.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS;
303 if (priv->config.hw_padding) {
304 #if defined(HAVE_IBV_WQ_FLAG_RX_END_PADDING)
305 wq_attr.ibv.create_flags |= IBV_WQ_FLAG_RX_END_PADDING;
306 wq_attr.ibv.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS;
307 #elif defined(HAVE_IBV_WQ_FLAGS_PCI_WRITE_END_PADDING)
308 wq_attr.ibv.create_flags |= IBV_WQ_FLAGS_PCI_WRITE_END_PADDING;
309 wq_attr.ibv.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS;
312 #ifdef HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT
313 wq_attr.mlx5 = (struct mlx5dv_wq_init_attr){
316 if (mlx5_rxq_mprq_enabled(rxq_data)) {
317 struct mlx5dv_striding_rq_init_attr *mprq_attr =
318 &wq_attr.mlx5.striding_rq_attrs;
320 wq_attr.mlx5.comp_mask |= MLX5DV_WQ_INIT_ATTR_MASK_STRIDING_RQ;
321 *mprq_attr = (struct mlx5dv_striding_rq_init_attr){
322 .single_stride_log_num_of_bytes = rxq_data->strd_sz_n,
323 .single_wqe_log_num_of_strides = rxq_data->strd_num_n,
324 .two_byte_shift_en = MLX5_MPRQ_TWO_BYTE_SHIFT,
327 rxq_obj->wq = mlx5_glue->dv_create_wq(priv->sh->cdev->ctx, &wq_attr.ibv,
330 rxq_obj->wq = mlx5_glue->create_wq(priv->sh->cdev->ctx, &wq_attr.ibv);
334 * Make sure number of WRs*SGEs match expectations since a queue
335 * cannot allocate more than "desc" buffers.
337 if (wq_attr.ibv.max_wr != (wqe_n >> rxq_data->sges_n) ||
338 wq_attr.ibv.max_sge != (1u << rxq_data->sges_n)) {
340 "Port %u Rx queue %u requested %u*%u but got"
342 dev->data->port_id, idx,
343 wqe_n >> rxq_data->sges_n,
344 (1 << rxq_data->sges_n),
345 wq_attr.ibv.max_wr, wq_attr.ibv.max_sge);
346 claim_zero(mlx5_glue->destroy_wq(rxq_obj->wq));
355 * Create the Rx queue Verbs object.
358 * Pointer to Ethernet device.
360 * Queue index in DPDK Rx queue array.
363 * 0 on success, a negative errno value otherwise and rte_errno is set.
366 mlx5_rxq_ibv_obj_new(struct rte_eth_dev *dev, uint16_t idx)
368 struct mlx5_priv *priv = dev->data->dev_private;
369 struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx];
370 struct mlx5_rxq_ctrl *rxq_ctrl =
371 container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
372 struct mlx5_rxq_obj *tmpl = rxq_ctrl->obj;
373 struct mlx5dv_cq cq_info;
374 struct mlx5dv_rwq rwq;
376 struct mlx5dv_obj obj;
378 MLX5_ASSERT(rxq_data);
380 tmpl->rxq_ctrl = rxq_ctrl;
383 mlx5_glue->create_comp_channel(priv->sh->cdev->ctx);
384 if (!tmpl->ibv_channel) {
385 DRV_LOG(ERR, "Port %u: comp channel creation failure.",
390 tmpl->fd = ((struct ibv_comp_channel *)(tmpl->ibv_channel))->fd;
392 /* Create CQ using Verbs API. */
393 tmpl->ibv_cq = mlx5_rxq_ibv_cq_create(dev, idx);
395 DRV_LOG(ERR, "Port %u Rx queue %u CQ creation failure.",
396 dev->data->port_id, idx);
400 obj.cq.in = tmpl->ibv_cq;
401 obj.cq.out = &cq_info;
402 ret = mlx5_glue->dv_init_obj(&obj, MLX5DV_OBJ_CQ);
407 if (cq_info.cqe_size != RTE_CACHE_LINE_SIZE) {
409 "Port %u wrong MLX5_CQE_SIZE environment "
410 "variable value: it should be set to %u.",
411 dev->data->port_id, RTE_CACHE_LINE_SIZE);
415 /* Fill the rings. */
416 rxq_data->cqe_n = log2above(cq_info.cqe_cnt);
417 rxq_data->cq_db = cq_info.dbrec;
418 rxq_data->cqes = (volatile struct mlx5_cqe (*)[])(uintptr_t)cq_info.buf;
419 rxq_data->cq_uar = cq_info.cq_uar;
420 rxq_data->cqn = cq_info.cqn;
421 /* Create WQ (RQ) using Verbs API. */
422 tmpl->wq = mlx5_rxq_ibv_wq_create(dev, idx);
424 DRV_LOG(ERR, "Port %u Rx queue %u WQ creation failure.",
425 dev->data->port_id, idx);
429 /* Change queue state to ready. */
430 ret = mlx5_ibv_modify_wq(tmpl, IBV_WQS_RDY);
433 "Port %u Rx queue %u WQ state to IBV_WQS_RDY failed.",
434 dev->data->port_id, idx);
438 obj.rwq.in = tmpl->wq;
440 ret = mlx5_glue->dv_init_obj(&obj, MLX5DV_OBJ_RWQ);
445 rxq_data->wqes = rwq.buf;
446 rxq_data->rq_db = rwq.dbrec;
447 rxq_data->cq_arm_sn = 0;
448 mlx5_rxq_initialize(rxq_data);
450 dev->data->rx_queue_state[idx] = RTE_ETH_QUEUE_STATE_STARTED;
451 rxq_ctrl->wqn = ((struct ibv_wq *)(tmpl->wq))->wq_num;
454 ret = rte_errno; /* Save rte_errno before cleanup. */
456 claim_zero(mlx5_glue->destroy_wq(tmpl->wq));
458 claim_zero(mlx5_glue->destroy_cq(tmpl->ibv_cq));
459 if (tmpl->ibv_channel)
460 claim_zero(mlx5_glue->destroy_comp_channel(tmpl->ibv_channel));
461 rte_errno = ret; /* Restore rte_errno. */
466 * Release an Rx verbs queue object.
469 * Verbs Rx queue object.
472 mlx5_rxq_ibv_obj_release(struct mlx5_rxq_obj *rxq_obj)
474 MLX5_ASSERT(rxq_obj);
475 MLX5_ASSERT(rxq_obj->wq);
476 MLX5_ASSERT(rxq_obj->ibv_cq);
477 claim_zero(mlx5_glue->destroy_wq(rxq_obj->wq));
478 claim_zero(mlx5_glue->destroy_cq(rxq_obj->ibv_cq));
479 if (rxq_obj->ibv_channel)
480 claim_zero(mlx5_glue->destroy_comp_channel
481 (rxq_obj->ibv_channel));
485 * Get event for an Rx verbs queue object.
488 * Verbs Rx queue object.
491 * 0 on success, a negative errno value otherwise and rte_errno is set.
494 mlx5_rx_ibv_get_event(struct mlx5_rxq_obj *rxq_obj)
496 struct ibv_cq *ev_cq;
498 int ret = mlx5_glue->get_cq_event(rxq_obj->ibv_channel,
501 if (ret < 0 || ev_cq != rxq_obj->ibv_cq)
503 mlx5_glue->ack_cq_events(rxq_obj->ibv_cq, 1);
514 * Creates a receive work queue as a filed of indirection table.
517 * Pointer to Ethernet device.
519 * Log of number of queues in the array.
521 * Verbs indirection table object.
524 * 0 on success, a negative errno value otherwise and rte_errno is set.
527 mlx5_ibv_ind_table_new(struct rte_eth_dev *dev, const unsigned int log_n,
528 struct mlx5_ind_table_obj *ind_tbl)
530 struct mlx5_priv *priv = dev->data->dev_private;
531 struct ibv_wq *wq[1 << log_n];
534 MLX5_ASSERT(ind_tbl);
535 for (i = 0; i != ind_tbl->queues_n; ++i) {
536 struct mlx5_rxq_data *rxq = (*priv->rxqs)[ind_tbl->queues[i]];
537 struct mlx5_rxq_ctrl *rxq_ctrl =
538 container_of(rxq, struct mlx5_rxq_ctrl, rxq);
540 wq[i] = rxq_ctrl->obj->wq;
543 /* Finalise indirection table. */
544 for (j = 0; i != (unsigned int)(1 << log_n); ++j, ++i)
546 ind_tbl->ind_table = mlx5_glue->create_rwq_ind_table
547 (priv->sh->cdev->ctx,
548 &(struct ibv_rwq_ind_table_init_attr){
549 .log_ind_tbl_size = log_n,
553 if (!ind_tbl->ind_table) {
561 * Destroys the specified Indirection Table.
564 * Indirection table to release.
567 mlx5_ibv_ind_table_destroy(struct mlx5_ind_table_obj *ind_tbl)
569 claim_zero(mlx5_glue->destroy_rwq_ind_table(ind_tbl->ind_table));
573 * Create an Rx Hash queue.
576 * Pointer to Ethernet device.
578 * Pointer to Rx Hash queue.
583 * 0 on success, a negative errno value otherwise and rte_errno is set.
586 mlx5_ibv_hrxq_new(struct rte_eth_dev *dev, struct mlx5_hrxq *hrxq,
587 int tunnel __rte_unused)
589 struct mlx5_priv *priv = dev->data->dev_private;
590 struct ibv_qp *qp = NULL;
591 struct mlx5_ind_table_obj *ind_tbl = hrxq->ind_table;
592 const uint8_t *rss_key = hrxq->rss_key;
593 uint64_t hash_fields = hrxq->hash_fields;
595 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
596 struct mlx5dv_qp_init_attr qp_init_attr;
598 memset(&qp_init_attr, 0, sizeof(qp_init_attr));
600 qp_init_attr.comp_mask =
601 MLX5DV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS;
602 qp_init_attr.create_flags = MLX5DV_QP_CREATE_TUNNEL_OFFLOADS;
604 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
605 if (dev->data->dev_conf.lpbk_mode) {
606 /* Allow packet sent from NIC loop back w/o source MAC check. */
607 qp_init_attr.comp_mask |=
608 MLX5DV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS;
609 qp_init_attr.create_flags |=
610 MLX5DV_QP_CREATE_TIR_ALLOW_SELF_LOOPBACK_UC;
613 qp = mlx5_glue->dv_create_qp
614 (priv->sh->cdev->ctx,
615 &(struct ibv_qp_init_attr_ex){
616 .qp_type = IBV_QPT_RAW_PACKET,
618 IBV_QP_INIT_ATTR_PD |
619 IBV_QP_INIT_ATTR_IND_TABLE |
620 IBV_QP_INIT_ATTR_RX_HASH,
621 .rx_hash_conf = (struct ibv_rx_hash_conf){
623 IBV_RX_HASH_FUNC_TOEPLITZ,
624 .rx_hash_key_len = hrxq->rss_key_len,
626 (void *)(uintptr_t)rss_key,
627 .rx_hash_fields_mask = hash_fields,
629 .rwq_ind_tbl = ind_tbl->ind_table,
634 qp = mlx5_glue->create_qp_ex
635 (priv->sh->cdev->ctx,
636 &(struct ibv_qp_init_attr_ex){
637 .qp_type = IBV_QPT_RAW_PACKET,
639 IBV_QP_INIT_ATTR_PD |
640 IBV_QP_INIT_ATTR_IND_TABLE |
641 IBV_QP_INIT_ATTR_RX_HASH,
642 .rx_hash_conf = (struct ibv_rx_hash_conf){
644 IBV_RX_HASH_FUNC_TOEPLITZ,
645 .rx_hash_key_len = hrxq->rss_key_len,
647 (void *)(uintptr_t)rss_key,
648 .rx_hash_fields_mask = hash_fields,
650 .rwq_ind_tbl = ind_tbl->ind_table,
659 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
660 hrxq->action = mlx5_glue->dv_create_flow_action_dest_ibv_qp(hrxq->qp);
668 err = rte_errno; /* Save rte_errno before cleanup. */
670 claim_zero(mlx5_glue->destroy_qp(qp));
671 rte_errno = err; /* Restore rte_errno. */
676 * Destroy a Verbs queue pair.
679 * Hash Rx queue to release its qp.
682 mlx5_ibv_qp_destroy(struct mlx5_hrxq *hrxq)
684 claim_zero(mlx5_glue->destroy_qp(hrxq->qp));
688 * Release a drop Rx queue Verbs object.
691 * Pointer to Ethernet device.
694 mlx5_rxq_ibv_obj_drop_release(struct rte_eth_dev *dev)
696 struct mlx5_priv *priv = dev->data->dev_private;
697 struct mlx5_rxq_obj *rxq = priv->drop_queue.rxq;
700 claim_zero(mlx5_glue->destroy_wq(rxq->wq));
702 claim_zero(mlx5_glue->destroy_cq(rxq->ibv_cq));
704 priv->drop_queue.rxq = NULL;
708 * Create a drop Rx queue Verbs object.
711 * Pointer to Ethernet device.
714 * 0 on success, a negative errno value otherwise and rte_errno is set.
717 mlx5_rxq_ibv_obj_drop_create(struct rte_eth_dev *dev)
719 struct mlx5_priv *priv = dev->data->dev_private;
720 struct ibv_context *ctx = priv->sh->cdev->ctx;
721 struct mlx5_rxq_obj *rxq = priv->drop_queue.rxq;
725 rxq = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*rxq), 0, SOCKET_ID_ANY);
727 DRV_LOG(DEBUG, "Port %u cannot allocate drop Rx queue memory.",
732 priv->drop_queue.rxq = rxq;
733 rxq->ibv_cq = mlx5_glue->create_cq(ctx, 1, NULL, NULL, 0);
735 DRV_LOG(DEBUG, "Port %u cannot allocate CQ for drop queue.",
740 rxq->wq = mlx5_glue->create_wq(ctx, &(struct ibv_wq_init_attr){
741 .wq_type = IBV_WQT_RQ,
748 DRV_LOG(DEBUG, "Port %u cannot allocate WQ for drop queue.",
753 priv->drop_queue.rxq = rxq;
756 mlx5_rxq_ibv_obj_drop_release(dev);
761 * Create a Verbs drop action for Rx Hash queue.
764 * Pointer to Ethernet device.
767 * 0 on success, a negative errno value otherwise and rte_errno is set.
770 mlx5_ibv_drop_action_create(struct rte_eth_dev *dev)
772 struct mlx5_priv *priv = dev->data->dev_private;
773 struct mlx5_hrxq *hrxq = priv->drop_queue.hrxq;
774 struct ibv_rwq_ind_table *ind_tbl = NULL;
775 struct mlx5_rxq_obj *rxq;
778 MLX5_ASSERT(hrxq && hrxq->ind_table);
779 ret = mlx5_rxq_ibv_obj_drop_create(dev);
782 rxq = priv->drop_queue.rxq;
783 ind_tbl = mlx5_glue->create_rwq_ind_table
784 (priv->sh->cdev->ctx,
785 &(struct ibv_rwq_ind_table_init_attr){
786 .log_ind_tbl_size = 0,
787 .ind_tbl = (struct ibv_wq **)&rxq->wq,
791 DRV_LOG(DEBUG, "Port %u"
792 " cannot allocate indirection table for drop queue.",
797 hrxq->qp = mlx5_glue->create_qp_ex(priv->sh->cdev->ctx,
798 &(struct ibv_qp_init_attr_ex){
799 .qp_type = IBV_QPT_RAW_PACKET,
800 .comp_mask = IBV_QP_INIT_ATTR_PD |
801 IBV_QP_INIT_ATTR_IND_TABLE |
802 IBV_QP_INIT_ATTR_RX_HASH,
803 .rx_hash_conf = (struct ibv_rx_hash_conf){
804 .rx_hash_function = IBV_RX_HASH_FUNC_TOEPLITZ,
805 .rx_hash_key_len = MLX5_RSS_HASH_KEY_LEN,
806 .rx_hash_key = rss_hash_default_key,
807 .rx_hash_fields_mask = 0,
809 .rwq_ind_tbl = ind_tbl,
813 DRV_LOG(DEBUG, "Port %u cannot allocate QP for drop queue.",
818 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
819 hrxq->action = mlx5_glue->dv_create_flow_action_dest_ibv_qp(hrxq->qp);
825 hrxq->ind_table->ind_table = ind_tbl;
829 claim_zero(mlx5_glue->destroy_qp(hrxq->qp));
831 claim_zero(mlx5_glue->destroy_rwq_ind_table(ind_tbl));
832 if (priv->drop_queue.rxq)
833 mlx5_rxq_ibv_obj_drop_release(dev);
838 * Release a drop hash Rx queue.
841 * Pointer to Ethernet device.
844 mlx5_ibv_drop_action_destroy(struct rte_eth_dev *dev)
846 struct mlx5_priv *priv = dev->data->dev_private;
847 struct mlx5_hrxq *hrxq = priv->drop_queue.hrxq;
848 struct ibv_rwq_ind_table *ind_tbl = hrxq->ind_table->ind_table;
850 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
851 claim_zero(mlx5_glue->destroy_flow_action(hrxq->action));
853 claim_zero(mlx5_glue->destroy_qp(hrxq->qp));
854 claim_zero(mlx5_glue->destroy_rwq_ind_table(ind_tbl));
855 mlx5_rxq_ibv_obj_drop_release(dev);
859 * Create a QP Verbs object.
862 * Pointer to Ethernet device.
864 * Queue index in DPDK Tx queue array.
867 * The QP Verbs object, NULL otherwise and rte_errno is set.
869 static struct ibv_qp *
870 mlx5_txq_ibv_qp_create(struct rte_eth_dev *dev, uint16_t idx)
872 struct mlx5_priv *priv = dev->data->dev_private;
873 struct mlx5_txq_data *txq_data = (*priv->txqs)[idx];
874 struct mlx5_txq_ctrl *txq_ctrl =
875 container_of(txq_data, struct mlx5_txq_ctrl, txq);
876 struct ibv_qp *qp_obj = NULL;
877 struct ibv_qp_init_attr_ex qp_attr = { 0 };
878 const int desc = 1 << txq_data->elts_n;
880 MLX5_ASSERT(txq_ctrl->obj->cq);
881 /* CQ to be associated with the send queue. */
882 qp_attr.send_cq = txq_ctrl->obj->cq;
883 /* CQ to be associated with the receive queue. */
884 qp_attr.recv_cq = txq_ctrl->obj->cq;
885 /* Max number of outstanding WRs. */
886 qp_attr.cap.max_send_wr = ((priv->sh->device_attr.max_qp_wr < desc) ?
887 priv->sh->device_attr.max_qp_wr : desc);
889 * Max number of scatter/gather elements in a WR, must be 1 to prevent
890 * libmlx5 from trying to affect must be 1 to prevent libmlx5 from
891 * trying to affect too much memory. TX gather is not impacted by the
892 * device_attr.max_sge limit and will still work properly.
894 qp_attr.cap.max_send_sge = 1;
895 qp_attr.qp_type = IBV_QPT_RAW_PACKET,
896 /* Do *NOT* enable this, completions events are managed per Tx burst. */
897 qp_attr.sq_sig_all = 0;
898 qp_attr.pd = priv->sh->pd;
899 qp_attr.comp_mask = IBV_QP_INIT_ATTR_PD;
900 if (txq_data->inlen_send)
901 qp_attr.cap.max_inline_data = txq_ctrl->max_inline_data;
902 if (txq_data->tso_en) {
903 qp_attr.max_tso_header = txq_ctrl->max_tso_header;
904 qp_attr.comp_mask |= IBV_QP_INIT_ATTR_MAX_TSO_HEADER;
906 qp_obj = mlx5_glue->create_qp_ex(priv->sh->cdev->ctx, &qp_attr);
907 if (qp_obj == NULL) {
908 DRV_LOG(ERR, "Port %u Tx queue %u QP creation failure.",
909 dev->data->port_id, idx);
916 * Create the Tx queue Verbs object.
919 * Pointer to Ethernet device.
921 * Queue index in DPDK Tx queue array.
924 * 0 on success, a negative errno value otherwise and rte_errno is set.
927 mlx5_txq_ibv_obj_new(struct rte_eth_dev *dev, uint16_t idx)
929 struct mlx5_priv *priv = dev->data->dev_private;
930 struct mlx5_txq_data *txq_data = (*priv->txqs)[idx];
931 struct mlx5_txq_ctrl *txq_ctrl =
932 container_of(txq_data, struct mlx5_txq_ctrl, txq);
933 struct mlx5_txq_obj *txq_obj = txq_ctrl->obj;
936 struct mlx5dv_cq cq_info;
937 struct mlx5dv_obj obj;
938 const int desc = 1 << txq_data->elts_n;
941 MLX5_ASSERT(txq_data);
942 MLX5_ASSERT(txq_obj);
943 txq_obj->txq_ctrl = txq_ctrl;
944 if (mlx5_getenv_int("MLX5_ENABLE_CQE_COMPRESSION")) {
945 DRV_LOG(ERR, "Port %u MLX5_ENABLE_CQE_COMPRESSION "
946 "must never be set.", dev->data->port_id);
950 cqe_n = desc / MLX5_TX_COMP_THRESH +
951 1 + MLX5_TX_COMP_THRESH_INLINE_DIV;
952 txq_obj->cq = mlx5_glue->create_cq(priv->sh->cdev->ctx, cqe_n,
954 if (txq_obj->cq == NULL) {
955 DRV_LOG(ERR, "Port %u Tx queue %u CQ creation failure.",
956 dev->data->port_id, idx);
960 txq_obj->qp = mlx5_txq_ibv_qp_create(dev, idx);
961 if (txq_obj->qp == NULL) {
965 ret = mlx5_ibv_modify_qp(txq_obj, MLX5_TXQ_MOD_RST2RDY,
966 (uint8_t)priv->dev_port);
968 DRV_LOG(ERR, "Port %u Tx queue %u QP state modifying failed.",
969 dev->data->port_id, idx);
973 qp.comp_mask = MLX5DV_QP_MASK_UAR_MMAP_OFFSET;
974 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
975 /* If using DevX, need additional mask to read tisn value. */
976 if (priv->sh->devx && !priv->sh->tdn)
977 qp.comp_mask |= MLX5DV_QP_MASK_RAW_QP_HANDLES;
979 obj.cq.in = txq_obj->cq;
980 obj.cq.out = &cq_info;
981 obj.qp.in = txq_obj->qp;
983 ret = mlx5_glue->dv_init_obj(&obj, MLX5DV_OBJ_CQ | MLX5DV_OBJ_QP);
988 if (cq_info.cqe_size != RTE_CACHE_LINE_SIZE) {
990 "Port %u wrong MLX5_CQE_SIZE environment variable"
991 " value: it should be set to %u.",
992 dev->data->port_id, RTE_CACHE_LINE_SIZE);
996 txq_data->cqe_n = log2above(cq_info.cqe_cnt);
997 txq_data->cqe_s = 1 << txq_data->cqe_n;
998 txq_data->cqe_m = txq_data->cqe_s - 1;
999 txq_data->qp_num_8s = ((struct ibv_qp *)txq_obj->qp)->qp_num << 8;
1000 txq_data->wqes = qp.sq.buf;
1001 txq_data->wqe_n = log2above(qp.sq.wqe_cnt);
1002 txq_data->wqe_s = 1 << txq_data->wqe_n;
1003 txq_data->wqe_m = txq_data->wqe_s - 1;
1004 txq_data->wqes_end = txq_data->wqes + txq_data->wqe_s;
1005 txq_data->qp_db = &qp.dbrec[MLX5_SND_DBR];
1006 txq_data->cq_db = cq_info.dbrec;
1007 txq_data->cqes = (volatile struct mlx5_cqe *)cq_info.buf;
1008 txq_data->cq_ci = 0;
1009 txq_data->cq_pi = 0;
1010 txq_data->wqe_ci = 0;
1011 txq_data->wqe_pi = 0;
1012 txq_data->wqe_comp = 0;
1013 txq_data->wqe_thres = txq_data->wqe_s / MLX5_TX_COMP_THRESH_INLINE_DIV;
1014 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
1016 * If using DevX need to query and store TIS transport domain value.
1017 * This is done once per port.
1018 * Will use this value on Rx, when creating matching TIR.
1020 if (priv->sh->devx && !priv->sh->tdn) {
1021 ret = mlx5_devx_cmd_qp_query_tis_td(txq_obj->qp, qp.tisn,
1024 DRV_LOG(ERR, "Fail to query port %u Tx queue %u QP TIS "
1025 "transport domain.", dev->data->port_id, idx);
1029 DRV_LOG(DEBUG, "Port %u Tx queue %u TIS number %d "
1030 "transport domain %d.", dev->data->port_id,
1031 idx, qp.tisn, priv->sh->tdn);
1035 txq_ctrl->bf_reg = qp.bf.reg;
1036 if (qp.comp_mask & MLX5DV_QP_MASK_UAR_MMAP_OFFSET) {
1037 txq_ctrl->uar_mmap_offset = qp.uar_mmap_offset;
1038 DRV_LOG(DEBUG, "Port %u: uar_mmap_offset 0x%" PRIx64 ".",
1039 dev->data->port_id, txq_ctrl->uar_mmap_offset);
1042 "Port %u failed to retrieve UAR info, invalid"
1044 dev->data->port_id);
1048 txq_uar_init(txq_ctrl);
1049 dev->data->tx_queue_state[idx] = RTE_ETH_QUEUE_STATE_STARTED;
1052 ret = rte_errno; /* Save rte_errno before cleanup. */
1054 claim_zero(mlx5_glue->destroy_cq(txq_obj->cq));
1056 claim_zero(mlx5_glue->destroy_qp(txq_obj->qp));
1057 rte_errno = ret; /* Restore rte_errno. */
1062 * Create the dummy QP with minimal resources for loopback.
1065 * Pointer to Ethernet device.
1068 * 0 on success, a negative errno value otherwise and rte_errno is set.
1071 mlx5_rxq_ibv_obj_dummy_lb_create(struct rte_eth_dev *dev)
1073 #if defined(HAVE_IBV_DEVICE_TUNNEL_SUPPORT) && defined(HAVE_IBV_FLOW_DV_SUPPORT)
1074 struct mlx5_priv *priv = dev->data->dev_private;
1075 struct mlx5_dev_ctx_shared *sh = priv->sh;
1076 struct ibv_context *ctx = sh->cdev->ctx;
1077 struct mlx5dv_qp_init_attr qp_init_attr = {0};
1079 struct ibv_cq_init_attr_ex ibv;
1080 struct mlx5dv_cq_init_attr mlx5;
1083 if (dev->data->dev_conf.lpbk_mode) {
1084 /* Allow packet sent from NIC loop back w/o source MAC check. */
1085 qp_init_attr.comp_mask |=
1086 MLX5DV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS;
1087 qp_init_attr.create_flags |=
1088 MLX5DV_QP_CREATE_TIR_ALLOW_SELF_LOOPBACK_UC;
1092 /* Only need to check refcnt, 0 after "sh" is allocated. */
1093 if (!!(__atomic_fetch_add(&sh->self_lb.refcnt, 1, __ATOMIC_RELAXED))) {
1094 MLX5_ASSERT(sh->self_lb.ibv_cq && sh->self_lb.qp);
1098 cq_attr.ibv = (struct ibv_cq_init_attr_ex){
1103 cq_attr.mlx5 = (struct mlx5dv_cq_init_attr){
1106 /* Only CQ is needed, no WQ(RQ) is required in this case. */
1107 sh->self_lb.ibv_cq = mlx5_glue->cq_ex_to_cq(mlx5_glue->dv_create_cq(ctx,
1110 if (!sh->self_lb.ibv_cq) {
1111 DRV_LOG(ERR, "Port %u cannot allocate CQ for loopback.",
1112 dev->data->port_id);
1116 sh->self_lb.qp = mlx5_glue->dv_create_qp(ctx,
1117 &(struct ibv_qp_init_attr_ex){
1118 .qp_type = IBV_QPT_RAW_PACKET,
1119 .comp_mask = IBV_QP_INIT_ATTR_PD,
1121 .send_cq = sh->self_lb.ibv_cq,
1122 .recv_cq = sh->self_lb.ibv_cq,
1123 .cap.max_recv_wr = 1,
1126 if (!sh->self_lb.qp) {
1127 DRV_LOG(DEBUG, "Port %u cannot allocate QP for loopback.",
1128 dev->data->port_id);
1135 if (sh->self_lb.ibv_cq) {
1136 claim_zero(mlx5_glue->destroy_cq(sh->self_lb.ibv_cq));
1137 sh->self_lb.ibv_cq = NULL;
1139 (void)__atomic_sub_fetch(&sh->self_lb.refcnt, 1, __ATOMIC_RELAXED);
1148 * Release the dummy queue resources for loopback.
1151 * Pointer to Ethernet device.
1154 mlx5_rxq_ibv_obj_dummy_lb_release(struct rte_eth_dev *dev)
1156 #if defined(HAVE_IBV_DEVICE_TUNNEL_SUPPORT) && defined(HAVE_IBV_FLOW_DV_SUPPORT)
1157 struct mlx5_priv *priv = dev->data->dev_private;
1158 struct mlx5_dev_ctx_shared *sh = priv->sh;
1162 MLX5_ASSERT(__atomic_load_n(&sh->self_lb.refcnt, __ATOMIC_RELAXED));
1163 if (!(__atomic_sub_fetch(&sh->self_lb.refcnt, 1, __ATOMIC_RELAXED))) {
1164 if (sh->self_lb.qp) {
1165 claim_zero(mlx5_glue->destroy_qp(sh->self_lb.qp));
1166 sh->self_lb.qp = NULL;
1168 if (sh->self_lb.ibv_cq) {
1169 claim_zero(mlx5_glue->destroy_cq(sh->self_lb.ibv_cq));
1170 sh->self_lb.ibv_cq = NULL;
1181 * Release an Tx verbs queue object.
1184 * Verbs Tx queue object..
1187 mlx5_txq_ibv_obj_release(struct mlx5_txq_obj *txq_obj)
1189 MLX5_ASSERT(txq_obj);
1190 claim_zero(mlx5_glue->destroy_qp(txq_obj->qp));
1191 claim_zero(mlx5_glue->destroy_cq(txq_obj->cq));
1194 struct mlx5_obj_ops ibv_obj_ops = {
1195 .rxq_obj_modify_vlan_strip = mlx5_rxq_obj_modify_wq_vlan_strip,
1196 .rxq_obj_new = mlx5_rxq_ibv_obj_new,
1197 .rxq_event_get = mlx5_rx_ibv_get_event,
1198 .rxq_obj_modify = mlx5_ibv_modify_wq,
1199 .rxq_obj_release = mlx5_rxq_ibv_obj_release,
1200 .ind_table_new = mlx5_ibv_ind_table_new,
1201 .ind_table_destroy = mlx5_ibv_ind_table_destroy,
1202 .hrxq_new = mlx5_ibv_hrxq_new,
1203 .hrxq_destroy = mlx5_ibv_qp_destroy,
1204 .drop_action_create = mlx5_ibv_drop_action_create,
1205 .drop_action_destroy = mlx5_ibv_drop_action_destroy,
1206 .txq_obj_new = mlx5_txq_ibv_obj_new,
1207 .txq_obj_modify = mlx5_ibv_modify_qp,
1208 .txq_obj_release = mlx5_txq_ibv_obj_release,
1209 .lb_dummy_queue_create = NULL,
1210 .lb_dummy_queue_release = NULL,