1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2020 Mellanox Technologies, Ltd
11 #include <sys/queue.h>
13 #include "mlx5_autoconf.h"
16 #include <rte_malloc.h>
17 #include <ethdev_driver.h>
18 #include <rte_common.h>
20 #include <mlx5_glue.h>
21 #include <mlx5_common.h>
22 #include <mlx5_common_mr.h>
23 #include <mlx5_verbs.h>
26 #include <mlx5_utils.h>
27 #include <mlx5_malloc.h>
30 * Modify Rx WQ vlan stripping offload
35 * @return 0 on success, non-0 otherwise
38 mlx5_rxq_obj_modify_wq_vlan_strip(struct mlx5_rxq_priv *rxq, int on)
40 uint16_t vlan_offloads =
41 (on ? IBV_WQ_FLAGS_CVLAN_STRIPPING : 0) |
43 struct ibv_wq_attr mod;
44 mod = (struct ibv_wq_attr){
45 .attr_mask = IBV_WQ_ATTR_FLAGS,
46 .flags_mask = IBV_WQ_FLAGS_CVLAN_STRIPPING,
47 .flags = vlan_offloads,
50 return mlx5_glue->modify_wq(rxq->ctrl->obj->wq, &mod);
54 * Modifies the attributes for the specified WQ.
59 * Type of change queue state.
62 * 0 on success, a negative errno value otherwise and rte_errno is set.
65 mlx5_ibv_modify_wq(struct mlx5_rxq_priv *rxq, uint8_t type)
67 struct ibv_wq_attr mod = {
68 .attr_mask = IBV_WQ_ATTR_STATE,
69 .wq_state = (enum ibv_wq_state)type,
72 return mlx5_glue->modify_wq(rxq->ctrl->obj->wq, &mod);
76 * Modify QP using Verbs API.
79 * Verbs Tx queue object.
81 * Type of change queue state.
83 * IB device port number.
86 * 0 on success, a negative errno value otherwise and rte_errno is set.
89 mlx5_ibv_modify_qp(struct mlx5_txq_obj *obj, enum mlx5_txq_modify_type type,
92 struct ibv_qp_attr mod = {
93 .qp_state = IBV_QPS_RESET,
96 int attr_mask = (IBV_QP_STATE | IBV_QP_PORT);
99 if (type != MLX5_TXQ_MOD_RST2RDY) {
100 ret = mlx5_glue->modify_qp(obj->qp, &mod, IBV_QP_STATE);
102 DRV_LOG(ERR, "Cannot change Tx QP state to RESET %s",
107 if (type == MLX5_TXQ_MOD_RDY2RST)
110 if (type == MLX5_TXQ_MOD_ERR2RDY)
111 attr_mask = IBV_QP_STATE;
112 mod.qp_state = IBV_QPS_INIT;
113 ret = mlx5_glue->modify_qp(obj->qp, &mod, attr_mask);
115 DRV_LOG(ERR, "Cannot change Tx QP state to INIT %s",
120 mod.qp_state = IBV_QPS_RTR;
121 ret = mlx5_glue->modify_qp(obj->qp, &mod, IBV_QP_STATE);
123 DRV_LOG(ERR, "Cannot change Tx QP state to RTR %s",
128 mod.qp_state = IBV_QPS_RTS;
129 ret = mlx5_glue->modify_qp(obj->qp, &mod, IBV_QP_STATE);
131 DRV_LOG(ERR, "Cannot change Tx QP state to RTS %s",
140 * Create a CQ Verbs object.
143 * Pointer to Rx queue.
146 * The Verbs CQ object initialized, NULL otherwise and rte_errno is set.
148 static struct ibv_cq *
149 mlx5_rxq_ibv_cq_create(struct mlx5_rxq_priv *rxq)
151 struct mlx5_priv *priv = rxq->priv;
152 struct mlx5_rxq_ctrl *rxq_ctrl = rxq->ctrl;
153 struct mlx5_rxq_data *rxq_data = &rxq_ctrl->rxq;
154 struct mlx5_rxq_obj *rxq_obj = rxq_ctrl->obj;
155 unsigned int cqe_n = mlx5_rxq_cqe_num(rxq_data);
157 struct ibv_cq_init_attr_ex ibv;
158 struct mlx5dv_cq_init_attr mlx5;
161 cq_attr.ibv = (struct ibv_cq_init_attr_ex){
163 .channel = rxq_obj->ibv_channel,
166 cq_attr.mlx5 = (struct mlx5dv_cq_init_attr){
169 if (priv->config.cqe_comp && !rxq_data->hw_timestamp) {
170 cq_attr.mlx5.comp_mask |=
171 MLX5DV_CQ_INIT_ATTR_MASK_COMPRESSED_CQE;
172 rxq_data->byte_mask = UINT32_MAX;
173 #ifdef HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT
174 if (mlx5_rxq_mprq_enabled(rxq_data)) {
175 cq_attr.mlx5.cqe_comp_res_format =
176 MLX5DV_CQE_RES_FORMAT_CSUM_STRIDX;
177 rxq_data->mcqe_format =
178 MLX5_CQE_RESP_FORMAT_CSUM_STRIDX;
180 cq_attr.mlx5.cqe_comp_res_format =
181 MLX5DV_CQE_RES_FORMAT_HASH;
182 rxq_data->mcqe_format =
183 MLX5_CQE_RESP_FORMAT_HASH;
186 cq_attr.mlx5.cqe_comp_res_format = MLX5DV_CQE_RES_FORMAT_HASH;
187 rxq_data->mcqe_format = MLX5_CQE_RESP_FORMAT_HASH;
190 * For vectorized Rx, it must not be doubled in order to
191 * make cq_ci and rq_ci aligned.
193 if (mlx5_rxq_check_vec_support(rxq_data) < 0)
194 cq_attr.ibv.cqe *= 2;
195 } else if (priv->config.cqe_comp && rxq_data->hw_timestamp) {
197 "Port %u Rx CQE compression is disabled for HW"
199 priv->dev_data->port_id);
201 #ifdef HAVE_IBV_MLX5_MOD_CQE_128B_PAD
202 if (RTE_CACHE_LINE_SIZE == 128) {
203 cq_attr.mlx5.comp_mask |= MLX5DV_CQ_INIT_ATTR_MASK_FLAGS;
204 cq_attr.mlx5.flags |= MLX5DV_CQ_INIT_ATTR_FLAGS_CQE_PAD;
207 return mlx5_glue->cq_ex_to_cq(mlx5_glue->dv_create_cq
208 (priv->sh->cdev->ctx,
214 * Create a WQ Verbs object.
217 * Pointer to Rx queue.
220 * The Verbs WQ object initialized, NULL otherwise and rte_errno is set.
222 static struct ibv_wq *
223 mlx5_rxq_ibv_wq_create(struct mlx5_rxq_priv *rxq)
225 struct mlx5_priv *priv = rxq->priv;
226 struct mlx5_rxq_ctrl *rxq_ctrl = rxq->ctrl;
227 struct mlx5_rxq_data *rxq_data = &rxq_ctrl->rxq;
228 struct mlx5_rxq_obj *rxq_obj = rxq_ctrl->obj;
229 unsigned int wqe_n = 1 << rxq_data->elts_n;
231 struct ibv_wq_init_attr ibv;
232 #ifdef HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT
233 struct mlx5dv_wq_init_attr mlx5;
237 wq_attr.ibv = (struct ibv_wq_init_attr){
238 .wq_context = NULL, /* Could be useful in the future. */
239 .wq_type = IBV_WQT_RQ,
240 /* Max number of outstanding WRs. */
241 .max_wr = wqe_n >> rxq_data->sges_n,
242 /* Max number of scatter/gather elements in a WR. */
243 .max_sge = 1 << rxq_data->sges_n,
244 .pd = priv->sh->cdev->pd,
245 .cq = rxq_obj->ibv_cq,
246 .comp_mask = IBV_WQ_FLAGS_CVLAN_STRIPPING | 0,
247 .create_flags = (rxq_data->vlan_strip ?
248 IBV_WQ_FLAGS_CVLAN_STRIPPING : 0),
250 /* By default, FCS (CRC) is stripped by hardware. */
251 if (rxq_data->crc_present) {
252 wq_attr.ibv.create_flags |= IBV_WQ_FLAGS_SCATTER_FCS;
253 wq_attr.ibv.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS;
255 if (priv->config.hw_padding) {
256 #if defined(HAVE_IBV_WQ_FLAG_RX_END_PADDING)
257 wq_attr.ibv.create_flags |= IBV_WQ_FLAG_RX_END_PADDING;
258 wq_attr.ibv.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS;
259 #elif defined(HAVE_IBV_WQ_FLAGS_PCI_WRITE_END_PADDING)
260 wq_attr.ibv.create_flags |= IBV_WQ_FLAGS_PCI_WRITE_END_PADDING;
261 wq_attr.ibv.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS;
264 #ifdef HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT
265 wq_attr.mlx5 = (struct mlx5dv_wq_init_attr){
268 if (mlx5_rxq_mprq_enabled(rxq_data)) {
269 struct mlx5dv_striding_rq_init_attr *mprq_attr =
270 &wq_attr.mlx5.striding_rq_attrs;
272 wq_attr.mlx5.comp_mask |= MLX5DV_WQ_INIT_ATTR_MASK_STRIDING_RQ;
273 *mprq_attr = (struct mlx5dv_striding_rq_init_attr){
274 .single_stride_log_num_of_bytes = rxq_data->strd_sz_n,
275 .single_wqe_log_num_of_strides = rxq_data->strd_num_n,
276 .two_byte_shift_en = MLX5_MPRQ_TWO_BYTE_SHIFT,
279 rxq_obj->wq = mlx5_glue->dv_create_wq(priv->sh->cdev->ctx, &wq_attr.ibv,
282 rxq_obj->wq = mlx5_glue->create_wq(priv->sh->cdev->ctx, &wq_attr.ibv);
286 * Make sure number of WRs*SGEs match expectations since a queue
287 * cannot allocate more than "desc" buffers.
289 if (wq_attr.ibv.max_wr != (wqe_n >> rxq_data->sges_n) ||
290 wq_attr.ibv.max_sge != (1u << rxq_data->sges_n)) {
292 "Port %u Rx queue %u requested %u*%u but got"
294 priv->dev_data->port_id, rxq->idx,
295 wqe_n >> rxq_data->sges_n,
296 (1 << rxq_data->sges_n),
297 wq_attr.ibv.max_wr, wq_attr.ibv.max_sge);
298 claim_zero(mlx5_glue->destroy_wq(rxq_obj->wq));
307 * Create the Rx queue Verbs object.
310 * Pointer to Rx queue.
313 * 0 on success, a negative errno value otherwise and rte_errno is set.
316 mlx5_rxq_ibv_obj_new(struct mlx5_rxq_priv *rxq)
318 uint16_t idx = rxq->idx;
319 struct mlx5_priv *priv = rxq->priv;
320 uint16_t port_id = priv->dev_data->port_id;
321 struct mlx5_rxq_ctrl *rxq_ctrl = rxq->ctrl;
322 struct mlx5_rxq_data *rxq_data = &rxq_ctrl->rxq;
323 struct mlx5_rxq_obj *tmpl = rxq_ctrl->obj;
324 struct mlx5dv_cq cq_info;
325 struct mlx5dv_rwq rwq;
327 struct mlx5dv_obj obj;
329 MLX5_ASSERT(rxq_data);
331 tmpl->rxq_ctrl = rxq_ctrl;
334 mlx5_glue->create_comp_channel(priv->sh->cdev->ctx);
335 if (!tmpl->ibv_channel) {
336 DRV_LOG(ERR, "Port %u: comp channel creation failure.",
341 tmpl->fd = ((struct ibv_comp_channel *)(tmpl->ibv_channel))->fd;
343 /* Create CQ using Verbs API. */
344 tmpl->ibv_cq = mlx5_rxq_ibv_cq_create(rxq);
346 DRV_LOG(ERR, "Port %u Rx queue %u CQ creation failure.",
351 obj.cq.in = tmpl->ibv_cq;
352 obj.cq.out = &cq_info;
353 ret = mlx5_glue->dv_init_obj(&obj, MLX5DV_OBJ_CQ);
358 if (cq_info.cqe_size != RTE_CACHE_LINE_SIZE) {
360 "Port %u wrong MLX5_CQE_SIZE environment "
361 "variable value: it should be set to %u.",
362 port_id, RTE_CACHE_LINE_SIZE);
366 /* Fill the rings. */
367 rxq_data->cqe_n = log2above(cq_info.cqe_cnt);
368 rxq_data->cq_db = cq_info.dbrec;
369 rxq_data->cqes = (volatile struct mlx5_cqe (*)[])(uintptr_t)cq_info.buf;
370 rxq_data->cq_uar = cq_info.cq_uar;
371 rxq_data->cqn = cq_info.cqn;
372 /* Create WQ (RQ) using Verbs API. */
373 tmpl->wq = mlx5_rxq_ibv_wq_create(rxq);
375 DRV_LOG(ERR, "Port %u Rx queue %u WQ creation failure.",
380 /* Change queue state to ready. */
381 ret = mlx5_ibv_modify_wq(rxq, IBV_WQS_RDY);
384 "Port %u Rx queue %u WQ state to IBV_WQS_RDY failed.",
389 obj.rwq.in = tmpl->wq;
391 ret = mlx5_glue->dv_init_obj(&obj, MLX5DV_OBJ_RWQ);
396 rxq_data->wqes = rwq.buf;
397 rxq_data->rq_db = rwq.dbrec;
398 rxq_data->cq_arm_sn = 0;
399 mlx5_rxq_initialize(rxq_data);
401 priv->dev_data->rx_queue_state[idx] = RTE_ETH_QUEUE_STATE_STARTED;
402 rxq_ctrl->wqn = ((struct ibv_wq *)(tmpl->wq))->wq_num;
405 ret = rte_errno; /* Save rte_errno before cleanup. */
407 claim_zero(mlx5_glue->destroy_wq(tmpl->wq));
409 claim_zero(mlx5_glue->destroy_cq(tmpl->ibv_cq));
410 if (tmpl->ibv_channel)
411 claim_zero(mlx5_glue->destroy_comp_channel(tmpl->ibv_channel));
412 rte_errno = ret; /* Restore rte_errno. */
417 * Release an Rx verbs queue object.
420 * Pointer to Rx queue.
423 mlx5_rxq_ibv_obj_release(struct mlx5_rxq_priv *rxq)
425 struct mlx5_rxq_obj *rxq_obj = rxq->ctrl->obj;
427 MLX5_ASSERT(rxq_obj);
428 MLX5_ASSERT(rxq_obj->wq);
429 MLX5_ASSERT(rxq_obj->ibv_cq);
430 claim_zero(mlx5_glue->destroy_wq(rxq_obj->wq));
431 claim_zero(mlx5_glue->destroy_cq(rxq_obj->ibv_cq));
432 if (rxq_obj->ibv_channel)
433 claim_zero(mlx5_glue->destroy_comp_channel
434 (rxq_obj->ibv_channel));
438 * Get event for an Rx verbs queue object.
441 * Verbs Rx queue object.
444 * 0 on success, a negative errno value otherwise and rte_errno is set.
447 mlx5_rx_ibv_get_event(struct mlx5_rxq_obj *rxq_obj)
449 struct ibv_cq *ev_cq;
451 int ret = mlx5_glue->get_cq_event(rxq_obj->ibv_channel,
454 if (ret < 0 || ev_cq != rxq_obj->ibv_cq)
456 mlx5_glue->ack_cq_events(rxq_obj->ibv_cq, 1);
467 * Creates a receive work queue as a filed of indirection table.
470 * Pointer to Ethernet device.
472 * Log of number of queues in the array.
474 * Verbs indirection table object.
477 * 0 on success, a negative errno value otherwise and rte_errno is set.
480 mlx5_ibv_ind_table_new(struct rte_eth_dev *dev, const unsigned int log_n,
481 struct mlx5_ind_table_obj *ind_tbl)
483 struct mlx5_priv *priv = dev->data->dev_private;
484 struct ibv_wq *wq[1 << log_n];
487 MLX5_ASSERT(ind_tbl);
488 for (i = 0; i != ind_tbl->queues_n; ++i) {
489 struct mlx5_rxq_data *rxq = (*priv->rxqs)[ind_tbl->queues[i]];
490 struct mlx5_rxq_ctrl *rxq_ctrl =
491 container_of(rxq, struct mlx5_rxq_ctrl, rxq);
493 wq[i] = rxq_ctrl->obj->wq;
496 /* Finalise indirection table. */
497 for (j = 0; i != (unsigned int)(1 << log_n); ++j, ++i)
499 ind_tbl->ind_table = mlx5_glue->create_rwq_ind_table
500 (priv->sh->cdev->ctx,
501 &(struct ibv_rwq_ind_table_init_attr){
502 .log_ind_tbl_size = log_n,
506 if (!ind_tbl->ind_table) {
514 * Destroys the specified Indirection Table.
517 * Indirection table to release.
520 mlx5_ibv_ind_table_destroy(struct mlx5_ind_table_obj *ind_tbl)
522 claim_zero(mlx5_glue->destroy_rwq_ind_table(ind_tbl->ind_table));
526 * Create an Rx Hash queue.
529 * Pointer to Ethernet device.
531 * Pointer to Rx Hash queue.
536 * 0 on success, a negative errno value otherwise and rte_errno is set.
539 mlx5_ibv_hrxq_new(struct rte_eth_dev *dev, struct mlx5_hrxq *hrxq,
540 int tunnel __rte_unused)
542 struct mlx5_priv *priv = dev->data->dev_private;
543 struct ibv_qp *qp = NULL;
544 struct mlx5_ind_table_obj *ind_tbl = hrxq->ind_table;
545 const uint8_t *rss_key = hrxq->rss_key;
546 uint64_t hash_fields = hrxq->hash_fields;
548 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
549 struct mlx5dv_qp_init_attr qp_init_attr;
551 memset(&qp_init_attr, 0, sizeof(qp_init_attr));
553 qp_init_attr.comp_mask =
554 MLX5DV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS;
555 qp_init_attr.create_flags = MLX5DV_QP_CREATE_TUNNEL_OFFLOADS;
557 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
558 if (dev->data->dev_conf.lpbk_mode) {
559 /* Allow packet sent from NIC loop back w/o source MAC check. */
560 qp_init_attr.comp_mask |=
561 MLX5DV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS;
562 qp_init_attr.create_flags |=
563 MLX5DV_QP_CREATE_TIR_ALLOW_SELF_LOOPBACK_UC;
566 qp = mlx5_glue->dv_create_qp
567 (priv->sh->cdev->ctx,
568 &(struct ibv_qp_init_attr_ex){
569 .qp_type = IBV_QPT_RAW_PACKET,
571 IBV_QP_INIT_ATTR_PD |
572 IBV_QP_INIT_ATTR_IND_TABLE |
573 IBV_QP_INIT_ATTR_RX_HASH,
574 .rx_hash_conf = (struct ibv_rx_hash_conf){
576 IBV_RX_HASH_FUNC_TOEPLITZ,
577 .rx_hash_key_len = hrxq->rss_key_len,
579 (void *)(uintptr_t)rss_key,
580 .rx_hash_fields_mask = hash_fields,
582 .rwq_ind_tbl = ind_tbl->ind_table,
583 .pd = priv->sh->cdev->pd,
587 qp = mlx5_glue->create_qp_ex
588 (priv->sh->cdev->ctx,
589 &(struct ibv_qp_init_attr_ex){
590 .qp_type = IBV_QPT_RAW_PACKET,
592 IBV_QP_INIT_ATTR_PD |
593 IBV_QP_INIT_ATTR_IND_TABLE |
594 IBV_QP_INIT_ATTR_RX_HASH,
595 .rx_hash_conf = (struct ibv_rx_hash_conf){
597 IBV_RX_HASH_FUNC_TOEPLITZ,
598 .rx_hash_key_len = hrxq->rss_key_len,
600 (void *)(uintptr_t)rss_key,
601 .rx_hash_fields_mask = hash_fields,
603 .rwq_ind_tbl = ind_tbl->ind_table,
604 .pd = priv->sh->cdev->pd,
612 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
613 hrxq->action = mlx5_glue->dv_create_flow_action_dest_ibv_qp(hrxq->qp);
621 err = rte_errno; /* Save rte_errno before cleanup. */
623 claim_zero(mlx5_glue->destroy_qp(qp));
624 rte_errno = err; /* Restore rte_errno. */
629 * Destroy a Verbs queue pair.
632 * Hash Rx queue to release its qp.
635 mlx5_ibv_qp_destroy(struct mlx5_hrxq *hrxq)
637 claim_zero(mlx5_glue->destroy_qp(hrxq->qp));
641 * Release a drop Rx queue Verbs object.
644 * Pointer to Ethernet device.
647 mlx5_rxq_ibv_obj_drop_release(struct rte_eth_dev *dev)
649 struct mlx5_priv *priv = dev->data->dev_private;
650 struct mlx5_rxq_priv *rxq = priv->drop_queue.rxq;
651 struct mlx5_rxq_obj *rxq_obj;
655 if (rxq->ctrl == NULL)
657 rxq_obj = rxq->ctrl->obj;
661 claim_zero(mlx5_glue->destroy_wq(rxq_obj->wq));
663 claim_zero(mlx5_glue->destroy_cq(rxq_obj->ibv_cq));
666 mlx5_free(rxq->ctrl);
669 priv->drop_queue.rxq = NULL;
673 * Create a drop Rx queue Verbs object.
676 * Pointer to Ethernet device.
679 * 0 on success, a negative errno value otherwise and rte_errno is set.
682 mlx5_rxq_ibv_obj_drop_create(struct rte_eth_dev *dev)
684 struct mlx5_priv *priv = dev->data->dev_private;
685 struct ibv_context *ctx = priv->sh->cdev->ctx;
686 struct mlx5_rxq_priv *rxq = priv->drop_queue.rxq;
687 struct mlx5_rxq_ctrl *rxq_ctrl = NULL;
688 struct mlx5_rxq_obj *rxq_obj = NULL;
692 rxq = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*rxq), 0, SOCKET_ID_ANY);
694 DRV_LOG(DEBUG, "Port %u cannot allocate drop Rx queue memory.",
699 priv->drop_queue.rxq = rxq;
700 rxq_ctrl = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*rxq_ctrl), 0,
702 if (rxq_ctrl == NULL) {
703 DRV_LOG(DEBUG, "Port %u cannot allocate drop Rx queue control memory.",
708 rxq->ctrl = rxq_ctrl;
709 rxq_obj = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*rxq_obj), 0,
711 if (rxq_obj == NULL) {
712 DRV_LOG(DEBUG, "Port %u cannot allocate drop Rx queue memory.",
717 rxq_ctrl->obj = rxq_obj;
718 rxq_obj->ibv_cq = mlx5_glue->create_cq(ctx, 1, NULL, NULL, 0);
719 if (!rxq_obj->ibv_cq) {
720 DRV_LOG(DEBUG, "Port %u cannot allocate CQ for drop queue.",
725 rxq_obj->wq = mlx5_glue->create_wq(ctx, &(struct ibv_wq_init_attr){
726 .wq_type = IBV_WQT_RQ,
729 .pd = priv->sh->cdev->pd,
730 .cq = rxq_obj->ibv_cq,
733 DRV_LOG(DEBUG, "Port %u cannot allocate WQ for drop queue.",
740 mlx5_rxq_ibv_obj_drop_release(dev);
745 * Create a Verbs drop action for Rx Hash queue.
748 * Pointer to Ethernet device.
751 * 0 on success, a negative errno value otherwise and rte_errno is set.
754 mlx5_ibv_drop_action_create(struct rte_eth_dev *dev)
756 struct mlx5_priv *priv = dev->data->dev_private;
757 struct mlx5_hrxq *hrxq = priv->drop_queue.hrxq;
758 struct ibv_rwq_ind_table *ind_tbl = NULL;
759 struct mlx5_rxq_obj *rxq;
762 MLX5_ASSERT(hrxq && hrxq->ind_table);
763 ret = mlx5_rxq_ibv_obj_drop_create(dev);
766 rxq = priv->drop_queue.rxq->ctrl->obj;
767 ind_tbl = mlx5_glue->create_rwq_ind_table
768 (priv->sh->cdev->ctx,
769 &(struct ibv_rwq_ind_table_init_attr){
770 .log_ind_tbl_size = 0,
771 .ind_tbl = (struct ibv_wq **)&rxq->wq,
775 DRV_LOG(DEBUG, "Port %u"
776 " cannot allocate indirection table for drop queue.",
781 hrxq->qp = mlx5_glue->create_qp_ex(priv->sh->cdev->ctx,
782 &(struct ibv_qp_init_attr_ex){
783 .qp_type = IBV_QPT_RAW_PACKET,
784 .comp_mask = IBV_QP_INIT_ATTR_PD |
785 IBV_QP_INIT_ATTR_IND_TABLE |
786 IBV_QP_INIT_ATTR_RX_HASH,
787 .rx_hash_conf = (struct ibv_rx_hash_conf){
788 .rx_hash_function = IBV_RX_HASH_FUNC_TOEPLITZ,
789 .rx_hash_key_len = MLX5_RSS_HASH_KEY_LEN,
790 .rx_hash_key = rss_hash_default_key,
791 .rx_hash_fields_mask = 0,
793 .rwq_ind_tbl = ind_tbl,
794 .pd = priv->sh->cdev->pd
797 DRV_LOG(DEBUG, "Port %u cannot allocate QP for drop queue.",
802 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
803 hrxq->action = mlx5_glue->dv_create_flow_action_dest_ibv_qp(hrxq->qp);
809 hrxq->ind_table->ind_table = ind_tbl;
813 claim_zero(mlx5_glue->destroy_qp(hrxq->qp));
815 claim_zero(mlx5_glue->destroy_rwq_ind_table(ind_tbl));
816 if (priv->drop_queue.rxq)
817 mlx5_rxq_ibv_obj_drop_release(dev);
822 * Release a drop hash Rx queue.
825 * Pointer to Ethernet device.
828 mlx5_ibv_drop_action_destroy(struct rte_eth_dev *dev)
830 struct mlx5_priv *priv = dev->data->dev_private;
831 struct mlx5_hrxq *hrxq = priv->drop_queue.hrxq;
832 struct ibv_rwq_ind_table *ind_tbl = hrxq->ind_table->ind_table;
834 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
835 claim_zero(mlx5_glue->destroy_flow_action(hrxq->action));
837 claim_zero(mlx5_glue->destroy_qp(hrxq->qp));
838 claim_zero(mlx5_glue->destroy_rwq_ind_table(ind_tbl));
839 mlx5_rxq_ibv_obj_drop_release(dev);
843 * Create a QP Verbs object.
846 * Pointer to Ethernet device.
848 * Queue index in DPDK Tx queue array.
851 * The QP Verbs object, NULL otherwise and rte_errno is set.
853 static struct ibv_qp *
854 mlx5_txq_ibv_qp_create(struct rte_eth_dev *dev, uint16_t idx)
856 struct mlx5_priv *priv = dev->data->dev_private;
857 struct mlx5_txq_data *txq_data = (*priv->txqs)[idx];
858 struct mlx5_txq_ctrl *txq_ctrl =
859 container_of(txq_data, struct mlx5_txq_ctrl, txq);
860 struct ibv_qp *qp_obj = NULL;
861 struct ibv_qp_init_attr_ex qp_attr = { 0 };
862 const int desc = 1 << txq_data->elts_n;
864 MLX5_ASSERT(txq_ctrl->obj->cq);
865 /* CQ to be associated with the send queue. */
866 qp_attr.send_cq = txq_ctrl->obj->cq;
867 /* CQ to be associated with the receive queue. */
868 qp_attr.recv_cq = txq_ctrl->obj->cq;
869 /* Max number of outstanding WRs. */
870 qp_attr.cap.max_send_wr = ((priv->sh->device_attr.max_qp_wr < desc) ?
871 priv->sh->device_attr.max_qp_wr : desc);
873 * Max number of scatter/gather elements in a WR, must be 1 to prevent
874 * libmlx5 from trying to affect must be 1 to prevent libmlx5 from
875 * trying to affect too much memory. TX gather is not impacted by the
876 * device_attr.max_sge limit and will still work properly.
878 qp_attr.cap.max_send_sge = 1;
879 qp_attr.qp_type = IBV_QPT_RAW_PACKET,
880 /* Do *NOT* enable this, completions events are managed per Tx burst. */
881 qp_attr.sq_sig_all = 0;
882 qp_attr.pd = priv->sh->cdev->pd;
883 qp_attr.comp_mask = IBV_QP_INIT_ATTR_PD;
884 if (txq_data->inlen_send)
885 qp_attr.cap.max_inline_data = txq_ctrl->max_inline_data;
886 if (txq_data->tso_en) {
887 qp_attr.max_tso_header = txq_ctrl->max_tso_header;
888 qp_attr.comp_mask |= IBV_QP_INIT_ATTR_MAX_TSO_HEADER;
890 qp_obj = mlx5_glue->create_qp_ex(priv->sh->cdev->ctx, &qp_attr);
891 if (qp_obj == NULL) {
892 DRV_LOG(ERR, "Port %u Tx queue %u QP creation failure.",
893 dev->data->port_id, idx);
900 * Create the Tx queue Verbs object.
903 * Pointer to Ethernet device.
905 * Queue index in DPDK Tx queue array.
908 * 0 on success, a negative errno value otherwise and rte_errno is set.
911 mlx5_txq_ibv_obj_new(struct rte_eth_dev *dev, uint16_t idx)
913 struct mlx5_priv *priv = dev->data->dev_private;
914 struct mlx5_txq_data *txq_data = (*priv->txqs)[idx];
915 struct mlx5_txq_ctrl *txq_ctrl =
916 container_of(txq_data, struct mlx5_txq_ctrl, txq);
917 struct mlx5_txq_obj *txq_obj = txq_ctrl->obj;
920 struct mlx5dv_cq cq_info;
921 struct mlx5dv_obj obj;
922 const int desc = 1 << txq_data->elts_n;
925 MLX5_ASSERT(txq_data);
926 MLX5_ASSERT(txq_obj);
927 txq_obj->txq_ctrl = txq_ctrl;
928 if (mlx5_getenv_int("MLX5_ENABLE_CQE_COMPRESSION")) {
929 DRV_LOG(ERR, "Port %u MLX5_ENABLE_CQE_COMPRESSION "
930 "must never be set.", dev->data->port_id);
934 cqe_n = desc / MLX5_TX_COMP_THRESH +
935 1 + MLX5_TX_COMP_THRESH_INLINE_DIV;
936 txq_obj->cq = mlx5_glue->create_cq(priv->sh->cdev->ctx, cqe_n,
938 if (txq_obj->cq == NULL) {
939 DRV_LOG(ERR, "Port %u Tx queue %u CQ creation failure.",
940 dev->data->port_id, idx);
944 txq_obj->qp = mlx5_txq_ibv_qp_create(dev, idx);
945 if (txq_obj->qp == NULL) {
949 ret = mlx5_ibv_modify_qp(txq_obj, MLX5_TXQ_MOD_RST2RDY,
950 (uint8_t)priv->dev_port);
952 DRV_LOG(ERR, "Port %u Tx queue %u QP state modifying failed.",
953 dev->data->port_id, idx);
957 qp.comp_mask = MLX5DV_QP_MASK_UAR_MMAP_OFFSET;
958 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
959 /* If using DevX, need additional mask to read tisn value. */
960 if (priv->sh->devx && !priv->sh->tdn)
961 qp.comp_mask |= MLX5DV_QP_MASK_RAW_QP_HANDLES;
963 obj.cq.in = txq_obj->cq;
964 obj.cq.out = &cq_info;
965 obj.qp.in = txq_obj->qp;
967 ret = mlx5_glue->dv_init_obj(&obj, MLX5DV_OBJ_CQ | MLX5DV_OBJ_QP);
972 if (cq_info.cqe_size != RTE_CACHE_LINE_SIZE) {
974 "Port %u wrong MLX5_CQE_SIZE environment variable"
975 " value: it should be set to %u.",
976 dev->data->port_id, RTE_CACHE_LINE_SIZE);
980 txq_data->cqe_n = log2above(cq_info.cqe_cnt);
981 txq_data->cqe_s = 1 << txq_data->cqe_n;
982 txq_data->cqe_m = txq_data->cqe_s - 1;
983 txq_data->qp_num_8s = ((struct ibv_qp *)txq_obj->qp)->qp_num << 8;
984 txq_data->wqes = qp.sq.buf;
985 txq_data->wqe_n = log2above(qp.sq.wqe_cnt);
986 txq_data->wqe_s = 1 << txq_data->wqe_n;
987 txq_data->wqe_m = txq_data->wqe_s - 1;
988 txq_data->wqes_end = txq_data->wqes + txq_data->wqe_s;
989 txq_data->qp_db = &qp.dbrec[MLX5_SND_DBR];
990 txq_data->cq_db = cq_info.dbrec;
991 txq_data->cqes = (volatile struct mlx5_cqe *)cq_info.buf;
994 txq_data->wqe_ci = 0;
995 txq_data->wqe_pi = 0;
996 txq_data->wqe_comp = 0;
997 txq_data->wqe_thres = txq_data->wqe_s / MLX5_TX_COMP_THRESH_INLINE_DIV;
998 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
1000 * If using DevX need to query and store TIS transport domain value.
1001 * This is done once per port.
1002 * Will use this value on Rx, when creating matching TIR.
1004 if (priv->sh->devx && !priv->sh->tdn) {
1005 ret = mlx5_devx_cmd_qp_query_tis_td(txq_obj->qp, qp.tisn,
1008 DRV_LOG(ERR, "Fail to query port %u Tx queue %u QP TIS "
1009 "transport domain.", dev->data->port_id, idx);
1013 DRV_LOG(DEBUG, "Port %u Tx queue %u TIS number %d "
1014 "transport domain %d.", dev->data->port_id,
1015 idx, qp.tisn, priv->sh->tdn);
1019 txq_ctrl->bf_reg = qp.bf.reg;
1020 if (qp.comp_mask & MLX5DV_QP_MASK_UAR_MMAP_OFFSET) {
1021 txq_ctrl->uar_mmap_offset = qp.uar_mmap_offset;
1022 DRV_LOG(DEBUG, "Port %u: uar_mmap_offset 0x%" PRIx64 ".",
1023 dev->data->port_id, txq_ctrl->uar_mmap_offset);
1026 "Port %u failed to retrieve UAR info, invalid"
1028 dev->data->port_id);
1032 txq_uar_init(txq_ctrl);
1033 dev->data->tx_queue_state[idx] = RTE_ETH_QUEUE_STATE_STARTED;
1036 ret = rte_errno; /* Save rte_errno before cleanup. */
1038 claim_zero(mlx5_glue->destroy_cq(txq_obj->cq));
1040 claim_zero(mlx5_glue->destroy_qp(txq_obj->qp));
1041 rte_errno = ret; /* Restore rte_errno. */
1046 * Create the dummy QP with minimal resources for loopback.
1049 * Pointer to Ethernet device.
1052 * 0 on success, a negative errno value otherwise and rte_errno is set.
1055 mlx5_rxq_ibv_obj_dummy_lb_create(struct rte_eth_dev *dev)
1057 #if defined(HAVE_IBV_DEVICE_TUNNEL_SUPPORT) && defined(HAVE_IBV_FLOW_DV_SUPPORT)
1058 struct mlx5_priv *priv = dev->data->dev_private;
1059 struct mlx5_dev_ctx_shared *sh = priv->sh;
1060 struct ibv_context *ctx = sh->cdev->ctx;
1061 struct mlx5dv_qp_init_attr qp_init_attr = {0};
1063 struct ibv_cq_init_attr_ex ibv;
1064 struct mlx5dv_cq_init_attr mlx5;
1067 if (dev->data->dev_conf.lpbk_mode) {
1068 /* Allow packet sent from NIC loop back w/o source MAC check. */
1069 qp_init_attr.comp_mask |=
1070 MLX5DV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS;
1071 qp_init_attr.create_flags |=
1072 MLX5DV_QP_CREATE_TIR_ALLOW_SELF_LOOPBACK_UC;
1076 /* Only need to check refcnt, 0 after "sh" is allocated. */
1077 if (!!(__atomic_fetch_add(&sh->self_lb.refcnt, 1, __ATOMIC_RELAXED))) {
1078 MLX5_ASSERT(sh->self_lb.ibv_cq && sh->self_lb.qp);
1082 cq_attr.ibv = (struct ibv_cq_init_attr_ex){
1087 cq_attr.mlx5 = (struct mlx5dv_cq_init_attr){
1090 /* Only CQ is needed, no WQ(RQ) is required in this case. */
1091 sh->self_lb.ibv_cq = mlx5_glue->cq_ex_to_cq(mlx5_glue->dv_create_cq(ctx,
1094 if (!sh->self_lb.ibv_cq) {
1095 DRV_LOG(ERR, "Port %u cannot allocate CQ for loopback.",
1096 dev->data->port_id);
1100 sh->self_lb.qp = mlx5_glue->dv_create_qp(ctx,
1101 &(struct ibv_qp_init_attr_ex){
1102 .qp_type = IBV_QPT_RAW_PACKET,
1103 .comp_mask = IBV_QP_INIT_ATTR_PD,
1105 .send_cq = sh->self_lb.ibv_cq,
1106 .recv_cq = sh->self_lb.ibv_cq,
1107 .cap.max_recv_wr = 1,
1110 if (!sh->self_lb.qp) {
1111 DRV_LOG(DEBUG, "Port %u cannot allocate QP for loopback.",
1112 dev->data->port_id);
1119 if (sh->self_lb.ibv_cq) {
1120 claim_zero(mlx5_glue->destroy_cq(sh->self_lb.ibv_cq));
1121 sh->self_lb.ibv_cq = NULL;
1123 (void)__atomic_sub_fetch(&sh->self_lb.refcnt, 1, __ATOMIC_RELAXED);
1132 * Release the dummy queue resources for loopback.
1135 * Pointer to Ethernet device.
1138 mlx5_rxq_ibv_obj_dummy_lb_release(struct rte_eth_dev *dev)
1140 #if defined(HAVE_IBV_DEVICE_TUNNEL_SUPPORT) && defined(HAVE_IBV_FLOW_DV_SUPPORT)
1141 struct mlx5_priv *priv = dev->data->dev_private;
1142 struct mlx5_dev_ctx_shared *sh = priv->sh;
1146 MLX5_ASSERT(__atomic_load_n(&sh->self_lb.refcnt, __ATOMIC_RELAXED));
1147 if (!(__atomic_sub_fetch(&sh->self_lb.refcnt, 1, __ATOMIC_RELAXED))) {
1148 if (sh->self_lb.qp) {
1149 claim_zero(mlx5_glue->destroy_qp(sh->self_lb.qp));
1150 sh->self_lb.qp = NULL;
1152 if (sh->self_lb.ibv_cq) {
1153 claim_zero(mlx5_glue->destroy_cq(sh->self_lb.ibv_cq));
1154 sh->self_lb.ibv_cq = NULL;
1165 * Release an Tx verbs queue object.
1168 * Verbs Tx queue object..
1171 mlx5_txq_ibv_obj_release(struct mlx5_txq_obj *txq_obj)
1173 MLX5_ASSERT(txq_obj);
1174 claim_zero(mlx5_glue->destroy_qp(txq_obj->qp));
1175 claim_zero(mlx5_glue->destroy_cq(txq_obj->cq));
1178 struct mlx5_obj_ops ibv_obj_ops = {
1179 .rxq_obj_modify_vlan_strip = mlx5_rxq_obj_modify_wq_vlan_strip,
1180 .rxq_obj_new = mlx5_rxq_ibv_obj_new,
1181 .rxq_event_get = mlx5_rx_ibv_get_event,
1182 .rxq_obj_modify = mlx5_ibv_modify_wq,
1183 .rxq_obj_release = mlx5_rxq_ibv_obj_release,
1184 .ind_table_new = mlx5_ibv_ind_table_new,
1185 .ind_table_destroy = mlx5_ibv_ind_table_destroy,
1186 .hrxq_new = mlx5_ibv_hrxq_new,
1187 .hrxq_destroy = mlx5_ibv_qp_destroy,
1188 .drop_action_create = mlx5_ibv_drop_action_create,
1189 .drop_action_destroy = mlx5_ibv_drop_action_destroy,
1190 .txq_obj_new = mlx5_txq_ibv_obj_new,
1191 .txq_obj_modify = mlx5_ibv_modify_qp,
1192 .txq_obj_release = mlx5_txq_ibv_obj_release,
1193 .lb_dummy_queue_create = NULL,
1194 .lb_dummy_queue_release = NULL,