net/mlx5: support descriptor LWM for Rx queue
[dpdk.git] / drivers / net / mlx5 / mlx5_flow_aso.c
index 64631ff..4129e3a 100644 (file)
 #include "mlx5.h"
 #include "mlx5_flow.h"
 
-/**
- * Destroy Completion Queue used for ASO access.
- *
- * @param[in] cq
- *   ASO CQ to destroy.
- */
-static void
-mlx5_aso_cq_destroy(struct mlx5_aso_cq *cq)
-{
-       if (cq->cq_obj.cq)
-               mlx5_devx_cq_destroy(&cq->cq_obj);
-       memset(cq, 0, sizeof(*cq));
-}
-
-/**
- * Create Completion Queue used for ASO access.
- *
- * @param[in] ctx
- *   Context returned from mlx5 open_device() glue function.
- * @param[in/out] cq
- *   Pointer to CQ to create.
- * @param[in] log_desc_n
- *   Log of number of descriptors in queue.
- * @param[in] socket
- *   Socket to use for allocation.
- * @param[in] uar_page_id
- *   UAR page ID to use.
- *
- * @return
- *   0 on success, a negative errno value otherwise and rte_errno is set.
- */
-static int
-mlx5_aso_cq_create(void *ctx, struct mlx5_aso_cq *cq, uint16_t log_desc_n,
-                  int socket, int uar_page_id)
-{
-       struct mlx5_devx_cq_attr attr = {
-               .uar_page_id = uar_page_id,
-       };
-
-       cq->log_desc_n = log_desc_n;
-       cq->cq_ci = 0;
-       return mlx5_devx_cq_create(ctx, &cq->cq_obj, log_desc_n, &attr, socket);
-}
-
 /**
  * Free MR resources.
  *
- * @param[in] sh
- *   Pointer to shared device context.
+ * @param[in] cdev
+ *   Pointer to the mlx5 common device.
  * @param[in] mr
  *   MR to free.
  */
 static void
-mlx5_aso_dereg_mr(struct mlx5_dev_ctx_shared *sh, struct mlx5_pmd_mr *mr)
+mlx5_aso_dereg_mr(struct mlx5_common_device *cdev, struct mlx5_pmd_mr *mr)
 {
        void *addr = mr->addr;
 
-       sh->share_cache.dereg_mr_cb(mr);
+       cdev->mr_scache.dereg_mr_cb(mr);
        mlx5_free(addr);
        memset(mr, 0, sizeof(*mr));
 }
@@ -78,32 +34,29 @@ mlx5_aso_dereg_mr(struct mlx5_dev_ctx_shared *sh, struct mlx5_pmd_mr *mr)
 /**
  * Register Memory Region.
  *
- * @param[in] sh
- *   Pointer to shared device context.
+ * @param[in] cdev
+ *   Pointer to the mlx5 common device.
  * @param[in] length
  *   Size of MR buffer.
  * @param[in/out] mr
  *   Pointer to MR to create.
- * @param[in] socket
- *   Socket to use for allocation.
  *
  * @return
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 static int
-mlx5_aso_reg_mr(struct mlx5_dev_ctx_shared *sh, size_t length,
-               struct mlx5_pmd_mr *mr, int socket)
+mlx5_aso_reg_mr(struct mlx5_common_device *cdev, size_t length,
+               struct mlx5_pmd_mr *mr)
 {
-
        int ret;
 
        mr->addr = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, length, 4096,
-                              socket);
+                              SOCKET_ID_ANY);
        if (!mr->addr) {
                DRV_LOG(ERR, "Failed to create ASO bits mem for MR.");
                return -1;
        }
-       ret = sh->share_cache.reg_mr_cb(sh->pd, mr->addr, length, mr);
+       ret = cdev->mr_scache.reg_mr_cb(cdev->pd, mr->addr, length, mr);
        if (ret) {
                DRV_LOG(ERR, "Failed to create direct Mkey.");
                mlx5_free(mr->addr);
@@ -122,7 +75,7 @@ static void
 mlx5_aso_destroy_sq(struct mlx5_aso_sq *sq)
 {
        mlx5_devx_sq_destroy(&sq->sq_obj);
-       mlx5_aso_cq_destroy(&sq->cq);
+       mlx5_devx_cq_destroy(&sq->cq.cq_obj);
        memset(sq, 0, sizeof(*sq));
 }
 
@@ -226,35 +179,31 @@ mlx5_aso_ct_init_sq(struct mlx5_aso_sq *sq)
 /**
  * Create Send Queue used for ASO access.
  *
- * @param[in] ctx
- *   Context returned from mlx5 open_device() glue function.
+ * @param[in] cdev
+ *   Pointer to the mlx5 common device.
  * @param[in/out] sq
  *   Pointer to SQ to create.
- * @param[in] socket
- *   Socket to use for allocation.
  * @param[in] uar
  *   User Access Region object.
- * @param[in] pdn
- *   Protection Domain number to use.
- * @param[in] log_desc_n
- *   Log of number of descriptors in queue.
- * @param[in] ts_format
- *   timestamp format supported by the queue.
  *
  * @return
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 static int
-mlx5_aso_sq_create(void *ctx, struct mlx5_aso_sq *sq, int socket, void *uar,
-                  uint32_t pdn, uint16_t log_desc_n, uint32_t ts_format)
+mlx5_aso_sq_create(struct mlx5_common_device *cdev, struct mlx5_aso_sq *sq,
+                  void *uar)
 {
-       struct mlx5_devx_create_sq_attr attr = {
+       struct mlx5_devx_cq_attr cq_attr = {
+               .uar_page_id = mlx5_os_get_devx_uar_page_id(uar),
+       };
+       struct mlx5_devx_create_sq_attr sq_attr = {
                .user_index = 0xFFFF,
                .wq_attr = (struct mlx5_devx_wq_attr){
-                       .pd = pdn,
+                       .pd = cdev->pdn,
                        .uar_page = mlx5_os_get_devx_uar_page_id(uar),
                },
-               .ts_format = mlx5_ts_format_conv(ts_format),
+               .ts_format =
+                       mlx5_ts_format_conv(cdev->config.hca_attr.sq_ts_format),
        };
        struct mlx5_devx_modify_sq_attr modify_attr = {
                .state = MLX5_SQC_STATE_RDY,
@@ -262,14 +211,18 @@ mlx5_aso_sq_create(void *ctx, struct mlx5_aso_sq *sq, int socket, void *uar,
        uint16_t log_wqbb_n;
        int ret;
 
-       if (mlx5_aso_cq_create(ctx, &sq->cq, log_desc_n, socket,
-                              mlx5_os_get_devx_uar_page_id(uar)))
+       if (mlx5_devx_cq_create(cdev->ctx, &sq->cq.cq_obj,
+                               MLX5_ASO_QUEUE_LOG_DESC, &cq_attr,
+                               SOCKET_ID_ANY))
                goto error;
-       sq->log_desc_n = log_desc_n;
-       attr.cqn = sq->cq.cq_obj.cq->id;
+       sq->cq.cq_ci = 0;
+       sq->cq.log_desc_n = MLX5_ASO_QUEUE_LOG_DESC;
+       sq->log_desc_n = MLX5_ASO_QUEUE_LOG_DESC;
+       sq_attr.cqn = sq->cq.cq_obj.cq->id;
        /* for mlx5_aso_wqe that is twice the size of mlx5_wqe */
-       log_wqbb_n = log_desc_n + 1;
-       ret = mlx5_devx_sq_create(ctx, &sq->sq_obj, log_wqbb_n, &attr, socket);
+       log_wqbb_n = sq->log_desc_n + 1;
+       ret = mlx5_devx_sq_create(cdev->ctx, &sq->sq_obj, log_wqbb_n, &sq_attr,
+                                 SOCKET_ID_ANY);
        if (ret) {
                DRV_LOG(ERR, "Can't create SQ object.");
                rte_errno = ENOMEM;
@@ -285,7 +238,6 @@ mlx5_aso_sq_create(void *ctx, struct mlx5_aso_sq *sq, int socket, void *uar,
        sq->head = 0;
        sq->tail = 0;
        sq->sqn = sq->sq_obj.sq->id;
-       sq->uar_addr = mlx5_os_get_devx_uar_reg_addr(uar);
        rte_spinlock_init(&sq->sqsl);
        return 0;
 error:
@@ -309,36 +261,34 @@ mlx5_aso_queue_init(struct mlx5_dev_ctx_shared *sh,
                    enum mlx5_access_aso_opc_mod aso_opc_mod)
 {
        uint32_t sq_desc_n = 1 << MLX5_ASO_QUEUE_LOG_DESC;
+       struct mlx5_common_device *cdev = sh->cdev;
 
        switch (aso_opc_mod) {
        case ASO_OPC_MOD_FLOW_HIT:
-               if (mlx5_aso_reg_mr(sh, (MLX5_ASO_AGE_ACTIONS_PER_POOL / 8) *
-                                   sq_desc_n, &sh->aso_age_mng->aso_sq.mr, 0))
+               if (mlx5_aso_reg_mr(cdev, (MLX5_ASO_AGE_ACTIONS_PER_POOL / 8) *
+                                   sq_desc_n, &sh->aso_age_mng->aso_sq.mr))
                        return -1;
-               if (mlx5_aso_sq_create(sh->ctx, &sh->aso_age_mng->aso_sq, 0,
-                                 sh->tx_uar, sh->pdn, MLX5_ASO_QUEUE_LOG_DESC,
-                                 sh->sq_ts_format)) {
-                       mlx5_aso_dereg_mr(sh, &sh->aso_age_mng->aso_sq.mr);
+               if (mlx5_aso_sq_create(cdev, &sh->aso_age_mng->aso_sq,
+                                      sh->tx_uar.obj)) {
+                       mlx5_aso_dereg_mr(cdev, &sh->aso_age_mng->aso_sq.mr);
                        return -1;
                }
                mlx5_aso_age_init_sq(&sh->aso_age_mng->aso_sq);
                break;
        case ASO_OPC_MOD_POLICER:
-               if (mlx5_aso_sq_create(sh->ctx, &sh->mtrmng->pools_mng.sq, 0,
-                                 sh->tx_uar, sh->pdn, MLX5_ASO_QUEUE_LOG_DESC,
-                                 sh->sq_ts_format))
+               if (mlx5_aso_sq_create(cdev, &sh->mtrmng->pools_mng.sq,
+                                      sh->tx_uar.obj))
                        return -1;
                mlx5_aso_mtr_init_sq(&sh->mtrmng->pools_mng.sq);
                break;
        case ASO_OPC_MOD_CONNECTION_TRACKING:
                /* 64B per object for query. */
-               if (mlx5_aso_reg_mr(sh, 64 * sq_desc_n,
-                                   &sh->ct_mng->aso_sq.mr, 0))
+               if (mlx5_aso_reg_mr(cdev, 64 * sq_desc_n,
+                                   &sh->ct_mng->aso_sq.mr))
                        return -1;
-               if (mlx5_aso_sq_create(sh->ctx, &sh->ct_mng->aso_sq, 0,
-                               sh->tx_uar, sh->pdn, MLX5_ASO_QUEUE_LOG_DESC,
-                               sh->sq_ts_format)) {
-                       mlx5_aso_dereg_mr(sh, &sh->ct_mng->aso_sq.mr);
+               if (mlx5_aso_sq_create(cdev, &sh->ct_mng->aso_sq,
+                                      sh->tx_uar.obj)) {
+                       mlx5_aso_dereg_mr(cdev, &sh->ct_mng->aso_sq.mr);
                        return -1;
                }
                mlx5_aso_ct_init_sq(&sh->ct_mng->aso_sq);
@@ -366,14 +316,14 @@ mlx5_aso_queue_uninit(struct mlx5_dev_ctx_shared *sh,
 
        switch (aso_opc_mod) {
        case ASO_OPC_MOD_FLOW_HIT:
-               mlx5_aso_dereg_mr(sh, &sh->aso_age_mng->aso_sq.mr);
+               mlx5_aso_dereg_mr(sh->cdev, &sh->aso_age_mng->aso_sq.mr);
                sq = &sh->aso_age_mng->aso_sq;
                break;
        case ASO_OPC_MOD_POLICER:
                sq = &sh->mtrmng->pools_mng.sq;
                break;
        case ASO_OPC_MOD_CONNECTION_TRACKING:
-               mlx5_aso_dereg_mr(sh, &sh->ct_mng->aso_sq.mr);
+               mlx5_aso_dereg_mr(sh->cdev, &sh->ct_mng->aso_sq.mr);
                sq = &sh->ct_mng->aso_sq;
                break;
        default:
@@ -386,8 +336,8 @@ mlx5_aso_queue_uninit(struct mlx5_dev_ctx_shared *sh,
 /**
  * Write a burst of WQEs to ASO SQ.
  *
- * @param[in] mng
- *   ASO management data, contains the SQ.
+ * @param[in] sh
+ *   Pointer to shared device context.
  * @param[in] n
  *   Index of the last valid pool.
  *
@@ -395,8 +345,9 @@ mlx5_aso_queue_uninit(struct mlx5_dev_ctx_shared *sh,
  *   Number of WQEs in burst.
  */
 static uint16_t
-mlx5_aso_sq_enqueue_burst(struct mlx5_aso_age_mng *mng, uint16_t n)
+mlx5_aso_sq_enqueue_burst(struct mlx5_dev_ctx_shared *sh, uint16_t n)
 {
+       struct mlx5_aso_age_mng *mng = sh->aso_age_mng;
        volatile struct mlx5_aso_wqe *wqe;
        struct mlx5_aso_sq *sq = &mng->aso_sq;
        struct mlx5_aso_age_pool *pool;
@@ -413,9 +364,9 @@ mlx5_aso_sq_enqueue_burst(struct mlx5_aso_age_mng *mng, uint16_t n)
                wqe = &sq->sq_obj.aso_wqes[sq->head & mask];
                rte_prefetch0(&sq->sq_obj.aso_wqes[(sq->head + 1) & mask]);
                /* Fill next WQE. */
-               rte_spinlock_lock(&mng->resize_sl);
+               rte_rwlock_read_lock(&mng->resize_rwl);
                pool = mng->pools[sq->next];
-               rte_spinlock_unlock(&mng->resize_sl);
+               rte_rwlock_read_unlock(&mng->resize_rwl);
                sq->elts[sq->head & mask].pool = pool;
                wqe->general_cseg.misc =
                                rte_cpu_to_be_32(((struct mlx5_devx_obj *)
@@ -435,11 +386,9 @@ mlx5_aso_sq_enqueue_burst(struct mlx5_aso_age_mng *mng, uint16_t n)
        } while (max);
        wqe->general_cseg.flags = RTE_BE32(MLX5_COMP_ALWAYS <<
                                                         MLX5_COMP_MODE_OFFSET);
-       rte_io_wmb();
-       sq->sq_obj.db_rec[MLX5_SND_DBR] = rte_cpu_to_be_32(sq->pi);
-       rte_wmb();
-       *sq->uar_addr = *(volatile uint64_t *)wqe; /* Assume 64 bit ARCH.*/
-       rte_wmb();
+       mlx5_doorbell_ring(&sh->tx_uar.bf_db, *(volatile uint64_t *)wqe,
+                          sq->pi, &sq->sq_obj.db_rec[MLX5_SND_DBR],
+                          !sh->tx_uar.dbnc);
        return sq->elts[start_head & mask].burst_size;
 }
 
@@ -631,16 +580,16 @@ mlx5_flow_aso_alarm(void *arg)
        uint32_t us = 100u;
        uint16_t n;
 
-       rte_spinlock_lock(&sh->aso_age_mng->resize_sl);
+       rte_rwlock_read_lock(&sh->aso_age_mng->resize_rwl);
        n = sh->aso_age_mng->next;
-       rte_spinlock_unlock(&sh->aso_age_mng->resize_sl);
+       rte_rwlock_read_unlock(&sh->aso_age_mng->resize_rwl);
        mlx5_aso_completion_handle(sh);
        if (sq->next == n) {
                /* End of loop: wait 1 second. */
                us = US_PER_S;
                sq->next = 0;
        }
-       mlx5_aso_sq_enqueue_burst(sh->aso_age_mng, n);
+       mlx5_aso_sq_enqueue_burst(sh, n);
        if (rte_eal_alarm_set(us, mlx5_flow_aso_alarm, sh))
                DRV_LOG(ERR, "Cannot reinitialize aso alarm.");
 }
@@ -691,8 +640,9 @@ mlx5_aso_flow_hit_queue_poll_stop(struct mlx5_dev_ctx_shared *sh)
 }
 
 static uint16_t
-mlx5_aso_mtr_sq_enqueue_single(struct mlx5_aso_sq *sq,
-               struct mlx5_aso_mtr *aso_mtr)
+mlx5_aso_mtr_sq_enqueue_single(struct mlx5_dev_ctx_shared *sh,
+                              struct mlx5_aso_sq *sq,
+                              struct mlx5_aso_mtr *aso_mtr)
 {
        volatile struct mlx5_aso_wqe *wqe = NULL;
        struct mlx5_flow_meter_info *fm = NULL;
@@ -702,6 +652,7 @@ mlx5_aso_mtr_sq_enqueue_single(struct mlx5_aso_sq *sq,
        uint16_t res;
        uint32_t dseg_idx = 0;
        struct mlx5_aso_mtr_pool *pool = NULL;
+       uint32_t param_le;
 
        rte_spinlock_lock(&sq->sqsl);
        res = size - (uint16_t)(sq->head - sq->tail);
@@ -738,22 +689,43 @@ mlx5_aso_mtr_sq_enqueue_single(struct mlx5_aso_sq *sq,
                wqe->aso_dseg.mtrs[dseg_idx].ebs_eir = 0;
        }
        fmp = fm->profile;
-       if (fmp->profile.packet_mode)
-               wqe->aso_dseg.mtrs[dseg_idx].v_bo_sc_bbog_mm =
-                               RTE_BE32((1 << ASO_DSEG_VALID_OFFSET) |
-                               (MLX5_FLOW_COLOR_GREEN << ASO_DSEG_SC_OFFSET) |
-                               (MLX5_METER_MODE_PKT << ASO_DSEG_MTR_MODE));
+       param_le = (1 << ASO_DSEG_VALID_OFFSET);
+       if (fm->color_aware)
+               param_le |= (MLX5_FLOW_COLOR_UNDEFINED << ASO_DSEG_SC_OFFSET);
        else
-               wqe->aso_dseg.mtrs[dseg_idx].v_bo_sc_bbog_mm =
-                               RTE_BE32((1 << ASO_DSEG_VALID_OFFSET) |
-                               (MLX5_FLOW_COLOR_GREEN << ASO_DSEG_SC_OFFSET));
+               param_le |= (MLX5_FLOW_COLOR_GREEN << ASO_DSEG_SC_OFFSET);
+       if (fmp->profile.packet_mode)
+               param_le |= (MLX5_METER_MODE_PKT << ASO_DSEG_MTR_MODE);
+       wqe->aso_dseg.mtrs[dseg_idx].v_bo_sc_bbog_mm = RTE_BE32(param_le);
+       switch (fmp->profile.alg) {
+       case RTE_MTR_SRTCM_RFC2697:
+               /* Only needed for RFC2697. */
+               if (fm->profile->srtcm_prm.ebs_eir)
+                       wqe->aso_dseg.mtrs[dseg_idx].v_bo_sc_bbog_mm |=
+                                       RTE_BE32(1 << ASO_DSEG_BO_OFFSET);
+               break;
+       case RTE_MTR_TRTCM_RFC2698:
+               wqe->aso_dseg.mtrs[dseg_idx].v_bo_sc_bbog_mm |=
+                               RTE_BE32(1 << ASO_DSEG_BBOG_OFFSET);
+               break;
+       case RTE_MTR_TRTCM_RFC4115:
+               wqe->aso_dseg.mtrs[dseg_idx].v_bo_sc_bbog_mm |=
+                               RTE_BE32(1 << ASO_DSEG_BO_OFFSET);
+               break;
+       default:
+               break;
+       }
+       /*
+        * Note:
+        * Due to software performance reason, the token fields will not be
+        * set when posting the WQE to ASO SQ. It will be filled by the HW
+        * automatically.
+        */
        sq->head++;
        sq->pi += 2;/* Each WQE contains 2 WQEBB's. */
-       rte_io_wmb();
-       sq->sq_obj.db_rec[MLX5_SND_DBR] = rte_cpu_to_be_32(sq->pi);
-       rte_wmb();
-       *sq->uar_addr = *(volatile uint64_t *)wqe; /* Assume 64 bit ARCH. */
-       rte_wmb();
+       mlx5_doorbell_ring(&sh->tx_uar.bf_db, *(volatile uint64_t *)wqe,
+                          sq->pi, &sq->sq_obj.db_rec[MLX5_SND_DBR],
+                          !sh->tx_uar.dbnc);
        rte_spinlock_unlock(&sq->sqsl);
        return 1;
 }
@@ -846,7 +818,7 @@ mlx5_aso_meter_update_by_wqe(struct mlx5_dev_ctx_shared *sh,
 
        do {
                mlx5_aso_mtr_completion_handle(sq);
-               if (mlx5_aso_mtr_sq_enqueue_single(sq, mtr))
+               if (mlx5_aso_mtr_sq_enqueue_single(sh, sq, mtr))
                        return 0;
                /* Waiting for wqe resource. */
                rte_delay_us_sleep(MLX5_ASO_WQE_CQE_RESPONSE_DELAY);
@@ -895,8 +867,8 @@ mlx5_aso_mtr_wait(struct mlx5_dev_ctx_shared *sh,
 /*
  * Post a WQE to the ASO CT SQ to modify the context.
  *
- * @param[in] mng
- *   Pointer to the CT pools management structure.
+ * @param[in] sh
+ *   Pointer to shared device context.
  * @param[in] ct
  *   Pointer to the generic CT structure related to the context.
  * @param[in] profile
@@ -906,12 +878,12 @@ mlx5_aso_mtr_wait(struct mlx5_dev_ctx_shared *sh,
  *   1 on success (WQE number), 0 on failure.
  */
 static uint16_t
-mlx5_aso_ct_sq_enqueue_single(struct mlx5_aso_ct_pools_mng *mng,
+mlx5_aso_ct_sq_enqueue_single(struct mlx5_dev_ctx_shared *sh,
                              struct mlx5_aso_ct_action *ct,
                              const struct rte_flow_action_conntrack *profile)
 {
        volatile struct mlx5_aso_wqe *wqe = NULL;
-       struct mlx5_aso_sq *sq = &mng->aso_sq;
+       struct mlx5_aso_sq *sq = &sh->ct_mng->aso_sq;
        uint16_t size = 1 << sq->log_desc_n;
        uint16_t mask = size - 1;
        uint16_t res;
@@ -1014,11 +986,9 @@ mlx5_aso_ct_sq_enqueue_single(struct mlx5_aso_ct_pools_mng *mng,
                 profile->reply_dir.max_ack);
        sq->head++;
        sq->pi += 2; /* Each WQE contains 2 WQEBB's. */
-       rte_io_wmb();
-       sq->sq_obj.db_rec[MLX5_SND_DBR] = rte_cpu_to_be_32(sq->pi);
-       rte_wmb();
-       *sq->uar_addr = *(volatile uint64_t *)wqe; /* Assume 64 bit ARCH. */
-       rte_wmb();
+       mlx5_doorbell_ring(&sh->tx_uar.bf_db, *(volatile uint64_t *)wqe,
+                          sq->pi, &sq->sq_obj.db_rec[MLX5_SND_DBR],
+                          !sh->tx_uar.dbnc);
        rte_spinlock_unlock(&sq->sqsl);
        return 1;
 }
@@ -1059,8 +1029,8 @@ mlx5_aso_ct_status_update(struct mlx5_aso_sq *sq, uint16_t num)
 /*
  * Post a WQE to the ASO CT SQ to query the current context.
  *
- * @param[in] mng
- *   Pointer to the CT pools management structure.
+ * @param[in] sh
+ *   Pointer to shared device context.
  * @param[in] ct
  *   Pointer to the generic CT structure related to the context.
  * @param[in] data
@@ -1070,11 +1040,11 @@ mlx5_aso_ct_status_update(struct mlx5_aso_sq *sq, uint16_t num)
  *   1 on success (WQE number), 0 on failure.
  */
 static int
-mlx5_aso_ct_sq_query_single(struct mlx5_aso_ct_pools_mng *mng,
+mlx5_aso_ct_sq_query_single(struct mlx5_dev_ctx_shared *sh,
                            struct mlx5_aso_ct_action *ct, char *data)
 {
        volatile struct mlx5_aso_wqe *wqe = NULL;
-       struct mlx5_aso_sq *sq = &mng->aso_sq;
+       struct mlx5_aso_sq *sq = &sh->ct_mng->aso_sq;
        uint16_t size = 1 << sq->log_desc_n;
        uint16_t mask = size - 1;
        uint16_t res;
@@ -1129,11 +1099,9 @@ mlx5_aso_ct_sq_query_single(struct mlx5_aso_ct_pools_mng *mng,
         * data segment is not used in this case.
         */
        sq->pi += 2;
-       rte_io_wmb();
-       sq->sq_obj.db_rec[MLX5_SND_DBR] = rte_cpu_to_be_32(sq->pi);
-       rte_wmb();
-       *sq->uar_addr = *(volatile uint64_t *)wqe; /* Assume 64 bit ARCH. */
-       rte_wmb();
+       mlx5_doorbell_ring(&sh->tx_uar.bf_db, *(volatile uint64_t *)wqe,
+                          sq->pi, &sq->sq_obj.db_rec[MLX5_SND_DBR],
+                          !sh->tx_uar.dbnc);
        rte_spinlock_unlock(&sq->sqsl);
        return 1;
 }
@@ -1213,14 +1181,13 @@ mlx5_aso_ct_update_by_wqe(struct mlx5_dev_ctx_shared *sh,
                          struct mlx5_aso_ct_action *ct,
                          const struct rte_flow_action_conntrack *profile)
 {
-       struct mlx5_aso_ct_pools_mng *mng = sh->ct_mng;
        uint32_t poll_wqe_times = MLX5_CT_POLL_WQE_CQE_TIMES;
        struct mlx5_aso_ct_pool *pool;
 
        MLX5_ASSERT(ct);
        do {
-               mlx5_aso_ct_completion_handle(mng);
-               if (mlx5_aso_ct_sq_enqueue_single(mng, ct, profile))
+               mlx5_aso_ct_completion_handle(sh->ct_mng);
+               if (mlx5_aso_ct_sq_enqueue_single(sh, ct, profile))
                        return 0;
                /* Waiting for wqe resource. */
                rte_delay_us_sleep(10u);
@@ -1360,7 +1327,6 @@ mlx5_aso_ct_query_by_wqe(struct mlx5_dev_ctx_shared *sh,
                         struct mlx5_aso_ct_action *ct,
                         struct rte_flow_action_conntrack *profile)
 {
-       struct mlx5_aso_ct_pools_mng *mng = sh->ct_mng;
        uint32_t poll_wqe_times = MLX5_CT_POLL_WQE_CQE_TIMES;
        struct mlx5_aso_ct_pool *pool;
        char out_data[64 * 2];
@@ -1368,8 +1334,8 @@ mlx5_aso_ct_query_by_wqe(struct mlx5_dev_ctx_shared *sh,
 
        MLX5_ASSERT(ct);
        do {
-               mlx5_aso_ct_completion_handle(mng);
-               ret = mlx5_aso_ct_sq_query_single(mng, ct, out_data);
+               mlx5_aso_ct_completion_handle(sh->ct_mng);
+               ret = mlx5_aso_ct_sq_query_single(sh, ct, out_data);
                if (ret < 0)
                        return ret;
                else if (ret > 0)