1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2020 Mellanox Technologies, Ltd
5 #include <rte_malloc.h>
6 #include <rte_cycles.h>
7 #include <rte_eal_paging.h>
9 #include <mlx5_malloc.h>
10 #include <mlx5_common_os.h>
13 #include "mlx5_flow.h"
16 * Destroy Completion Queue used for ASO access.
22 mlx5_aso_cq_destroy(struct mlx5_aso_cq *cq)
25 claim_zero(mlx5_devx_cmd_destroy(cq->cq));
27 claim_zero(mlx5_glue->devx_umem_dereg(cq->umem_obj));
29 mlx5_free((void *)(uintptr_t)cq->umem_buf);
30 memset(cq, 0, sizeof(*cq));
34 * Create Completion Queue used for ASO access.
37 * Context returned from mlx5 open_device() glue function.
39 * Pointer to CQ to create.
40 * @param[in] log_desc_n
41 * Log of number of descriptors in queue.
43 * Socket to use for allocation.
44 * @param[in] uar_page_id
50 * 0 on success, a negative errno value otherwise and rte_errno is set.
53 mlx5_aso_cq_create(void *ctx, struct mlx5_aso_cq *cq, uint16_t log_desc_n,
54 int socket, int uar_page_id, uint32_t eqn)
56 struct mlx5_devx_cq_attr attr = { 0 };
57 size_t pgsize = rte_mem_page_size();
59 uint16_t cq_size = 1 << log_desc_n;
61 cq->log_desc_n = log_desc_n;
62 umem_size = sizeof(struct mlx5_cqe) * cq_size + sizeof(*cq->db_rec) * 2;
63 cq->umem_buf = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, umem_size,
66 DRV_LOG(ERR, "Failed to allocate memory for CQ.");
70 cq->umem_obj = mlx5_os_umem_reg(ctx,
71 (void *)(uintptr_t)cq->umem_buf,
73 IBV_ACCESS_LOCAL_WRITE);
75 DRV_LOG(ERR, "Failed to register umem for aso CQ.");
78 attr.q_umem_valid = 1;
79 attr.db_umem_valid = 1;
80 attr.use_first_only = 0;
81 attr.overrun_ignore = 0;
82 attr.uar_page_id = uar_page_id;
83 attr.q_umem_id = mlx5_os_get_umem_id(cq->umem_obj);
84 attr.q_umem_offset = 0;
85 attr.db_umem_id = attr.q_umem_id;
86 attr.db_umem_offset = sizeof(struct mlx5_cqe) * cq_size;
88 attr.log_cq_size = log_desc_n;
89 attr.log_page_size = rte_log2_u32(pgsize);
90 cq->cq = mlx5_devx_cmd_create_cq(ctx, &attr);
93 cq->db_rec = RTE_PTR_ADD(cq->umem_buf, (uintptr_t)attr.db_umem_offset);
95 memset((void *)(uintptr_t)cq->umem_buf, 0xFF, attr.db_umem_offset);
98 mlx5_aso_cq_destroy(cq);
109 mlx5_aso_devx_dereg_mr(struct mlx5_aso_devx_mr *mr)
111 claim_zero(mlx5_devx_cmd_destroy(mr->mkey));
112 if (!mr->is_indirect && mr->umem)
113 claim_zero(mlx5_glue->devx_umem_dereg(mr->umem));
115 memset(mr, 0, sizeof(*mr));
119 * Register Memory Region.
122 * Context returned from mlx5 open_device() glue function.
126 * Pointer to MR to create.
128 * Socket to use for allocation.
130 * Protection Domain number to use.
133 * 0 on success, a negative errno value otherwise and rte_errno is set.
136 mlx5_aso_devx_reg_mr(void *ctx, size_t length, struct mlx5_aso_devx_mr *mr,
139 struct mlx5_devx_mkey_attr mkey_attr;
141 mr->buf = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, length, 4096,
144 DRV_LOG(ERR, "Failed to create ASO bits mem for MR by Devx.");
147 mr->umem = mlx5_os_umem_reg(ctx, mr->buf, length,
148 IBV_ACCESS_LOCAL_WRITE);
150 DRV_LOG(ERR, "Failed to register Umem for MR by Devx.");
153 mkey_attr.addr = (uintptr_t)mr->buf;
154 mkey_attr.size = length;
155 mkey_attr.umem_id = mlx5_os_get_umem_id(mr->umem);
157 mkey_attr.pg_access = 1;
158 mkey_attr.klm_array = NULL;
159 mkey_attr.klm_num = 0;
160 mkey_attr.relaxed_ordering_read = 0;
161 mkey_attr.relaxed_ordering_write = 0;
162 mr->mkey = mlx5_devx_cmd_mkey_create(ctx, &mkey_attr);
164 DRV_LOG(ERR, "Failed to create direct Mkey.");
168 mr->is_indirect = false;
172 claim_zero(mlx5_glue->devx_umem_dereg(mr->umem));
178 * Destroy Send Queue used for ASO access.
184 mlx5_aso_destroy_sq(struct mlx5_aso_sq *sq)
187 mlx5_glue->devx_umem_dereg(sq->wqe_umem);
191 mlx5_free((void *)(uintptr_t)sq->umem_buf);
195 mlx5_devx_cmd_destroy(sq->sq);
199 mlx5_aso_cq_destroy(&sq->cq);
200 mlx5_aso_devx_dereg_mr(&sq->mr);
201 memset(sq, 0, sizeof(*sq));
205 * Initialize Send Queue used for ASO access.
208 * ASO SQ to initialize.
211 mlx5_aso_init_sq(struct mlx5_aso_sq *sq)
213 volatile struct mlx5_aso_wqe *restrict wqe;
215 int size = 1 << sq->log_desc_n;
218 /* All the next fields state should stay constant. */
219 for (i = 0, wqe = &sq->wqes[0]; i < size; ++i, ++wqe) {
220 wqe->general_cseg.sq_ds = rte_cpu_to_be_32((sq->sqn << 8) |
221 (sizeof(*wqe) >> 4));
222 wqe->aso_cseg.lkey = rte_cpu_to_be_32(sq->mr.mkey->id);
223 addr = (uint64_t)((uint64_t *)sq->mr.buf + i *
224 MLX5_ASO_AGE_ACTIONS_PER_POOL / 64);
225 wqe->aso_cseg.va_h = rte_cpu_to_be_32((uint32_t)(addr >> 32));
226 wqe->aso_cseg.va_l_r = rte_cpu_to_be_32((uint32_t)addr | 1u);
227 wqe->aso_cseg.operand_masks = rte_cpu_to_be_32
229 (ASO_OPER_LOGICAL_OR << ASO_CSEG_COND_OPER_OFFSET) |
230 (ASO_OP_ALWAYS_TRUE << ASO_CSEG_COND_1_OPER_OFFSET) |
231 (ASO_OP_ALWAYS_TRUE << ASO_CSEG_COND_0_OPER_OFFSET) |
232 (BYTEWISE_64BYTE << ASO_CSEG_DATA_MASK_MODE_OFFSET));
233 wqe->aso_cseg.data_mask = RTE_BE64(UINT64_MAX);
238 * Create Send Queue used for ASO access.
241 * Context returned from mlx5 open_device() glue function.
243 * Pointer to SQ to create.
245 * Socket to use for allocation.
247 * User Access Region object.
249 * Protection Domain number to use.
252 * @param[in] log_desc_n
253 * Log of number of descriptors in queue.
256 * 0 on success, a negative errno value otherwise and rte_errno is set.
259 mlx5_aso_sq_create(void *ctx, struct mlx5_aso_sq *sq, int socket,
260 void *uar, uint32_t pdn,
261 uint32_t eqn, uint16_t log_desc_n)
263 struct mlx5_devx_create_sq_attr attr = { 0 };
264 struct mlx5_devx_modify_sq_attr modify_attr = { 0 };
265 size_t pgsize = rte_mem_page_size();
266 struct mlx5_devx_wq_attr *wq_attr = &attr.wq_attr;
267 uint32_t sq_desc_n = 1 << log_desc_n;
268 uint32_t wq_size = sizeof(struct mlx5_aso_wqe) * sq_desc_n;
271 if (mlx5_aso_devx_reg_mr(ctx, (MLX5_ASO_AGE_ACTIONS_PER_POOL / 8) *
272 sq_desc_n, &sq->mr, socket, pdn))
274 if (mlx5_aso_cq_create(ctx, &sq->cq, log_desc_n, socket,
275 mlx5_os_get_devx_uar_page_id(uar), eqn))
277 sq->log_desc_n = log_desc_n;
278 sq->umem_buf = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, wq_size +
279 sizeof(*sq->db_rec) * 2, 4096, socket);
281 DRV_LOG(ERR, "Can't allocate wqe buffer.");
284 sq->wqe_umem = mlx5_os_umem_reg(ctx,
285 (void *)(uintptr_t)sq->umem_buf,
287 sizeof(*sq->db_rec) * 2,
288 IBV_ACCESS_LOCAL_WRITE);
290 DRV_LOG(ERR, "Failed to register umem for SQ.");
294 attr.state = MLX5_SQC_STATE_RST;
297 attr.user_index = 0xFFFF;
298 attr.cqn = sq->cq.cq->id;
299 wq_attr->uar_page = mlx5_os_get_devx_uar_page_id(uar);
301 wq_attr->wq_type = MLX5_WQ_TYPE_CYCLIC;
302 wq_attr->log_wq_pg_sz = rte_log2_u32(pgsize);
303 wq_attr->wq_umem_id = mlx5_os_get_umem_id(sq->wqe_umem);
304 wq_attr->wq_umem_offset = 0;
305 wq_attr->wq_umem_valid = 1;
306 wq_attr->log_wq_stride = 6;
307 wq_attr->log_wq_sz = rte_log2_u32(wq_size) - 6;
308 wq_attr->dbr_umem_id = wq_attr->wq_umem_id;
309 wq_attr->dbr_addr = wq_size;
310 wq_attr->dbr_umem_valid = 1;
311 sq->sq = mlx5_devx_cmd_create_sq(ctx, &attr);
313 DRV_LOG(ERR, "Can't create sq object.");
317 modify_attr.state = MLX5_SQC_STATE_RDY;
318 ret = mlx5_devx_cmd_modify_sq(sq->sq, &modify_attr);
320 DRV_LOG(ERR, "Can't change sq state to ready.");
327 sq->sqn = sq->sq->id;
328 sq->db_rec = RTE_PTR_ADD(sq->umem_buf, (uintptr_t)(wq_attr->dbr_addr));
329 sq->uar_addr = mlx5_os_get_devx_uar_reg_addr(uar);
330 mlx5_aso_init_sq(sq);
333 mlx5_aso_destroy_sq(sq);
338 * API to create and initialize Send Queue used for ASO access.
341 * Pointer to shared device context.
344 * 0 on success, a negative errno value otherwise and rte_errno is set.
347 mlx5_aso_queue_init(struct mlx5_dev_ctx_shared *sh)
349 return mlx5_aso_sq_create(sh->ctx, &sh->aso_age_mng->aso_sq, 0,
350 sh->tx_uar, sh->pdn, sh->eqn,
351 MLX5_ASO_QUEUE_LOG_DESC);
355 * API to destroy Send Queue used for ASO access.
358 * Pointer to shared device context.
361 mlx5_aso_queue_uninit(struct mlx5_dev_ctx_shared *sh)
363 mlx5_aso_destroy_sq(&sh->aso_age_mng->aso_sq);
367 * Write a burst of WQEs to ASO SQ.
370 * ASO management data, contains the SQ.
372 * Index of the last valid pool.
375 * Number of WQEs in burst.
378 mlx5_aso_sq_enqueue_burst(struct mlx5_aso_age_mng *mng, uint16_t n)
380 volatile struct mlx5_aso_wqe *wqe;
381 struct mlx5_aso_sq *sq = &mng->aso_sq;
382 struct mlx5_aso_age_pool *pool;
383 uint16_t size = 1 << sq->log_desc_n;
384 uint16_t mask = size - 1;
386 uint16_t start_head = sq->head;
388 max = RTE_MIN(size - (uint16_t)(sq->head - sq->tail), n - sq->next);
391 sq->elts[start_head & mask].burst_size = max;
393 wqe = &sq->wqes[sq->head & mask];
394 rte_prefetch0(&sq->wqes[(sq->head + 1) & mask]);
396 rte_spinlock_lock(&mng->resize_sl);
397 pool = mng->pools[sq->next];
398 rte_spinlock_unlock(&mng->resize_sl);
399 sq->elts[sq->head & mask].pool = pool;
400 wqe->general_cseg.misc =
401 rte_cpu_to_be_32(((struct mlx5_devx_obj *)
402 (pool->flow_hit_aso_obj))->id);
403 wqe->general_cseg.flags = RTE_BE32(MLX5_COMP_ONLY_FIRST_ERR <<
404 MLX5_COMP_MODE_OFFSET);
405 wqe->general_cseg.opcode = rte_cpu_to_be_32
406 (MLX5_OPCODE_ACCESS_ASO |
407 (ASO_OPC_MOD_FLOW_HIT <<
408 WQE_CSEG_OPC_MOD_OFFSET) |
410 WQE_CSEG_WQE_INDEX_OFFSET));
411 sq->pi += 2; /* Each WQE contains 2 WQEBB's. */
416 wqe->general_cseg.flags = RTE_BE32(MLX5_COMP_ALWAYS <<
417 MLX5_COMP_MODE_OFFSET);
419 sq->db_rec[MLX5_SND_DBR] = rte_cpu_to_be_32(sq->pi);
421 *sq->uar_addr = *(volatile uint64_t *)wqe; /* Assume 64 bit ARCH.*/
423 return sq->elts[start_head & mask].burst_size;
427 * Debug utility function. Dump contents of error CQE and WQE.
435 mlx5_aso_dump_err_objs(volatile uint32_t *cqe, volatile uint32_t *wqe)
439 DRV_LOG(ERR, "Error cqe:");
440 for (i = 0; i < 16; i += 4)
441 DRV_LOG(ERR, "%08X %08X %08X %08X", cqe[i], cqe[i + 1],
442 cqe[i + 2], cqe[i + 3]);
443 DRV_LOG(ERR, "\nError wqe:");
444 for (i = 0; i < (int)sizeof(struct mlx5_aso_wqe) / 4; i += 4)
445 DRV_LOG(ERR, "%08X %08X %08X %08X", wqe[i], wqe[i + 1],
446 wqe[i + 2], wqe[i + 3]);
450 * Handle case of error CQE.
456 mlx5_aso_cqe_err_handle(struct mlx5_aso_sq *sq)
458 struct mlx5_aso_cq *cq = &sq->cq;
459 uint32_t idx = cq->cq_ci & ((1 << cq->log_desc_n) - 1);
460 volatile struct mlx5_err_cqe *cqe =
461 (volatile struct mlx5_err_cqe *)&cq->cqes[idx];
464 idx = rte_be_to_cpu_16(cqe->wqe_counter) & (1u << sq->log_desc_n);
465 mlx5_aso_dump_err_objs((volatile uint32_t *)cqe,
466 (volatile uint32_t *)&sq->wqes[idx]);
470 * Update ASO objects upon completion.
473 * Shared device context.
475 * Number of completed ASO objects.
478 mlx5_aso_age_action_update(struct mlx5_dev_ctx_shared *sh, uint16_t n)
480 struct mlx5_aso_age_mng *mng = sh->aso_age_mng;
481 struct mlx5_aso_sq *sq = &mng->aso_sq;
482 struct mlx5_age_info *age_info;
483 const uint16_t size = 1 << sq->log_desc_n;
484 const uint16_t mask = size - 1;
485 const uint64_t curr = MLX5_CURR_TIME_SEC;
486 uint16_t expected = AGE_CANDIDATE;
489 for (i = 0; i < n; ++i) {
490 uint16_t idx = (sq->tail + i) & mask;
491 struct mlx5_aso_age_pool *pool = sq->elts[idx].pool;
492 uint64_t diff = curr - pool->time_of_last_age_check;
493 uint64_t *addr = sq->mr.buf;
496 addr += idx * MLX5_ASO_AGE_ACTIONS_PER_POOL / 64;
497 pool->time_of_last_age_check = curr;
498 for (j = 0; j < MLX5_ASO_AGE_ACTIONS_PER_POOL; j++) {
499 struct mlx5_aso_age_action *act = &pool->actions[j];
500 struct mlx5_age_param *ap = &act->age_params;
506 if (__atomic_load_n(&ap->state, __ATOMIC_RELAXED) !=
511 u8addr = (uint8_t *)addr;
512 hit = (u8addr[byte] >> offset) & 0x1;
514 __atomic_store_n(&ap->sec_since_last_hit, 0,
517 struct mlx5_priv *priv;
519 __atomic_fetch_add(&ap->sec_since_last_hit,
520 diff, __ATOMIC_RELAXED);
521 /* If timeout passed add to aged-out list. */
522 if (ap->sec_since_last_hit <= ap->timeout)
525 rte_eth_devices[ap->port_id].data->dev_private;
526 age_info = GET_PORT_AGE_INFO(priv);
527 rte_spinlock_lock(&age_info->aged_sl);
528 if (__atomic_compare_exchange_n(&ap->state,
534 LIST_INSERT_HEAD(&age_info->aged_aso,
536 MLX5_AGE_SET(age_info,
539 rte_spinlock_unlock(&age_info->aged_sl);
543 mlx5_age_event_prepare(sh);
547 * Handle completions from WQEs sent to ASO SQ.
550 * Shared device context.
553 * Number of CQEs handled.
556 mlx5_aso_completion_handle(struct mlx5_dev_ctx_shared *sh)
558 struct mlx5_aso_age_mng *mng = sh->aso_age_mng;
559 struct mlx5_aso_sq *sq = &mng->aso_sq;
560 struct mlx5_aso_cq *cq = &sq->cq;
561 volatile struct mlx5_cqe *restrict cqe;
562 const unsigned int cq_size = 1 << cq->log_desc_n;
563 const unsigned int mask = cq_size - 1;
565 uint32_t next_idx = cq->cq_ci & mask;
566 const uint16_t max = (uint16_t)(sq->head - sq->tail);
573 next_idx = (cq->cq_ci + 1) & mask;
574 rte_prefetch0(&cq->cqes[next_idx]);
575 cqe = &cq->cqes[idx];
576 ret = check_cqe(cqe, cq_size, cq->cq_ci);
578 * Be sure owner read is done before any other cookie field or
582 if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) {
583 if (likely(ret == MLX5_CQE_STATUS_HW_OWN))
585 mlx5_aso_cqe_err_handle(sq);
587 i += sq->elts[(sq->tail + i) & mask].burst_size;
592 mlx5_aso_age_action_update(sh, i);
595 cq->db_rec[0] = rte_cpu_to_be_32(cq->cq_ci);
601 * Periodically read CQEs and send WQEs to ASO SQ.
604 * Shared device context containing the ASO SQ.
607 mlx5_flow_aso_alarm(void *arg)
609 struct mlx5_dev_ctx_shared *sh = arg;
610 struct mlx5_aso_sq *sq = &sh->aso_age_mng->aso_sq;
614 rte_spinlock_lock(&sh->aso_age_mng->resize_sl);
615 n = sh->aso_age_mng->next;
616 rte_spinlock_unlock(&sh->aso_age_mng->resize_sl);
617 mlx5_aso_completion_handle(sh);
619 /* End of loop: wait 1 second. */
623 mlx5_aso_sq_enqueue_burst(sh->aso_age_mng, n);
624 if (rte_eal_alarm_set(us, mlx5_flow_aso_alarm, sh))
625 DRV_LOG(ERR, "Cannot reinitialize aso alarm.");
629 * API to start ASO access using ASO SQ.
632 * Pointer to shared device context.
635 * 0 on success, a negative errno value otherwise and rte_errno is set.
638 mlx5_aso_queue_start(struct mlx5_dev_ctx_shared *sh)
640 if (rte_eal_alarm_set(US_PER_S, mlx5_flow_aso_alarm, sh)) {
641 DRV_LOG(ERR, "Cannot reinitialize ASO age alarm.");
648 * API to stop ASO access using ASO SQ.
651 * Pointer to shared device context.
654 * 0 on success, a negative errno value otherwise and rte_errno is set.
657 mlx5_aso_queue_stop(struct mlx5_dev_ctx_shared *sh)
661 if (!sh->aso_age_mng->aso_sq.sq)
665 rte_eal_alarm_cancel(mlx5_flow_aso_alarm, sh);
666 if (rte_errno != EINPROGRESS)