1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2019 Mellanox Technologies, Ltd
6 #include <rte_malloc.h>
8 #include <rte_common.h>
9 #include <rte_sched_common.h>
12 #include <mlx5_common.h>
14 #include "mlx5_vdpa_utils.h"
15 #include "mlx5_vdpa.h"
18 mlx5_vdpa_mem_dereg(struct mlx5_vdpa_priv *priv)
20 struct mlx5_vdpa_query_mr *entry;
21 struct mlx5_vdpa_query_mr *next;
23 entry = SLIST_FIRST(&priv->mr_list);
25 next = SLIST_NEXT(entry, next);
26 claim_zero(mlx5_devx_cmd_destroy(entry->mkey));
27 if (!entry->is_indirect)
28 claim_zero(mlx5_glue->devx_umem_dereg(entry->umem));
29 SLIST_REMOVE(&priv->mr_list, entry, mlx5_vdpa_query_mr, next);
33 SLIST_INIT(&priv->mr_list);
35 mlx5_os_wrapped_mkey_destroy(&priv->lm_mr);
37 claim_zero(mlx5_glue->dereg_mr(priv->null_mr));
47 mlx5_vdpa_regions_addr_cmp(const void *a, const void *b)
49 const struct rte_vhost_mem_region *region_a = a;
50 const struct rte_vhost_mem_region *region_b = b;
52 if (region_a->guest_phys_addr < region_b->guest_phys_addr)
54 if (region_a->guest_phys_addr > region_b->guest_phys_addr)
59 #define KLM_NUM_MAX_ALIGN(sz) (RTE_ALIGN_CEIL(sz, MLX5_MAX_KLM_BYTE_COUNT) / \
60 MLX5_MAX_KLM_BYTE_COUNT)
63 * Allocate and sort the region list and choose indirect mkey mode:
64 * 1. Calculate GCD, guest memory size and indirect mkey entries num per mode.
65 * 2. Align GCD to the maximum allowed size(2G) and to be power of 2.
66 * 2. Decide the indirect mkey mode according to the next rules:
67 * a. If both KLM_FBS entries number and KLM entries number are bigger
68 * than the maximum allowed(MLX5_DEVX_MAX_KLM_ENTRIES) - error.
69 * b. KLM mode if KLM_FBS entries number is bigger than the maximum
70 * allowed(MLX5_DEVX_MAX_KLM_ENTRIES).
71 * c. KLM mode if GCD is smaller than the minimum allowed(4K).
72 * d. KLM mode if the total size of KLM entries is in one cache line
73 * and the total size of KLM_FBS entries is not in one cache line.
74 * e. Otherwise, KLM_FBS mode.
76 static struct rte_vhost_memory *
77 mlx5_vdpa_vhost_mem_regions_prepare(int vid, uint8_t *mode, uint64_t *mem_size,
78 uint64_t *gcd, uint32_t *entries_num)
80 struct rte_vhost_memory *mem;
82 uint64_t klm_entries_num = 0;
83 uint64_t klm_fbs_entries_num;
85 int ret = rte_vhost_get_mem_table(vid, &mem);
88 DRV_LOG(ERR, "Failed to get VM memory layout vid =%d.", vid);
92 qsort(mem->regions, mem->nregions, sizeof(mem->regions[0]),
93 mlx5_vdpa_regions_addr_cmp);
94 *mem_size = (mem->regions[(mem->nregions - 1)].guest_phys_addr) +
95 (mem->regions[(mem->nregions - 1)].size) -
96 (mem->regions[0].guest_phys_addr);
98 for (i = 0; i < mem->nregions; ++i) {
99 DRV_LOG(INFO, "Region %u: HVA 0x%" PRIx64 ", GPA 0x%" PRIx64
100 ", size 0x%" PRIx64 ".", i,
101 mem->regions[i].host_user_addr,
102 mem->regions[i].guest_phys_addr, mem->regions[i].size);
105 size = mem->regions[i].guest_phys_addr -
106 (mem->regions[i - 1].guest_phys_addr +
107 mem->regions[i - 1].size);
108 *gcd = rte_get_gcd64(*gcd, size);
109 klm_entries_num += KLM_NUM_MAX_ALIGN(size);
111 size = mem->regions[i].size;
112 *gcd = rte_get_gcd64(*gcd, size);
113 klm_entries_num += KLM_NUM_MAX_ALIGN(size);
115 if (*gcd > MLX5_MAX_KLM_BYTE_COUNT)
116 *gcd = rte_get_gcd64(*gcd, MLX5_MAX_KLM_BYTE_COUNT);
117 if (!RTE_IS_POWER_OF_2(*gcd)) {
118 uint64_t candidate_gcd = rte_align64prevpow2(*gcd);
120 while (candidate_gcd > 1 && (*gcd % candidate_gcd))
122 DRV_LOG(DEBUG, "GCD 0x%" PRIx64 " is not power of 2. Adjusted "
123 "GCD is 0x%" PRIx64 ".", *gcd, candidate_gcd);
124 *gcd = candidate_gcd;
126 klm_fbs_entries_num = *mem_size / *gcd;
127 if (*gcd < MLX5_MIN_KLM_FIXED_BUFFER_SIZE || klm_fbs_entries_num >
128 MLX5_DEVX_MAX_KLM_ENTRIES ||
129 ((klm_entries_num * sizeof(struct mlx5_klm)) <=
130 RTE_CACHE_LINE_SIZE && (klm_fbs_entries_num *
131 sizeof(struct mlx5_klm)) >
132 RTE_CACHE_LINE_SIZE)) {
133 *mode = MLX5_MKC_ACCESS_MODE_KLM;
134 *entries_num = klm_entries_num;
135 DRV_LOG(INFO, "Indirect mkey mode is KLM.");
137 *mode = MLX5_MKC_ACCESS_MODE_KLM_FBS;
138 *entries_num = klm_fbs_entries_num;
139 DRV_LOG(INFO, "Indirect mkey mode is KLM Fixed Buffer Size.");
141 DRV_LOG(DEBUG, "Memory registration information: nregions = %u, "
142 "mem_size = 0x%" PRIx64 ", GCD = 0x%" PRIx64
143 ", klm_fbs_entries_num = 0x%" PRIx64 ", klm_entries_num = 0x%"
144 PRIx64 ".", mem->nregions, *mem_size, *gcd, klm_fbs_entries_num,
146 if (*entries_num > MLX5_DEVX_MAX_KLM_ENTRIES) {
147 DRV_LOG(ERR, "Failed to prepare memory of vid %d - memory is "
148 "too fragmented.", vid);
155 #define KLM_SIZE_MAX_ALIGN(sz) ((sz) > MLX5_MAX_KLM_BYTE_COUNT ? \
156 MLX5_MAX_KLM_BYTE_COUNT : (sz))
159 * The target here is to group all the physical memory regions of the
160 * virtio device in one indirect mkey.
161 * For KLM Fixed Buffer Size mode (HW find the translation entry in one
162 * read according to the guest phisical address):
163 * All the sub-direct mkeys of it must be in the same size, hence, each
164 * one of them should be in the GCD size of all the virtio memory
165 * regions and the holes between them.
166 * For KLM mode (each entry may be in different size so HW must iterate
168 * Each virtio memory region and each hole between them have one entry,
169 * just need to cover the maximum allowed size(2G) by splitting entries
170 * which their associated memory regions are bigger than 2G.
171 * It means that each virtio memory region may be mapped to more than
172 * one direct mkey in the 2 modes.
173 * All the holes of invalid memory between the virtio memory regions
174 * will be mapped to the null memory region for security.
177 mlx5_vdpa_mem_register(struct mlx5_vdpa_priv *priv)
179 struct mlx5_devx_mkey_attr mkey_attr;
180 struct mlx5_vdpa_query_mr *entry = NULL;
181 struct rte_vhost_mem_region *reg = NULL;
183 uint32_t entries_num = 0;
191 struct rte_vhost_memory *mem = mlx5_vdpa_vhost_mem_regions_prepare
192 (priv->vid, &mode, &mem_size, &gcd, &entries_num);
193 struct mlx5_klm klm_array[entries_num];
198 priv->null_mr = mlx5_glue->alloc_null_mr(priv->cdev->pd);
199 if (!priv->null_mr) {
200 DRV_LOG(ERR, "Failed to allocate null MR.");
204 DRV_LOG(DEBUG, "Dump fill Mkey = %u.", priv->null_mr->lkey);
205 memset(&mkey_attr, 0, sizeof(mkey_attr));
206 for (i = 0; i < mem->nregions; i++) {
207 reg = &mem->regions[i];
208 entry = rte_zmalloc(__func__, sizeof(*entry), 0);
211 DRV_LOG(ERR, "Failed to allocate mem entry memory.");
214 entry->umem = mlx5_glue->devx_umem_reg(priv->cdev->ctx,
215 (void *)(uintptr_t)reg->host_user_addr,
216 reg->size, IBV_ACCESS_LOCAL_WRITE);
218 DRV_LOG(ERR, "Failed to register Umem by Devx.");
222 mkey_attr.addr = (uintptr_t)(reg->guest_phys_addr);
223 mkey_attr.size = reg->size;
224 mkey_attr.umem_id = entry->umem->umem_id;
225 mkey_attr.pd = priv->cdev->pdn;
226 mkey_attr.pg_access = 1;
227 entry->mkey = mlx5_devx_cmd_mkey_create(priv->cdev->ctx,
230 DRV_LOG(ERR, "Failed to create direct Mkey.");
234 entry->addr = (void *)(uintptr_t)(reg->host_user_addr);
235 entry->length = reg->size;
236 entry->is_indirect = 0;
239 uint64_t empty_region_sz = reg->guest_phys_addr -
240 (mem->regions[i - 1].guest_phys_addr +
241 mem->regions[i - 1].size);
243 if (empty_region_sz > 0) {
244 sadd = mem->regions[i - 1].guest_phys_addr +
245 mem->regions[i - 1].size;
246 klm_size = mode == MLX5_MKC_ACCESS_MODE_KLM ?
247 KLM_SIZE_MAX_ALIGN(empty_region_sz) : gcd;
248 for (k = 0; k < empty_region_sz;
250 klm_array[klm_index].byte_count =
251 k + klm_size > empty_region_sz ?
252 empty_region_sz - k : klm_size;
253 klm_array[klm_index].mkey =
255 klm_array[klm_index].address = sadd + k;
260 klm_size = mode == MLX5_MKC_ACCESS_MODE_KLM ?
261 KLM_SIZE_MAX_ALIGN(reg->size) : gcd;
262 for (k = 0; k < reg->size; k += klm_size) {
263 klm_array[klm_index].byte_count = k + klm_size >
264 reg->size ? reg->size - k : klm_size;
265 klm_array[klm_index].mkey = entry->mkey->id;
266 klm_array[klm_index].address = reg->guest_phys_addr + k;
269 SLIST_INSERT_HEAD(&priv->mr_list, entry, next);
271 mkey_attr.addr = (uintptr_t)(mem->regions[0].guest_phys_addr);
272 mkey_attr.size = mem_size;
273 mkey_attr.pd = priv->cdev->pdn;
274 mkey_attr.umem_id = 0;
275 /* Must be zero for KLM mode. */
276 mkey_attr.log_entity_size = mode == MLX5_MKC_ACCESS_MODE_KLM_FBS ?
277 rte_log2_u64(gcd) : 0;
278 mkey_attr.pg_access = 0;
279 mkey_attr.klm_array = klm_array;
280 mkey_attr.klm_num = klm_index;
281 entry = rte_zmalloc(__func__, sizeof(*entry), 0);
283 DRV_LOG(ERR, "Failed to allocate memory for indirect entry.");
287 entry->mkey = mlx5_devx_cmd_mkey_create(priv->cdev->ctx, &mkey_attr);
289 DRV_LOG(ERR, "Failed to create indirect Mkey.");
293 entry->is_indirect = 1;
294 SLIST_INSERT_HEAD(&priv->mr_list, entry, next);
295 priv->gpa_mkey_index = entry->mkey->id;
300 mlx5_devx_cmd_destroy(entry->mkey);
302 mlx5_glue->devx_umem_dereg(entry->umem);
305 mlx5_vdpa_mem_dereg(priv);