4 * Copyright (c) 2017 Red Hat, Inc.
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
10 * * Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * * Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
16 * * Neither the name of Intel Corporation nor the names of its
17 * contributors may be used to endorse or promote products derived
18 * from this software without specific prior written permission.
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 #ifdef RTE_LIBRTE_VHOST_NUMA
37 #include <rte_tailq.h>
42 struct vhost_iotlb_entry {
43 TAILQ_ENTRY(vhost_iotlb_entry) next;
51 #define IOTLB_CACHE_SIZE 2048
54 vhost_user_iotlb_pending_remove_all(struct vhost_virtqueue *vq)
56 struct vhost_iotlb_entry *node, *temp_node;
58 rte_rwlock_write_lock(&vq->iotlb_pending_lock);
60 TAILQ_FOREACH_SAFE(node, &vq->iotlb_pending_list, next, temp_node) {
61 TAILQ_REMOVE(&vq->iotlb_pending_list, node, next);
62 rte_mempool_put(vq->iotlb_pool, node);
65 rte_rwlock_write_unlock(&vq->iotlb_pending_lock);
69 vhost_user_iotlb_pending_miss(struct vhost_virtqueue *vq, uint64_t iova,
72 struct vhost_iotlb_entry *node;
75 rte_rwlock_read_lock(&vq->iotlb_pending_lock);
77 TAILQ_FOREACH(node, &vq->iotlb_pending_list, next) {
78 if ((node->iova == iova) && (node->perm == perm)) {
84 rte_rwlock_read_unlock(&vq->iotlb_pending_lock);
90 vhost_user_iotlb_pending_insert(struct vhost_virtqueue *vq,
91 uint64_t iova, uint8_t perm)
93 struct vhost_iotlb_entry *node;
96 ret = rte_mempool_get(vq->iotlb_pool, (void **)&node);
98 RTE_LOG(INFO, VHOST_CONFIG,
99 "IOTLB pool empty, clear pending misses\n");
100 vhost_user_iotlb_pending_remove_all(vq);
101 ret = rte_mempool_get(vq->iotlb_pool, (void **)&node);
103 RTE_LOG(ERR, VHOST_CONFIG, "IOTLB pool still empty, failure\n");
111 rte_rwlock_write_lock(&vq->iotlb_pending_lock);
113 TAILQ_INSERT_TAIL(&vq->iotlb_pending_list, node, next);
115 rte_rwlock_write_unlock(&vq->iotlb_pending_lock);
119 vhost_user_iotlb_pending_remove(struct vhost_virtqueue *vq,
120 uint64_t iova, uint64_t size, uint8_t perm)
122 struct vhost_iotlb_entry *node, *temp_node;
124 rte_rwlock_write_lock(&vq->iotlb_pending_lock);
126 TAILQ_FOREACH_SAFE(node, &vq->iotlb_pending_list, next, temp_node) {
127 if (node->iova < iova)
129 if (node->iova >= iova + size)
131 if ((node->perm & perm) != node->perm)
133 TAILQ_REMOVE(&vq->iotlb_pending_list, node, next);
134 rte_mempool_put(vq->iotlb_pool, node);
137 rte_rwlock_write_unlock(&vq->iotlb_pending_lock);
141 vhost_user_iotlb_cache_remove_all(struct vhost_virtqueue *vq)
143 struct vhost_iotlb_entry *node, *temp_node;
145 rte_rwlock_write_lock(&vq->iotlb_lock);
147 TAILQ_FOREACH_SAFE(node, &vq->iotlb_list, next, temp_node) {
148 TAILQ_REMOVE(&vq->iotlb_list, node, next);
149 rte_mempool_put(vq->iotlb_pool, node);
152 vq->iotlb_cache_nr = 0;
154 rte_rwlock_write_unlock(&vq->iotlb_lock);
158 vhost_user_iotlb_cache_random_evict(struct vhost_virtqueue *vq)
160 struct vhost_iotlb_entry *node, *temp_node;
163 rte_rwlock_write_lock(&vq->iotlb_lock);
165 entry_idx = rte_rand() % vq->iotlb_cache_nr;
167 TAILQ_FOREACH_SAFE(node, &vq->iotlb_list, next, temp_node) {
169 TAILQ_REMOVE(&vq->iotlb_list, node, next);
170 rte_mempool_put(vq->iotlb_pool, node);
171 vq->iotlb_cache_nr--;
177 rte_rwlock_write_unlock(&vq->iotlb_lock);
181 vhost_user_iotlb_cache_insert(struct vhost_virtqueue *vq, uint64_t iova,
182 uint64_t uaddr, uint64_t size, uint8_t perm)
184 struct vhost_iotlb_entry *node, *new_node;
187 ret = rte_mempool_get(vq->iotlb_pool, (void **)&new_node);
189 RTE_LOG(DEBUG, VHOST_CONFIG, "IOTLB pool empty, evict one entry\n");
190 vhost_user_iotlb_cache_random_evict(vq);
191 ret = rte_mempool_get(vq->iotlb_pool, (void **)&new_node);
193 RTE_LOG(ERR, VHOST_CONFIG, "IOTLB pool still empty, failure\n");
198 new_node->iova = iova;
199 new_node->uaddr = uaddr;
200 new_node->size = size;
201 new_node->perm = perm;
203 rte_rwlock_write_lock(&vq->iotlb_lock);
205 TAILQ_FOREACH(node, &vq->iotlb_list, next) {
207 * Entries must be invalidated before being updated.
208 * So if iova already in list, assume identical.
210 if (node->iova == new_node->iova) {
211 rte_mempool_put(vq->iotlb_pool, new_node);
213 } else if (node->iova > new_node->iova) {
214 TAILQ_INSERT_BEFORE(node, new_node, next);
215 vq->iotlb_cache_nr++;
220 TAILQ_INSERT_TAIL(&vq->iotlb_list, new_node, next);
221 vq->iotlb_cache_nr++;
224 vhost_user_iotlb_pending_remove(vq, iova, size, perm);
226 rte_rwlock_write_unlock(&vq->iotlb_lock);
231 vhost_user_iotlb_cache_remove(struct vhost_virtqueue *vq,
232 uint64_t iova, uint64_t size)
234 struct vhost_iotlb_entry *node, *temp_node;
239 rte_rwlock_write_lock(&vq->iotlb_lock);
241 TAILQ_FOREACH_SAFE(node, &vq->iotlb_list, next, temp_node) {
243 if (unlikely(iova + size < node->iova))
246 if (iova < node->iova + node->size) {
247 TAILQ_REMOVE(&vq->iotlb_list, node, next);
248 rte_mempool_put(vq->iotlb_pool, node);
249 vq->iotlb_cache_nr--;
253 rte_rwlock_write_unlock(&vq->iotlb_lock);
257 vhost_user_iotlb_cache_find(struct vhost_virtqueue *vq, uint64_t iova,
258 uint64_t *size, uint8_t perm)
260 struct vhost_iotlb_entry *node;
261 uint64_t offset, vva = 0, mapped = 0;
263 if (unlikely(!*size))
266 TAILQ_FOREACH(node, &vq->iotlb_list, next) {
267 /* List sorted by iova */
268 if (unlikely(iova < node->iova))
271 if (iova >= node->iova + node->size)
274 if (unlikely((perm & node->perm) != perm)) {
279 offset = iova - node->iova;
281 vva = node->uaddr + offset;
283 mapped += node->size - offset;
284 iova = node->iova + node->size;
291 /* Only part of the requested chunk is mapped */
292 if (unlikely(mapped < *size))
299 vhost_user_iotlb_init(struct virtio_net *dev, int vq_index)
301 char pool_name[RTE_MEMPOOL_NAMESIZE];
302 struct vhost_virtqueue *vq = dev->virtqueue[vq_index];
305 if (vq->iotlb_pool) {
307 * The cache has already been initialized,
308 * just drop all cached and pending entries.
310 vhost_user_iotlb_cache_remove_all(vq);
311 vhost_user_iotlb_pending_remove_all(vq);
314 #ifdef RTE_LIBRTE_VHOST_NUMA
315 if (get_mempolicy(&socket, NULL, 0, vq, MPOL_F_NODE | MPOL_F_ADDR) != 0)
319 rte_rwlock_init(&vq->iotlb_lock);
320 rte_rwlock_init(&vq->iotlb_pending_lock);
322 TAILQ_INIT(&vq->iotlb_list);
323 TAILQ_INIT(&vq->iotlb_pending_list);
325 snprintf(pool_name, sizeof(pool_name), "iotlb_cache_%d_%d",
328 /* If already created, free it and recreate */
329 vq->iotlb_pool = rte_mempool_lookup(pool_name);
331 rte_mempool_free(vq->iotlb_pool);
333 vq->iotlb_pool = rte_mempool_create(pool_name,
334 IOTLB_CACHE_SIZE, sizeof(struct vhost_iotlb_entry), 0,
335 0, 0, NULL, NULL, NULL, socket,
336 MEMPOOL_F_NO_CACHE_ALIGN |
339 if (!vq->iotlb_pool) {
340 RTE_LOG(ERR, VHOST_CONFIG,
341 "Failed to create IOTLB cache pool (%s)\n",
346 vq->iotlb_cache_nr = 0;