4 * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 #include <sys/ioctl.h>
40 #include <xen/sys/gntalloc.h>
42 #include <rte_common.h>
43 #include <rte_mempool.h>
44 #include <rte_memory.h>
45 #include <rte_errno.h>
47 #include "rte_xen_lib.h"
48 #include "rte_eth_xenvirt.h"
57 struct _mempool_gntalloc_info {
58 struct rte_mempool *mp;
68 static rte_atomic32_t global_xenvirt_mempool_idx = RTE_ATOMIC32_INIT(-1);
71 compare(const void *p1, const void *p2)
73 return ((const struct _gntarr *)p1)->pa - ((const struct _gntarr *)p2)->pa;
77 static struct _mempool_gntalloc_info
78 _create_mempool(const char *name, unsigned elt_num, unsigned elt_size,
79 unsigned cache_size, unsigned private_data_size,
80 rte_mempool_ctor_t *mp_init, void *mp_init_arg,
81 rte_mempool_obj_cb_t *obj_init, void *obj_init_arg,
82 int socket_id, unsigned flags)
84 struct _mempool_gntalloc_info mgi;
85 struct rte_mempool *mp = NULL;
86 struct rte_mempool_objsz objsz;
87 uint32_t pg_num, rpg_num, pg_shift, pg_sz;
88 char *va, *orig_va, *uv; /* uv: from which, the pages could be freed */
89 ssize_t sz, usz; /* usz: unused size */
91 * for each page allocated through xen_gntalloc driver,
92 * gref_arr:stores grant references,
93 * pa_arr: stores physical address,
94 * gnt_arr: stores all meta dat
96 uint32_t *gref_arr = NULL;
97 phys_addr_t *pa_arr = NULL;
98 struct _gntarr *gnt_arr = NULL;
99 /* start index of the grant referances, used for dealloc*/
100 uint64_t start_index;
103 struct ioctl_gntalloc_dealloc_gref arg;
106 va = orig_va = uv = NULL;
107 pg_num = rpg_num = 0;
110 pg_sz = getpagesize();
111 if (rte_is_power_of_2(pg_sz) == 0) {
114 pg_shift = rte_bsf32(pg_sz);
116 rte_mempool_calc_obj_size(elt_size, flags, &objsz);
117 sz = rte_mempool_xmem_size(elt_num, objsz.total_size, pg_shift);
118 pg_num = sz >> pg_shift;
120 pa_arr = calloc(pg_num, sizeof(pa_arr[0]));
121 gref_arr = calloc(pg_num, sizeof(gref_arr[0]));
122 gnt_arr = calloc(pg_num, sizeof(gnt_arr[0]));
123 if ((gnt_arr == NULL) || (gref_arr == NULL) || (pa_arr == NULL))
126 /* grant index is continuous in ascending order */
127 orig_va = gntalloc(sz, gref_arr, &start_index);
131 get_phys_map(orig_va, pa_arr, pg_num, pg_sz);
132 for (i = 0; i < pg_num; i++) {
133 gnt_arr[i].index = start_index + i * pg_sz;
134 gnt_arr[i].gref = gref_arr[i];
135 gnt_arr[i].pa = pa_arr[i];
136 gnt_arr[i].va = RTE_PTR_ADD(orig_va, i * pg_sz);
138 qsort(gnt_arr, pg_num, sizeof(struct _gntarr), compare);
140 va = get_xen_virtual(sz, pg_sz);
146 * map one by one, as index isn't continuous now.
147 * pg_num VMAs, doesn't linux has a limitation on this?
149 for (i = 0; i < pg_num; i++) {
150 /* update gref_arr and pa_arr after sort */
151 gref_arr[i] = gnt_arr[i].gref;
152 pa_arr[i] = gnt_arr[i].pa;
153 gnt_arr[i].va = mmap(va + i * pg_sz, pg_sz, PROT_READ | PROT_WRITE,
154 MAP_SHARED | MAP_FIXED, gntalloc_fd, gnt_arr[i].index);
155 if ((gnt_arr[i].va == MAP_FAILED) || (gnt_arr[i].va != (va + i * pg_sz))) {
156 RTE_LOG(ERR, PMD, "failed to map %d pages\n", i);
162 * Check that allocated size is big enough to hold elt_num
163 * objects and a calcualte how many bytes are actually required.
165 usz = rte_mempool_xmem_usage(va, elt_num, objsz.total_size, pa_arr, pg_num, pg_shift);
171 /* unmap unused pages if any */
172 uv = RTE_PTR_ADD(va, usz);
173 if ((usz = va + sz - uv) > 0) {
176 "%s(%s): unmap unused %zu of %zu "
177 "mmaped bytes @%p orig:%p\n",
178 __func__, name, usz, sz, uv, va);
180 i = (sz - usz) / pg_sz;
181 for (; i < pg_num; i++) {
183 arg.index = gnt_arr[i].index;
184 rv = ioctl(gntalloc_fd, IOCTL_GNTALLOC_DEALLOC_GREF, &arg);
186 /* shouldn't fail here */
187 RTE_LOG(ERR, PMD, "va=%p pa=%"PRIu64"x index=%"PRIu64" %s\n",
190 arg.index, strerror(errno));
191 rte_panic("gntdealloc failed when freeing pages\n");
195 rpg_num = (sz - usz) >> pg_shift;
199 mp = rte_mempool_xmem_create(name, elt_num, elt_size,
200 cache_size, private_data_size,
201 mp_init, mp_init_arg,
202 obj_init, obj_init_arg,
203 socket_id, flags, va, pa_arr, rpg_num, pg_shift);
205 RTE_ASSERT(elt_num == mp->size);
208 mgi.pg_num = rpg_num;
209 mgi.gref_arr = gref_arr;
212 mgi.mempool_idx = rte_atomic32_add_return(&global_xenvirt_mempool_idx, 1);
213 mgi.start_index = start_index;
222 * unmap only, without deallocate grant reference.
223 * unused pages have already been unmaped,
224 * unmap twice will fail, but it is safe.
227 for (j = 0; j < i; j++) {
229 munmap(gnt_arr[i].va, pg_sz);
239 /* some gref has already been de-allocated from the list in the driver,
240 * so dealloc one by one, and it is safe to deallocate twice
243 for (i = 0; i < pg_num; i++) {
244 arg.index = start_index + i * pg_sz;
245 rv = ioctl(gntalloc_fd, IOCTL_GNTALLOC_DEALLOC_GREF, arg);
253 rte_mempool_gntalloc_create(const char *name, unsigned elt_num, unsigned elt_size,
254 unsigned cache_size, unsigned private_data_size,
255 rte_mempool_ctor_t *mp_init, void *mp_init_arg,
256 rte_mempool_obj_cb_t *obj_init, void *obj_init_arg,
257 int socket_id, unsigned flags)
261 struct _mempool_gntalloc_info mgi;
262 struct ioctl_gntalloc_dealloc_gref arg;
263 int pg_sz = getpagesize();
265 mgi = _create_mempool(name, elt_num, elt_size,
266 cache_size, private_data_size,
267 mp_init, mp_init_arg,
268 obj_init, obj_init_arg,
271 rv = grant_gntalloc_mbuf_pool(mgi.mp,
281 * in _create_mempool, unused pages have already been unmapped, deallocagted
282 * unmap and dealloc the remained ones here.
284 munmap(mgi.va, pg_sz * mgi.pg_num);
285 for (i = 0; i < mgi.pg_num; i++) {
286 arg.index = mgi.start_index + i * pg_sz;
287 rv = ioctl(gntalloc_fd, IOCTL_GNTALLOC_DEALLOC_GREF, arg);