1 /* SPDX-License-Identifier: BSD-3-Clause
5 #include <rte_memory.h>
7 #include "dpaax_iova_table.h"
8 #include "dpaax_logs.h"
10 /* Global dpaax logger identifier */
13 /* Global table reference */
14 struct dpaax_iova_table *dpaax_iova_table_p;
16 static int dpaax_handle_memevents(void);
18 /* A structure representing the device-tree node available in /proc/device-tree.
25 /* A ntohll equivalent routine
26 * XXX: This is only applicable for 64 bit environment.
29 rotate_8(unsigned char *arr)
33 uint32_t *second_half;
35 first_half = (uint32_t *)(arr);
36 second_half = (uint32_t *)(arr + 4);
39 *first_half = *second_half;
42 *first_half = ntohl(*first_half);
43 *second_half = ntohl(*second_half);
47 * Memory layout for DPAAx platforms (LS1043, LS1046, LS1088, LS2088, LX2160)
48 * are populated by Uboot and available in device tree:
49 * /proc/device-tree/memory@<address>/reg <= register.
50 * Entries are of the form:
51 * (<8 byte start addr><8 byte length>)(..more similar blocks of start,len>)..
54 * OUT populate number of entries found in memory node
56 * Pointer to array of reg_node elements, count size
58 static struct reg_node *
59 read_memory_node(unsigned int *count)
64 struct stat statbuf = {0};
65 char file_data[MEM_NODE_FILE_LEN];
66 struct reg_node *nodes = NULL;
70 ret = glob(MEM_NODE_PATH_GLOB, 0, NULL, &result);
72 ret = glob(MEM_NODE_PATH_GLOB_VM, 0, NULL, &result);
75 DPAAX_DEBUG("Unable to glob device-tree memory node (err: %d)",
80 if (result.gl_pathc != 1) {
81 /* Either more than one memory@<addr> node found, or none.
82 * In either case, cannot work ahead.
84 DPAAX_DEBUG("Found (%zu) entries in device-tree. Not supported!",
89 DPAAX_DEBUG("Opening and parsing device-tree node: (%s)",
91 fd = open(result.gl_pathv[0], O_RDONLY);
93 DPAAX_DEBUG("Unable to open the device-tree node: (%s)(fd=%d)",
94 MEM_NODE_PATH_GLOB, fd);
98 /* Stat to get the file size */
99 ret = fstat(fd, &statbuf);
101 DPAAX_DEBUG("Unable to get device-tree memory node size.");
105 DPAAX_DEBUG("Size of device-tree mem node: %" PRIu64, statbuf.st_size);
106 if (statbuf.st_size > MEM_NODE_FILE_LEN) {
107 DPAAX_DEBUG("More memory nodes available than assumed.");
108 DPAAX_DEBUG("System may not work properly!");
111 ret = read(fd, file_data, statbuf.st_size > MEM_NODE_FILE_LEN ?
112 MEM_NODE_FILE_LEN : statbuf.st_size);
114 DPAAX_DEBUG("Unable to read device-tree memory node: (%d)",
119 /* The reg node should be multiple of 16 bytes, 8 bytes each for addr
122 *count = (statbuf.st_size / 16);
123 if ((*count) <= 0 || (statbuf.st_size % 16 != 0)) {
124 DPAAX_DEBUG("Invalid memory node values or count. (size=%" PRIu64 ")",
129 /* each entry is of 16 bytes, and size/16 is total count of entries */
130 nodes = malloc(sizeof(struct reg_node) * (*count));
132 DPAAX_DEBUG("Failure in allocating working memory.");
135 memset(nodes, 0, sizeof(struct reg_node) * (*count));
137 for (i = 0, j = 0; i < (statbuf.st_size) && j < (*count); i += 16, j++) {
138 memcpy(&nodes[j], file_data + i, 16);
139 /* Rotate (ntohl) each 8 byte entry */
140 rotate_8((unsigned char *)(&(nodes[j].addr)));
141 rotate_8((unsigned char *)(&(nodes[j].len)));
144 DPAAX_DEBUG("Device-tree memory node data:");
146 DPAAX_DEBUG(" %08" PRIx64 " %08zu",
147 nodes[j].addr, nodes[j].len);
158 dpaax_iova_table_populate(void)
161 unsigned int i, node_count;
162 size_t tot_memory_size, total_table_size;
163 struct reg_node *nodes;
164 struct dpaax_iovat_element *entry;
166 /* dpaax_iova_table_p is a singleton - only one instance should be
169 if (dpaax_iova_table_p) {
170 DPAAX_DEBUG("Multiple allocation attempt for IOVA Table (%p)",
172 /* This can be an error case as well - some path not cleaning
173 * up table - but, for now, it is assumed that if IOVA Table
174 * pointer is valid, table is allocated.
179 nodes = read_memory_node(&node_count);
181 DPAAX_WARN("PA->VA translation not available;");
182 DPAAX_WARN("Expect performance impact.");
187 for (i = 0; i < node_count; i++)
188 tot_memory_size += nodes[i].len;
190 DPAAX_DEBUG("Total available PA memory size: %zu", tot_memory_size);
192 /* Total table size = meta data + tot_memory_size/8 */
193 total_table_size = sizeof(struct dpaax_iova_table) +
194 (sizeof(struct dpaax_iovat_element) * node_count) +
195 ((tot_memory_size / DPAAX_MEM_SPLIT) * sizeof(uint64_t));
197 /* TODO: This memory doesn't need to shared but needs to be always
198 * pinned to RAM (no swap out) - using hugepage rather than malloc
200 dpaax_iova_table_p = rte_zmalloc(NULL, total_table_size, 0);
201 if (dpaax_iova_table_p == NULL) {
202 DPAAX_WARN("Unable to allocate memory for PA->VA Table;");
203 DPAAX_WARN("PA->VA translation not available;");
204 DPAAX_WARN("Expect performance impact.");
209 /* Initialize table */
210 dpaax_iova_table_p->count = node_count;
211 entry = dpaax_iova_table_p->entries;
213 DPAAX_DEBUG("IOVA Table entries: (entry start = %p)", (void *)entry);
214 DPAAX_DEBUG("\t(entry),(start),(len),(next)");
216 for (i = 0; i < node_count; i++) {
217 /* dpaax_iova_table_p
218 * | dpaax_iova_table_p->entries
222 * +------+------+-------+---+----------+---------+---
223 * |iova_ |entry | entry | | pages | pages |
224 * |table | 1 | 2 |...| entry 1 | entry2 |
225 * +-----'+.-----+-------+---+;---------+;--------+---
227 * `~~~~~~|~~~~~>pages /
231 entry[i].start = nodes[i].addr;
232 entry[i].len = nodes[i].len;
234 entry[i].pages = entry[i-1].pages +
235 ((entry[i-1].len/DPAAX_MEM_SPLIT));
237 entry[i].pages = (uint64_t *)((unsigned char *)entry +
238 (sizeof(struct dpaax_iovat_element) *
241 DPAAX_DEBUG("\t(%u),(%8"PRIx64"),(%8zu),(%8p)",
242 i, entry[i].start, entry[i].len, entry[i].pages);
245 /* Release memory associated with nodes array - not required now */
248 DPAAX_DEBUG("Adding mem-event handler");
249 ret = dpaax_handle_memevents();
251 DPAAX_ERR("Unable to add mem-event handler");
252 DPAAX_WARN("Cases with non-buffer pool mem won't work!");
259 dpaax_iova_table_depopulate(void)
261 if (dpaax_iova_table_p == NULL)
264 rte_free(dpaax_iova_table_p->entries);
265 dpaax_iova_table_p = NULL;
267 DPAAX_DEBUG("IOVA Table cleanedup");
271 dpaax_iova_table_update(phys_addr_t paddr, void *vaddr, size_t length)
275 size_t req_length = length, e_offset;
276 struct dpaax_iovat_element *entry;
277 uintptr_t align_vaddr;
278 phys_addr_t align_paddr;
280 if (unlikely(dpaax_iova_table_p == NULL))
283 align_paddr = paddr & DPAAX_MEM_SPLIT_MASK;
284 align_vaddr = ((uintptr_t)vaddr & DPAAX_MEM_SPLIT_MASK);
286 /* Check if paddr is available in table */
287 entry = dpaax_iova_table_p->entries;
288 for (i = 0; i < dpaax_iova_table_p->count; i++) {
289 if (align_paddr < entry[i].start) {
290 /* Address lower than start, but not found in previous
291 * iteration shouldn't exist.
293 DPAAX_ERR("Add: Incorrect entry for PA->VA Table"
294 "(%"PRIu64")", paddr);
295 DPAAX_ERR("Add: Lowest address: %"PRIu64"",
300 if (align_paddr > (entry[i].start + entry[i].len))
303 /* align_paddr >= start && align_paddr < (start + len) */
307 e_offset = ((align_paddr - entry[i].start) / DPAAX_MEM_SPLIT);
308 /* TODO: Whatif something already exists at this
309 * location - is that an error? For now, ignoring the
312 entry[i].pages[e_offset] = align_vaddr;
313 #ifdef RTE_COMMON_DPAAX_DEBUG
314 DPAAX_DEBUG("Added: vaddr=%zu for Phy:%"PRIu64" at %zu"
315 " remaining len %zu", align_vaddr,
316 align_paddr, e_offset, req_length);
318 /* Incoming request can be larger than the
319 * DPAAX_MEM_SPLIT size - in which case, multiple
320 * entries in entry->pages[] are filled up.
322 if (req_length <= DPAAX_MEM_SPLIT)
324 align_paddr += DPAAX_MEM_SPLIT;
325 align_vaddr += DPAAX_MEM_SPLIT;
326 req_length -= DPAAX_MEM_SPLIT;
333 /* There might be case where the incoming physical address is
334 * beyond the address discovered in the memory node of
335 * device-tree. Specially if some malloc'd area is used by EAL
336 * and the memevent handlers passes that across. But, this is
337 * not necessarily an error.
339 DPAAX_DEBUG("Add: Unable to find slot for vaddr:(%p),"
344 #ifdef RTE_COMMON_DPAAX_DEBUG
345 DPAAX_DEBUG("Add: Found slot at (%"PRIu64")[(%zu)] for vaddr:(%p),"
346 " phy(%"PRIu64"), len(%zu)", entry[i].start, e_offset,
347 vaddr, paddr, length);
352 /* dpaax_iova_table_dump
353 * Dump the table, with its entries, on screen. Only works in Debug Mode
354 * Not for weak hearted - the tables can get quite large
357 dpaax_iova_table_dump(void)
360 struct dpaax_iovat_element *entry;
362 /* In case DEBUG is not enabled, some 'if' conditions might misbehave
363 * as they have nothing else in them except a DPAAX_DEBUG() which if
364 * tuned out would leave 'if' naked.
366 if (rte_log_get_global_level() < RTE_LOG_DEBUG) {
367 DPAAX_ERR("Set log level to Debug for PA->Table dump!");
371 DPAAX_DEBUG(" === Start of PA->VA Translation Table ===");
372 if (dpaax_iova_table_p == NULL)
373 DPAAX_DEBUG("\tNULL");
375 entry = dpaax_iova_table_p->entries;
376 for (i = 0; i < dpaax_iova_table_p->count; i++) {
377 DPAAX_DEBUG("\t(%16i),(%16"PRIu64"),(%16zu),(%16p)",
378 i, entry[i].start, entry[i].len, entry[i].pages);
379 DPAAX_DEBUG("\t\t (PA), (VA)");
380 for (j = 0; j < (entry->len/DPAAX_MEM_SPLIT); j++) {
381 if (entry[i].pages[j] == 0)
383 DPAAX_DEBUG("\t\t(%16"PRIx64"),(%16"PRIx64")",
384 (entry[i].start + (j * sizeof(uint64_t))),
388 DPAAX_DEBUG(" === End of PA->VA Translation Table ===");
392 dpaax_memevent_cb(enum rte_mem_event type, const void *addr, size_t len,
393 void *arg __rte_unused)
395 struct rte_memseg_list *msl;
396 struct rte_memseg *ms;
397 size_t cur_len = 0, map_len = 0;
398 phys_addr_t phys_addr;
402 DPAAX_DEBUG("Called with addr=%p, len=%zu", addr, len);
404 msl = rte_mem_virt2memseg_list(addr);
406 while (cur_len < len) {
407 const void *va = RTE_PTR_ADD(addr, cur_len);
409 ms = rte_mem_virt2memseg(va, msl);
410 phys_addr = rte_mem_virt2phy(ms->addr);
411 virt_addr = ms->addr;
413 #ifdef RTE_COMMON_DPAAX_DEBUG
414 DPAAX_DEBUG("Request for %s, va=%p, virt_addr=%p,"
415 "iova=%"PRIu64", map_len=%zu",
416 type == RTE_MEM_EVENT_ALLOC ?
418 va, virt_addr, phys_addr, map_len);
420 if (type == RTE_MEM_EVENT_ALLOC)
421 ret = dpaax_iova_table_update(phys_addr, virt_addr,
424 /* In case of mem_events for MEM_EVENT_FREE, complete
425 * hugepage is released and its PA entry is set to 0.
427 ret = dpaax_iova_table_update(phys_addr, 0, map_len);
430 DPAAX_DEBUG("PA-Table entry update failed. "
431 "Map=%d, addr=%p, len=%zu, err:(%d)",
432 type, va, map_len, ret);
441 dpaax_memevent_walk_memsegs(const struct rte_memseg_list *msl __rte_unused,
442 const struct rte_memseg *ms, size_t len,
443 void *arg __rte_unused)
445 DPAAX_DEBUG("Walking for %p (pa=%"PRIu64") and len %zu",
446 ms->addr, ms->phys_addr, len);
447 dpaax_iova_table_update(rte_mem_virt2phy(ms->addr), ms->addr, len);
452 dpaax_handle_memevents(void)
454 /* First, walk through all memsegs and pin them, before installing
455 * handler. This assures that all memseg which have already been
456 * identified/allocated by EAL, are already part of PA->VA Table. This
457 * is especially for cases where application allocates memory before
458 * the EAL or this is an externally allocated memory passed to EAL.
460 rte_memseg_contig_walk_thread_unsafe(dpaax_memevent_walk_memsegs, NULL);
462 return rte_mem_event_callback_register("dpaax_memevents_cb",
463 dpaax_memevent_cb, NULL);
468 dpaax_logger = rte_log_register("pmd.common.dpaax");
469 if (dpaax_logger >= 0)
470 rte_log_set_level(dpaax_logger, RTE_LOG_ERR);