#include <stdio.h>
#include <stdint.h>
#include <inttypes.h>
+#include <string.h>
#include <unistd.h>
#include <sys/queue.h>
#include <rte_lcore.h>
#include <rte_cycles.h>
#include <rte_spinlock.h>
+#include <rte_atomic.h>
#include "test.h"
static rte_spinlock_t sl, sl_try;
static rte_spinlock_t sl_tab[RTE_MAX_LCORE];
static rte_spinlock_recursive_t slr;
-static unsigned count;
+static unsigned count = 0;
+
+static rte_atomic32_t synchro;
static int
test_spinlock_per_core(__attribute__((unused)) void *arg)
return 0;
}
-static volatile int count1, count2;
static rte_spinlock_t lk = RTE_SPINLOCK_INITIALIZER;
-static unsigned int max = 10000000; /* 10M */
-static volatile uint64_t looptime[RTE_MAX_LCORE];
+static uint64_t lock_count[RTE_MAX_LCORE] = {0};
+
+#define TIME_S 5
static int
-load_loop_fn(__attribute__((unused)) void *dummy)
+load_loop_fn(void *func_param)
{
- uint64_t end, begin;
+ uint64_t time_diff = 0, begin;
+ uint64_t hz = rte_get_hpet_hz();
+ uint64_t lcount = 0;
+ const int use_lock = *(int*)func_param;
+ const unsigned lcore = rte_lcore_id();
+
+ /* wait synchro for slaves */
+ if (lcore != rte_get_master_lcore())
+ while (rte_atomic32_read(&synchro) == 0);
+
begin = rte_get_hpet_cycles();
- unsigned int i = 0;
- for ( i = 0; i < max; i++) {
- rte_spinlock_lock(&lk);
- count1++;
- rte_spinlock_unlock(&lk);
- count2++;
+ while (time_diff / hz < TIME_S) {
+ if (use_lock)
+ rte_spinlock_lock(&lk);
+ lcount++;
+ if (use_lock)
+ rte_spinlock_unlock(&lk);
+ /* delay to make lock duty cycle slighlty realistic */
+ rte_delay_us(1);
+ time_diff = rte_get_hpet_cycles() - begin;
}
- end = rte_get_hpet_cycles();
- looptime[rte_lcore_id()] = end - begin;
+ lock_count[lcore] = lcount;
return 0;
}
static int
-test_spinlock_load(void)
+test_spinlock_perf(void)
{
- if (rte_lcore_count()<= 1) {
- printf("no cores counted\n");
- return -1;
- }
- printf ("Running %u tests.......\n", max);
- printf ("Number of cores = %u\n", rte_lcore_count());
+ unsigned int i;
+ uint64_t total = 0;
+ int lock = 0;
+ const unsigned lcore = rte_lcore_id();
- rte_eal_mp_remote_launch(load_loop_fn, NULL , CALL_MASTER);
- rte_eal_mp_wait_lcore();
+ printf("\nTest with no lock on single core...\n");
+ load_loop_fn(&lock);
+ printf("Core [%u] count = %"PRIu64"\n", lcore, lock_count[lcore]);
+ memset(lock_count, 0, sizeof(lock_count));
- unsigned int k = 0;
- uint64_t avgtime = 0;
+ printf("\nTest with lock on single core...\n");
+ lock = 1;
+ load_loop_fn(&lock);
+ printf("Core [%u] count = %"PRIu64"\n", lcore, lock_count[lcore]);
+ memset(lock_count, 0, sizeof(lock_count));
- RTE_LCORE_FOREACH(k) {
- printf("Core [%u] time = %"PRIu64"\n", k, looptime[k]);
- avgtime += looptime[k];
- }
+ printf("\nTest with lock on %u cores...\n", rte_lcore_count());
- avgtime = avgtime / rte_lcore_count();
- printf("Average time = %"PRIu64"\n", avgtime);
+ /* Clear synchro and start slaves */
+ rte_atomic32_set(&synchro, 0);
+ rte_eal_mp_remote_launch(load_loop_fn, &lock, SKIP_MASTER);
- int check = 0;
- check = max * rte_lcore_count();
- if (count1 == check && count2 != check)
- printf("Passed Load test\n");
- else {
- printf("Failed load test\n");
- return -1;
+ /* start synchro and launch test on master */
+ rte_atomic32_set(&synchro, 1);
+ load_loop_fn(&lock);
+
+ rte_eal_mp_wait_lcore();
+
+ RTE_LCORE_FOREACH(i) {
+ printf("Core [%u] count = %"PRIu64"\n", i, lock_count[i]);
+ total += lock_count[i];
}
+
+ printf("Total count = %"PRIu64"\n", total);
+
return 0;
}
rte_eal_mp_wait_lcore();
- if (test_spinlock_load()<0)
- return -1;
-
rte_spinlock_recursive_lock(&slr);
/*
rte_spinlock_recursive_unlock(&slr);
rte_spinlock_recursive_unlock(&slr);
+ if (test_spinlock_perf() < 0)
+ return -1;
+
return ret;
}
#include <rte_memzone.h>
#include <rte_tailq.h>
#include <rte_eal.h>
+#include <rte_eal_memconfig.h>
#include <rte_log.h>
#include "eal_private.h"
for (i=0; i<RTE_MAX_MEMSEG; i++) {
if (mcfg->memseg[i].addr == NULL)
break;
- printf("phys:0x%"PRIx64", len:0x%"PRIx64", virt:%p, "
- "socket_id:%"PRId32"\n",
+
+ printf("Segment %o: phys:0x%"PRIx64", len:0x%"PRIx64", "
+ "virt:%p, socket_id:%"PRId32", "
+ "hugepage_sz:0x%"PRIx64", nchannel:%"PRIx32", "
+ "nrank:%"PRIx32"\n", i,
mcfg->memseg[i].phys_addr,
mcfg->memseg[i].len,
mcfg->memseg[i].addr,
- mcfg->memseg[i].socket_id);
+ mcfg->memseg[i].socket_id,
+ mcfg->memseg[i].hugepage_sz,
+ mcfg->memseg[i].nchannel,
+ mcfg->memseg[i].nrank);
}
}
#include <rte_memzone.h>
#include <rte_tailq.h>
#include <rte_eal.h>
+#include <rte_eal_memconfig.h>
#include <rte_per_lcore.h>
#include <rte_errno.h>
#include <rte_string_fns.h>
#include "eal_private.h"
/* internal copy of free memory segments */
-static struct rte_memseg free_memseg[RTE_MAX_MEMSEG];
+static struct rte_memseg *free_memseg = NULL;
-/* pointer to last reserved memzone */
-static unsigned memzone_idx;
+static inline const struct rte_memzone *
+memzone_lookup_thread_unsafe(const char *name)
+{
+ const struct rte_mem_config *mcfg;
+ unsigned i = 0;
+
+ /* get pointer to global configuration */
+ mcfg = rte_eal_get_configuration()->mem_config;
+
+ /*
+ * the algorithm is not optimal (linear), but there are few
+ * zones and this function should be called at init only
+ */
+ for (i = 0; i < RTE_MAX_MEMZONE && mcfg->memzone[i].addr != NULL; i++) {
+ if (!strncmp(name, mcfg->memzone[i].name, RTE_MEMZONE_NAMESIZE))
+ return &mcfg->memzone[i];
+ }
+
+ return NULL;
+}
/*
* Return a pointer to a correctly filled memzone descriptor. If the
len, socket_id, flags, CACHE_LINE_SIZE);
}
-/*
- * Return a pointer to a correctly filled memzone descriptor (with a
- * specified alignment). If the allocation cannot be done, return NULL.
- */
-const struct rte_memzone *
-rte_memzone_reserve_aligned(const char *name, uint64_t len,
+static const struct rte_memzone *
+memzone_reserve_aligned_thread_unsafe(const char *name, uint64_t len,
int socket_id, unsigned flags, unsigned align)
{
- struct rte_config *config;
+ struct rte_mem_config *mcfg;
unsigned i = 0;
int memseg_idx = -1;
- uint64_t requested_len;
+ uint64_t addr_offset, requested_len;
uint64_t memseg_len = 0;
phys_addr_t memseg_physaddr;
void *memseg_addr;
- uintptr_t addr_offset;
-
- /* if secondary processes return error */
- if (rte_eal_process_type() == RTE_PROC_SECONDARY){
- RTE_LOG(ERR, EAL, "%s(): Not allowed in secondary process\n", __func__);
- rte_errno = E_RTE_SECONDARY;
- return NULL;
- }
-
- /* if alignment is not a power of two */
- if (!rte_is_power_of_2(align)) {
- RTE_LOG(ERR, EAL, "%s(): Invalid alignment: %u\n", __func__,
- align);
- rte_errno = EINVAL;
- return NULL;
- }
-
- /* alignment less than cache size is not allowed */
- if (align < CACHE_LINE_SIZE)
- align = CACHE_LINE_SIZE;
/* get pointer to global configuration */
- config = rte_eal_get_configuration();
+ mcfg = rte_eal_get_configuration()->mem_config;
/* no more room in config */
- if (memzone_idx >= RTE_MAX_MEMZONE) {
+ if (mcfg->memzone_idx >= RTE_MAX_MEMZONE) {
RTE_LOG(ERR, EAL, "%s(): No more room in config\n", __func__);
rte_errno = ENOSPC;
return NULL;
}
- /* both sizes cannot be explicitly called for */
- if ((flags & RTE_MEMZONE_1GB) && (flags & RTE_MEMZONE_2MB)) {
- rte_errno = EINVAL;
- return NULL;
- }
-
/* zone already exist */
- if (rte_memzone_lookup(name) != NULL) {
+ if ((memzone_lookup_thread_unsafe(name)) != NULL) {
RTE_LOG(DEBUG, EAL, "%s(): memzone <%s> already exists\n",
__func__, name);
rte_errno = EEXIST;
/* align length on cache boundary */
len += CACHE_LINE_MASK;
- len &= ~((uint64_t)CACHE_LINE_MASK);
+ len &= ~((uint64_t) CACHE_LINE_MASK);
+ /* save original length */
+ requested_len = len;
+ /* reserve extra space for future alignment */
+ if (len)
+ len += align;
/* save requested length */
requested_len = len;
/* find the smallest segment matching requirements */
for (i = 0; i < RTE_MAX_MEMSEG; i++) {
-
/* last segment */
if (free_memseg[i].addr == NULL)
break;
* try allocating again without the size parameter otherwise -fail.
*/
if ((flags & RTE_MEMZONE_SIZE_HINT_ONLY) &&
- ((flags & RTE_MEMZONE_1GB) || (flags & RTE_MEMZONE_2MB)))
- return rte_memzone_reserve_aligned(name, len - align,
+ ((flags & RTE_MEMZONE_1GB) || (flags & RTE_MEMZONE_2MB)))
+ return memzone_reserve_aligned_thread_unsafe(name, len - align,
socket_id, 0, align);
RTE_LOG(ERR, EAL, "%s(): No appropriate segment found\n", __func__);
}
/* get offset needed to adjust alignment */
- addr_offset = (uintptr_t) RTE_PTR_SUB(
- RTE_ALIGN_CEIL(free_memseg[memseg_idx].addr, (uintptr_t) align),
- (uintptr_t) free_memseg[memseg_idx].addr);
+ addr_offset = RTE_ALIGN_CEIL(free_memseg[memseg_idx].phys_addr, align) -
+ free_memseg[memseg_idx].phys_addr;
/* save aligned physical and virtual addresses */
memseg_physaddr = free_memseg[memseg_idx].phys_addr + addr_offset;
- memseg_addr = RTE_PTR_ADD(free_memseg[memseg_idx].addr, addr_offset);
+ memseg_addr = RTE_PTR_ADD(free_memseg[memseg_idx].addr, (uintptr_t) addr_offset);
/* if we are looking for a biggest memzone */
if (requested_len == 0)
(char *)free_memseg[memseg_idx].addr + len;
/* fill the zone in config */
- struct rte_memzone *mz = &config->mem_config->memzone[memzone_idx++];
+ struct rte_memzone *mz = &mcfg->memzone[mcfg->memzone_idx++];
rte_snprintf(mz->name, sizeof(mz->name), "%s", name);
mz->phys_addr = memseg_physaddr;
mz->addr = memseg_addr;
return mz;
}
+/*
+ * Return a pointer to a correctly filled memzone descriptor (with a
+ * specified alignment). If the allocation cannot be done, return NULL.
+ */
+const struct rte_memzone *
+rte_memzone_reserve_aligned(const char *name, uint64_t len,
+ int socket_id, unsigned flags, unsigned align)
+{
+ struct rte_mem_config *mcfg;
+ const struct rte_memzone *mz = NULL;
+
+ /* both sizes cannot be explicitly called for */
+ if ((flags & RTE_MEMZONE_1GB) && (flags & RTE_MEMZONE_2MB)) {
+ rte_errno = EINVAL;
+ return NULL;
+ }
+
+ /* if alignment is not a power of two */
+ if (!rte_is_power_of_2(align)) {
+ RTE_LOG(ERR, EAL, "%s(): Invalid alignment: %u\n", __func__,
+ align);
+ rte_errno = EINVAL;
+ return NULL;
+ }
+
+ /* alignment less than cache size is not allowed */
+ if (align < CACHE_LINE_SIZE)
+ align = CACHE_LINE_SIZE;
+
+ /* get pointer to global configuration */
+ mcfg = rte_eal_get_configuration()->mem_config;
+
+ rte_rwlock_write_lock(&mcfg->mlock);
+
+ mz = memzone_reserve_aligned_thread_unsafe(
+ name, len, socket_id, flags, align);
+
+ rte_rwlock_write_unlock(&mcfg->mlock);
+
+ return mz;
+}
+
/*
* Lookup for the memzone identified by the given name
*/
const struct rte_memzone *
rte_memzone_lookup(const char *name)
{
- const struct rte_mem_config *mcfg;
- unsigned i = 0;
+ struct rte_mem_config *mcfg;
+ const struct rte_memzone *memzone = NULL;
- /* get pointer to global configuration */
mcfg = rte_eal_get_configuration()->mem_config;
+
+ rte_rwlock_read_lock(&mcfg->mlock);
- /*
- * the algorithm is not optimal (linear), but there are few
- * zones and this function should be called at init only
- */
- for (i = 0; i < RTE_MAX_MEMZONE && mcfg->memzone[i].addr != NULL; i++) {
- if (!strncmp(name, mcfg->memzone[i].name, RTE_MEMZONE_NAMESIZE))
- return &mcfg->memzone[i];
- }
- return NULL;
+ memzone = memzone_lookup_thread_unsafe(name);
+
+ rte_rwlock_read_unlock(&mcfg->mlock);
+
+ return memzone;
}
/* Dump all reserved memory zones on console */
void
rte_memzone_dump(void)
{
- const struct rte_mem_config *mcfg;
+ struct rte_mem_config *mcfg;
unsigned i = 0;
/* get pointer to global configuration */
mcfg = rte_eal_get_configuration()->mem_config;
+ rte_rwlock_read_lock(&mcfg->mlock);
/* dump all zones */
for (i=0; i<RTE_MAX_MEMZONE; i++) {
if (mcfg->memzone[i].addr == NULL)
break;
- printf("name:<%s>, phys:0x%"PRIx64", len:0x%"PRIx64""
- ", virt:%p, socket_id:%"PRId32"\n",
+ printf("Zone %o: name:<%s>, phys:0x%"PRIx64", len:0x%"PRIx64""
+ ", virt:%p, socket_id:%"PRId32", flags:%"PRIx32"\n", i,
mcfg->memzone[i].name,
mcfg->memzone[i].phys_addr,
mcfg->memzone[i].len,
mcfg->memzone[i].addr,
- mcfg->memzone[i].socket_id);
+ mcfg->memzone[i].socket_id,
+ mcfg->memzone[i].flags);
}
+ rte_rwlock_read_unlock(&mcfg->mlock);
}
/*
int
rte_eal_memzone_init(void)
{
- struct rte_config *config;
+ struct rte_mem_config *mcfg;
const struct rte_memseg *memseg;
unsigned i = 0;
+ /* get pointer to global configuration */
+ mcfg = rte_eal_get_configuration()->mem_config;
+
+ /* mirror the runtime memsegs from config */
+ free_memseg = mcfg->free_memseg;
+
/* secondary processes don't need to initialise anything */
if (rte_eal_process_type() == RTE_PROC_SECONDARY)
return 0;
- /* get pointer to global configuration */
- config = rte_eal_get_configuration();
-
memseg = rte_eal_get_physmem_layout();
if (memseg == NULL) {
RTE_LOG(ERR, EAL, "%s(): Cannot get physical layout\n", __func__);
return -1;
}
+ rte_rwlock_write_lock(&mcfg->mlock);
+
/* duplicate the memsegs from config */
- memcpy(free_memseg, memseg, sizeof(free_memseg));
+ memcpy(free_memseg, memseg, sizeof(struct rte_memseg) * RTE_MAX_MEMSEG);
/* make all zones cache-aligned */
for (i=0; i<RTE_MAX_MEMSEG; i++) {
break;
if (memseg_sanitize(&free_memseg[i]) < 0) {
RTE_LOG(ERR, EAL, "%s(): Sanity check failed\n", __func__);
+ rte_rwlock_write_unlock(&mcfg->mlock);
return -1;
}
}
/* delete all zones */
- memzone_idx = 0;
- memset(config->mem_config->memzone, 0, sizeof(config->mem_config->memzone));
+ mcfg->memzone_idx = 0;
+ memset(mcfg->memzone, 0, sizeof(mcfg->memzone));
+
+ rte_rwlock_write_unlock(&mcfg->mlock);
return 0;
}
mcfg = rte_eal_get_configuration()->mem_config;
+ rte_rwlock_read_lock(&mcfg->qlock);
for (i=0; i < RTE_MAX_TAILQ; i++) {
const struct rte_tailq_head *tailq = &mcfg->tailq_head[i];
const struct rte_dummy_head *head = &tailq->tailq_head;
(rte_tailq_names[i] != NULL ? rte_tailq_names[i]:"nil"),
head->tqh_first, head->tqh_last);
}
+ rte_rwlock_read_unlock(&mcfg->qlock);
}
int
int rte_eal_init(int argc, char **argv);
/**
- * Utility macro to do a tailq 'INSERT' of rte_mem_config
+ * macro to get the lock of tailq in mem_config
+ */
+#define RTE_EAL_TAILQ_RWLOCK (&rte_eal_get_configuration()->mem_config->qlock)
+
+/**
+ * macro to get the multiple lock of mempool shared by mutiple-instance
+ */
+#define RTE_EAL_MEMPOOL_RWLOCK (&rte_eal_get_configuration()->mem_config->mplock)
+
+
+/**
+ * Utility macro to do a thread-safe tailq 'INSERT' of rte_mem_config
*
* @param idx
* a kind of tailq define in enum rte_tailq_t
#define RTE_EAL_TAILQ_INSERT_TAIL(idx, type, elm) do { \
struct type *list; \
list = RTE_TAILQ_LOOKUP_BY_IDX(idx, type); \
+ rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK); \
TAILQ_INSERT_TAIL(list, elm, next); \
+ rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK); \
} while (0)
/**
- * Utility macro to do a tailq 'REMOVE' of rte_mem_config
+ * Utility macro to do a thread-safe tailq 'REMOVE' of rte_mem_config
*
* @param idx
* a kind of tailq define in enum rte_tailq_t
#define RTE_EAL_TAILQ_REMOVE(idx, type, elm) do { \
struct type *list; \
list = RTE_TAILQ_LOOKUP_BY_IDX(idx, type); \
+ rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK); \
TAILQ_REMOVE(list, elm, next); \
+ rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK); \
} while (0) \
#include <rte_tailq.h>
#include <rte_memory.h>
#include <rte_memzone.h>
+#include <rte_rwlock.h>
#ifdef __cplusplus
extern "C" {
* support, the memory details should be shared across instances
*/
struct rte_mem_config {
+ volatile uint32_t magic; /**< Magic number - Sanity check. */
+
/* memory topology */
uint32_t nchannel; /**< Number of channels (0 if unknown). */
uint32_t nrank; /**< Number of ranks (0 if unknown). */
+ /**
+ * current lock nest order
+ * - qlock->mlock (ring/hash/lpm)
+ * - mplock->qlock->mlock (mempool)
+ * Notice:
+ * *ALWAYS* obtain qlock first if having to obtain both qlock and mlock
+ */
+ rte_rwlock_t mlock; /**< only used by memzone LIB for thread-safe. */
+ rte_rwlock_t qlock; /**< used for tailq operation for thread safe. */
+ rte_rwlock_t mplock; /**< only used by mempool LIB for thread-safe. */
+
+ uint32_t memzone_idx; /**< Index of memzone */
+
/* memory segments and zones */
struct rte_memseg memseg[RTE_MAX_MEMSEG]; /**< Physmem descriptors. */
struct rte_memzone memzone[RTE_MAX_MEMZONE]; /**< Memzone descriptors. */
+ /* Runtime Physmem descriptors. */
+ struct rte_memseg free_memseg[RTE_MAX_MEMSEG];
+
struct rte_tailq_head tailq_head[RTE_MAX_TAILQ]; /**< Tailqs for objects */
} __attribute__((__packed__));
+inline static void
+rte_eal_mcfg_wait_complete(struct rte_mem_config* mcfg)
+{
+ /* wait until shared mem_config finish initialising */
+ while(mcfg->magic != RTE_MAGIC)
+ rte_pause();
+}
+
#ifdef __cplusplus
}
#endif
*/
#define __rte_cache_aligned __attribute__((__aligned__(CACHE_LINE_SIZE)))
-#ifndef __KERNEL__ /* so we can include this header in kernel modules */
typedef uint64_t phys_addr_t; /**< Physical address definition. */
-#endif
/**
* Physical memory segment descriptor.
#include <unistd.h>
#include <pthread.h>
#include <getopt.h>
-#include <fcntl.h>
+#include <sys/file.h>
#include <stddef.h>
#include <errno.h>
#include <limits.h>
#include <rte_launch.h>
#include <rte_tailq.h>
#include <rte_eal.h>
+#include <rte_eal_memconfig.h>
#include <rte_per_lcore.h>
#include <rte_lcore.h>
#include <rte_log.h>
#include <rte_pci.h>
#include <rte_common.h>
#include <rte_version.h>
+#include <rte_atomic.h>
#include "eal_private.h"
#include "eal_thread.h"
}
rte_mem_cfg_addr = mmap(NULL, sizeof(*rte_config.mem_config),
- PROT_READ | PROT_WRITE, MAP_SHARED, mem_cfg_fd, 0);
+ PROT_READ | PROT_WRITE, MAP_SHARED, mem_cfg_fd, 0);
if (rte_mem_cfg_addr == MAP_FAILED){
rte_panic("Cannot mmap memory for rte_config\n");
}
+ memcpy(rte_mem_cfg_addr, &early_mem_config, sizeof(early_mem_config));
rte_config.mem_config = (struct rte_mem_config *) rte_mem_cfg_addr;
- memcpy(rte_config.mem_config, &early_mem_config,
- sizeof(early_mem_config));
}
/* attach to an existing shared memory config */
return;
if (mem_cfg_fd < 0){
- mem_cfg_fd = open(pathname, O_RDONLY);
+ mem_cfg_fd = open(pathname, O_RDWR);
if (mem_cfg_fd < 0)
rte_panic("Cannot open '%s' for rte_mem_config\n", pathname);
}
- rte_mem_cfg_addr = mmap(NULL, sizeof(*rte_config.mem_config), PROT_READ,
- MAP_SHARED, mem_cfg_fd, 0);
+ rte_mem_cfg_addr = mmap(NULL, sizeof(*rte_config.mem_config),
+ PROT_READ | PROT_WRITE, MAP_SHARED, mem_cfg_fd, 0);
close(mem_cfg_fd);
if (rte_mem_cfg_addr == MAP_FAILED)
rte_panic("Cannot mmap memory for rte_config\n");
break;
case RTE_PROC_SECONDARY:
rte_eal_config_attach();
+ rte_eal_mcfg_wait_complete(rte_config.mem_config);
break;
case RTE_PROC_AUTO:
case RTE_PROC_INVALID:
}
}
+/* Unlocks hugepage directories that were locked by eal_hugepage_info_init */
+static void
+eal_hugedirs_unlock(void)
+{
+ int i;
+
+ for (i = 0; i < MAX_HUGEPAGE_SIZES; i++)
+ {
+ /* skip uninitialized */
+ if (internal_config.hugepage_info[i].lock_descriptor == 0)
+ continue;
+ /* unlock hugepage file */
+ flock(internal_config.hugepage_info[i].lock_descriptor, LOCK_UN);
+ close(internal_config.hugepage_info[i].lock_descriptor);
+ /* reset the field */
+ internal_config.hugepage_info[i].lock_descriptor = 0;
+ }
+}
+
/* display usage */
static void
eal_usage(const char *prgname)
for (i = 0; i < RTE_MAX_NUMA_NODES; i++)
internal_config.socket_mem[i] = 0;
+ /* zero out hugedir descriptors */
+ for (i = 0; i < MAX_HUGEPAGE_SIZES; i++)
+ internal_config.hugepage_info[i].lock_descriptor = 0;
+
while ((opt = getopt_long(argc, argvopt, "b:c:m:n:r:v",
lgopts, &option_index)) != EOF) {
"memory on local socket!\n");
}
+static int
+sync_func(__attribute__((unused)) void *arg)
+{
+ return 0;
+}
+
+inline static void
+rte_eal_mcfg_complete(void)
+{
+ /* ALL shared mem_config related INIT DONE */
+ if (rte_config.process_type == RTE_PROC_PRIMARY)
+ rte_config.mem_config->magic = RTE_MAGIC;
+}
+
/* Launch threads, called at application init(). */
int
rte_eal_init(int argc, char **argv)
{
int i, fctret, ret;
pthread_t thread_id;
+ static rte_atomic32_t run_once = RTE_ATOMIC32_INIT(0);
+
+ if (!rte_atomic32_test_and_set(&run_once))
+ return -1;
thread_id = pthread_self();
if (fctret < 0)
exit(1);
- if (eal_hugepage_info_init() < 0)
+ if (internal_config.no_hugetlbfs == 0 &&
+ internal_config.process_type != RTE_PROC_SECONDARY &&
+ eal_hugepage_info_init() < 0)
rte_panic("Cannot get hugepage information\n");
if (internal_config.memory == 0 && internal_config.force_sockets == 0) {
}
rte_srand(rte_rdtsc());
- rte_config_init();
+ rte_config_init();
+
if (rte_eal_cpu_init() < 0)
rte_panic("Cannot detect lcores\n");
if (rte_eal_memory_init() < 0)
rte_panic("Cannot init memory\n");
+ /* the directories are locked during eal_hugepage_info_init */
+ eal_hugedirs_unlock();
+
if (rte_eal_memzone_init() < 0)
rte_panic("Cannot init memzone\n");
eal_check_mem_on_local_socket();
+ rte_eal_mcfg_complete();
+
RTE_LCORE_FOREACH_SLAVE(i) {
/*
eal_thread_init_master(rte_config.master_lcore);
+ /*
+ * Launch a dummy function on all slave lcores, so that master lcore
+ * knows they are all ready when this function returns.
+ */
+ rte_eal_mp_remote_launch(sync_func, NULL, SKIP_MASTER);
+ rte_eal_mp_wait_lcore();
+
return fctret;
}
#include <string.h>
#include <sys/types.h>
+#include <sys/file.h>
#include <dirent.h>
#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
+#include <fnmatch.h>
#include <inttypes.h>
#include <stdarg.h>
+#include <unistd.h>
#include <errno.h>
#include <sys/queue.h>
-#include "rte_memory.h"
-#include "rte_memzone.h"
-#include "rte_tailq.h"
-#include "rte_eal.h"
-#include "rte_launch.h"
-#include "rte_per_lcore.h"
-#include "rte_lcore.h"
-#include "rte_debug.h"
-#include "rte_log.h"
-#include "rte_common.h"
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_tailq.h>
+#include <rte_eal.h>
+#include <rte_launch.h>
+#include <rte_per_lcore.h>
+#include <rte_lcore.h>
+#include <rte_debug.h>
+#include <rte_log.h>
+#include <rte_common.h>
#include "rte_string_fns.h"
#include "eal_internal_cfg.h"
#include "eal_hugepages.h"
static int32_t
get_num_hugepages(const char *subdir)
{
- const char nr_hp_file[] = "nr_hugepages";
- char path[BUFSIZ];
- unsigned num_pages = 0;
+ char path[PATH_MAX];
+ long unsigned num_pages = 0;
+ const char *nr_hp_file;
+
+ /* if secondary process, just look at the number of hugepages,
+ * otherwise look at number of free hugepages */
+ if (internal_config.process_type == RTE_PROC_SECONDARY)
+ nr_hp_file = "nr_hugepages";
+ else
+ nr_hp_file = "free_hugepages";
rte_snprintf(path, sizeof(path), "%s/%s/%s",
sys_dir_path, subdir, nr_hp_file);
if (eal_parse_sysfs_value(path, &num_pages) < 0)
return 0;
- return num_pages;
+ if (num_pages == 0)
+ RTE_LOG(ERR, EAL, "Error - no free hugepages available!\n");
+
+ return (int32_t)num_pages;
}
static uint64_t
swap_hpi(struct hugepage_info *a, struct hugepage_info *b)
{
char buf[sizeof(*a)];
- memcpy(buf, a, sizeof(*a));
- memcpy(a, b, sizeof(*a));
- memcpy(b, buf, sizeof(*a));
+ memcpy(buf, a, sizeof(buf));
+ memcpy(a, b, sizeof(buf));
+ memcpy(b, buf, sizeof(buf));
+}
+
+/*
+ * Clear the hugepage directory of whatever hugepage files
+ * there are. Checks if the file is locked (i.e.
+ * if it's in use by another DPDK process).
+ */
+static int
+clear_hugedir(const char * hugedir)
+{
+ DIR *dir;
+ struct dirent *dirent;
+ int dir_fd, fd, lck_result;
+ const char filter[] = "*map_*"; /* matches hugepage files */
+
+ /* open directory */
+ dir = opendir(hugedir);
+ if (!dir) {
+ RTE_LOG(INFO, EAL, "Unable to open hugepage directory %s\n",
+ hugedir);
+ goto error;
+ }
+ dir_fd = dirfd(dir);
+
+ dirent = readdir(dir);
+ if (!dirent) {
+ RTE_LOG(INFO, EAL, "Unable to read hugepage directory %s\n",
+ hugedir);
+ goto error;
+ }
+
+ while(dirent != NULL){
+ /* skip files that don't match the hugepage pattern */
+ if (fnmatch(filter, dirent->d_name, 0) > 0) {
+ dirent = readdir(dir);
+ continue;
+ }
+
+ /* try and lock the file */
+ fd = openat(dir_fd, dirent->d_name, O_RDONLY);
+
+ /* skip to next file */
+ if (fd == -1) {
+ dirent = readdir(dir);
+ continue;
+ }
+
+ /* non-blocking lock */
+ lck_result = flock(fd, LOCK_EX | LOCK_NB);
+
+ /* if lock succeeds, unlock and remove the file */
+ if (lck_result != -1) {
+ flock(fd, LOCK_UN);
+ unlinkat(dir_fd, dirent->d_name, 0);
+ }
+ close (fd);
+ dirent = readdir(dir);
+ }
+
+ closedir(dir);
+ return 0;
+
+error:
+ if (dir)
+ closedir(dir);
+
+ RTE_LOG(INFO, EAL, "Error while clearing hugepage dir: %s\n",
+ strerror(errno));
+
+ return -1;
}
/*
(unsigned) get_num_hugepages(dirent->d_name),
(unsigned long long)hpi->hugepage_sz);
} else {
+ /* try to obtain a writelock */
+ hpi->lock_descriptor = open(hpi->hugedir, O_RDONLY);
+
+ /* if blocking lock failed */
+ if (flock(hpi->lock_descriptor, LOCK_EX) == -1) {
+ RTE_LOG(CRIT, EAL, "Failed to lock hugepage directory!\n");
+ return -1;
+ }
+ /* clear out the hugepages dir from unused pages */
+ if (clear_hugedir(hpi->hugedir) == -1)
+ return -1;
+
/* for now, put all pages into socket 0,
* later they will be sorted */
hpi->num_pages[0] = get_num_hugepages(dirent->d_name);
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/queue.h>
-#include <fcntl.h>
+#include <sys/file.h>
#include <unistd.h>
#include <limits.h>
#include <errno.h>
#include <sys/ioctl.h>
+#include <sys/time.h>
+#include <sys/resource.h>
#include <rte_log.h>
#include <rte_memory.h>
#include <rte_launch.h>
#include <rte_tailq.h>
#include <rte_eal.h>
+#include <rte_eal_memconfig.h>
#include <rte_per_lcore.h>
#include <rte_lcore.h>
#include <rte_common.h>
}
}
+/*
+ * Increase limit for open files for current process
+ */
+static int
+increase_open_file_limit(void)
+{
+ struct rlimit limit;
+
+ /* read current limits */
+ if (getrlimit(RLIMIT_NOFILE, &limit) != 0) {
+ RTE_LOG(ERR, EAL, "Error reading resource limit: %s\n",
+ strerror(errno));
+ return -1;
+ }
+
+ /* check if current soft limit matches the hard limit */
+ if (limit.rlim_cur < limit.rlim_max) {
+ /* set soft limit to match hard limit */
+ limit.rlim_cur = limit.rlim_max;
+ }
+ else {
+ /* we can't increase the soft limit so now we try to increase
+ * soft and hard limit. this might fail when run as non-root.
+ */
+ limit.rlim_cur *= 2;
+ limit.rlim_max *= 2;
+ }
+
+ /* set current resource limit */
+ if (setrlimit(RLIMIT_NOFILE, &limit) != 0) {
+ RTE_LOG(ERR, EAL, "Error increasing open files limit: %s\n",
+ strerror(errno));
+ return -1;
+ }
+
+ return 0;
+}
+
/*
* Try to mmap *size bytes in /dev/zero. If it is succesful, return the
* pointer to the mmap'd area and keep *size unmodified. Else, retry
vma_len = hugepage_sz;
}
+ /* try to create hugepage file */
fd = open(hugepg_tbl[i].filepath, O_CREAT | O_RDWR, 0755);
if (fd < 0) {
RTE_LOG(ERR, EAL, "%s(): open failed: %s\n", __func__,
hugepg_tbl[i].final_va = virtaddr;
}
+ /* close the file descriptor, files will be locked later */
+ close(fd);
+
vma_addr = (char *)vma_addr + hugepage_sz;
vma_len -= hugepage_sz;
- close(fd);
}
return 0;
}
munmap(hp->final_va, hp->size);
hp->final_va = NULL;
}
+ /* lock the page and skip */
else {
+ /* try and open the hugepage file */
+ while ((fd = open(hp->filepath, O_CREAT | O_RDWR, 0755)) < 0) {
+ /* if we can't open due to resource limits */
+ if (errno == EMFILE) {
+ RTE_LOG(INFO, EAL, "Increasing open file limit\n");
+
+ /* if we manage to increase resource limit, try again */
+ if (increase_open_file_limit() == 0)
+ continue;
+ }
+ else
+ RTE_LOG(ERR, EAL, "%s(): open failed: %s\n", __func__,
+ strerror(errno));
+ return -1;
+ }
+ /* try and lock the hugepage */
+ if (flock(fd, LOCK_SH | LOCK_NB) == -1) {
+ RTE_LOG(ERR, EAL, "Locking hugepage file failed!\n");
+ close(fd);
+ return -1;
+ }
+ hp->page_lock = fd;
pages_found++;
}
} /* match page */
int socket_id; /**< NUMA socket ID */
int file_id; /**< the '%d' in HUGEFILE_FMT */
int memseg_id; /**< the memory segment to which page belongs */
- char filepath[MAX_HUGEPAGE_PATH]; /**< Path to backing file on filesystem */
+ char filepath[MAX_HUGEPAGE_PATH]; /**< path to backing file on filesystem */
+ int page_lock; /**< descriptor for hugepage file */
};
/**
const char *hugedir; /**< dir where hugetlbfs is mounted */
uint32_t num_pages[RTE_MAX_NUMA_NODES];
/**< number of hugepages of that size on each socket */
+ int lock_descriptor; /**< file descriptor for hugepage dir */
};
/**
#include <rte_string_fns.h>
#include <rte_cpuflags.h>
#include <rte_log.h>
+#include <rte_spinlock.h>
#include "rte_fbk_hash.h"
return NULL;
}
+ rte_rwlock_read_lock(RTE_EAL_TAILQ_RWLOCK);
TAILQ_FOREACH(h, fbk_hash_list, next) {
if (strncmp(name, h->name, RTE_FBK_HASH_NAMESIZE) == 0)
break;
}
+ rte_rwlock_read_unlock(RTE_EAL_TAILQ_RWLOCK);
if (h == NULL)
rte_errno = ENOENT;
return h;
rte_snprintf(hash_name, sizeof(hash_name), "FBK_%s", params->name);
+ rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK);
+
/* guarantee there's no existing */
TAILQ_FOREACH(ht, fbk_hash_list, next) {
if (strncmp(params->name, ht->name, RTE_FBK_HASH_NAMESIZE) == 0)
break;
}
if (ht != NULL)
- return NULL;
+ goto exit;
/* Allocate memory for table. */
ht = (struct rte_fbk_hash_table *)rte_malloc_socket(hash_name, mem_size,
0, params->socket_id);
if (ht == NULL)
- return NULL;
+ goto exit;
+
memset(ht, 0, mem_size);
/* Set up hash table context. */
}
TAILQ_INSERT_TAIL(fbk_hash_list, ht, next);
+
+exit:
+ rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
+
return ht;
}
#include <rte_string_fns.h>
#include <rte_cpuflags.h>
#include <rte_log.h>
+#include <rte_rwlock.h>
+#include <rte_spinlock.h>
#include "rte_hash.h"
return NULL;
}
+ rte_rwlock_read_lock(RTE_EAL_TAILQ_RWLOCK);
TAILQ_FOREACH(h, hash_list, next) {
if (strncmp(name, h->name, RTE_HASH_NAMESIZE) == 0)
break;
}
+ rte_rwlock_read_unlock(RTE_EAL_TAILQ_RWLOCK);
+
if (h == NULL)
rte_errno = ENOENT;
return h;
/* Total memory required for hash context */
mem_size = hash_tbl_size + sig_tbl_size + key_tbl_size;
+ rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK);
+
/* guarantee there's no existing */
TAILQ_FOREACH(h, hash_list, next) {
if (strncmp(params->name, h->name, RTE_HASH_NAMESIZE) == 0)
break;
}
if (h != NULL)
- return NULL;
+ goto exit;
h = (struct rte_hash *)rte_zmalloc_socket(hash_name, mem_size,
CACHE_LINE_SIZE, params->socket_id);
if (h == NULL) {
RTE_LOG(ERR, HASH, "memory allocation failed\n");
- return NULL;
+ goto exit;
}
/* Setup hash context */
DEFAULT_HASH_FUNC : params->hash_func;
TAILQ_INSERT_TAIL(hash_list, h, next);
+
+exit:
+ rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
+
return h;
}
#include <rte_per_lcore.h>
#include <rte_string_fns.h>
#include <rte_errno.h>
+#include <rte_rwlock.h>
+#include <rte_spinlock.h>
#include "rte_lpm.h"
return NULL;
}
+ rte_rwlock_read_lock(RTE_EAL_TAILQ_RWLOCK);
TAILQ_FOREACH(l, lpm_list, next) {
if (strncmp(name, l->name, RTE_LPM_NAMESIZE) == 0)
break;
}
+ rte_rwlock_read_unlock(RTE_EAL_TAILQ_RWLOCK);
if (l == NULL)
rte_errno = ENOENT;
/* Determine the amount of memory to allocate. */
mem_size = sizeof(*lpm) + (sizeof(lpm->rules_tbl[0]) * max_rules);
+ rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK);
+
/* guarantee there's no existing */
TAILQ_FOREACH(lpm, lpm_list, next) {
if (strncmp(name, lpm->name, RTE_LPM_NAMESIZE) == 0)
break;
}
if (lpm != NULL)
- return NULL;
+ goto exit;
/* Allocate memory to store the LPM data structures. */
lpm = (struct rte_lpm *)rte_zmalloc_socket(mem_name, mem_size,
CACHE_LINE_SIZE, socket_id);
if (lpm == NULL) {
RTE_LOG(ERR, LPM, "LPM memory allocation failed\n");
- return NULL;
+ goto exit;
}
/* Save user arguments. */
TAILQ_INSERT_TAIL(lpm_list, lpm, next);
+exit:
+ rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
+
return lpm;
}
#include <rte_ring.h>
#include <rte_errno.h>
#include <rte_string_fns.h>
+#include <rte_spinlock.h>
#include "rte_mempool.h"
if (flags & MEMPOOL_F_SC_GET)
rg_flags |= RING_F_SC_DEQ;
+ rte_rwlock_write_lock(RTE_EAL_MEMPOOL_RWLOCK);
+
/* allocate the ring that will be used to store objects */
/* Ring functions will return appropriate errors if we are
* running as a secondary process etc., so no checks made
rte_snprintf(rg_name, sizeof(rg_name), "MP_%s", name);
r = rte_ring_create(rg_name, rte_align32pow2(n+1), socket_id, rg_flags);
if (r == NULL)
- return NULL;
+ goto exit;
/*
* In header, we have at least the pointer to the pool, and
mempool_size = total_elt_size * n +
sizeof(struct rte_mempool) + private_data_size;
rte_snprintf(mz_name, sizeof(mz_name), "MP_%s", name);
+
mz = rte_memzone_reserve(mz_name, mempool_size, socket_id, mz_flags);
/*
* space for the as we cannot free it
*/
if (mz == NULL)
- return NULL;
+ goto exit;
/* init the mempool structure */
mp = mz->addr;
RTE_EAL_TAILQ_INSERT_TAIL(RTE_TAILQ_MEMPOOL, rte_mempool_list, mp);
+exit:
+ rte_rwlock_write_unlock(RTE_EAL_MEMPOOL_RWLOCK);
+
return mp;
}
{
mempool_audit_cache(mp);
mempool_audit_cookies(mp);
+
+ /* For case where mempool DEBUG is not set, and cache size is 0 */
+ RTE_SET_USED(mp);
}
/* dump the status of the mempool on the console */
return;
}
+ rte_rwlock_read_lock(RTE_EAL_MEMPOOL_RWLOCK);
+
TAILQ_FOREACH(mp, mempool_list, next) {
rte_mempool_dump(mp);
}
+
+ rte_rwlock_read_unlock(RTE_EAL_MEMPOOL_RWLOCK);
}
/* search a mempool from its name */
return NULL;
}
+ rte_rwlock_read_lock(RTE_EAL_MEMPOOL_RWLOCK);
+
TAILQ_FOREACH(mp, mempool_list, next) {
if (strncmp(name, mp->name, RTE_MEMPOOL_NAMESIZE) == 0)
break;
}
+
+ rte_rwlock_read_unlock(RTE_EAL_MEMPOOL_RWLOCK);
+
if (mp == NULL)
rte_errno = ENOENT;
#include <rte_branch_prediction.h>
#include <rte_errno.h>
#include <rte_string_fns.h>
+#include <rte_spinlock.h>
#include "rte_ring.h"
rte_snprintf(mz_name, sizeof(mz_name), "RG_%s", name);
ring_size = count * sizeof(void *) + sizeof(struct rte_ring);
+ rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK);
+
/* reserve a memory zone for this ring. If we can't get rte_config or
* we are secondary process, the memzone_reserve function will set
* rte_errno for us appropriately - hence no check in this this function */
r = NULL;
RTE_LOG(ERR, RING, "Cannot reserve memory\n");
}
+ rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
return r;
}
return;
}
+ rte_rwlock_read_lock(RTE_EAL_TAILQ_RWLOCK);
+
TAILQ_FOREACH(mp, ring_list, next) {
rte_ring_dump(mp);
}
+
+ rte_rwlock_read_unlock(RTE_EAL_TAILQ_RWLOCK);
}
/* search a ring from its name */
return NULL;
}
+ rte_rwlock_read_lock(RTE_EAL_TAILQ_RWLOCK);
+
TAILQ_FOREACH(r, ring_list, next) {
if (strncmp(name, r->name, RTE_RING_NAMESIZE) == 0)
break;
}
+ rte_rwlock_read_unlock(RTE_EAL_TAILQ_RWLOCK);
+
if (r == NULL)
rte_errno = ENOENT;