/*-
* BSD LICENSE
- *
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *
+ * Copyright(c) 2010-2017 Intel Corporation. All rights reserved.
* All rights reserved.
- *
+ *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
- *
+ *
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
- *
+ *
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
#include <rte_lcore.h>
#include <rte_atomic.h>
#include <rte_branch_prediction.h>
+#include <rte_memzone.h>
+
+#define RTE_TAILQ_RING_NAME "RTE_RING"
enum rte_ring_queue_behavior {
RTE_RING_QUEUE_FIXED = 0, /* Enq/Deq a fixed number of items from a ring */
- RTE_RING_QUEUE_VARIABLE /* Enq/Deq as many items a possible from ring */
+ RTE_RING_QUEUE_VARIABLE /* Enq/Deq as many items as possible from ring */
};
#ifdef RTE_LIBRTE_RING_DEBUG
} __rte_cache_aligned;
#endif
-#define RTE_RING_NAMESIZE 32 /**< The maximum length of a ring name. */
#define RTE_RING_MZ_PREFIX "RG_"
+/**< The maximum length of a ring name. */
+#define RTE_RING_NAMESIZE (RTE_MEMZONE_NAMESIZE - \
+ sizeof(RTE_RING_MZ_PREFIX) + 1)
+
+#ifndef RTE_RING_PAUSE_REP_COUNT
+#define RTE_RING_PAUSE_REP_COUNT 0 /**< Yield after pause num of times, no yield
+ * if RTE_RING_PAUSE_REP not defined. */
+#endif
+
+struct rte_memzone; /* forward declaration, so as not to require memzone.h */
+
+#if RTE_CACHE_LINE_SIZE < 128
+#define PROD_ALIGN (RTE_CACHE_LINE_SIZE * 2)
+#define CONS_ALIGN (RTE_CACHE_LINE_SIZE * 2)
+#else
+#define PROD_ALIGN RTE_CACHE_LINE_SIZE
+#define CONS_ALIGN RTE_CACHE_LINE_SIZE
+#endif
+
+/* structure to hold a pair of head/tail values and other metadata */
+struct rte_ring_headtail {
+ volatile uint32_t head; /**< Prod/consumer head. */
+ volatile uint32_t tail; /**< Prod/consumer tail. */
+ uint32_t single; /**< True if single prod/cons */
+};
/**
* An RTE ring structure.
* a problem.
*/
struct rte_ring {
- TAILQ_ENTRY(rte_ring) next; /**< Next in list. */
-
- char name[RTE_RING_NAMESIZE]; /**< Name of the ring. */
- int flags; /**< Flags supplied at creation. */
+ /*
+ * Note: this field kept the RTE_MEMZONE_NAMESIZE size due to ABI
+ * compatibility requirements, it could be changed to RTE_RING_NAMESIZE
+ * next time the ABI changes
+ */
+ char name[RTE_MEMZONE_NAMESIZE]; /**< Name of the ring. */
+ int flags; /**< Flags supplied at creation. */
+ const struct rte_memzone *memzone;
+ /**< Memzone, if any, containing the rte_ring */
+ uint32_t size; /**< Size of ring. */
+ uint32_t mask; /**< Mask (size-1) of ring. */
+ uint32_t watermark; /**< Max items before EDQUOT in producer. */
/** Ring producer status. */
- struct prod {
- uint32_t watermark; /**< Maximum items before EDQUOT. */
- uint32_t sp_enqueue; /**< True, if single producer. */
- uint32_t size; /**< Size of ring. */
- uint32_t mask; /**< Mask (size-1) of ring. */
- volatile uint32_t head; /**< Producer head. */
- volatile uint32_t tail; /**< Producer tail. */
- } prod __rte_cache_aligned;
+ struct rte_ring_headtail prod __rte_aligned(PROD_ALIGN);
/** Ring consumer status. */
- struct cons {
- uint32_t sc_dequeue; /**< True, if single consumer. */
- uint32_t size; /**< Size of the ring. */
- uint32_t mask; /**< Mask (size-1) of ring. */
- volatile uint32_t head; /**< Consumer head. */
- volatile uint32_t tail; /**< Consumer tail. */
-#ifdef RTE_RING_SPLIT_PROD_CONS
- } cons __rte_cache_aligned;
-#else
- } cons;
-#endif
+ struct rte_ring_headtail cons __rte_aligned(CONS_ALIGN);
#ifdef RTE_LIBRTE_RING_DEBUG
struct rte_ring_debug_stats stats[RTE_MAX_LCORE];
#endif
- void * ring[0] __rte_cache_aligned; /**< Memory space of ring starts here.
- * not volatile so need to be careful
- * about compiler re-ordering */
+ void *ring[] __rte_cache_aligned; /**< Memory space of ring starts here.
+ * not volatile so need to be careful
+ * about compiler re-ordering */
};
#define RING_F_SP_ENQ 0x0001 /**< The default enqueue is "single-producer". */
* The number to add to the object-oriented statistics.
*/
#ifdef RTE_LIBRTE_RING_DEBUG
-#define __RING_STAT_ADD(r, name, n) do { \
- unsigned __lcore_id = rte_lcore_id(); \
- r->stats[__lcore_id].name##_objs += n; \
- r->stats[__lcore_id].name##_bulk += 1; \
+#define __RING_STAT_ADD(r, name, n) do { \
+ unsigned __lcore_id = rte_lcore_id(); \
+ if (__lcore_id < RTE_MAX_LCORE) { \
+ r->stats[__lcore_id].name##_objs += n; \
+ r->stats[__lcore_id].name##_bulk += 1; \
+ } \
} while(0)
#else
#define __RING_STAT_ADD(r, name, n) do {} while(0)
* rte_errno set appropriately. Possible errno values include:
* - E_RTE_NO_CONFIG - function could not get pointer to rte_config structure
* - E_RTE_SECONDARY - function was called from a secondary process instance
- * - E_RTE_NO_TAILQ - no tailq list could be got for the ring list
* - EINVAL - count provided is not a power of 2
* - ENOSPC - the maximum number of memzones has already been allocated
* - EEXIST - a memzone with the same name already exists
*/
struct rte_ring *rte_ring_create(const char *name, unsigned count,
int socket_id, unsigned flags);
+/**
+ * De-allocate all memory used by the ring.
+ *
+ * @param r
+ * Ring to free
+ */
+void rte_ring_free(struct rte_ring *r);
/**
* Change the high water mark.
int rte_ring_set_water_mark(struct rte_ring *r, unsigned count);
/**
- * Dump the status of the ring to the console.
+ * Dump the status of the ring to a file.
*
* @param f
* A pointer to a file for output
*/
void rte_ring_dump(FILE *f, const struct rte_ring *r);
-/* the actual enqueue of pointers on the ring.
+/* the actual enqueue of pointers on the ring.
* Placed here since identical code needed in both
* single and multi producer enqueue functions */
#define ENQUEUE_PTRS() do { \
- const uint32_t size = r->prod.size; \
+ const uint32_t size = r->size; \
uint32_t idx = prod_head & mask; \
if (likely(idx + n < size)) { \
for (i = 0; i < (n & ((~(unsigned)0x3))); i+=4, idx+=4) { \
} \
} while(0)
-/* the actual copy of pointers on the ring to obj_table.
+/* the actual copy of pointers on the ring to obj_table.
* Placed here since identical code needed in both
* single and multi consumer dequeue functions */
#define DEQUEUE_PTRS() do { \
uint32_t idx = cons_head & mask; \
- const uint32_t size = r->cons.size; \
+ const uint32_t size = r->size; \
if (likely(idx + n < size)) { \
for (i = 0; i < (n & (~(unsigned)0x3)); i+=4, idx+=4) {\
obj_table[i] = r->ring[idx]; \
uint32_t cons_tail, free_entries;
const unsigned max = n;
int success;
- unsigned i;
- uint32_t mask = r->prod.mask;
+ unsigned i, rep = 0;
+ uint32_t mask = r->mask;
int ret;
+ /* Avoid the unnecessary cmpset operation below, which is also
+ * potentially harmful when n equals 0. */
+ if (n == 0)
+ return 0;
+
/* move prod.head atomically */
do {
/* Reset n to the initial burst count */
/* write entries in ring */
ENQUEUE_PTRS();
- rte_compiler_barrier();
+ rte_smp_wmb();
/* if we exceed the watermark */
- if (unlikely(((mask + 1) - free_entries + n) > r->prod.watermark)) {
+ if (unlikely(((mask + 1) - free_entries + n) > r->watermark)) {
ret = (behavior == RTE_RING_QUEUE_FIXED) ? -EDQUOT :
(int)(n | RTE_RING_QUOT_EXCEED);
__RING_STAT_ADD(r, enq_quota, n);
* If there are other enqueues in progress that preceded us,
* we need to wait for them to complete
*/
- while (unlikely(r->prod.tail != prod_head))
+ while (unlikely(r->prod.tail != prod_head)) {
rte_pause();
+ /* Set RTE_RING_PAUSE_REP_COUNT to avoid spin too long waiting
+ * for other thread finish. It gives pre-empted thread a chance
+ * to proceed and finish with ring dequeue operation. */
+ if (RTE_RING_PAUSE_REP_COUNT &&
+ ++rep == RTE_RING_PAUSE_REP_COUNT) {
+ rep = 0;
+ sched_yield();
+ }
+ }
r->prod.tail = prod_next;
return ret;
}
uint32_t prod_head, cons_tail;
uint32_t prod_next, free_entries;
unsigned i;
- uint32_t mask = r->prod.mask;
+ uint32_t mask = r->mask;
int ret;
prod_head = r->prod.head;
/* write entries in ring */
ENQUEUE_PTRS();
- rte_compiler_barrier();
+ rte_smp_wmb();
/* if we exceed the watermark */
- if (unlikely(((mask + 1) - free_entries + n) > r->prod.watermark)) {
+ if (unlikely(((mask + 1) - free_entries + n) > r->watermark)) {
ret = (behavior == RTE_RING_QUEUE_FIXED) ? -EDQUOT :
(int)(n | RTE_RING_QUOT_EXCEED);
__RING_STAT_ADD(r, enq_quota, n);
uint32_t cons_next, entries;
const unsigned max = n;
int success;
- unsigned i;
- uint32_t mask = r->prod.mask;
+ unsigned i, rep = 0;
+ uint32_t mask = r->mask;
+
+ /* Avoid the unnecessary cmpset operation below, which is also
+ * potentially harmful when n equals 0. */
+ if (n == 0)
+ return 0;
/* move cons.head atomically */
do {
/* copy in table */
DEQUEUE_PTRS();
- rte_compiler_barrier();
+ rte_smp_rmb();
/*
* If there are other dequeues in progress that preceded us,
* we need to wait for them to complete
*/
- while (unlikely(r->cons.tail != cons_head))
+ while (unlikely(r->cons.tail != cons_head)) {
rte_pause();
+ /* Set RTE_RING_PAUSE_REP_COUNT to avoid spin too long waiting
+ * for other thread finish. It gives pre-empted thread a chance
+ * to proceed and finish with ring dequeue operation. */
+ if (RTE_RING_PAUSE_REP_COUNT &&
+ ++rep == RTE_RING_PAUSE_REP_COUNT) {
+ rep = 0;
+ sched_yield();
+ }
+ }
__RING_STAT_ADD(r, deq_success, n);
r->cons.tail = cons_next;
uint32_t cons_head, prod_tail;
uint32_t cons_next, entries;
unsigned i;
- uint32_t mask = r->prod.mask;
+ uint32_t mask = r->mask;
cons_head = r->cons.head;
prod_tail = r->prod.tail;
/* copy in table */
DEQUEUE_PTRS();
- rte_compiler_barrier();
+ rte_smp_rmb();
__RING_STAT_ADD(r, deq_success, n);
r->cons.tail = cons_next;
rte_ring_enqueue_bulk(struct rte_ring *r, void * const *obj_table,
unsigned n)
{
- if (r->prod.sp_enqueue)
+ if (r->prod.single)
return rte_ring_sp_enqueue_bulk(r, obj_table, n);
else
return rte_ring_mp_enqueue_bulk(r, obj_table, n);
static inline int __attribute__((always_inline))
rte_ring_enqueue(struct rte_ring *r, void *obj)
{
- if (r->prod.sp_enqueue)
+ if (r->prod.single)
return rte_ring_sp_enqueue(r, obj);
else
return rte_ring_mp_enqueue(r, obj);
static inline int __attribute__((always_inline))
rte_ring_dequeue_bulk(struct rte_ring *r, void **obj_table, unsigned n)
{
- if (r->cons.sc_dequeue)
+ if (r->cons.single)
return rte_ring_sc_dequeue_bulk(r, obj_table, n);
else
return rte_ring_mc_dequeue_bulk(r, obj_table, n);
static inline int __attribute__((always_inline))
rte_ring_dequeue(struct rte_ring *r, void **obj_p)
{
- if (r->cons.sc_dequeue)
+ if (r->cons.single)
return rte_ring_sc_dequeue(r, obj_p);
else
return rte_ring_mc_dequeue(r, obj_p);
{
uint32_t prod_tail = r->prod.tail;
uint32_t cons_tail = r->cons.tail;
- return (((cons_tail - prod_tail - 1) & r->prod.mask) == 0);
+ return ((cons_tail - prod_tail - 1) & r->mask) == 0;
}
/**
{
uint32_t prod_tail = r->prod.tail;
uint32_t cons_tail = r->cons.tail;
- return ((prod_tail - cons_tail) & r->prod.mask);
+ return (prod_tail - cons_tail) & r->mask;
}
/**
{
uint32_t prod_tail = r->prod.tail;
uint32_t cons_tail = r->cons.tail;
- return ((cons_tail - prod_tail - 1) & r->prod.mask);
+ return (cons_tail - prod_tail - 1) & r->mask;
+}
+
+/**
+ * Return the size of the ring.
+ *
+ * @param r
+ * A pointer to the ring structure.
+ * @return
+ * The number of elements which can be stored in the ring.
+ */
+static inline unsigned int
+rte_ring_get_size(const struct rte_ring *r)
+{
+ return r->size;
}
/**
* @return
* - n: Actual number of objects enqueued.
*/
-static inline int __attribute__((always_inline))
+static inline unsigned __attribute__((always_inline))
rte_ring_mp_enqueue_burst(struct rte_ring *r, void * const *obj_table,
unsigned n)
{
* @return
* - n: Actual number of objects enqueued.
*/
-static inline int __attribute__((always_inline))
+static inline unsigned __attribute__((always_inline))
rte_ring_sp_enqueue_burst(struct rte_ring *r, void * const *obj_table,
unsigned n)
{
* @return
* - n: Actual number of objects enqueued.
*/
-static inline int __attribute__((always_inline))
+static inline unsigned __attribute__((always_inline))
rte_ring_enqueue_burst(struct rte_ring *r, void * const *obj_table,
unsigned n)
{
- if (r->prod.sp_enqueue)
- return rte_ring_sp_enqueue_burst(r, obj_table, n);
+ if (r->prod.single)
+ return rte_ring_sp_enqueue_burst(r, obj_table, n);
else
- return rte_ring_mp_enqueue_burst(r, obj_table, n);
+ return rte_ring_mp_enqueue_burst(r, obj_table, n);
}
/**
* @return
* - n: Actual number of objects dequeued, 0 if ring is empty
*/
-static inline int __attribute__((always_inline))
+static inline unsigned __attribute__((always_inline))
rte_ring_mc_dequeue_burst(struct rte_ring *r, void **obj_table, unsigned n)
{
return __rte_ring_mc_do_dequeue(r, obj_table, n, RTE_RING_QUEUE_VARIABLE);
* @return
* - n: Actual number of objects dequeued, 0 if ring is empty
*/
-static inline int __attribute__((always_inline))
+static inline unsigned __attribute__((always_inline))
rte_ring_sc_dequeue_burst(struct rte_ring *r, void **obj_table, unsigned n)
{
return __rte_ring_sc_do_dequeue(r, obj_table, n, RTE_RING_QUEUE_VARIABLE);
* @param n
* The number of objects to dequeue from the ring to the obj_table.
* @return
- * - Number of objects dequeued, or a negative error code on error
+ * - Number of objects dequeued
*/
-static inline int __attribute__((always_inline))
+static inline unsigned __attribute__((always_inline))
rte_ring_dequeue_burst(struct rte_ring *r, void **obj_table, unsigned n)
{
- if (r->cons.sc_dequeue)
+ if (r->cons.single)
return rte_ring_sc_dequeue_burst(r, obj_table, n);
else
return rte_ring_mc_dequeue_burst(r, obj_table, n);