/*-
* BSD LICENSE
- *
- * Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
* All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
* are met:
- *
- * * Redistributions of source code must retain the above copyright
+ *
+ * * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
* distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
*/
/*
extern "C" {
#endif
+#include <stdio.h>
#include <stdint.h>
#include <sys/queue.h>
#include <errno.h>
#include <rte_atomic.h>
#include <rte_branch_prediction.h>
+#define RTE_TAILQ_RING_NAME "RTE_RING"
+
enum rte_ring_queue_behavior {
RTE_RING_QUEUE_FIXED = 0, /* Enq/Deq a fixed number of items from a ring */
RTE_RING_QUEUE_VARIABLE /* Enq/Deq as many items a possible from ring */
#endif
#define RTE_RING_NAMESIZE 32 /**< The maximum length of a ring name. */
+#define RTE_RING_MZ_PREFIX "RG_"
+
+#ifndef RTE_RING_PAUSE_REP_COUNT
+#define RTE_RING_PAUSE_REP_COUNT 0 /**< Yield after pause num of times, no yield
+ * if RTE_RING_PAUSE_REP not defined. */
+#endif
+
+struct rte_memzone; /* forward declaration, so as not to require memzone.h */
/**
* An RTE ring structure.
* a problem.
*/
struct rte_ring {
- TAILQ_ENTRY(rte_ring) next; /**< Next in list. */
-
char name[RTE_RING_NAMESIZE]; /**< Name of the ring. */
int flags; /**< Flags supplied at creation. */
+ const struct rte_memzone *memzone;
+ /**< Memzone, if any, containing the rte_ring */
/** Ring producer status. */
struct prod {
#endif
void * ring[0] __rte_cache_aligned; /**< Memory space of ring starts here.
- * not volatile so need to be careful
- * about compiler re-ordering */
+ * not volatile so need to be careful
+ * about compiler re-ordering */
};
-/* dummy assembly operation to prevent compiler re-ordering of instructions */
-#define COMPILER_BARRIER() do { asm volatile("" ::: "memory"); } while(0)
-
#define RING_F_SP_ENQ 0x0001 /**< The default enqueue is "single-producer". */
#define RING_F_SC_DEQ 0x0002 /**< The default dequeue is "single-consumer". */
#define RTE_RING_QUOT_EXCEED (1 << 31) /**< Quota exceed for burst ops */
* The number to add to the object-oriented statistics.
*/
#ifdef RTE_LIBRTE_RING_DEBUG
-#define __RING_STAT_ADD(r, name, n) do { \
- unsigned __lcore_id = rte_lcore_id(); \
- r->stats[__lcore_id].name##_objs += n; \
- r->stats[__lcore_id].name##_bulk += 1; \
+#define __RING_STAT_ADD(r, name, n) do { \
+ unsigned __lcore_id = rte_lcore_id(); \
+ if (__lcore_id < RTE_MAX_LCORE) { \
+ r->stats[__lcore_id].name##_objs += n; \
+ r->stats[__lcore_id].name##_bulk += 1; \
+ } \
} while(0)
#else
#define __RING_STAT_ADD(r, name, n) do {} while(0)
#endif
+/**
+ * Calculate the memory size needed for a ring
+ *
+ * This function returns the number of bytes needed for a ring, given
+ * the number of elements in it. This value is the sum of the size of
+ * the structure rte_ring and the size of the memory needed by the
+ * objects pointers. The value is aligned to a cache line size.
+ *
+ * @param count
+ * The number of elements in the ring (must be a power of 2).
+ * @return
+ * - The memory size needed for the ring on success.
+ * - -EINVAL if count is not a power of 2.
+ */
+ssize_t rte_ring_get_memsize(unsigned count);
+
+/**
+ * Initialize a ring structure.
+ *
+ * Initialize a ring structure in memory pointed by "r". The size of the
+ * memory area must be large enough to store the ring structure and the
+ * object table. It is advised to use rte_ring_get_memsize() to get the
+ * appropriate size.
+ *
+ * The ring size is set to *count*, which must be a power of two. Water
+ * marking is disabled by default. The real usable ring size is
+ * *count-1* instead of *count* to differentiate a free ring from an
+ * empty ring.
+ *
+ * The ring is not added in RTE_TAILQ_RING global list. Indeed, the
+ * memory given by the caller may not be shareable among dpdk
+ * processes.
+ *
+ * @param r
+ * The pointer to the ring structure followed by the objects table.
+ * @param name
+ * The name of the ring.
+ * @param count
+ * The number of elements in the ring (must be a power of 2).
+ * @param flags
+ * An OR of the following:
+ * - RING_F_SP_ENQ: If this flag is set, the default behavior when
+ * using ``rte_ring_enqueue()`` or ``rte_ring_enqueue_bulk()``
+ * is "single-producer". Otherwise, it is "multi-producers".
+ * - RING_F_SC_DEQ: If this flag is set, the default behavior when
+ * using ``rte_ring_dequeue()`` or ``rte_ring_dequeue_bulk()``
+ * is "single-consumer". Otherwise, it is "multi-consumers".
+ * @return
+ * 0 on success, or a negative value on error.
+ */
+int rte_ring_init(struct rte_ring *r, const char *name, unsigned count,
+ unsigned flags);
+
/**
* Create a new ring named *name* in memory.
*
- * This function uses ``memzone_reserve()`` to allocate memory. Its size is
- * set to *count*, which must be a power of two. Water marking is
- * disabled by default.
- * Note that the real usable ring size is *count-1* instead of
- * *count*.
+ * This function uses ``memzone_reserve()`` to allocate memory. Then it
+ * calls rte_ring_init() to initialize an empty ring.
+ *
+ * The new ring size is set to *count*, which must be a power of
+ * two. Water marking is disabled by default. The real usable ring size
+ * is *count-1* instead of *count* to differentiate a free ring from an
+ * empty ring.
+ *
+ * The ring is added in RTE_TAILQ_RING list.
*
* @param name
* The name of the ring.
* rte_errno set appropriately. Possible errno values include:
* - E_RTE_NO_CONFIG - function could not get pointer to rte_config structure
* - E_RTE_SECONDARY - function was called from a secondary process instance
- * - E_RTE_NO_TAILQ - no tailq list could be got for the ring list
* - EINVAL - count provided is not a power of 2
* - ENOSPC - the maximum number of memzones has already been allocated
* - EEXIST - a memzone with the same name already exists
*/
struct rte_ring *rte_ring_create(const char *name, unsigned count,
int socket_id, unsigned flags);
+/**
+ * De-allocate all memory used by the ring.
+ *
+ * @param r
+ * Ring to free
+ */
+void rte_ring_free(struct rte_ring *r);
/**
* Change the high water mark.
/**
* Dump the status of the ring to the console.
*
+ * @param f
+ * A pointer to a file for output
* @param r
* A pointer to the ring structure.
*/
-void rte_ring_dump(const struct rte_ring *r);
+void rte_ring_dump(FILE *f, const struct rte_ring *r);
-/* the actual enqueue of pointers on the ring.
+/* the actual enqueue of pointers on the ring.
* Placed here since identical code needed in both
* single and multi producer enqueue functions */
#define ENQUEUE_PTRS() do { \
} \
} while(0)
-/* the actual copy of pointers on the ring to obj_table.
+/* the actual copy of pointers on the ring to obj_table.
* Placed here since identical code needed in both
* single and multi consumer dequeue functions */
#define DEQUEUE_PTRS() do { \
uint32_t cons_tail, free_entries;
const unsigned max = n;
int success;
- unsigned i;
+ unsigned i, rep = 0;
uint32_t mask = r->prod.mask;
int ret;
+ /* Avoid the unnecessary cmpset operation below, which is also
+ * potentially harmful when n equals 0. */
+ if (n == 0)
+ return 0;
+
/* move prod.head atomically */
do {
/* Reset n to the initial burst count */
/* write entries in ring */
ENQUEUE_PTRS();
- COMPILER_BARRIER();
+ rte_smp_wmb();
/* if we exceed the watermark */
if (unlikely(((mask + 1) - free_entries + n) > r->prod.watermark)) {
}
/*
- * If there are other enqueues in progress that preceeded us,
+ * If there are other enqueues in progress that preceded us,
* we need to wait for them to complete
*/
- while (unlikely(r->prod.tail != prod_head))
+ while (unlikely(r->prod.tail != prod_head)) {
rte_pause();
+ /* Set RTE_RING_PAUSE_REP_COUNT to avoid spin too long waiting
+ * for other thread finish. It gives pre-empted thread a chance
+ * to proceed and finish with ring dequeue operation. */
+ if (RTE_RING_PAUSE_REP_COUNT &&
+ ++rep == RTE_RING_PAUSE_REP_COUNT) {
+ rep = 0;
+ sched_yield();
+ }
+ }
r->prod.tail = prod_next;
return ret;
}
/* write entries in ring */
ENQUEUE_PTRS();
- COMPILER_BARRIER();
+ rte_smp_wmb();
/* if we exceed the watermark */
if (unlikely(((mask + 1) - free_entries + n) > r->prod.watermark)) {
uint32_t cons_next, entries;
const unsigned max = n;
int success;
- unsigned i;
+ unsigned i, rep = 0;
uint32_t mask = r->prod.mask;
+ /* Avoid the unnecessary cmpset operation below, which is also
+ * potentially harmful when n equals 0. */
+ if (n == 0)
+ return 0;
+
/* move cons.head atomically */
do {
/* Restore n as it may change every loop */
/* copy in table */
DEQUEUE_PTRS();
- COMPILER_BARRIER();
+ rte_smp_rmb();
/*
* If there are other dequeues in progress that preceded us,
* we need to wait for them to complete
*/
- while (unlikely(r->cons.tail != cons_head))
+ while (unlikely(r->cons.tail != cons_head)) {
rte_pause();
+ /* Set RTE_RING_PAUSE_REP_COUNT to avoid spin too long waiting
+ * for other thread finish. It gives pre-empted thread a chance
+ * to proceed and finish with ring dequeue operation. */
+ if (RTE_RING_PAUSE_REP_COUNT &&
+ ++rep == RTE_RING_PAUSE_REP_COUNT) {
+ rep = 0;
+ sched_yield();
+ }
+ }
__RING_STAT_ADD(r, deq_success, n);
r->cons.tail = cons_next;
/* copy in table */
DEQUEUE_PTRS();
- COMPILER_BARRIER();
+ rte_smp_rmb();
__RING_STAT_ADD(r, deq_success, n);
r->cons.tail = cons_next;
{
uint32_t prod_tail = r->prod.tail;
uint32_t cons_tail = r->cons.tail;
- return (((cons_tail - prod_tail - 1) & r->prod.mask) == 0);
+ return ((cons_tail - prod_tail - 1) & r->prod.mask) == 0;
}
/**
{
uint32_t prod_tail = r->prod.tail;
uint32_t cons_tail = r->cons.tail;
- return ((prod_tail - cons_tail) & r->prod.mask);
+ return (prod_tail - cons_tail) & r->prod.mask;
}
/**
{
uint32_t prod_tail = r->prod.tail;
uint32_t cons_tail = r->cons.tail;
- return ((cons_tail - prod_tail - 1) & r->prod.mask);
+ return (cons_tail - prod_tail - 1) & r->prod.mask;
}
/**
* Dump the status of all rings on the console
+ *
+ * @param f
+ * A pointer to a file for output
*/
-void rte_ring_list_dump(void);
+void rte_ring_list_dump(FILE *f);
/**
* Search a ring from its name
* @return
* - n: Actual number of objects enqueued.
*/
-static inline int __attribute__((always_inline))
+static inline unsigned __attribute__((always_inline))
rte_ring_mp_enqueue_burst(struct rte_ring *r, void * const *obj_table,
unsigned n)
{
* @return
* - n: Actual number of objects enqueued.
*/
-static inline int __attribute__((always_inline))
+static inline unsigned __attribute__((always_inline))
rte_ring_sp_enqueue_burst(struct rte_ring *r, void * const *obj_table,
unsigned n)
{
* @return
* - n: Actual number of objects enqueued.
*/
-static inline int __attribute__((always_inline))
+static inline unsigned __attribute__((always_inline))
rte_ring_enqueue_burst(struct rte_ring *r, void * const *obj_table,
unsigned n)
{
if (r->prod.sp_enqueue)
- return rte_ring_sp_enqueue_burst(r, obj_table, n);
+ return rte_ring_sp_enqueue_burst(r, obj_table, n);
else
- return rte_ring_mp_enqueue_burst(r, obj_table, n);
+ return rte_ring_mp_enqueue_burst(r, obj_table, n);
}
/**
* @return
* - n: Actual number of objects dequeued, 0 if ring is empty
*/
-static inline int __attribute__((always_inline))
+static inline unsigned __attribute__((always_inline))
rte_ring_mc_dequeue_burst(struct rte_ring *r, void **obj_table, unsigned n)
{
return __rte_ring_mc_do_dequeue(r, obj_table, n, RTE_RING_QUEUE_VARIABLE);
* @return
* - n: Actual number of objects dequeued, 0 if ring is empty
*/
-static inline int __attribute__((always_inline))
+static inline unsigned __attribute__((always_inline))
rte_ring_sc_dequeue_burst(struct rte_ring *r, void **obj_table, unsigned n)
{
return __rte_ring_sc_do_dequeue(r, obj_table, n, RTE_RING_QUEUE_VARIABLE);
* @param n
* The number of objects to dequeue from the ring to the obj_table.
* @return
- * - Number of objects dequeued, or a negative error code on error
+ * - Number of objects dequeued
*/
-static inline int __attribute__((always_inline))
+static inline unsigned __attribute__((always_inline))
rte_ring_dequeue_burst(struct rte_ring *r, void **obj_table, unsigned n)
{
if (r->cons.sc_dequeue)