}
retptr64 = &(buf->retptr64[0]);
- /* Spin while handshake bits are set (scheduler clears it) */
- while (unlikely(*retptr64 & RTE_DISTRIB_GET_BUF)) {
+ /* Spin while handshake bits are set (scheduler clears it).
+ * Sync with worker on GET_BUF flag.
+ */
+ while (unlikely(__atomic_load_n(retptr64, __ATOMIC_ACQUIRE)
+ & RTE_DISTRIB_GET_BUF)) {
rte_pause();
uint64_t t = rte_rdtsc()+100;
/*
* Finally, set the GET_BUF to signal to distributor that cache
* line is ready for processing
+ * Sync with distributor to release retptrs
*/
- *retptr64 |= RTE_DISTRIB_GET_BUF;
+ __atomic_store_n(retptr64, *retptr64 | RTE_DISTRIB_GET_BUF,
+ __ATOMIC_RELEASE);
}
BIND_DEFAULT_SYMBOL(rte_distributor_request_pkt, _v1705, 17.05);
MAP_STATIC_SYMBOL(void rte_distributor_request_pkt(struct rte_distributor *d,
return (pkts[0]) ? 1 : 0;
}
- /* If bit is set, return */
- if (buf->bufptr64[0] & RTE_DISTRIB_GET_BUF)
+ /* If bit is set, return
+ * Sync with distributor to acquire bufptrs
+ */
+ if (__atomic_load_n(&(buf->bufptr64[0]), __ATOMIC_ACQUIRE)
+ & RTE_DISTRIB_GET_BUF)
return -1;
/* since bufptr64 is signed, this should be an arithmetic shift */
* so now we've got the contents of the cacheline into an array of
* mbuf pointers, so toggle the bit so scheduler can start working
* on the next cacheline while we're working.
+ * Sync with distributor on GET_BUF flag. Release bufptrs.
*/
- buf->bufptr64[0] |= RTE_DISTRIB_GET_BUF;
+ __atomic_store_n(&(buf->bufptr64[0]),
+ buf->bufptr64[0] | RTE_DISTRIB_GET_BUF, __ATOMIC_RELEASE);
return count;
}
return -EINVAL;
}
+ /* Sync with distributor to acquire retptrs */
+ __atomic_thread_fence(__ATOMIC_ACQUIRE);
for (i = 0; i < RTE_DIST_BURST_SIZE; i++)
/* Switch off the return bit first */
buf->retptr64[i] &= ~RTE_DISTRIB_RETURN_BUF;
buf->retptr64[i] = (((int64_t)(uintptr_t)oldpkt[i]) <<
RTE_DISTRIB_FLAG_BITS) | RTE_DISTRIB_RETURN_BUF;
- /* set the GET_BUF but even if we got no returns */
- buf->retptr64[0] |= RTE_DISTRIB_GET_BUF;
+ /* set the GET_BUF but even if we got no returns.
+ * Sync with distributor on GET_BUF flag. Release retptrs.
+ */
+ __atomic_store_n(&(buf->retptr64[0]),
+ buf->retptr64[0] | RTE_DISTRIB_GET_BUF, __ATOMIC_RELEASE);
return 0;
}
unsigned int count = 0;
unsigned int i;
- if (buf->retptr64[0] & RTE_DISTRIB_GET_BUF) {
+ /* Sync on GET_BUF flag. Acquire retptrs. */
+ if (__atomic_load_n(&(buf->retptr64[0]), __ATOMIC_ACQUIRE)
+ & RTE_DISTRIB_GET_BUF) {
for (i = 0; i < RTE_DIST_BURST_SIZE; i++) {
if (buf->retptr64[i] & RTE_DISTRIB_RETURN_BUF) {
oldbuf = ((uintptr_t)(buf->retptr64[i] >>
}
d->returns.start = ret_start;
d->returns.count = ret_count;
- /* Clear for the worker to populate with more returns */
- buf->retptr64[0] = 0;
+ /* Clear for the worker to populate with more returns.
+ * Sync with distributor on GET_BUF flag. Release retptrs.
+ */
+ __atomic_store_n(&(buf->retptr64[0]), 0, __ATOMIC_RELEASE);
}
return count;
}
struct rte_distributor_buffer *buf = &(d->bufs[wkr]);
unsigned int i;
- while (!(d->bufs[wkr].bufptr64[0] & RTE_DISTRIB_GET_BUF))
+ /* Sync with worker on GET_BUF flag */
+ while (!(__atomic_load_n(&(d->bufs[wkr].bufptr64[0]), __ATOMIC_ACQUIRE)
+ & RTE_DISTRIB_GET_BUF))
rte_pause();
handle_returns(d, wkr);
d->backlog[wkr].count = 0;
- /* Clear the GET bit */
- buf->bufptr64[0] &= ~RTE_DISTRIB_GET_BUF;
+ /* Clear the GET bit.
+ * Sync with worker on GET_BUF flag. Release bufptrs.
+ */
+ __atomic_store_n(&(buf->bufptr64[0]),
+ buf->bufptr64[0] & ~RTE_DISTRIB_GET_BUF, __ATOMIC_RELEASE);
return buf->count;
}
if (unlikely(num_mbufs == 0)) {
/* Flush out all non-full cache-lines to workers. */
for (wid = 0 ; wid < d->num_workers; wid++) {
- if (d->bufs[wid].bufptr64[0] & RTE_DISTRIB_GET_BUF) {
+ /* Sync with worker on GET_BUF flag. */
+ if (__atomic_load_n(&(d->bufs[wid].bufptr64[0]),
+ __ATOMIC_ACQUIRE) & RTE_DISTRIB_GET_BUF) {
release(d, wid);
handle_returns(d, wid);
}
uint16_t matches[RTE_DIST_BURST_SIZE];
unsigned int pkts;
- if (d->bufs[wkr].bufptr64[0] & RTE_DISTRIB_GET_BUF)
+ /* Sync with worker on GET_BUF flag. */
+ if (__atomic_load_n(&(d->bufs[wkr].bufptr64[0]),
+ __ATOMIC_ACQUIRE) & RTE_DISTRIB_GET_BUF)
d->bufs[wkr].count = 0;
if ((num_mbufs - next_idx) < RTE_DIST_BURST_SIZE)
/* Flush out all non-full cache-lines to workers. */
for (wid = 0 ; wid < d->num_workers; wid++)
- if ((d->bufs[wid].bufptr64[0] & RTE_DISTRIB_GET_BUF))
+ /* Sync with worker on GET_BUF flag. */
+ if ((__atomic_load_n(&(d->bufs[wid].bufptr64[0]),
+ __ATOMIC_ACQUIRE) & RTE_DISTRIB_GET_BUF))
release(d, wid);
return num_mbufs;
/* throw away returns, so workers can exit */
for (wkr = 0; wkr < d->num_workers; wkr++)
- d->bufs[wkr].retptr64[0] = 0;
+ /* Sync with worker. Release retptrs. */
+ __atomic_store_n(&(d->bufs[wkr].retptr64[0]), 0,
+ __ATOMIC_RELEASE);
}
BIND_DEFAULT_SYMBOL(rte_distributor_clear_returns, _v1705, 17.05);
MAP_STATIC_SYMBOL(void rte_distributor_clear_returns(struct rte_distributor *d),
union rte_distributor_buffer_v20 *buf = &d->bufs[worker_id];
int64_t req = (((int64_t)(uintptr_t)oldpkt) << RTE_DISTRIB_FLAG_BITS)
| RTE_DISTRIB_GET_BUF;
- while (unlikely(buf->bufptr64 & RTE_DISTRIB_FLAGS_MASK))
+ while (unlikely(__atomic_load_n(&buf->bufptr64, __ATOMIC_RELAXED)
+ & RTE_DISTRIB_FLAGS_MASK))
rte_pause();
- buf->bufptr64 = req;
+
+ /* Sync with distributor on GET_BUF flag. */
+ __atomic_store_n(&(buf->bufptr64), req, __ATOMIC_RELEASE);
}
VERSION_SYMBOL(rte_distributor_request_pkt, _v20, 2.0);
unsigned worker_id)
{
union rte_distributor_buffer_v20 *buf = &d->bufs[worker_id];
- if (buf->bufptr64 & RTE_DISTRIB_GET_BUF)
+ /* Sync with distributor. Acquire bufptr64. */
+ if (__atomic_load_n(&buf->bufptr64, __ATOMIC_ACQUIRE)
+ & RTE_DISTRIB_GET_BUF)
return NULL;
/* since bufptr64 is signed, this should be an arithmetic shift */
union rte_distributor_buffer_v20 *buf = &d->bufs[worker_id];
uint64_t req = (((int64_t)(uintptr_t)oldpkt) << RTE_DISTRIB_FLAG_BITS)
| RTE_DISTRIB_RETURN_BUF;
- buf->bufptr64 = req;
+ /* Sync with distributor on RETURN_BUF flag. */
+ __atomic_store_n(&(buf->bufptr64), req, __ATOMIC_RELEASE);
return 0;
}
VERSION_SYMBOL(rte_distributor_return_pkt, _v20, 2.0);
{
d->in_flight_tags[wkr] = 0;
d->in_flight_bitmask &= ~(1UL << wkr);
- d->bufs[wkr].bufptr64 = 0;
+ /* Sync with worker. Release bufptr64. */
+ __atomic_store_n(&(d->bufs[wkr].bufptr64), 0, __ATOMIC_RELEASE);
if (unlikely(d->backlog[wkr].count != 0)) {
/* On return of a packet, we need to move the
* queued packets for this core elsewhere.
ret_count = d->returns.count;
for (wkr = 0; wkr < d->num_workers; wkr++) {
-
- const int64_t data = d->bufs[wkr].bufptr64;
uintptr_t oldbuf = 0;
+ /* Sync with worker. Acquire bufptr64. */
+ const int64_t data = __atomic_load_n(&(d->bufs[wkr].bufptr64),
+ __ATOMIC_ACQUIRE);
if (data & RTE_DISTRIB_GET_BUF) {
flushed++;
if (d->backlog[wkr].count)
- d->bufs[wkr].bufptr64 =
- backlog_pop(&d->backlog[wkr]);
+ /* Sync with worker. Release bufptr64. */
+ __atomic_store_n(&(d->bufs[wkr].bufptr64),
+ backlog_pop(&d->backlog[wkr]),
+ __ATOMIC_RELEASE);
else {
- d->bufs[wkr].bufptr64 = RTE_DISTRIB_GET_BUF;
+ /* Sync with worker on GET_BUF flag. */
+ __atomic_store_n(&(d->bufs[wkr].bufptr64),
+ RTE_DISTRIB_GET_BUF,
+ __ATOMIC_RELEASE);
d->in_flight_tags[wkr] = 0;
d->in_flight_bitmask &= ~(1UL << wkr);
}
return process_returns(d);
while (next_idx < num_mbufs || next_mb != NULL) {
-
- int64_t data = d->bufs[wkr].bufptr64;
uintptr_t oldbuf = 0;
+ /* Sync with worker. Acquire bufptr64. */
+ int64_t data = __atomic_load_n(&(d->bufs[wkr].bufptr64),
+ __ATOMIC_ACQUIRE);
if (!next_mb) {
next_mb = mbufs[next_idx++];
(d->backlog[wkr].count || next_mb)) {
if (d->backlog[wkr].count)
- d->bufs[wkr].bufptr64 =
- backlog_pop(&d->backlog[wkr]);
+ /* Sync with worker. Release bufptr64. */
+ __atomic_store_n(&(d->bufs[wkr].bufptr64),
+ backlog_pop(&d->backlog[wkr]),
+ __ATOMIC_RELEASE);
else {
- d->bufs[wkr].bufptr64 = next_value;
+ /* Sync with worker. Release bufptr64. */
+ __atomic_store_n(&(d->bufs[wkr].bufptr64),
+ next_value,
+ __ATOMIC_RELEASE);
d->in_flight_tags[wkr] = new_tag;
d->in_flight_bitmask |= (1UL << wkr);
next_mb = NULL;
* if they are ready */
for (wkr = 0; wkr < d->num_workers; wkr++)
if (d->backlog[wkr].count &&
- (d->bufs[wkr].bufptr64 & RTE_DISTRIB_GET_BUF)) {
+ /* Sync with worker. Acquire bufptr64. */
+ (__atomic_load_n(&(d->bufs[wkr].bufptr64),
+ __ATOMIC_ACQUIRE) & RTE_DISTRIB_GET_BUF)) {
int64_t oldbuf = d->bufs[wkr].bufptr64 >>
RTE_DISTRIB_FLAG_BITS;
+
store_return(oldbuf, d, &ret_start, &ret_count);
- d->bufs[wkr].bufptr64 = backlog_pop(&d->backlog[wkr]);
+ /* Sync with worker. Release bufptr64. */
+ __atomic_store_n(&(d->bufs[wkr].bufptr64),
+ backlog_pop(&d->backlog[wkr]),
+ __ATOMIC_RELEASE);
}
d->returns.start = ret_start;