These wrappers must be used for patches that need to be merged in 20.08
onwards. This change will not introduce any performance degradation.
-* rte_cio_*mb: Since the IO barriers for ARMv8 platforms are relaxed from DSB
- to DMB, rte_cio_*mb APIs provide the same functionality as rte_io_*mb
- APIs (taking all platforms into consideration). rte_io_*mb APIs should be
- used in the place of rte_cio_*mb APIs. The rte_cio_*mb APIs will be
- deprecated in 20.11 release.
-
* igb_uio: In the view of reducing the kernel dependency from the main tree,
as a first step, the Technical Board decided to move ``igb_uio``
kernel module to the dpdk-kmods repository in the /linux/igb_uio/ directory
* eal: Made the ``rte_dev_event`` structure private to the EAL as no public API
used it.
+* eal: ``rte_cio_rmb()`` and ``rte_cio_wmb()`` were deprecated since 20.08
+ and are removed in this release.
+
* mem: Removed the unioned field ``phys_addr`` from
the structures ``rte_memseg`` and ``rte_memzone``.
The field ``iova`` is remaining from the old unions.
if (unlikely((op_owner != (!!(idx))) || (op_code == MLX5_CQE_INVALID)))
return MLX5_CQE_STATUS_HW_OWN;
- rte_cio_rmb();
+ rte_io_rmb();
if (unlikely(op_code == MLX5_CQE_RESP_ERR ||
op_code == MLX5_CQE_REQ_ERR))
return MLX5_CQE_STATUS_ERR;
* buffer immediately, a DMB is not required to push out
* LMTSTs.
*/
- rte_cio_wmb();
+ rte_io_wmb();
lmt_status = otx2_lmt_submit(qp->lf_nq_reg);
} while (lmt_status == 0);
inst.u64[3] = 0;
inst.res_addr = rte_mempool_virt2iova(res);
- rte_cio_wmb();
+ rte_io_wmb();
do {
/* Copy CPT command to LMTLINE */
otx2_err("Request timed out");
return -ETIMEDOUT;
}
- rte_cio_rmb();
+ rte_io_rmb();
}
if (unlikely(res->compcode != CPT_9X_COMP_E_GOOD)) {
switch (ev->sched_type) {
case SSO_SYNC_ORDERED:
ssows_swtag_norm(ws, ev->event, SSO_SYNC_ATOMIC);
- rte_cio_wmb();
+ rte_io_wmb();
ssows_swtag_wait(ws);
break;
case SSO_SYNC_UNTAGGED:
ssows_swtag_full(ws, ev->u64, ev->event, SSO_SYNC_ATOMIC,
ev->queue_id);
- rte_cio_wmb();
+ rte_io_wmb();
ssows_swtag_wait(ws);
break;
case SSO_SYNC_ATOMIC:
- rte_cio_wmb();
+ rte_io_wmb();
break;
}
if (wait_flag)
otx2_ssogws_head_wait(ws);
- rte_cio_wmb();
+ rte_io_wmb();
}
static __rte_always_inline const struct otx2_eth_txq *
/* Poll for the valid bit */
for (i = 0; i < timeout; i++) {
/* Sanity check on the resp->resp_len */
- rte_cio_rmb();
+ rte_io_rmb();
if (resp->resp_len && resp->resp_len <= bp->max_resp_len) {
/* Last byte of resp contains the valid key */
valid = (uint8_t *)resp + resp->resp_len - 1;
static inline void bnxt_db_write(struct bnxt_db_info *db, uint32_t idx)
{
- rte_cio_wmb();
+ rte_io_wmb();
if (db->db_64)
rte_write64_relaxed(db->db_key64 | idx, db->doorbell);
if (unlikely(!cpr->cp_db.db_64))
return;
- rte_cio_wmb();
+ rte_io_wmb();
rte_write64_relaxed(cpr->cp_db.db_key64 | DBR_TYPE_NQ |
RING_CMP(cpr->cp_ring_struct, cpr->cp_raw_cons),
cpr->cp_db.doorbell);
if (unlikely(!cpr->cp_db.db_64))
return;
- rte_cio_wmb();
+ rte_io_wmb();
rte_write64_relaxed(cpr->cp_db.db_key64 | DBR_TYPE_NQ_ARM |
RING_CMP(cpr->cp_ring_struct, cpr->cp_raw_cons),
cpr->cp_db.doorbell);
* reverse order to ensure consistent state.
*/
rxcmp1[3] = vld1q_u32((void *)&cpr->cp_desc_ring[cons + 7]);
- rte_cio_rmb();
+ rte_io_rmb();
rxcmp[3] = vld1q_u32((void *)&cpr->cp_desc_ring[cons + 6]);
rxcmp1[2] = vld1q_u32((void *)&cpr->cp_desc_ring[cons + 5]);
- rte_cio_rmb();
+ rte_io_rmb();
rxcmp[2] = vld1q_u32((void *)&cpr->cp_desc_ring[cons + 4]);
t1 = vreinterpretq_u64_u32(vzip2q_u32(rxcmp1[2], rxcmp1[3]));
rxcmp1[1] = vld1q_u32((void *)&cpr->cp_desc_ring[cons + 3]);
- rte_cio_rmb();
+ rte_io_rmb();
rxcmp[1] = vld1q_u32((void *)&cpr->cp_desc_ring[cons + 2]);
rxcmp1[0] = vld1q_u32((void *)&cpr->cp_desc_ring[cons + 1]);
- rte_cio_rmb();
+ rte_io_rmb();
rxcmp[0] = vld1q_u32((void *)&cpr->cp_desc_ring[cons + 0]);
t0 = vreinterpretq_u64_u32(vzip2q_u32(rxcmp1[0], rxcmp1[1]));
tx_desc->lower.data = rte_cpu_to_le_32(txd_lower | size);
tx_desc->upper.data = 0;
- rte_cio_wmb();
+ rte_io_wmb();
txq->tx_tail++;
if (txq->tx_tail == txq->nb_tx_desc)
txq->tx_tail = 0;
(unsigned) txq->port_id, (unsigned) txq->queue_id,
(unsigned) tx_id, (unsigned) nb_tx);
- rte_cio_wmb();
+ rte_io_wmb();
I40E_PCI_REG_WRITE_RELAXED(txq->qtx_tail, tx_id);
txq->tx_tail = tx_id;
rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
(rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
- rte_cio_wmb();
+ rte_io_wmb();
/* Update the tail pointer on the NIC */
I40E_PCI_REG_WRITE_RELAXED(rxq->qrx_tail, rx_id);
}
txq->tx_tail = tx_id;
- rte_cio_wmb();
+ rte_io_wmb();
I40E_PCI_REG_WRITE_RELAXED(txq->qtx_tail, tx_id);
return nb_pkts;
pool->raw = pool->raw_hw;
rte_spinlock_unlock(&pool->sl);
/* Be sure the new raw counters data is updated in memory. */
- rte_cio_wmb();
+ rte_io_wmb();
if (!TAILQ_EMPTY(&pool->counters[query_gen])) {
rte_spinlock_lock(&cont->csl);
TAILQ_CONCAT(&cont->counters,
cont->last_pool_idx = pool->index;
}
/* Pool initialization must be updated before host thread access. */
- rte_cio_wmb();
+ rte_io_wmb();
rte_atomic16_add(&cont->n_valid, 1);
return pool;
}
cqe->op_own = MLX5_CQE_INVALIDATE;
}
/* Resync CQE and WQE (WQ in RESET state). */
- rte_cio_wmb();
+ rte_io_wmb();
*rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);
- rte_cio_wmb();
+ rte_io_wmb();
*rxq->rq_db = rte_cpu_to_be_32(0);
- rte_cio_wmb();
+ rte_io_wmb();
}
/**
rte_errno = errno;
return ret;
}
- rte_cio_wmb();
+ rte_io_wmb();
*rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);
- rte_cio_wmb();
- /* Reset RQ consumer before moving queue to READY state. */
+ rte_io_wmb();
+ /* Reset RQ consumer before moving queue ro READY state. */
*rxq->rq_db = rte_cpu_to_be_32(0);
- rte_cio_wmb();
+ rte_io_wmb();
ret = priv->obj_ops.rxq_obj_modify(rxq_ctrl->obj, true);
if (ret) {
DRV_LOG(ERR, "Cannot change Rx WQ state to READY: %s",
};
/* Update doorbell counter. */
rxq->rq_ci = wqe_n >> rxq->sges_n;
- rte_cio_wmb();
+ rte_io_wmb();
*rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);
}
case MLX5_RXQ_ERR_STATE_NEED_READY:
ret = check_cqe(u.cqe, cqe_n, rxq->cq_ci);
if (ret == MLX5_CQE_STATUS_HW_OWN) {
- rte_cio_wmb();
+ rte_io_wmb();
*rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);
- rte_cio_wmb();
+ rte_io_wmb();
/*
* The RQ consumer index must be zeroed while moving
* from RESET state to RDY state.
*/
*rxq->rq_db = rte_cpu_to_be_32(0);
- rte_cio_wmb();
+ rte_io_wmb();
sm.is_wq = 1;
sm.queue_id = rxq->idx;
sm.state = IBV_WQS_RDY;
return 0;
/* Update the consumer index. */
rxq->rq_ci = rq_ci >> sges_n;
- rte_cio_wmb();
+ rte_io_wmb();
*rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);
- rte_cio_wmb();
+ rte_io_wmb();
*rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);
#ifdef MLX5_PMD_SOFT_COUNTERS
/* Increment packets counter. */
out:
/* Update the consumer indexes. */
rxq->consumed_strd = consumed_strd;
- rte_cio_wmb();
+ rte_io_wmb();
*rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);
if (rq_ci != rxq->rq_ci) {
rxq->rq_ci = rq_ci;
- rte_cio_wmb();
+ rte_io_wmb();
*rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);
}
#ifdef MLX5_PMD_SOFT_COUNTERS
uint64_t *dst = MLX5_TX_BFREG(txq);
volatile uint64_t *src = ((volatile uint64_t *)wqe);
- rte_cio_wmb();
+ rte_io_wmb();
*txq->qp_db = rte_cpu_to_be_32(txq->wqe_ci);
/* Ensure ordering between DB record and BF copy. */
rte_wmb();
elts_idx = rxq->rq_ci & q_mask;
for (i = 0; i < MLX5_VPMD_DESCS_PER_LOOP; ++i)
(*rxq->elts)[elts_idx + i] = &rxq->fake_mbuf;
- rte_cio_wmb();
+ rte_io_wmb();
*rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);
}
/* B.2 copy mbuf pointers. */
*(vector unsigned char *)&pkts[pos] = mbp1;
*(vector unsigned char *)&pkts[pos + 2] = mbp2;
- rte_cio_rmb();
+ rte_io_rmb();
/* C.1 load remaining CQE data and extract necessary fields. */
cqe_tmp2 = *(vector unsigned char *)
/* B.0 (CQE 0) load a block having op_own. */
c0 = vld1q_u64((uint64_t *)(p0 + 48));
/* Synchronize for loading the rest of blocks. */
- rte_cio_rmb();
+ rte_io_rmb();
/* Prefetch next 4 CQEs. */
if (pkts_n - pos >= 2 * MLX5_VPMD_DESCS_PER_LOOP) {
unsigned int next = pos + MLX5_VPMD_DESCS_PER_LOOP;
rxq->decompressed -= n;
}
}
- rte_cio_wmb();
+ rte_io_wmb();
*rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);
*no_cq = !rcvd_pkt;
return rcvd_pkt;
/* B.2 copy mbuf pointers. */
_mm_storeu_si128((__m128i *)&pkts[pos], mbp1);
_mm_storeu_si128((__m128i *)&pkts[pos + 2], mbp2);
- rte_cio_rmb();
+ rte_io_rmb();
/* C.1 load remained CQE data and extract necessary fields. */
cqe_tmp2 = _mm_load_si128((__m128i *)&cq[pos + p3]);
cqe_tmp1 = _mm_load_si128((__m128i *)&cq[pos + p2]);
cqe->op_own = MLX5_CQE_INVALIDATE;
}
/* Resync CQE and WQE (WQ in reset state). */
- rte_cio_wmb();
+ rte_io_wmb();
*txq->cq_db = rte_cpu_to_be_32(txq->cq_ci);
- rte_cio_wmb();
+ rte_io_wmb();
}
/**
struct octeontx_txq *txq = tx_queue;
octeontx_dq_t *dq = &txq->dq;
uint16_t count = 0, nb_desc;
- rte_cio_wmb();
+ rte_io_wmb();
while (count < nb_pkts) {
if (unlikely(*((volatile int64_t *)dq->fc_status_va) < 0))
timeout = rte_get_timer_cycles() + 5 * rte_get_timer_hz();
- rte_cio_wmb();
+ rte_io_wmb();
do {
otx2_lmt_mov(qp->lmtline, &inst, 2);
sess->ip_id++;
sess->esn++;
- rte_cio_wmb();
+ rte_io_wmb();
do {
otx2_lmt_mov(sess->cpt_lmtline, &inst, 2);
rxq->head = head;
rxq->available -= packets;
- rte_cio_wmb();
+ rte_io_wmb();
/* Free all the CQs that we've processed */
otx2_write64((rxq->wdata | packets), rxq->cq_door);
}
/* Lets commit any changes in the packet */
- rte_cio_wmb();
+ rte_io_wmb();
for (i = 0; i < pkts; i++) {
otx2_nix_xmit_prepare(tx_pkts[i], cmd, flags);
}
/* Lets commit any changes in the packet */
- rte_cio_wmb();
+ rte_io_wmb();
for (i = 0; i < pkts; i++) {
otx2_nix_xmit_prepare(tx_pkts[i], cmd, flags);
txq->fc_cache_pkts -= pkts;
/* Lets commit any changes in the packet */
- rte_cio_wmb();
+ rte_io_wmb();
senddesc01_w0 = vld1q_dup_u64(&txq->cmd[0]);
senddesc23_w0 = senddesc01_w0;
for (i = 0; i < num; i++) {
used_idx = vq->vq_used_cons_idx;
- /* desc_is_used has a load-acquire or rte_cio_rmb inside
+ /* desc_is_used has a load-acquire or rte_io_rmb inside
* and wait for used desc in virtqueue.
*/
if (!desc_is_used(&desc[used_idx], vq))
if (unlikely(nb_pkts < RTE_VIRTIO_DESC_PER_LOOP))
return 0;
- /* virtqueue_nused has a load-acquire or rte_cio_rmb inside */
+ /* virtqueue_nused has a load-acquire or rte_io_rmb inside */
nb_used = virtqueue_nused(vq);
if (unlikely(nb_used == 0))
if (weak_barriers)
rte_smp_rmb();
else
- rte_cio_rmb();
+ rte_io_rmb();
}
static inline void
if (weak_barriers)
rte_smp_wmb();
else
- rte_cio_wmb();
+ rte_io_wmb();
}
static inline uint16_t
if (weak_barriers) {
/* x86 prefers to using rte_smp_rmb over __atomic_load_n as it reports
* a better perf(~1.5%), which comes from the saved branch by the compiler.
- * The if and else branch are identical with the smp and cio barriers both
+ * The if and else branch are identical with the smp and io barriers both
* defined as compiler barriers on x86.
*/
#ifdef RTE_ARCH_X86_64
#endif
} else {
flags = dp->flags;
- rte_cio_rmb();
+ rte_io_rmb();
}
return flags;
if (weak_barriers) {
/* x86 prefers to using rte_smp_wmb over __atomic_store_n as it reports
* a better perf(~1.5%), which comes from the saved branch by the compiler.
- * The if and else branch are identical with the smp and cio barriers both
+ * The if and else branch are identical with the smp and io barriers both
* defined as compiler barriers on x86.
*/
#ifdef RTE_ARCH_X86_64
__atomic_store_n(&dp->flags, flags, __ATOMIC_RELEASE);
#endif
} else {
- rte_cio_wmb();
+ rte_io_wmb();
dp->flags = flags;
}
}
return VTNET_TQ;
}
-/* virtqueue_nused has load-acquire or rte_cio_rmb insed */
+/* virtqueue_nused has load-acquire or rte_io_rmb insed */
static inline uint16_t
virtqueue_nused(const struct virtqueue *vq)
{
* x86 prefers to using rte_smp_rmb over __atomic_load_n as it
* reports a slightly better perf, which comes from the saved
* branch by the compiler.
- * The if and else branches are identical with the smp and cio
+ * The if and else branches are identical with the smp and io
* barriers both defined as compiler barriers on x86.
*/
#ifdef RTE_ARCH_X86_64
#endif
} else {
idx = vq->vq_split.ring.used->idx;
- rte_cio_rmb();
+ rte_io_rmb();
}
return idx - vq->vq_used_cons_idx;
}
* it reports a slightly better perf, which comes from the
* saved branch by the compiler.
* The if and else branches are identical with the smp and
- * cio barriers both defined as compiler barriers on x86.
+ * io barriers both defined as compiler barriers on x86.
*/
#ifdef RTE_ARCH_X86_64
rte_smp_wmb();
vq->vq_avail_idx, __ATOMIC_RELEASE);
#endif
} else {
- rte_cio_wmb();
+ rte_io_wmb();
vq->vq_split.ring.avail->idx = vq->vq_avail_idx;
}
}
struct vq_desc_extra *dxp;
used_idx = vq->vq_used_cons_idx;
- /* desc_is_used has a load-acquire or rte_cio_rmb inside
+ /* desc_is_used has a load-acquire or rte_io_rmb inside
* and wait for used desc in virtqueue.
*/
while (num > 0 && desc_is_used(&desc[used_idx], vq)) {
struct vq_desc_extra *dxp;
used_idx = vq->vq_used_cons_idx;
- /* desc_is_used has a load-acquire or rte_cio_rmb inside
+ /* desc_is_used has a load-acquire or rte_io_rmb inside
* and wait for used desc in virtqueue.
*/
while (num-- && desc_is_used(&desc[used_idx], vq)) {
otx2_write64(iq->fill_cnt, iq->doorbell_reg);
/* Make sure doorbell writes observed by HW */
- rte_cio_wmb();
+ rte_io_wmb();
iq->fill_cnt = 0;
}
/* Ack the h/w with no# of pkts read by Host */
rte_write32(pkts, droq->pkts_sent_reg);
- rte_cio_wmb();
+ rte_io_wmb();
droq->last_pkt_count -= pkts;
((struct mlx5_wqe_ctrl_seg *)wqe)->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE;
uint64_t *doorbell_addr =
(uint64_t *)((uint8_t *)uar->base_addr + 0x800);
- rte_cio_wmb();
+ rte_io_wmb();
sq->dbr[MLX5_SND_DBR] = rte_cpu_to_be_32((sq->db_pi + 1) &
MLX5_REGEX_MAX_WQE_INDEX);
rte_wmb();
next_cqe_offset = (cq->ci & (cq_size_get(cq) - 1));
cqe = (volatile struct mlx5_cqe *)(cq->cqe + next_cqe_offset);
- rte_cio_wmb();
+ rte_io_wmb();
int ret = check_cqe(cqe, cq_size_get(cq), cq->ci);
#define rte_io_rmb() rte_rmb()
-#define rte_cio_wmb() rte_wmb()
-
-#define rte_cio_rmb() rte_rmb()
-
static __rte_always_inline void
rte_atomic_thread_fence(int memory_order)
{
#define rte_io_rmb() rte_rmb()
-#define rte_cio_wmb() rte_wmb()
-
-#define rte_cio_rmb() rte_rmb()
-
static __rte_always_inline void
rte_atomic_thread_fence(int memory_order)
{
static inline void rte_io_rmb(void);
///@}
-/** @name Coherent I/O Memory Barrier
- *
- * Coherent I/O memory barrier is a lightweight version of I/O memory
- * barriers which are system-wide data synchronization barriers. This
- * is for only coherent memory domain between lcore and I/O device but
- * it is same as the I/O memory barriers in most of architectures.
- * However, some architecture provides even lighter barriers which are
- * somewhere in between I/O memory barriers and SMP memory barriers.
- * For example, in case of ARMv8, DMB(data memory barrier) instruction
- * can have different shareability domains - inner-shareable and
- * outer-shareable. And inner-shareable DMB fits for SMP memory
- * barriers and outer-shareable DMB for coherent I/O memory barriers,
- * which acts on coherent memory.
- *
- * In most cases, I/O memory barriers are safer but if operations are
- * on coherent memory instead of incoherent MMIO region of a device,
- * then coherent I/O memory barriers can be used and this could bring
- * performance gain depending on architectures.
- */
-///@{
-/**
- * Write memory barrier for coherent memory between lcore and I/O device
- *
- * Guarantees that the STORE operations on coherent memory that
- * precede the rte_cio_wmb() call are visible to I/O device before the
- * STORE operations that follow it.
- */
-static inline void rte_cio_wmb(void);
-
-/**
- * Read memory barrier for coherent memory between lcore and I/O device
- *
- * Guarantees that the LOAD operations on coherent memory updated by
- * I/O device that precede the rte_cio_rmb() call are visible to CPU
- * before the LOAD operations that follow it.
- */
-static inline void rte_cio_rmb(void);
-///@}
-
#endif /* __DOXYGEN__ */
/**
#define rte_io_rmb() rte_rmb()
-#define rte_cio_wmb() rte_wmb()
-
-#define rte_cio_rmb() rte_rmb()
-
static __rte_always_inline void
rte_atomic_thread_fence(int memory_order)
{
#define rte_io_rmb() rte_compiler_barrier()
-#define rte_cio_wmb() rte_compiler_barrier()
-
-#define rte_cio_rmb() rte_compiler_barrier()
-
/**
* Synchronization fence between threads based on the specified memory order.
*