if (weak_barriers)
rte_smp_rmb();
else
- rte_cio_rmb();
+ rte_io_rmb();
}
static inline void
if (weak_barriers)
rte_smp_wmb();
else
- rte_cio_wmb();
+ rte_io_wmb();
}
static inline uint16_t
if (weak_barriers) {
/* x86 prefers to using rte_smp_rmb over __atomic_load_n as it reports
* a better perf(~1.5%), which comes from the saved branch by the compiler.
- * The if and else branch are identical with the smp and cio barriers both
+ * The if and else branch are identical with the smp and io barriers both
* defined as compiler barriers on x86.
*/
#ifdef RTE_ARCH_X86_64
#endif
} else {
flags = dp->flags;
- rte_cio_rmb();
+ rte_io_rmb();
}
return flags;
if (weak_barriers) {
/* x86 prefers to using rte_smp_wmb over __atomic_store_n as it reports
* a better perf(~1.5%), which comes from the saved branch by the compiler.
- * The if and else branch are identical with the smp and cio barriers both
+ * The if and else branch are identical with the smp and io barriers both
* defined as compiler barriers on x86.
*/
#ifdef RTE_ARCH_X86_64
__atomic_store_n(&dp->flags, flags, __ATOMIC_RELEASE);
#endif
} else {
- rte_cio_wmb();
+ rte_io_wmb();
dp->flags = flags;
}
}
#define VIRTIO_MAX_TX_INDIRECT 8
struct virtio_tx_region {
struct virtio_net_hdr_mrg_rxbuf tx_hdr;
- struct vring_desc tx_indir[VIRTIO_MAX_TX_INDIRECT]
- __rte_aligned(16);
+ union {
+ struct vring_desc tx_indir[VIRTIO_MAX_TX_INDIRECT];
+ struct vring_packed_desc
+ tx_packed_indir[VIRTIO_MAX_TX_INDIRECT];
+ } __rte_aligned(16);
};
static inline int
dp[i].next = VQ_RING_DESC_CHAIN_END;
}
+static inline void
+vring_desc_init_indirect_packed(struct vring_packed_desc *dp, int n)
+{
+ int i;
+ for (i = 0; i < n; i++) {
+ dp[i].id = (uint16_t)i;
+ dp[i].flags = VRING_DESC_F_WRITE;
+ }
+}
+
/**
* Tell the backend not to interrupt us. Implementation for packed virtqueues.
*/
return VTNET_TQ;
}
-/* virtqueue_nused has load-acquire or rte_cio_rmb insed */
+/* virtqueue_nused has load-acquire or rte_io_rmb insed */
static inline uint16_t
virtqueue_nused(const struct virtqueue *vq)
{
* x86 prefers to using rte_smp_rmb over __atomic_load_n as it
* reports a slightly better perf, which comes from the saved
* branch by the compiler.
- * The if and else branches are identical with the smp and cio
+ * The if and else branches are identical with the smp and io
* barriers both defined as compiler barriers on x86.
*/
#ifdef RTE_ARCH_X86_64
#endif
} else {
idx = vq->vq_split.ring.used->idx;
- rte_cio_rmb();
+ rte_io_rmb();
}
return idx - vq->vq_used_cons_idx;
}
* it reports a slightly better perf, which comes from the
* saved branch by the compiler.
* The if and else branches are identical with the smp and
- * cio barriers both defined as compiler barriers on x86.
+ * io barriers both defined as compiler barriers on x86.
*/
#ifdef RTE_ARCH_X86_64
rte_smp_wmb();
vq->vq_avail_idx, __ATOMIC_RELEASE);
#endif
} else {
- rte_cio_wmb();
+ rte_io_wmb();
vq->vq_split.ring.avail->idx = vq->vq_avail_idx;
}
}
/* avoid write operation when necessary, to lessen cache issues */
#define ASSIGN_UNLESS_EQUAL(var, val) do { \
- typeof(var) var_ = (var); \
- typeof(val) val_ = (val); \
- if ((var_) != (val_)) \
- (var_) = (val_); \
+ typeof(var) *const var_ = &(var); \
+ typeof(val) const val_ = (val); \
+ if (*var_ != val_) \
+ *var_ = val_; \
} while (0)
#define virtqueue_clear_net_hdr(hdr) do { \
static inline void
virtqueue_enqueue_xmit_packed(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
- uint16_t needed, int can_push, int in_order)
+ uint16_t needed, int use_indirect, int can_push,
+ int in_order)
{
struct virtio_tx_region *txr = txvq->virtio_net_hdr_mz->addr;
struct vq_desc_extra *dxp;
struct virtio_net_hdr *hdr;
uint16_t prev;
bool prepend_header = false;
+ uint16_t seg_num = cookie->nb_segs;
id = in_order ? vq->vq_avail_idx : vq->vq_desc_head_idx;
/* if offload disabled, it is not zeroed below, do it now */
if (!vq->hw->has_tx_offload)
virtqueue_clear_net_hdr(hdr);
+ } else if (use_indirect) {
+ /* setup tx ring slot to point to indirect
+ * descriptor list stored in reserved region.
+ *
+ * the first slot in indirect ring is already preset
+ * to point to the header in reserved region
+ */
+ start_dp[idx].addr = txvq->virtio_net_hdr_mem +
+ RTE_PTR_DIFF(&txr[idx].tx_packed_indir, txr);
+ start_dp[idx].len = (seg_num + 1) *
+ sizeof(struct vring_packed_desc);
+ /* reset flags for indirect desc */
+ head_flags = VRING_DESC_F_INDIRECT;
+ head_flags |= vq->vq_packed.cached_flags;
+ hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
+
+ /* loop below will fill in rest of the indirect elements */
+ start_dp = txr[idx].tx_packed_indir;
+ idx = 1;
} else {
/* setup first tx ring slot to point to header
* stored in reserved region.
start_dp[prev].id = id;
+ if (use_indirect) {
+ idx = head_idx;
+ if (++idx >= vq->vq_nentries) {
+ idx -= vq->vq_nentries;
+ vq->vq_packed.cached_flags ^=
+ VRING_PACKED_DESC_F_AVAIL_USED;
+ }
+ }
+
vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
vq->vq_avail_idx = idx;
struct vq_desc_extra *dxp;
used_idx = vq->vq_used_cons_idx;
- /* desc_is_used has a load-acquire or rte_cio_rmb inside
+ /* desc_is_used has a load-acquire or rte_io_rmb inside
* and wait for used desc in virtqueue.
*/
while (num > 0 && desc_is_used(&desc[used_idx], vq)) {
struct vq_desc_extra *dxp;
used_idx = vq->vq_used_cons_idx;
- /* desc_is_used has a load-acquire or rte_cio_rmb inside
+ /* desc_is_used has a load-acquire or rte_io_rmb inside
* and wait for used desc in virtqueue.
*/
while (num-- && desc_is_used(&desc[used_idx], vq)) {