/*
* Per virtio_ring.h in Linux.
* For virtio_pci on SMP, we don't need to order with respect to MMIO
- * accesses through relaxed memory I/O windows, so smp_mb() et al are
+ * accesses through relaxed memory I/O windows, so thread_fence is
* sufficient.
*
* For using virtio to talk to real devices (eg. vDPA) we do need real
virtio_mb(uint8_t weak_barriers)
{
if (weak_barriers)
- rte_smp_mb();
+ rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
else
rte_mb();
}
virtio_rmb(uint8_t weak_barriers)
{
if (weak_barriers)
- rte_smp_rmb();
+ rte_atomic_thread_fence(__ATOMIC_ACQUIRE);
else
rte_io_rmb();
}
virtio_wmb(uint8_t weak_barriers)
{
if (weak_barriers)
- rte_smp_wmb();
+ rte_atomic_thread_fence(__ATOMIC_RELEASE);
else
rte_io_wmb();
}
uint16_t flags;
if (weak_barriers) {
-/* x86 prefers to using rte_smp_rmb over __atomic_load_n as it reports
+/* x86 prefers to using rte_io_rmb over __atomic_load_n as it reports
* a better perf(~1.5%), which comes from the saved branch by the compiler.
- * The if and else branch are identical with the smp and io barriers both
- * defined as compiler barriers on x86.
+ * The if and else branch are identical on the platforms except Arm.
*/
-#ifdef RTE_ARCH_X86_64
- flags = dp->flags;
- rte_smp_rmb();
-#else
+#ifdef RTE_ARCH_ARM
flags = __atomic_load_n(&dp->flags, __ATOMIC_ACQUIRE);
+#else
+ flags = dp->flags;
+ rte_io_rmb();
#endif
} else {
flags = dp->flags;
uint16_t flags, uint8_t weak_barriers)
{
if (weak_barriers) {
-/* x86 prefers to using rte_smp_wmb over __atomic_store_n as it reports
+/* x86 prefers to using rte_io_wmb over __atomic_store_n as it reports
* a better perf(~1.5%), which comes from the saved branch by the compiler.
- * The if and else branch are identical with the smp and io barriers both
- * defined as compiler barriers on x86.
+ * The if and else branch are identical on the platforms except Arm.
*/
-#ifdef RTE_ARCH_X86_64
- rte_smp_wmb();
- dp->flags = flags;
-#else
+#ifdef RTE_ARCH_ARM
__atomic_store_n(&dp->flags, flags, __ATOMIC_RELEASE);
+#else
+ rte_io_wmb();
+ dp->flags = flags;
#endif
} else {
rte_io_wmb();
dp->flags = flags;
}
}
+
#ifdef RTE_PMD_PACKET_PREFETCH
#define rte_packet_prefetch(p) rte_prefetch1(p)
#else
#define VIRTIO_MAX_TX_INDIRECT 8
struct virtio_tx_region {
struct virtio_net_hdr_mrg_rxbuf tx_hdr;
- struct vring_desc tx_indir[VIRTIO_MAX_TX_INDIRECT]
- __rte_aligned(16);
+ union {
+ struct vring_desc tx_indir[VIRTIO_MAX_TX_INDIRECT];
+ struct vring_packed_desc
+ tx_packed_indir[VIRTIO_MAX_TX_INDIRECT];
+ } __rte_aligned(16);
};
static inline int
dp[i].next = VQ_RING_DESC_CHAIN_END;
}
+static inline void
+vring_desc_init_indirect_packed(struct vring_packed_desc *dp, int n)
+{
+ int i;
+ for (i = 0; i < n; i++) {
+ dp[i].id = (uint16_t)i;
+ dp[i].flags = VRING_DESC_F_WRITE;
+ }
+}
+
/**
* Tell the backend not to interrupt us. Implementation for packed virtqueues.
*/
static inline void
virtqueue_enqueue_xmit_packed(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
- uint16_t needed, int can_push, int in_order)
+ uint16_t needed, int use_indirect, int can_push,
+ int in_order)
{
struct virtio_tx_region *txr = txvq->virtio_net_hdr_mz->addr;
struct vq_desc_extra *dxp;
struct virtio_net_hdr *hdr;
uint16_t prev;
bool prepend_header = false;
+ uint16_t seg_num = cookie->nb_segs;
id = in_order ? vq->vq_avail_idx : vq->vq_desc_head_idx;
/* if offload disabled, it is not zeroed below, do it now */
if (!vq->hw->has_tx_offload)
virtqueue_clear_net_hdr(hdr);
+ } else if (use_indirect) {
+ /* setup tx ring slot to point to indirect
+ * descriptor list stored in reserved region.
+ *
+ * the first slot in indirect ring is already preset
+ * to point to the header in reserved region
+ */
+ start_dp[idx].addr = txvq->virtio_net_hdr_mem +
+ RTE_PTR_DIFF(&txr[idx].tx_packed_indir, txr);
+ start_dp[idx].len = (seg_num + 1) *
+ sizeof(struct vring_packed_desc);
+ /* reset flags for indirect desc */
+ head_flags = VRING_DESC_F_INDIRECT;
+ head_flags |= vq->vq_packed.cached_flags;
+ hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
+
+ /* loop below will fill in rest of the indirect elements */
+ start_dp = txr[idx].tx_packed_indir;
+ idx = 1;
} else {
/* setup first tx ring slot to point to header
* stored in reserved region.
start_dp[prev].id = id;
+ if (use_indirect) {
+ idx = head_idx;
+ if (++idx >= vq->vq_nentries) {
+ idx -= vq->vq_nentries;
+ vq->vq_packed.cached_flags ^=
+ VRING_PACKED_DESC_F_AVAIL_USED;
+ }
+ }
+
vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
vq->vq_avail_idx = idx;