From: David Christensen Date: Wed, 14 Aug 2019 17:49:53 +0000 (-0500) Subject: net/virtio: add Altivec Rx X-Git-Url: http://git.droids-corp.org/?a=commitdiff_plain;h=52b5a707e6ca;p=dpdk.git net/virtio: add Altivec Rx Added the file virtio_rxtx_simple_altivec.c which implements Altivec code for the virtio vectorized RX functions. Updated the various build files. Signed-off-by: David Christensen Reviewed-by: Maxime Coquelin --- diff --git a/drivers/net/virtio/Makefile b/drivers/net/virtio/Makefile index 6c2c9967bf..5144e7cc4a 100644 --- a/drivers/net/virtio/Makefile +++ b/drivers/net/virtio/Makefile @@ -33,6 +33,8 @@ SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_rxtx_simple.c ifeq ($(CONFIG_RTE_ARCH_X86),y) SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_rxtx_simple_sse.c +else ifeq ($(CONFIG_RTE_ARCH_PPC_64),y) +SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_rxtx_simple_altivec.c else ifneq ($(filter y,$(CONFIG_RTE_ARCH_ARM) $(CONFIG_RTE_ARCH_ARM64)),) SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_rxtx_simple_neon.c endif diff --git a/drivers/net/virtio/meson.build b/drivers/net/virtio/meson.build index 794905401d..04c7fdf25d 100644 --- a/drivers/net/virtio/meson.build +++ b/drivers/net/virtio/meson.build @@ -11,6 +11,8 @@ deps += ['kvargs', 'bus_pci'] if arch_subdir == 'x86' sources += files('virtio_rxtx_simple_sse.c') +elif arch_subdir == 'ppc_64' + sources += files('virtio_rxtx_simple_altivec.c') elif arch_subdir == 'arm' and host_machine.cpu_family().startswith('aarch64') sources += files('virtio_rxtx_simple_neon.c') endif diff --git a/drivers/net/virtio/virtio_rxtx_simple_altivec.c b/drivers/net/virtio/virtio_rxtx_simple_altivec.c new file mode 100644 index 0000000000..1cd732dec3 --- /dev/null +++ b/drivers/net/virtio/virtio_rxtx_simple_altivec.c @@ -0,0 +1,203 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2010-2015 Intel Corporation + * Copyright(C) 2019 IBM Corporation + */ + +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "virtio_rxtx_simple.h" + +#define RTE_VIRTIO_DESC_PER_LOOP 8 + +/* virtio vPMD receive routine, only accept(nb_pkts >= RTE_VIRTIO_DESC_PER_LOOP) + * + * This routine is for non-mergeable RX, one desc for each guest buffer. + * This routine is based on the RX ring layout optimization. Each entry in the + * avail ring points to the desc with the same index in the desc ring and this + * will never be changed in the driver. + * + * - nb_pkts < RTE_VIRTIO_DESC_PER_LOOP, just return no packet + */ +uint16_t +virtio_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, + uint16_t nb_pkts) +{ + struct virtnet_rx *rxvq = rx_queue; + struct virtqueue *vq = rxvq->vq; + struct virtio_hw *hw = vq->hw; + uint16_t nb_used; + uint16_t desc_idx; + struct vring_used_elem *rused; + struct rte_mbuf **sw_ring; + struct rte_mbuf **sw_ring_end; + uint16_t nb_pkts_received = 0; + const vector unsigned char zero = {0}; + + const vector unsigned char shuf_msk1 = { + 0xFF, 0xFF, 0xFF, 0xFF, /* packet type */ + 4, 5, 0xFF, 0xFF, /* vlan tci */ + 4, 5, /* dat len */ + 0xFF, 0xFF, /* vlan tci */ + 0xFF, 0xFF, 0xFF, 0xFF + }; + + const vector unsigned char shuf_msk2 = { + 0xFF, 0xFF, 0xFF, 0xFF, /* packet type */ + 12, 13, 0xFF, 0xFF, /* pkt len */ + 12, 13, /* dat len */ + 0xFF, 0xFF, /* vlan tci */ + 0xFF, 0xFF, 0xFF, 0xFF + }; + + /* + * Subtract the header length. + * In which case do we need the header length in used->len ? + */ + const vector unsigned short len_adjust = { + 0, 0, + (uint16_t)-vq->hw->vtnet_hdr_size, 0, + (uint16_t)-vq->hw->vtnet_hdr_size, 0, + 0, 0 + }; + + if (unlikely(hw->started == 0)) + return nb_pkts_received; + + if (unlikely(nb_pkts < RTE_VIRTIO_DESC_PER_LOOP)) + return 0; + + nb_used = VIRTQUEUE_NUSED(vq); + + rte_compiler_barrier(); + + if (unlikely(nb_used == 0)) + return 0; + + nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, RTE_VIRTIO_DESC_PER_LOOP); + nb_used = RTE_MIN(nb_used, nb_pkts); + + desc_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1)); + rused = &vq->vq_split.ring.used->ring[desc_idx]; + sw_ring = &vq->sw_ring[desc_idx]; + sw_ring_end = &vq->sw_ring[vq->vq_nentries]; + + rte_prefetch0(rused); + + if (vq->vq_free_cnt >= RTE_VIRTIO_VPMD_RX_REARM_THRESH) { + virtio_rxq_rearm_vec(rxvq); + if (unlikely(virtqueue_kick_prepare(vq))) + virtqueue_notify(vq); + } + + for (nb_pkts_received = 0; + nb_pkts_received < nb_used;) { + vector unsigned char desc[RTE_VIRTIO_DESC_PER_LOOP / 2]; + vector unsigned char mbp[RTE_VIRTIO_DESC_PER_LOOP / 2]; + vector unsigned char pkt_mb[RTE_VIRTIO_DESC_PER_LOOP]; + + mbp[0] = vec_vsx_ld(0, (unsigned char const *)(sw_ring + 0)); + desc[0] = vec_vsx_ld(0, (unsigned char const *)(rused + 0)); + *(vector unsigned char *)&rx_pkts[0] = mbp[0]; + + mbp[1] = vec_vsx_ld(0, (unsigned char const *)(sw_ring + 2)); + desc[1] = vec_vsx_ld(0, (unsigned char const *)(rused + 2)); + *(vector unsigned char *)&rx_pkts[2] = mbp[1]; + + mbp[2] = vec_vsx_ld(0, (unsigned char const *)(sw_ring + 4)); + desc[2] = vec_vsx_ld(0, (unsigned char const *)(rused + 4)); + *(vector unsigned char *)&rx_pkts[4] = mbp[2]; + + mbp[3] = vec_vsx_ld(0, (unsigned char const *)(sw_ring + 6)); + desc[3] = vec_vsx_ld(0, (unsigned char const *)(rused + 6)); + *(vector unsigned char *)&rx_pkts[6] = mbp[3]; + + pkt_mb[0] = vec_perm(desc[0], zero, shuf_msk1); + pkt_mb[1] = vec_perm(desc[0], zero, shuf_msk2); + pkt_mb[0] = (vector unsigned char) + ((vector unsigned short)pkt_mb[0] + len_adjust); + pkt_mb[1] = (vector unsigned char) + ((vector unsigned short)pkt_mb[1] + len_adjust); + *(vector unsigned char *)&rx_pkts[0]->rx_descriptor_fields1 = + pkt_mb[0]; + *(vector unsigned char *)&rx_pkts[1]->rx_descriptor_fields1 = + pkt_mb[1]; + + pkt_mb[2] = vec_perm(desc[1], zero, shuf_msk1); + pkt_mb[3] = vec_perm(desc[1], zero, shuf_msk2); + pkt_mb[2] = (vector unsigned char) + ((vector unsigned short)pkt_mb[2] + len_adjust); + pkt_mb[3] = (vector unsigned char) + ((vector unsigned short)pkt_mb[3] + len_adjust); + *(vector unsigned char *)&rx_pkts[2]->rx_descriptor_fields1 = + pkt_mb[2]; + *(vector unsigned char *)&rx_pkts[3]->rx_descriptor_fields1 = + pkt_mb[3]; + + pkt_mb[4] = vec_perm(desc[2], zero, shuf_msk1); + pkt_mb[5] = vec_perm(desc[2], zero, shuf_msk2); + pkt_mb[4] = (vector unsigned char) + ((vector unsigned short)pkt_mb[4] + len_adjust); + pkt_mb[5] = (vector unsigned char) + ((vector unsigned short)pkt_mb[5] + len_adjust); + *(vector unsigned char *)&rx_pkts[4]->rx_descriptor_fields1 = + pkt_mb[4]; + *(vector unsigned char *)&rx_pkts[5]->rx_descriptor_fields1 = + pkt_mb[5]; + + pkt_mb[6] = vec_perm(desc[3], zero, shuf_msk1); + pkt_mb[7] = vec_perm(desc[3], zero, shuf_msk2); + pkt_mb[6] = (vector unsigned char) + ((vector unsigned short)pkt_mb[6] + len_adjust); + pkt_mb[7] = (vector unsigned char) + ((vector unsigned short)pkt_mb[7] + len_adjust); + *(vector unsigned char *)&rx_pkts[6]->rx_descriptor_fields1 = + pkt_mb[6]; + *(vector unsigned char *)&rx_pkts[7]->rx_descriptor_fields1 = + pkt_mb[7]; + + if (unlikely(nb_used <= RTE_VIRTIO_DESC_PER_LOOP)) { + if (sw_ring + nb_used <= sw_ring_end) + nb_pkts_received += nb_used; + else + nb_pkts_received += sw_ring_end - sw_ring; + break; + } else { + if (unlikely(sw_ring + RTE_VIRTIO_DESC_PER_LOOP >= + sw_ring_end)) { + nb_pkts_received += sw_ring_end - sw_ring; + break; + } else { + nb_pkts_received += RTE_VIRTIO_DESC_PER_LOOP; + + rx_pkts += RTE_VIRTIO_DESC_PER_LOOP; + sw_ring += RTE_VIRTIO_DESC_PER_LOOP; + rused += RTE_VIRTIO_DESC_PER_LOOP; + nb_used -= RTE_VIRTIO_DESC_PER_LOOP; + } + } + } + + vq->vq_used_cons_idx += nb_pkts_received; + vq->vq_free_cnt += nb_pkts_received; + rxvq->stats.packets += nb_pkts_received; + return nb_pkts_received; +}