-/*-
- * BSD LICENSE
- *
- * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2015 Intel Corporation
*/
#include <stdint.h>
-#include <rte_ethdev.h>
+#include <rte_ethdev_driver.h>
#include <rte_malloc.h>
+#include <rte_vect.h>
#include "ixgbe_ethdev.h"
#include "ixgbe_rxtx.h"
#include "ixgbe_rxtx_vec_common.h"
-#include <arm_neon.h>
-
#pragma GCC diagnostic ignored "-Wcast-qual"
static inline void
* Data to be rearmed is 6 bytes long.
*/
vst1_u8((uint8_t *)&mb0->rearm_data, p);
- paddr = mb0->buf_physaddr + RTE_PKTMBUF_HEADROOM;
+ paddr = mb0->buf_iova + RTE_PKTMBUF_HEADROOM;
dma_addr0 = vsetq_lane_u64(paddr, zero, 0);
/* flush desc with pa dma_addr */
vst1q_u64((uint64_t *)&rxdp++->read, dma_addr0);
vst1_u8((uint8_t *)&mb1->rearm_data, p);
- paddr = mb1->buf_physaddr + RTE_PKTMBUF_HEADROOM;
+ paddr = mb1->buf_iova + RTE_PKTMBUF_HEADROOM;
dma_addr1 = vsetq_lane_u64(paddr, zero, 0);
vst1q_u64((uint64_t *)&rxdp++->read, dma_addr1);
}
IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
}
-/* Handling the offload flags (olflags) field takes computation
- * time when receiving packets. Therefore we provide a flag to disable
- * the processing of the olflags field when they are not needed. This
- * gives improved performance, at the cost of losing the offload info
- * in the received packet
- */
-#ifdef RTE_IXGBE_RX_OLFLAGS_ENABLE
-
#define VTAG_SHIFT (3)
static inline void
} vol;
const uint8x16_t pkttype_msk = {
- PKT_RX_VLAN_PKT, PKT_RX_VLAN_PKT,
- PKT_RX_VLAN_PKT, PKT_RX_VLAN_PKT,
+ PKT_RX_VLAN, PKT_RX_VLAN,
+ PKT_RX_VLAN, PKT_RX_VLAN,
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00};
rx_pkts[2]->ol_flags = vol.e[2];
rx_pkts[3]->ol_flags = vol.e[3];
}
-#else
-#define desc_to_olflags_v(sterr_tmp1, sterr_tmp2, staterr, rx_pkts)
-#endif
-/*
+#define IXGBE_VPMD_DESC_EOP_MASK 0x02020202
+#define IXGBE_UINT8_BIT (CHAR_BIT * sizeof(uint8_t))
+
+static inline uint32_t
+get_packet_type(uint32_t pkt_info,
+ uint32_t etqf_check,
+ uint32_t tunnel_check)
+{
+ if (etqf_check)
+ return RTE_PTYPE_UNKNOWN;
+
+ if (tunnel_check) {
+ pkt_info &= IXGBE_PACKET_TYPE_MASK_TUNNEL;
+ return ptype_table_tn[pkt_info];
+ }
+
+ pkt_info &= IXGBE_PACKET_TYPE_MASK_82599;
+ return ptype_table[pkt_info];
+}
+
+static inline void
+desc_to_ptype_v(uint64x2_t descs[4], uint16_t pkt_type_mask,
+ struct rte_mbuf **rx_pkts)
+{
+ uint32x4_t etqf_check, tunnel_check;
+ uint32x4_t etqf_mask = vdupq_n_u32(0x8000);
+ uint32x4_t tunnel_mask = vdupq_n_u32(0x10000);
+ uint32x4_t ptype_mask = vdupq_n_u32((uint32_t)pkt_type_mask);
+ uint32x4_t ptype0 = vzipq_u32(vreinterpretq_u32_u64(descs[0]),
+ vreinterpretq_u32_u64(descs[2])).val[0];
+ uint32x4_t ptype1 = vzipq_u32(vreinterpretq_u32_u64(descs[1]),
+ vreinterpretq_u32_u64(descs[3])).val[0];
+
+ /* interleave low 32 bits,
+ * now we have 4 ptypes in a NEON register
+ */
+ ptype0 = vzipq_u32(ptype0, ptype1).val[0];
+
+ /* mask etqf bits */
+ etqf_check = vandq_u32(ptype0, etqf_mask);
+ /* mask tunnel bits */
+ tunnel_check = vandq_u32(ptype0, tunnel_mask);
+
+ /* shift right by IXGBE_PACKET_TYPE_SHIFT, and apply ptype mask */
+ ptype0 = vandq_u32(vshrq_n_u32(ptype0, IXGBE_PACKET_TYPE_SHIFT),
+ ptype_mask);
+
+ rx_pkts[0]->packet_type =
+ get_packet_type(vgetq_lane_u32(ptype0, 0),
+ vgetq_lane_u32(etqf_check, 0),
+ vgetq_lane_u32(tunnel_check, 0));
+ rx_pkts[1]->packet_type =
+ get_packet_type(vgetq_lane_u32(ptype0, 1),
+ vgetq_lane_u32(etqf_check, 1),
+ vgetq_lane_u32(tunnel_check, 1));
+ rx_pkts[2]->packet_type =
+ get_packet_type(vgetq_lane_u32(ptype0, 2),
+ vgetq_lane_u32(etqf_check, 2),
+ vgetq_lane_u32(tunnel_check, 2));
+ rx_pkts[3]->packet_type =
+ get_packet_type(vgetq_lane_u32(ptype0, 3),
+ vgetq_lane_u32(etqf_check, 3),
+ vgetq_lane_u32(tunnel_check, 3));
+}
+
+/**
* vPMD raw receive routine, only accept(nb_pkts >= RTE_IXGBE_DESCS_PER_LOOP)
*
* Notice:
* - nb_pkts < RTE_IXGBE_DESCS_PER_LOOP, just return no packet
- * - nb_pkts > RTE_IXGBE_MAX_RX_BURST, only scan RTE_IXGBE_MAX_RX_BURST
- * numbers of DD bit
* - floor align nb_pkts to a RTE_IXGBE_DESC_PER_LOOP power-of-two
- * - don't support ol_flags for rss and csum err
*/
-
-#define IXGBE_VPMD_DESC_DD_MASK 0x01010101
-#define IXGBE_VPMD_DESC_EOP_MASK 0x02020202
-
static inline uint16_t
_recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
uint16_t nb_pkts, uint8_t *split_packet)
uint16x8_t crc_adjust = {0, 0, rxq->crc_len, 0,
rxq->crc_len, 0, 0, 0};
- /* nb_pkts shall be less equal than RTE_IXGBE_MAX_RX_BURST */
- nb_pkts = RTE_MIN(nb_pkts, RTE_IXGBE_MAX_RX_BURST);
-
/* nb_pkts has to be floor-aligned to RTE_IXGBE_DESCS_PER_LOOP */
nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, RTE_IXGBE_DESCS_PER_LOOP);
uint64x2_t mbp1, mbp2;
uint8x16_t staterr;
uint16x8_t tmp;
- uint32_t var = 0;
uint32_t stat;
- /* B.1 load 1 mbuf point */
+ /* B.1 load 2 mbuf point */
mbp1 = vld1q_u64((uint64_t *)&sw_ring[pos]);
/* B.2 copy 2 mbuf point into rx_pkts */
vst1q_u64((uint64_t *)&rx_pkts[pos], mbp1);
- /* B.1 load 1 mbuf point */
+ /* B.1 load 2 mbuf point */
mbp2 = vld1q_u64((uint64_t *)&sw_ring[pos + 2]);
/* A. load 4 pkts descs */
descs[1] = vld1q_u64((uint64_t *)(rxdp + 1));
descs[2] = vld1q_u64((uint64_t *)(rxdp + 2));
descs[3] = vld1q_u64((uint64_t *)(rxdp + 3));
- rte_smp_rmb();
/* B.2 copy 2 mbuf point into rx_pkts */
vst1q_u64((uint64_t *)&rx_pkts[pos + 2], mbp2);
/* C.2 get 4 pkts staterr value */
staterr = vzipq_u8(sterr_tmp1.val[1], sterr_tmp2.val[1]).val[0];
- stat = vgetq_lane_u32(vreinterpretq_u32_u8(staterr), 0);
/* set ol_flags with vlan packet type */
desc_to_olflags_v(sterr_tmp1, sterr_tmp2, staterr,
/* C* extract and record EOP bit */
if (split_packet) {
+ stat = vgetq_lane_u32(vreinterpretq_u32_u8(staterr), 0);
/* and with mask to extract bits, flipping 1-0 */
*(int *)split_packet = ~stat & IXGBE_VPMD_DESC_EOP_MASK;
split_packet += RTE_IXGBE_DESCS_PER_LOOP;
}
+ /* C.4 expand DD bit to saturate UINT8 */
+ staterr = vshlq_n_u8(staterr, IXGBE_UINT8_BIT - 1);
+ staterr = vreinterpretq_u8_s8
+ (vshrq_n_s8(vreinterpretq_s8_u8(staterr),
+ IXGBE_UINT8_BIT - 1));
+ stat = ~vgetq_lane_u32(vreinterpretq_u32_u8(staterr), 0);
+
rte_prefetch_non_temporal(rxdp + RTE_IXGBE_DESCS_PER_LOOP);
/* D.3 copy final 1,2 data to rx_pkts */
vst1q_u8((uint8_t *)&rx_pkts[pos]->rx_descriptor_fields1,
pkt_mb1);
- stat &= IXGBE_VPMD_DESC_DD_MASK;
+ desc_to_ptype_v(descs, rxq->pkt_type_mask, &rx_pkts[pos]);
- /* C.4 calc avaialbe number of desc */
- if (likely(stat != IXGBE_VPMD_DESC_DD_MASK)) {
- while (stat & 0x01) {
- ++var;
- stat = stat >> 8;
- }
- nb_pkts_recd += var;
- break;
- } else {
+ /* C.5 calc available number of desc */
+ if (unlikely(stat == 0)) {
nb_pkts_recd += RTE_IXGBE_DESCS_PER_LOOP;
+ } else {
+ nb_pkts_recd += __builtin_ctz(stat) / IXGBE_UINT8_BIT;
+ break;
}
}
return nb_pkts_recd;
}
-/*
+/**
* vPMD receive routine, only accept(nb_pkts >= RTE_IXGBE_DESCS_PER_LOOP)
*
* Notice:
* - nb_pkts < RTE_IXGBE_DESCS_PER_LOOP, just return no packet
- * - nb_pkts > RTE_IXGBE_MAX_RX_BURST, only scan RTE_IXGBE_MAX_RX_BURST
- * numbers of DD bit
* - floor align nb_pkts to a RTE_IXGBE_DESC_PER_LOOP power-of-two
* - don't support ol_flags for rss and csum err
*/
return _recv_raw_pkts_vec(rx_queue, rx_pkts, nb_pkts, NULL);
}
-/*
+/**
* vPMD receive routine that reassembles scattered packets
*
* Notice:
* - don't support ol_flags for rss and csum err
* - nb_pkts < RTE_IXGBE_DESCS_PER_LOOP, just return no packet
- * - nb_pkts > RTE_IXGBE_MAX_RX_BURST, only scan RTE_IXGBE_MAX_RX_BURST
- * numbers of DD bit
* - floor align nb_pkts to a RTE_IXGBE_DESC_PER_LOOP power-of-two
*/
-uint16_t
-ixgbe_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
- uint16_t nb_pkts)
+static uint16_t
+ixgbe_recv_scattered_burst_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
+ uint16_t nb_pkts)
{
struct ixgbe_rx_queue *rxq = rx_queue;
uint8_t split_flags[RTE_IXGBE_MAX_RX_BURST] = {0};
i++;
if (i == nb_bufs)
return nb_bufs;
+ rxq->pkt_first_seg = rx_pkts[i];
}
return i + reassemble_packets(rxq, &rx_pkts[i], nb_bufs - i,
&split_flags[i]);
}
+/**
+ * vPMD receive routine that reassembles scattered packets.
+ */
+uint16_t
+ixgbe_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
+ uint16_t nb_pkts)
+{
+ uint16_t retval = 0;
+
+ while (nb_pkts > RTE_IXGBE_MAX_RX_BURST) {
+ uint16_t burst;
+
+ burst = ixgbe_recv_scattered_burst_vec(rx_queue,
+ rx_pkts + retval,
+ RTE_IXGBE_MAX_RX_BURST);
+ retval += burst;
+ nb_pkts -= burst;
+ if (burst < RTE_IXGBE_MAX_RX_BURST)
+ return retval;
+ }
+
+ return retval + ixgbe_recv_scattered_burst_vec(rx_queue,
+ rx_pkts + retval,
+ nb_pkts);
+}
+
static inline void
vtx1(volatile union ixgbe_adv_tx_desc *txdp,
struct rte_mbuf *pkt, uint64_t flags)
{
uint64x2_t descriptor = {
- pkt->buf_physaddr + pkt->data_off,
+ pkt->buf_iova + pkt->data_off,
(uint64_t)pkt->pkt_len << 46 | flags | pkt->data_len};
vst1q_u64((uint64_t *)&txdp->read, descriptor);
return nb_pkts;
}
-static void __attribute__((cold))
+static void __rte_cold
ixgbe_tx_queue_release_mbufs_vec(struct ixgbe_tx_queue *txq)
{
_ixgbe_tx_queue_release_mbufs_vec(txq);
}
-void __attribute__((cold))
+void __rte_cold
ixgbe_rx_queue_release_mbufs_vec(struct ixgbe_rx_queue *rxq)
{
_ixgbe_rx_queue_release_mbufs_vec(rxq);
}
-static void __attribute__((cold))
+static void __rte_cold
ixgbe_tx_free_swring(struct ixgbe_tx_queue *txq)
{
_ixgbe_tx_free_swring_vec(txq);
}
-static void __attribute__((cold))
+static void __rte_cold
ixgbe_reset_tx_queue(struct ixgbe_tx_queue *txq)
{
_ixgbe_reset_tx_queue_vec(txq);
.reset = ixgbe_reset_tx_queue,
};
-int __attribute__((cold))
+int __rte_cold
ixgbe_rxq_vec_setup(struct ixgbe_rx_queue *rxq)
{
return ixgbe_rxq_vec_setup_default(rxq);
}
-int __attribute__((cold))
+int __rte_cold
ixgbe_txq_vec_setup(struct ixgbe_tx_queue *txq)
{
return ixgbe_txq_vec_setup_default(txq, &vec_txq_ops);
}
-int __attribute__((cold))
+int __rte_cold
ixgbe_rx_vec_dev_conf_condition_check(struct rte_eth_dev *dev)
{
struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
/* no csum error report support */
- if (rxmode->hw_ip_checksum == 1)
+ if (rxmode->offloads & DEV_RX_OFFLOAD_CHECKSUM)
return -1;
return ixgbe_rx_vec_dev_conf_condition_check_default(dev);