ICE_HASH_CFG_RESET(&pf->gtpu_hash_ctx.ipv6_tcp);
}
+static uint64_t
+ice_get_supported_rxdid(struct ice_hw *hw)
+{
+ uint64_t supported_rxdid = 0; /* bitmap for supported RXDID */
+ uint32_t regval;
+ int i;
+
+ supported_rxdid |= BIT(ICE_RXDID_LEGACY_1);
+
+ for (i = ICE_RXDID_FLEX_NIC; i < ICE_FLEX_DESC_RXDID_MAX_NUM; i++) {
+ regval = ICE_READ_REG(hw, GLFLXP_RXDID_FLAGS(i, 0));
+ if ((regval >> GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_S)
+ & GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_M)
+ supported_rxdid |= BIT(i);
+ }
+ return supported_rxdid;
+}
+
static int
ice_dev_init(struct rte_eth_dev *dev)
{
return ret;
}
+ pf->supported_rxdid = ice_get_supported_rxdid(hw);
+
return 0;
err_pf_setup:
#define ICE_RXTX_BYTES_HIGH(bytes) ((bytes) & ~ICE_40_BIT_MASK)
#define ICE_RXTX_BYTES_LOW(bytes) ((bytes) & ICE_40_BIT_MASK)
+/* Max number of flexible descriptor rxdid */
+#define ICE_FLEX_DESC_RXDID_MAX_NUM 64
+
/* DDP package type */
enum ice_pkg_type {
ICE_PKG_TYPE_UNKNOWN,
bool init_link_up;
uint64_t old_rx_bytes;
uint64_t old_tx_bytes;
+ uint64_t supported_rxdid; /* bitmap for supported RXDID */
};
#define ICE_MAX_QUEUE_NUM 2048
ice_proto_xtr_type_to_rxdid(uint8_t xtr_type)
{
static uint8_t rxdid_map[] = {
- [PROTO_XTR_NONE] = ICE_RXDID_COMMS_GENERIC,
+ [PROTO_XTR_NONE] = ICE_RXDID_COMMS_OVS,
[PROTO_XTR_VLAN] = ICE_RXDID_COMMS_AUX_VLAN,
[PROTO_XTR_IPV4] = ICE_RXDID_COMMS_AUX_IPV4,
[PROTO_XTR_IPV6] = ICE_RXDID_COMMS_AUX_IPV6,
};
return xtr_type < RTE_DIM(rxdid_map) ?
- rxdid_map[xtr_type] : ICE_RXDID_COMMS_GENERIC;
+ rxdid_map[xtr_type] : ICE_RXDID_COMMS_OVS;
}
static enum ice_status
{
struct ice_vsi *vsi = rxq->vsi;
struct ice_hw *hw = ICE_VSI_TO_HW(vsi);
+ struct ice_pf *pf = ICE_VSI_TO_PF(vsi);
struct rte_eth_dev *dev = ICE_VSI_TO_ETH_DEV(rxq->vsi);
struct ice_rlan_ctx rx_ctx;
enum ice_status err;
uint16_t buf_size, len;
struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
- uint32_t rxdid = ICE_RXDID_COMMS_GENERIC;
+ uint32_t rxdid = ICE_RXDID_COMMS_OVS;
uint32_t regval;
/* Set buffer size as the head split is disabled. */
PMD_DRV_LOG(DEBUG, "Port (%u) - Rx queue (%u) is set with RXDID : %u",
rxq->port_id, rxq->queue_id, rxdid);
+ if (!(pf->supported_rxdid & BIT(rxdid))) {
+ PMD_DRV_LOG(ERR, "currently package doesn't support RXDID (%u)",
+ rxdid);
+ return -EINVAL;
+ }
+
/* Enable Flexible Descriptors in the queue context which
* allows this driver to select a specific receive descriptor format
*/
static void
ice_rxd_to_proto_xtr(struct rte_mbuf *mb,
- volatile struct ice_32b_rx_flex_desc_comms *desc)
+ volatile struct ice_32b_rx_flex_desc_comms_ovs *desc)
{
uint16_t stat_err = rte_le_to_cpu_16(desc->status_error1);
uint32_t metadata = 0;
ice_rxd_to_pkt_fields(struct rte_mbuf *mb,
volatile union ice_rx_flex_desc *rxdp)
{
- volatile struct ice_32b_rx_flex_desc_comms *desc =
- (volatile struct ice_32b_rx_flex_desc_comms *)rxdp;
+ volatile struct ice_32b_rx_flex_desc_comms_ovs *desc =
+ (volatile struct ice_32b_rx_flex_desc_comms_ovs *)rxdp;
+#ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
uint16_t stat_err;
stat_err = rte_le_to_cpu_16(desc->status_error0);
mb->ol_flags |= PKT_RX_RSS_HASH;
mb->hash.rss = rte_le_to_cpu_32(desc->rss_hash);
}
+#endif
-#ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
if (desc->flow_id != 0xFFFFFFFF) {
mb->ol_flags |= PKT_RX_FDIR | PKT_RX_FDIR_ID;
mb->hash.fdir.hi = rte_le_to_cpu_32(desc->flow_id);
}
+#ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
if (unlikely(rte_net_ice_dynf_proto_xtr_metadata_avail()))
ice_rxd_to_proto_xtr(mb, desc);
#endif
#define ICE_FDIR_PKT_LEN 512
+#define ICE_RXDID_COMMS_OVS 22
+
typedef void (*ice_rx_release_mbufs_t)(struct ice_rx_queue *rxq);
typedef void (*ice_tx_release_mbufs_t)(struct ice_tx_queue *txq);
};
};
+/* Rx Flex Descriptor for Comms Package Profile
+ * RxDID Profile ID 22 (swap Hash and FlowID)
+ * Flex-field 0: Flow ID lower 16-bits
+ * Flex-field 1: Flow ID upper 16-bits
+ * Flex-field 2: RSS hash lower 16-bits
+ * Flex-field 3: RSS hash upper 16-bits
+ * Flex-field 4: AUX0
+ * Flex-field 5: AUX1
+ */
+struct ice_32b_rx_flex_desc_comms_ovs {
+ /* Qword 0 */
+ u8 rxdid;
+ u8 mir_id_umb_cast;
+ __le16 ptype_flexi_flags0;
+ __le16 pkt_len;
+ __le16 hdr_len_sph_flex_flags1;
+
+ /* Qword 1 */
+ __le16 status_error0;
+ __le16 l2tag1;
+ __le32 flow_id;
+
+ /* Qword 2 */
+ __le16 status_error1;
+ u8 flexi_flags2;
+ u8 ts_low;
+ __le16 l2tag2_1st;
+ __le16 l2tag2_2nd;
+
+ /* Qword 3 */
+ __le32 rss_hash;
+ union {
+ struct {
+ __le16 aux0;
+ __le16 aux1;
+ } flex;
+ __le32 ts_high;
+ } flex_ts;
+};
+
int ice_rx_queue_setup(struct rte_eth_dev *dev,
uint16_t queue_idx,
uint16_t nb_desc,
const __m256i shuf_msk =
_mm256_set_epi8
(/* first descriptor */
- 15, 14,
- 13, 12, /* octet 12~15, 32 bits rss */
+ 0xFF, 0xFF,
+ 0xFF, 0xFF, /* rss hash parsed separately */
11, 10, /* octet 10~11, 16 bits vlan_macip */
5, 4, /* octet 4~5, 16 bits data_len */
0xFF, 0xFF, /* skip hi 16 bits pkt_len, zero out */
0xFF, 0xFF, /* pkt_type set as unknown */
0xFF, 0xFF, /*pkt_type set as unknown */
/* second descriptor */
- 15, 14,
- 13, 12, /* octet 12~15, 32 bits rss */
+ 0xFF, 0xFF,
+ 0xFF, 0xFF, /* rss hash parsed separately */
11, 10, /* octet 10~11, 16 bits vlan_macip */
5, 4, /* octet 4~5, 16 bits data_len */
0xFF, 0xFF, /* skip hi 16 bits pkt_len, zero out */
/* merge flags */
const __m256i mbuf_flags = _mm256_or_si256(l3_l4_flags,
rss_vlan_flags);
+
+#ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
+ /**
+ * needs to load 2nd 16B of each desc for RSS hash parsing,
+ * will cause performance drop to get into this context.
+ */
+ if (rxq->vsi->adapter->eth_dev->data->dev_conf.rxmode.offloads &
+ DEV_RX_OFFLOAD_RSS_HASH) {
+ /* load bottom half of every 32B desc */
+ const __m128i raw_desc_bh7 =
+ _mm_load_si128
+ ((void *)(&rxdp[7].wb.status_error1));
+ rte_compiler_barrier();
+ const __m128i raw_desc_bh6 =
+ _mm_load_si128
+ ((void *)(&rxdp[6].wb.status_error1));
+ rte_compiler_barrier();
+ const __m128i raw_desc_bh5 =
+ _mm_load_si128
+ ((void *)(&rxdp[5].wb.status_error1));
+ rte_compiler_barrier();
+ const __m128i raw_desc_bh4 =
+ _mm_load_si128
+ ((void *)(&rxdp[4].wb.status_error1));
+ rte_compiler_barrier();
+ const __m128i raw_desc_bh3 =
+ _mm_load_si128
+ ((void *)(&rxdp[3].wb.status_error1));
+ rte_compiler_barrier();
+ const __m128i raw_desc_bh2 =
+ _mm_load_si128
+ ((void *)(&rxdp[2].wb.status_error1));
+ rte_compiler_barrier();
+ const __m128i raw_desc_bh1 =
+ _mm_load_si128
+ ((void *)(&rxdp[1].wb.status_error1));
+ rte_compiler_barrier();
+ const __m128i raw_desc_bh0 =
+ _mm_load_si128
+ ((void *)(&rxdp[0].wb.status_error1));
+
+ __m256i raw_desc_bh6_7 =
+ _mm256_inserti128_si256
+ (_mm256_castsi128_si256(raw_desc_bh6),
+ raw_desc_bh7, 1);
+ __m256i raw_desc_bh4_5 =
+ _mm256_inserti128_si256
+ (_mm256_castsi128_si256(raw_desc_bh4),
+ raw_desc_bh5, 1);
+ __m256i raw_desc_bh2_3 =
+ _mm256_inserti128_si256
+ (_mm256_castsi128_si256(raw_desc_bh2),
+ raw_desc_bh3, 1);
+ __m256i raw_desc_bh0_1 =
+ _mm256_inserti128_si256
+ (_mm256_castsi128_si256(raw_desc_bh0),
+ raw_desc_bh1, 1);
+
+ /**
+ * to shift the 32b RSS hash value to the
+ * highest 32b of each 128b before mask
+ */
+ __m256i rss_hash6_7 =
+ _mm256_slli_epi64(raw_desc_bh6_7, 32);
+ __m256i rss_hash4_5 =
+ _mm256_slli_epi64(raw_desc_bh4_5, 32);
+ __m256i rss_hash2_3 =
+ _mm256_slli_epi64(raw_desc_bh2_3, 32);
+ __m256i rss_hash0_1 =
+ _mm256_slli_epi64(raw_desc_bh0_1, 32);
+
+ __m256i rss_hash_msk =
+ _mm256_set_epi32(0xFFFFFFFF, 0, 0, 0,
+ 0xFFFFFFFF, 0, 0, 0);
+
+ rss_hash6_7 = _mm256_and_si256
+ (rss_hash6_7, rss_hash_msk);
+ rss_hash4_5 = _mm256_and_si256
+ (rss_hash4_5, rss_hash_msk);
+ rss_hash2_3 = _mm256_and_si256
+ (rss_hash2_3, rss_hash_msk);
+ rss_hash0_1 = _mm256_and_si256
+ (rss_hash0_1, rss_hash_msk);
+
+ mb6_7 = _mm256_or_si256(mb6_7, rss_hash6_7);
+ mb4_5 = _mm256_or_si256(mb4_5, rss_hash4_5);
+ mb2_3 = _mm256_or_si256(mb2_3, rss_hash2_3);
+ mb0_1 = _mm256_or_si256(mb0_1, rss_hash0_1);
+ } /* if() on RSS hash parsing */
+#endif
/**
* At this point, we have the 8 sets of flags in the low 16-bits
* of each 32-bit value in vlan0.
const __m128i zero = _mm_setzero_si128();
/* mask to shuffle from desc. to mbuf */
const __m128i shuf_msk = _mm_set_epi8
- (15, 14, 13, 12, /* octet 12~15, 32 bits rss */
+ (0xFF, 0xFF,
+ 0xFF, 0xFF, /* rss hash parsed separately */
11, 10, /* octet 10~11, 16 bits vlan_macip */
5, 4, /* octet 4~5, 16 bits data_len */
0xFF, 0xFF, /* skip high 16 bits pkt_len, zero out */
pos += ICE_DESCS_PER_LOOP,
rxdp += ICE_DESCS_PER_LOOP) {
__m128i descs[ICE_DESCS_PER_LOOP];
- __m128i pkt_mb1, pkt_mb2, pkt_mb3, pkt_mb4;
+ __m128i pkt_mb0, pkt_mb1, pkt_mb2, pkt_mb3;
__m128i staterr, sterr_tmp1, sterr_tmp2;
/* 2 64 bit or 4 32 bit mbuf pointers in one XMM reg. */
__m128i mbp1;
rte_compiler_barrier();
/* D.1 pkt 3,4 convert format from desc to pktmbuf */
- pkt_mb4 = _mm_shuffle_epi8(descs[3], shuf_msk);
- pkt_mb3 = _mm_shuffle_epi8(descs[2], shuf_msk);
+ pkt_mb3 = _mm_shuffle_epi8(descs[3], shuf_msk);
+ pkt_mb2 = _mm_shuffle_epi8(descs[2], shuf_msk);
+
+ /* D.1 pkt 1,2 convert format from desc to pktmbuf */
+ pkt_mb1 = _mm_shuffle_epi8(descs[1], shuf_msk);
+ pkt_mb0 = _mm_shuffle_epi8(descs[0], shuf_msk);
/* C.1 4=>2 filter staterr info only */
sterr_tmp2 = _mm_unpackhi_epi32(descs[3], descs[2]);
ice_rx_desc_to_olflags_v(rxq, descs, &rx_pkts[pos]);
/* D.2 pkt 3,4 set in_port/nb_seg and remove crc */
- pkt_mb4 = _mm_add_epi16(pkt_mb4, crc_adjust);
pkt_mb3 = _mm_add_epi16(pkt_mb3, crc_adjust);
+ pkt_mb2 = _mm_add_epi16(pkt_mb2, crc_adjust);
- /* D.1 pkt 1,2 convert format from desc to pktmbuf */
- pkt_mb2 = _mm_shuffle_epi8(descs[1], shuf_msk);
- pkt_mb1 = _mm_shuffle_epi8(descs[0], shuf_msk);
+ /* D.2 pkt 1,2 set in_port/nb_seg and remove crc */
+ pkt_mb1 = _mm_add_epi16(pkt_mb1, crc_adjust);
+ pkt_mb0 = _mm_add_epi16(pkt_mb0, crc_adjust);
+
+#ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
+ /**
+ * needs to load 2nd 16B of each desc for RSS hash parsing,
+ * will cause performance drop to get into this context.
+ */
+ if (rxq->vsi->adapter->eth_dev->data->dev_conf.rxmode.offloads &
+ DEV_RX_OFFLOAD_RSS_HASH) {
+ /* load bottom half of every 32B desc */
+ const __m128i raw_desc_bh3 =
+ _mm_load_si128
+ ((void *)(&rxdp[3].wb.status_error1));
+ rte_compiler_barrier();
+ const __m128i raw_desc_bh2 =
+ _mm_load_si128
+ ((void *)(&rxdp[2].wb.status_error1));
+ rte_compiler_barrier();
+ const __m128i raw_desc_bh1 =
+ _mm_load_si128
+ ((void *)(&rxdp[1].wb.status_error1));
+ rte_compiler_barrier();
+ const __m128i raw_desc_bh0 =
+ _mm_load_si128
+ ((void *)(&rxdp[0].wb.status_error1));
+
+ /**
+ * to shift the 32b RSS hash value to the
+ * highest 32b of each 128b before mask
+ */
+ __m128i rss_hash3 =
+ _mm_slli_epi64(raw_desc_bh3, 32);
+ __m128i rss_hash2 =
+ _mm_slli_epi64(raw_desc_bh2, 32);
+ __m128i rss_hash1 =
+ _mm_slli_epi64(raw_desc_bh1, 32);
+ __m128i rss_hash0 =
+ _mm_slli_epi64(raw_desc_bh0, 32);
+
+ __m128i rss_hash_msk =
+ _mm_set_epi32(0xFFFFFFFF, 0, 0, 0);
+
+ rss_hash3 = _mm_and_si128
+ (rss_hash3, rss_hash_msk);
+ rss_hash2 = _mm_and_si128
+ (rss_hash2, rss_hash_msk);
+ rss_hash1 = _mm_and_si128
+ (rss_hash1, rss_hash_msk);
+ rss_hash0 = _mm_and_si128
+ (rss_hash0, rss_hash_msk);
+
+ pkt_mb3 = _mm_or_si128(pkt_mb3, rss_hash3);
+ pkt_mb2 = _mm_or_si128(pkt_mb2, rss_hash2);
+ pkt_mb1 = _mm_or_si128(pkt_mb1, rss_hash1);
+ pkt_mb0 = _mm_or_si128(pkt_mb0, rss_hash0);
+ } /* if() on RSS hash parsing */
+#endif
/* C.2 get 4 pkts staterr value */
staterr = _mm_unpacklo_epi32(sterr_tmp1, sterr_tmp2);
/* D.3 copy final 3,4 data to rx_pkts */
_mm_storeu_si128
((void *)&rx_pkts[pos + 3]->rx_descriptor_fields1,
- pkt_mb4);
+ pkt_mb3);
_mm_storeu_si128
((void *)&rx_pkts[pos + 2]->rx_descriptor_fields1,
- pkt_mb3);
-
- /* D.2 pkt 1,2 set in_port/nb_seg and remove crc */
- pkt_mb2 = _mm_add_epi16(pkt_mb2, crc_adjust);
- pkt_mb1 = _mm_add_epi16(pkt_mb1, crc_adjust);
+ pkt_mb2);
/* C* extract and record EOP bit */
if (split_packet) {
/* D.3 copy final 1,2 data to rx_pkts */
_mm_storeu_si128
((void *)&rx_pkts[pos + 1]->rx_descriptor_fields1,
- pkt_mb2);
+ pkt_mb1);
_mm_storeu_si128((void *)&rx_pkts[pos]->rx_descriptor_fields1,
- pkt_mb1);
+ pkt_mb0);
ice_rx_desc_to_ptype_v(descs, &rx_pkts[pos], ptype_tbl);
/* C.4 calc avaialbe number of desc */
var = __builtin_popcountll(_mm_cvtsi128_si64(staterr));