#include <rte_ether.h>
#include <rte_mbuf.h>
+#ifdef __ARM64_NEON__
+#include <arm_neon.h>
+#endif
#include "base/hinic_compat.h"
#include "base/hinic_pmd_hwdev.h"
#define HINIC_GET_RQ_FREE_WQEBBS(rxq) ((rxq)->wq->delta - 1)
+/* rxq cqe done and status bit */
+#define HINIC_GET_RX_DONE_BE(status) \
+ ((status) & 0x80U)
+
#define HINIC_RX_CSUM_OFFLOAD_EN 0xFFF
+#define RQ_CQE_SGE_VLAN_SHIFT 0
+#define RQ_CQE_SGE_LEN_SHIFT 16
+
+#define RQ_CQE_SGE_VLAN_MASK 0xFFFFU
+#define RQ_CQE_SGE_LEN_MASK 0xFFFFU
+
+#define RQ_CQE_SGE_GET(val, member) \
+ (((val) >> RQ_CQE_SGE_##member##_SHIFT) & RQ_CQE_SGE_##member##_MASK)
+
+#define HINIC_GET_RX_VLAN_TAG(vlan_len) \
+ RQ_CQE_SGE_GET(vlan_len, VLAN)
+
+#define HINIC_GET_RX_PKT_LEN(vlan_len) \
+ RQ_CQE_SGE_GET(vlan_len, LEN)
+
+#define RQ_CQE_STATUS_CSUM_ERR_SHIFT 0
+#define RQ_CQE_STATUS_NUM_LRO_SHIFT 16
+#define RQ_CQE_STATUS_LRO_PUSH_SHIFT 25
+#define RQ_CQE_STATUS_LRO_ENTER_SHIFT 26
+#define RQ_CQE_STATUS_LRO_INTR_SHIFT 27
+
+#define RQ_CQE_STATUS_BP_EN_SHIFT 30
+#define RQ_CQE_STATUS_RXDONE_SHIFT 31
+#define RQ_CQE_STATUS_FLUSH_SHIFT 28
+
+#define RQ_CQE_STATUS_CSUM_ERR_MASK 0xFFFFU
+#define RQ_CQE_STATUS_NUM_LRO_MASK 0xFFU
+#define RQ_CQE_STATUS_LRO_PUSH_MASK 0X1U
+#define RQ_CQE_STATUS_LRO_ENTER_MASK 0X1U
+#define RQ_CQE_STATUS_LRO_INTR_MASK 0X1U
+#define RQ_CQE_STATUS_BP_EN_MASK 0X1U
+#define RQ_CQE_STATUS_RXDONE_MASK 0x1U
+#define RQ_CQE_STATUS_FLUSH_MASK 0x1U
+
+#define RQ_CQE_STATUS_GET(val, member) \
+ (((val) >> RQ_CQE_STATUS_##member##_SHIFT) & \
+ RQ_CQE_STATUS_##member##_MASK)
+
+#define RQ_CQE_STATUS_CLEAR(val, member) \
+ ((val) & (~(RQ_CQE_STATUS_##member##_MASK << \
+ RQ_CQE_STATUS_##member##_SHIFT)))
+
+#define HINIC_GET_RX_CSUM_ERR(status) \
+ RQ_CQE_STATUS_GET(status, CSUM_ERR)
+
+#define HINIC_GET_RX_DONE(status) \
+ RQ_CQE_STATUS_GET(status, RXDONE)
+
+#define HINIC_GET_RX_FLUSH(status) \
+ RQ_CQE_STATUS_GET(status, FLUSH)
+
+#define HINIC_GET_RX_BP_EN(status) \
+ RQ_CQE_STATUS_GET(status, BP_EN)
+
+#define HINIC_GET_RX_NUM_LRO(status) \
+ RQ_CQE_STATUS_GET(status, NUM_LRO)
+
/* RQ_CTRL */
#define RQ_CTRL_BUFDESC_SECT_LEN_SHIFT 0
#define RQ_CTRL_COMPLETE_FORMAT_SHIFT 15
#define RQ_CTRL_CLEAR(val, member) \
((val) & (~(RQ_CTRL_##member##_MASK << RQ_CTRL_##member##_SHIFT)))
+#define RQ_CQE_PKT_NUM_SHIFT 1
+#define RQ_CQE_PKT_FIRST_LEN_SHIFT 19
+#define RQ_CQE_PKT_LAST_LEN_SHIFT 6
+#define RQ_CQE_SUPER_CQE_EN_SHIFT 0
+
+#define RQ_CQE_PKT_FIRST_LEN_MASK 0x1FFFU
+#define RQ_CQE_PKT_LAST_LEN_MASK 0x1FFFU
+#define RQ_CQE_PKT_NUM_MASK 0x1FU
+#define RQ_CQE_SUPER_CQE_EN_MASK 0x1
+
+#define RQ_CQE_PKT_NUM_GET(val, member) \
+ (((val) >> RQ_CQE_PKT_##member##_SHIFT) & RQ_CQE_PKT_##member##_MASK)
+
+#define HINIC_GET_RQ_CQE_PKT_NUM(pkt_info) RQ_CQE_PKT_NUM_GET(pkt_info, NUM)
+
+#define RQ_CQE_SUPER_CQE_EN_GET(val, member) \
+ (((val) >> RQ_CQE_##member##_SHIFT) & RQ_CQE_##member##_MASK)
+
+#define HINIC_GET_SUPER_CQE_EN(pkt_info) \
+ RQ_CQE_SUPER_CQE_EN_GET(pkt_info, SUPER_CQE_EN)
+
+#define RQ_CQE_OFFOLAD_TYPE_VLAN_EN_SHIFT 21
+#define RQ_CQE_OFFOLAD_TYPE_VLAN_EN_MASK 0x1U
+
+#define RQ_CQE_OFFOLAD_TYPE_PKT_TYPE_SHIFT 0
+#define RQ_CQE_OFFOLAD_TYPE_PKT_TYPE_MASK 0xFFFU
+
+#define RQ_CQE_OFFOLAD_TYPE_PKT_UMBCAST_SHIFT 19
+#define RQ_CQE_OFFOLAD_TYPE_PKT_UMBCAST_MASK 0x3U
+
+#define RQ_CQE_OFFOLAD_TYPE_RSS_TYPE_SHIFT 24
+#define RQ_CQE_OFFOLAD_TYPE_RSS_TYPE_MASK 0xFFU
+
+#define RQ_CQE_OFFOLAD_TYPE_GET(val, member) (((val) >> \
+ RQ_CQE_OFFOLAD_TYPE_##member##_SHIFT) & \
+ RQ_CQE_OFFOLAD_TYPE_##member##_MASK)
+
+#define HINIC_GET_RX_VLAN_OFFLOAD_EN(offload_type) \
+ RQ_CQE_OFFOLAD_TYPE_GET(offload_type, VLAN_EN)
+
+#define HINIC_GET_RSS_TYPES(offload_type) \
+ RQ_CQE_OFFOLAD_TYPE_GET(offload_type, RSS_TYPE)
+
+#define HINIC_GET_RX_PKT_TYPE(offload_type) \
+ RQ_CQE_OFFOLAD_TYPE_GET(offload_type, PKT_TYPE)
+
+#define HINIC_GET_RX_PKT_UMBCAST(offload_type) \
+ RQ_CQE_OFFOLAD_TYPE_GET(offload_type, PKT_UMBCAST)
+
+#define RQ_CQE_STATUS_CSUM_BYPASS_VAL 0x80U
+#define RQ_CQE_STATUS_CSUM_ERR_IP_MASK 0x39U
+#define RQ_CQE_STATUS_CSUM_ERR_L4_MASK 0x46U
+#define RQ_CQE_STATUS_CSUM_ERR_OTHER 0x100U
+
+#define HINIC_CSUM_ERR_BYPASSED(csum_err) \
+ ((csum_err) == RQ_CQE_STATUS_CSUM_BYPASS_VAL)
+
+#define HINIC_CSUM_ERR_IP(csum_err) \
+ ((csum_err) & RQ_CQE_STATUS_CSUM_ERR_IP_MASK)
+
+#define HINIC_CSUM_ERR_L4(csum_err) \
+ ((csum_err) & RQ_CQE_STATUS_CSUM_ERR_L4_MASK)
+
+#define HINIC_CSUM_ERR_OTHER(csum_err) \
+ ((csum_err) == RQ_CQE_STATUS_CSUM_ERR_OTHER)
+
void hinic_get_func_rx_buf_size(struct hinic_nic_dev *nic_dev)
{
buf_desc->addr_low = lower_32_bits(buf_addr);
}
+void hinic_rxq_get_stats(struct hinic_rxq *rxq, struct hinic_rxq_stats *stats)
+{
+ if (!rxq || !stats)
+ return;
+
+ memcpy(stats, &rxq->rxq_stats, sizeof(rxq->rxq_stats));
+}
+
+void hinic_rxq_stats_reset(struct hinic_rxq *rxq)
+{
+ struct hinic_rxq_stats *rxq_stats;
+
+ if (rxq == NULL)
+ return;
+
+ rxq_stats = &rxq->rxq_stats;
+ memset(rxq_stats, 0, sizeof(*rxq_stats));
+}
+
static int hinic_rx_alloc_cqe(struct hinic_rxq *rxq)
{
size_t cqe_mem_size;
hinic_free_all_rx_skbs(nic_dev->rxqs[q_id]);
}
+static void hinic_recv_jumbo_pkt(struct hinic_rxq *rxq,
+ struct rte_mbuf *head_skb,
+ u32 remain_pkt_len)
+{
+ struct hinic_nic_dev *nic_dev = rxq->nic_dev;
+ struct rte_mbuf *cur_mbuf, *rxm = NULL;
+ struct hinic_rx_info *rx_info;
+ u16 sw_ci, rx_buf_len = rxq->buf_len;
+ u32 pkt_len;
+
+ while (remain_pkt_len > 0) {
+ sw_ci = hinic_get_rq_local_ci(nic_dev->hwdev, rxq->q_id);
+ rx_info = &rxq->rx_info[sw_ci];
+
+ hinic_update_rq_local_ci(nic_dev->hwdev, rxq->q_id, 1);
+
+ pkt_len = remain_pkt_len > rx_buf_len ?
+ rx_buf_len : remain_pkt_len;
+ remain_pkt_len -= pkt_len;
+
+ cur_mbuf = rx_info->mbuf;
+ cur_mbuf->data_len = (u16)pkt_len;
+ cur_mbuf->next = NULL;
+
+ head_skb->pkt_len += cur_mbuf->data_len;
+ head_skb->nb_segs++;
+
+ if (!rxm)
+ head_skb->next = cur_mbuf;
+ else
+ rxm->next = cur_mbuf;
+
+ rxm = cur_mbuf;
+ }
+}
+
static void hinic_rss_deinit(struct hinic_nic_dev *nic_dev)
{
u8 prio_tc[HINIC_DCB_UP_MAX] = {0};
}
}
+static inline void hinic_rq_cqe_be_to_cpu32(void *dst_le32,
+ volatile void *src_be32)
+{
+#if defined(__X86_64_SSE__)
+ volatile __m128i *wqe_be = (volatile __m128i *)src_be32;
+ __m128i *wqe_le = (__m128i *)dst_le32;
+ __m128i shuf_mask = _mm_set_epi8(12, 13, 14, 15, 8, 9, 10,
+ 11, 4, 5, 6, 7, 0, 1, 2, 3);
+
+ /* l2nic just use first 128 bits */
+ wqe_le[0] = _mm_shuffle_epi8(wqe_be[0], shuf_mask);
+#elif defined(__ARM64_NEON__)
+ volatile uint8x16_t *wqe_be = (volatile uint8x16_t *)src_be32;
+ uint8x16_t *wqe_le = (uint8x16_t *)dst_le32;
+ const uint8x16_t shuf_mask = {3, 2, 1, 0, 7, 6, 5, 4, 11, 10,
+ 9, 8, 15, 14, 13, 12};
+
+ /* l2nic just use first 128 bits */
+ wqe_le[0] = vqtbl1q_u8(wqe_be[0], shuf_mask);
+#else
+ u32 i;
+ volatile u32 *wqe_be = (volatile u32 *)src_be32;
+ u32 *wqe_le = (u32 *)dst_le32;
+
+#define HINIC_L2NIC_RQ_CQE_USED 4 /* 4Bytes unit */
+
+ for (i = 0; i < HINIC_L2NIC_RQ_CQE_USED; i++) {
+ *wqe_le = rte_be_to_cpu_32(*wqe_be);
+ wqe_be++;
+ wqe_le++;
+ }
+#endif
+}
+
+static inline uint64_t hinic_rx_rss_hash(uint32_t offload_type,
+ uint32_t cqe_hass_val,
+ uint32_t *rss_hash)
+{
+ uint32_t rss_type;
+
+ rss_type = HINIC_GET_RSS_TYPES(offload_type);
+ if (likely(rss_type != 0)) {
+ *rss_hash = cqe_hass_val;
+ return PKT_RX_RSS_HASH;
+ }
+
+ return 0;
+}
+
+static inline uint64_t hinic_rx_csum(uint32_t status, struct hinic_rxq *rxq)
+{
+ uint32_t checksum_err;
+ uint64_t flags;
+
+ /* most case checksum is ok */
+ checksum_err = HINIC_GET_RX_CSUM_ERR(status);
+ if (likely(checksum_err == 0))
+ return (PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD);
+
+ /* If BYPASS bit set, all other status indications should be ignored */
+ if (unlikely(HINIC_CSUM_ERR_BYPASSED(checksum_err)))
+ return PKT_RX_IP_CKSUM_UNKNOWN;
+
+ flags = 0;
+
+ /* IP checksum error */
+ if (HINIC_CSUM_ERR_IP(checksum_err))
+ flags |= PKT_RX_IP_CKSUM_BAD;
+ else
+ flags |= PKT_RX_IP_CKSUM_GOOD;
+
+ /* L4 checksum error */
+ if (HINIC_CSUM_ERR_L4(checksum_err))
+ flags |= PKT_RX_L4_CKSUM_BAD;
+ else
+ flags |= PKT_RX_L4_CKSUM_GOOD;
+
+ if (unlikely(HINIC_CSUM_ERR_OTHER(checksum_err)))
+ flags = PKT_RX_L4_CKSUM_NONE;
+
+ rxq->rxq_stats.errors++;
+
+ return flags;
+}
+
+static inline uint64_t hinic_rx_vlan(uint32_t offload_type, uint32_t vlan_len,
+ uint16_t *vlan_tci)
+{
+ uint16_t vlan_tag;
+
+ vlan_tag = HINIC_GET_RX_VLAN_TAG(vlan_len);
+ if (!HINIC_GET_RX_VLAN_OFFLOAD_EN(offload_type) || 0 == vlan_tag) {
+ *vlan_tci = 0;
+ return 0;
+ }
+
+ *vlan_tci = vlan_tag;
+
+ return PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED;
+}
+
+static inline u32 hinic_rx_alloc_mbuf_bulk(struct hinic_rxq *rxq,
+ struct rte_mbuf **mbufs,
+ u32 exp_mbuf_cnt)
+{
+ int rc;
+ u32 avail_cnt;
+
+ rc = rte_pktmbuf_alloc_bulk(rxq->mb_pool, mbufs, exp_mbuf_cnt);
+ if (likely(rc == HINIC_OK)) {
+ avail_cnt = exp_mbuf_cnt;
+ } else {
+ avail_cnt = 0;
+ rxq->rxq_stats.rx_nombuf += exp_mbuf_cnt;
+ }
+
+ return avail_cnt;
+}
+
static struct rte_mbuf *hinic_rx_alloc_mbuf(struct hinic_rxq *rxq,
dma_addr_t *dma_addr)
{
return mbuf;
}
+static inline void hinic_rearm_rxq_mbuf(struct hinic_rxq *rxq)
+{
+ u16 pi;
+ u32 i, free_wqebbs, rearm_wqebbs, exp_wqebbs;
+ dma_addr_t dma_addr;
+ struct hinic_rq_wqe *rq_wqe;
+ struct rte_mbuf **rearm_mbufs;
+
+ /* check free wqebb fo rearm */
+ free_wqebbs = HINIC_GET_RQ_FREE_WQEBBS(rxq);
+ if (unlikely(free_wqebbs < rxq->rx_free_thresh))
+ return;
+
+ /* get rearm mbuf array */
+ pi = HINIC_GET_RQ_LOCAL_PI(rxq);
+ rearm_mbufs = (struct rte_mbuf **)(&rxq->rx_info[pi]);
+
+ /* check rxq free wqebbs turn around */
+ exp_wqebbs = rxq->q_depth - pi;
+ if (free_wqebbs < exp_wqebbs)
+ exp_wqebbs = free_wqebbs;
+
+ /* alloc mbuf in bulk */
+ rearm_wqebbs = hinic_rx_alloc_mbuf_bulk(rxq, rearm_mbufs, exp_wqebbs);
+ if (unlikely(rearm_wqebbs == 0))
+ return;
+
+ /* rearm rx mbuf */
+ rq_wqe = WQ_WQE_ADDR(rxq->wq, (u32)pi);
+ for (i = 0; i < rearm_wqebbs; i++) {
+ dma_addr = rte_mbuf_data_iova_default(rearm_mbufs[i]);
+ rq_wqe->buf_desc.addr_high =
+ cpu_to_be32(upper_32_bits(dma_addr));
+ rq_wqe->buf_desc.addr_low =
+ cpu_to_be32(lower_32_bits(dma_addr));
+ rq_wqe++;
+ }
+ rxq->wq->prod_idx += rearm_wqebbs;
+ rxq->wq->delta -= rearm_wqebbs;
+
+ /* update rq hw_pi */
+ rte_wmb();
+ HINIC_UPDATE_RQ_HW_PI(rxq, pi + rearm_wqebbs);
+}
+
void hinic_rx_alloc_pkts(struct hinic_rxq *rxq)
{
struct hinic_nic_dev *nic_dev = rxq->nic_dev;
HINIC_UPDATE_RQ_HW_PI(rxq, pi + 1);
}
}
+
+u16 hinic_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, u16 nb_pkts)
+{
+ struct rte_mbuf *rxm;
+ struct hinic_rxq *rxq = rx_queue;
+ struct hinic_rx_info *rx_info;
+ volatile struct hinic_rq_cqe *rx_cqe;
+ u16 rx_buf_len, pkts = 0;
+ u16 sw_ci, ci_mask, wqebb_cnt = 0;
+ u32 pkt_len, status, vlan_len;
+ u64 rx_bytes = 0;
+ struct hinic_rq_cqe cqe;
+ u32 offload_type, rss_hash;
+
+ rx_buf_len = rxq->buf_len;
+
+ /* 1. get polling start ci */
+ ci_mask = HINIC_GET_RQ_WQE_MASK(rxq);
+ sw_ci = HINIC_GET_RQ_LOCAL_CI(rxq);
+
+ while (pkts < nb_pkts) {
+ /* 2. current ci is done */
+ rx_cqe = &rxq->rx_cqe[sw_ci];
+ status = rx_cqe->status;
+ if (!HINIC_GET_RX_DONE_BE(status))
+ break;
+
+ /* read other cqe member after status */
+ rte_rmb();
+
+ /* convert cqe and get packet length */
+ hinic_rq_cqe_be_to_cpu32(&cqe, (volatile void *)rx_cqe);
+ vlan_len = cqe.vlan_len;
+
+ rx_info = &rxq->rx_info[sw_ci];
+ rxm = rx_info->mbuf;
+
+ /* 3. next ci point and prefetch */
+ sw_ci++;
+ sw_ci &= ci_mask;
+
+ /* prefetch next mbuf first 64B */
+ rte_prefetch0(rxq->rx_info[sw_ci].mbuf);
+
+ /* 4. jumbo frame process */
+ pkt_len = HINIC_GET_RX_PKT_LEN(vlan_len);
+ if (likely(pkt_len <= rx_buf_len)) {
+ rxm->data_len = pkt_len;
+ rxm->pkt_len = pkt_len;
+ wqebb_cnt++;
+ } else {
+ rxm->data_len = rx_buf_len;
+ rxm->pkt_len = rx_buf_len;
+
+ /* if jumbo use multi-wqebb update ci,
+ * recv_jumbo_pkt will also update ci
+ */
+ HINIC_UPDATE_RQ_LOCAL_CI(rxq, wqebb_cnt + 1);
+ wqebb_cnt = 0;
+ hinic_recv_jumbo_pkt(rxq, rxm, pkt_len - rx_buf_len);
+ sw_ci = HINIC_GET_RQ_LOCAL_CI(rxq);
+ }
+
+ /* 5. vlan/checksum/rss/pkt_type/gro offload */
+ rxm->data_off = RTE_PKTMBUF_HEADROOM;
+ rxm->port = rxq->port_id;
+ offload_type = cqe.offload_type;
+
+ /* vlan offload */
+ rxm->ol_flags |= hinic_rx_vlan(offload_type, vlan_len,
+ &rxm->vlan_tci);
+
+ /* checksum offload */
+ rxm->ol_flags |= hinic_rx_csum(cqe.status, rxq);
+
+ /* rss hash offload */
+ rss_hash = cqe.rss_hash;
+ rxm->ol_flags |= hinic_rx_rss_hash(offload_type, rss_hash,
+ &rxm->hash.rss);
+
+ /* 6. clear done bit */
+ rx_cqe->status = 0;
+
+ rx_bytes += pkt_len;
+ rx_pkts[pkts++] = rxm;
+ }
+
+ if (pkts) {
+ /* 7. update ci */
+ HINIC_UPDATE_RQ_LOCAL_CI(rxq, wqebb_cnt);
+
+ /* do packet stats */
+ rxq->rxq_stats.packets += pkts;
+ rxq->rxq_stats.bytes += rx_bytes;
+ }
+ rxq->rxq_stats.burst_pkts = pkts;
+
+ /* 8. rearm mbuf to rxq */
+ hinic_rearm_rxq_mbuf(rxq);
+
+ return pkts;
+}
#include <rte_sctp.h>
#include <rte_udp.h>
#include <rte_ip.h>
+#ifdef __ARM64_NEON__
+#include <arm_neon.h>
+#endif
#include "base/hinic_compat.h"
#include "base/hinic_pmd_hwdev.h"
#include "hinic_pmd_ethdev.h"
#include "hinic_pmd_tx.h"
+/* packet header and tx offload info */
+#define VXLANLEN 8
+#define MAX_PLD_OFFSET 221
+#define MAX_SINGLE_SGE_SIZE 65536
+#define TSO_ENABLE 1
+#define TX_MSS_DEFAULT 0x3E00
+#define TX_MSS_MIN 0x50
+
+#define HINIC_NONTSO_PKT_MAX_SGE 17 /* non-tso max sge 17 */
+#define HINIC_NONTSO_SEG_NUM_INVALID(num) \
+ ((num) > HINIC_NONTSO_PKT_MAX_SGE)
+
+#define HINIC_TSO_PKT_MAX_SGE 127 /* tso max sge 127 */
+#define HINIC_TSO_SEG_NUM_INVALID(num) ((num) > HINIC_TSO_PKT_MAX_SGE)
+
+/* sizeof(struct hinic_sq_bufdesc) == 16, shift 4 */
+#define HINIC_BUF_DESC_SIZE(nr_descs) (SIZE_8BYTES(((u32)nr_descs) << 4))
+
+#define MASKED_SQ_IDX(sq, idx) ((idx) & (sq)->wq->mask)
+
+/* SQ_CTRL */
+#define SQ_CTRL_BUFDESC_SECT_LEN_SHIFT 0
+#define SQ_CTRL_TASKSECT_LEN_SHIFT 16
+#define SQ_CTRL_DATA_FORMAT_SHIFT 22
+#define SQ_CTRL_LEN_SHIFT 29
+#define SQ_CTRL_OWNER_SHIFT 31
+
+#define SQ_CTRL_BUFDESC_SECT_LEN_MASK 0xFFU
+#define SQ_CTRL_TASKSECT_LEN_MASK 0x1FU
+#define SQ_CTRL_DATA_FORMAT_MASK 0x1U
+#define SQ_CTRL_LEN_MASK 0x3U
+#define SQ_CTRL_OWNER_MASK 0x1U
+
+#define SQ_CTRL_SET(val, member) \
+ (((val) & SQ_CTRL_##member##_MASK) << SQ_CTRL_##member##_SHIFT)
+
+#define SQ_CTRL_QUEUE_INFO_PLDOFF_SHIFT 2
+#define SQ_CTRL_QUEUE_INFO_UFO_SHIFT 10
+#define SQ_CTRL_QUEUE_INFO_TSO_SHIFT 11
+#define SQ_CTRL_QUEUE_INFO_TCPUDP_CS_SHIFT 12
+#define SQ_CTRL_QUEUE_INFO_MSS_SHIFT 13
+#define SQ_CTRL_QUEUE_INFO_SCTP_SHIFT 27
+#define SQ_CTRL_QUEUE_INFO_UC_SHIFT 28
+#define SQ_CTRL_QUEUE_INFO_PRI_SHIFT 29
+
+#define SQ_CTRL_QUEUE_INFO_PLDOFF_MASK 0xFFU
+#define SQ_CTRL_QUEUE_INFO_UFO_MASK 0x1U
+#define SQ_CTRL_QUEUE_INFO_TSO_MASK 0x1U
+#define SQ_CTRL_QUEUE_INFO_TCPUDP_CS_MASK 0x1U
+#define SQ_CTRL_QUEUE_INFO_MSS_MASK 0x3FFFU
+#define SQ_CTRL_QUEUE_INFO_SCTP_MASK 0x1U
+#define SQ_CTRL_QUEUE_INFO_UC_MASK 0x1U
+#define SQ_CTRL_QUEUE_INFO_PRI_MASK 0x7U
+
+#define SQ_CTRL_QUEUE_INFO_SET(val, member) \
+ (((u32)(val) & SQ_CTRL_QUEUE_INFO_##member##_MASK) << \
+ SQ_CTRL_QUEUE_INFO_##member##_SHIFT)
+
+#define SQ_CTRL_QUEUE_INFO_GET(val, member) \
+ (((val) >> SQ_CTRL_QUEUE_INFO_##member##_SHIFT) & \
+ SQ_CTRL_QUEUE_INFO_##member##_MASK)
+
+#define SQ_CTRL_QUEUE_INFO_CLEAR(val, member) \
+ ((val) & (~(SQ_CTRL_QUEUE_INFO_##member##_MASK << \
+ SQ_CTRL_QUEUE_INFO_##member##_SHIFT)))
+
+#define SQ_TASK_INFO0_L2HDR_LEN_SHIFT 0
+#define SQ_TASK_INFO0_L4OFFLOAD_SHIFT 8
+#define SQ_TASK_INFO0_INNER_L3TYPE_SHIFT 10
+#define SQ_TASK_INFO0_VLAN_OFFLOAD_SHIFT 12
+#define SQ_TASK_INFO0_PARSE_FLAG_SHIFT 13
+#define SQ_TASK_INFO0_UFO_AVD_SHIFT 14
+#define SQ_TASK_INFO0_TSO_UFO_SHIFT 15
+#define SQ_TASK_INFO0_VLAN_TAG_SHIFT 16
+
+#define SQ_TASK_INFO0_L2HDR_LEN_MASK 0xFFU
+#define SQ_TASK_INFO0_L4OFFLOAD_MASK 0x3U
+#define SQ_TASK_INFO0_INNER_L3TYPE_MASK 0x3U
+#define SQ_TASK_INFO0_VLAN_OFFLOAD_MASK 0x1U
+#define SQ_TASK_INFO0_PARSE_FLAG_MASK 0x1U
+#define SQ_TASK_INFO0_UFO_AVD_MASK 0x1U
+#define SQ_TASK_INFO0_TSO_UFO_MASK 0x1U
+#define SQ_TASK_INFO0_VLAN_TAG_MASK 0xFFFFU
+
+#define SQ_TASK_INFO0_SET(val, member) \
+ (((u32)(val) & SQ_TASK_INFO0_##member##_MASK) << \
+ SQ_TASK_INFO0_##member##_SHIFT)
+
+#define SQ_TASK_INFO1_MD_TYPE_SHIFT 8
+#define SQ_TASK_INFO1_INNER_L4LEN_SHIFT 16
+#define SQ_TASK_INFO1_INNER_L3LEN_SHIFT 24
+
+#define SQ_TASK_INFO1_MD_TYPE_MASK 0xFFU
+#define SQ_TASK_INFO1_INNER_L4LEN_MASK 0xFFU
+#define SQ_TASK_INFO1_INNER_L3LEN_MASK 0xFFU
+
+#define SQ_TASK_INFO1_SET(val, member) \
+ (((val) & SQ_TASK_INFO1_##member##_MASK) << \
+ SQ_TASK_INFO1_##member##_SHIFT)
+
+#define SQ_TASK_INFO2_TUNNEL_L4LEN_SHIFT 0
+#define SQ_TASK_INFO2_OUTER_L3LEN_SHIFT 8
+#define SQ_TASK_INFO2_TUNNEL_L4TYPE_SHIFT 16
+#define SQ_TASK_INFO2_OUTER_L3TYPE_SHIFT 24
+
+#define SQ_TASK_INFO2_TUNNEL_L4LEN_MASK 0xFFU
+#define SQ_TASK_INFO2_OUTER_L3LEN_MASK 0xFFU
+#define SQ_TASK_INFO2_TUNNEL_L4TYPE_MASK 0x7U
+#define SQ_TASK_INFO2_OUTER_L3TYPE_MASK 0x3U
+
+#define SQ_TASK_INFO2_SET(val, member) \
+ (((val) & SQ_TASK_INFO2_##member##_MASK) << \
+ SQ_TASK_INFO2_##member##_SHIFT)
+
+#define SQ_TASK_INFO4_L2TYPE_SHIFT 31
+
+#define SQ_TASK_INFO4_L2TYPE_MASK 0x1U
+
+#define SQ_TASK_INFO4_SET(val, member) \
+ (((u32)(val) & SQ_TASK_INFO4_##member##_MASK) << \
+ SQ_TASK_INFO4_##member##_SHIFT)
+
+/* SQ_DB */
+#define SQ_DB_OFF 0x00000800
+#define SQ_DB_INFO_HI_PI_SHIFT 0
+#define SQ_DB_INFO_QID_SHIFT 8
+#define SQ_DB_INFO_CFLAG_SHIFT 23
+#define SQ_DB_INFO_COS_SHIFT 24
+#define SQ_DB_INFO_TYPE_SHIFT 27
+
+#define SQ_DB_INFO_HI_PI_MASK 0xFFU
+#define SQ_DB_INFO_QID_MASK 0x3FFU
+#define SQ_DB_INFO_CFLAG_MASK 0x1U
+#define SQ_DB_INFO_COS_MASK 0x7U
+#define SQ_DB_INFO_TYPE_MASK 0x1FU
+#define SQ_DB_INFO_SET(val, member) \
+ (((u32)(val) & SQ_DB_INFO_##member##_MASK) << \
+ SQ_DB_INFO_##member##_SHIFT)
+
+#define SQ_DB 1
+#define SQ_CFLAG_DP 0 /* CFLAG_DATA_PATH */
+
+#define SQ_DB_PI_LOW_MASK 0xFF
+#define SQ_DB_PI_LOW(pi) ((pi) & SQ_DB_PI_LOW_MASK)
+#define SQ_DB_PI_HI_SHIFT 8
+#define SQ_DB_PI_HIGH(pi) ((pi) >> SQ_DB_PI_HI_SHIFT)
+#define SQ_DB_ADDR(sq, pi) \
+ ((u64 *)((u8 __iomem *)((sq)->db_addr) + SQ_DB_OFF) + SQ_DB_PI_LOW(pi))
+
+/* txq wq operations */
+#define HINIC_GET_SQ_WQE_MASK(txq) ((txq)->wq->mask)
+
+#define HINIC_GET_SQ_HW_CI(txq) \
+ ((be16_to_cpu(*(txq)->cons_idx_addr)) & HINIC_GET_SQ_WQE_MASK(txq))
+
+#define HINIC_GET_SQ_LOCAL_CI(txq) \
+ (((txq)->wq->cons_idx) & HINIC_GET_SQ_WQE_MASK(txq))
+
+#define HINIC_UPDATE_SQ_LOCAL_CI(txq, wqebb_cnt) \
+ do { \
+ (txq)->wq->cons_idx += wqebb_cnt; \
+ (txq)->wq->delta += wqebb_cnt; \
+ } while (0)
+
+#define HINIC_GET_SQ_FREE_WQEBBS(txq) ((txq)->wq->delta - 1)
+
+#define HINIC_IS_SQ_EMPTY(txq) (((txq)->wq->delta) == ((txq)->q_depth))
+
+#define BUF_DESC_SIZE_SHIFT 4
+
+#define HINIC_SQ_WQE_SIZE(num_sge) \
+ (sizeof(struct hinic_sq_ctrl) + sizeof(struct hinic_sq_task) + \
+ (unsigned int)((num_sge) << BUF_DESC_SIZE_SHIFT))
+
+#define HINIC_SQ_WQEBB_CNT(num_sge) \
+ (int)(ALIGN(HINIC_SQ_WQE_SIZE((u32)num_sge), \
+ HINIC_SQ_WQEBB_SIZE) >> HINIC_SQ_WQEBB_SHIFT)
+
+
+static inline void hinic_sq_wqe_cpu_to_be32(void *data, int nr_wqebb)
+{
+#if defined(__X86_64_SSE__)
+ int i;
+ __m128i *wqe_line = (__m128i *)data;
+ __m128i shuf_mask = _mm_set_epi8(12, 13, 14, 15, 8, 9, 10,
+ 11, 4, 5, 6, 7, 0, 1, 2, 3);
+
+ for (i = 0; i < nr_wqebb; i++) {
+ /* convert 64B wqebb using 4 SSE instructions */
+ wqe_line[0] = _mm_shuffle_epi8(wqe_line[0], shuf_mask);
+ wqe_line[1] = _mm_shuffle_epi8(wqe_line[1], shuf_mask);
+ wqe_line[2] = _mm_shuffle_epi8(wqe_line[2], shuf_mask);
+ wqe_line[3] = _mm_shuffle_epi8(wqe_line[3], shuf_mask);
+ wqe_line += 4;
+ }
+#elif defined(__ARM64_NEON__)
+ int i;
+ uint8x16_t *wqe_line = (uint8x16_t *)data;
+ const uint8x16_t shuf_mask = {3, 2, 1, 0, 7, 6, 5, 4, 11, 10,
+ 9, 8, 15, 14, 13, 12};
+
+ for (i = 0; i < nr_wqebb; i++) {
+ wqe_line[0] = vqtbl1q_u8(wqe_line[0], shuf_mask);
+ wqe_line[1] = vqtbl1q_u8(wqe_line[1], shuf_mask);
+ wqe_line[2] = vqtbl1q_u8(wqe_line[2], shuf_mask);
+ wqe_line[3] = vqtbl1q_u8(wqe_line[3], shuf_mask);
+ wqe_line += 4;
+ }
+#else
+ hinic_cpu_to_be32(data, nr_wqebb * HINIC_SQ_WQEBB_SIZE);
+#endif
+}
+
+static inline void hinic_sge_cpu_to_be32(void *data, int nr_sge)
+{
+#if defined(__X86_64_SSE__)
+ int i;
+ __m128i *sge_line = (__m128i *)data;
+ __m128i shuf_mask = _mm_set_epi8(12, 13, 14, 15, 8, 9, 10,
+ 11, 4, 5, 6, 7, 0, 1, 2, 3);
+
+ for (i = 0; i < nr_sge; i++) {
+ /* convert 16B sge using 1 SSE instructions */
+ *sge_line = _mm_shuffle_epi8(*sge_line, shuf_mask);
+ sge_line++;
+ }
+#elif defined(__ARM64_NEON__)
+ int i;
+ uint8x16_t *sge_line = (uint8x16_t *)data;
+ const uint8x16_t shuf_mask = {3, 2, 1, 0, 7, 6, 5, 4, 11, 10,
+ 9, 8, 15, 14, 13, 12};
+
+ for (i = 0; i < nr_sge; i++) {
+ *sge_line = vqtbl1q_u8(*sge_line, shuf_mask);
+ sge_line++;
+ }
+#else
+ hinic_cpu_to_be32(data, nr_sge * sizeof(struct hinic_sq_bufdesc));
+#endif
+}
+
+void hinic_txq_get_stats(struct hinic_txq *txq, struct hinic_txq_stats *stats)
+{
+ if (!txq || !stats) {
+ PMD_DRV_LOG(ERR, "Txq or stats is NULL");
+ return;
+ }
+
+ memcpy(stats, &txq->txq_stats, sizeof(txq->txq_stats));
+}
+
+void hinic_txq_stats_reset(struct hinic_txq *txq)
+{
+ struct hinic_txq_stats *txq_stats;
+
+ if (txq == NULL)
+ return;
+
+ txq_stats = &txq->txq_stats;
+ memset(txq_stats, 0, sizeof(*txq_stats));
+}
+
+static inline struct rte_mbuf *hinic_copy_tx_mbuf(struct hinic_nic_dev *nic_dev,
+ struct rte_mbuf *mbuf,
+ u16 sge_cnt)
+{
+ struct rte_mbuf *dst_mbuf;
+ u32 offset = 0;
+ u16 i;
+
+ if (unlikely(!nic_dev->cpy_mpool))
+ return NULL;
+
+ dst_mbuf = rte_pktmbuf_alloc(nic_dev->cpy_mpool);
+ if (unlikely(!dst_mbuf))
+ return NULL;
+
+ dst_mbuf->data_off = 0;
+ for (i = 0; i < sge_cnt; i++) {
+ rte_memcpy((char *)dst_mbuf->buf_addr + offset,
+ (char *)mbuf->buf_addr + mbuf->data_off,
+ mbuf->data_len);
+ dst_mbuf->data_len += mbuf->data_len;
+ offset += mbuf->data_len;
+ mbuf = mbuf->next;
+ }
+
+ return dst_mbuf;
+}
+
+static inline bool hinic_mbuf_dma_map_sge(struct hinic_txq *txq,
+ struct rte_mbuf *mbuf,
+ struct hinic_sq_bufdesc *sges,
+ struct hinic_wqe_info *sqe_info)
+{
+ dma_addr_t dma_addr;
+ u16 i, around_sges;
+ u16 nb_segs = sqe_info->sge_cnt - sqe_info->cpy_mbuf_cnt;
+ u16 real_nb_segs = mbuf->nb_segs;
+ struct hinic_sq_bufdesc *sge_idx = sges;
+
+ if (unlikely(sqe_info->around)) {
+ /* parts of wqe is in sq bottom while parts
+ * of wqe is in sq head
+ */
+ i = 0;
+ for (sge_idx = sges; (u64)sge_idx <= txq->sq_bot_sge_addr;
+ sge_idx++) {
+ dma_addr = rte_mbuf_data_iova(mbuf);
+ hinic_set_sge((struct hinic_sge *)sge_idx, dma_addr,
+ mbuf->data_len);
+ mbuf = mbuf->next;
+ i++;
+ }
+
+ around_sges = nb_segs - i;
+ sge_idx = (struct hinic_sq_bufdesc *)
+ ((void *)txq->sq_head_addr);
+ for (; i < nb_segs; i++) {
+ dma_addr = rte_mbuf_data_iova(mbuf);
+ hinic_set_sge((struct hinic_sge *)sge_idx, dma_addr,
+ mbuf->data_len);
+ mbuf = mbuf->next;
+ sge_idx++;
+ }
+
+ /* covert sges at head to big endian */
+ hinic_sge_cpu_to_be32((void *)txq->sq_head_addr, around_sges);
+ } else {
+ /* wqe is in continuous space */
+ for (i = 0; i < nb_segs; i++) {
+ dma_addr = rte_mbuf_data_iova(mbuf);
+ hinic_set_sge((struct hinic_sge *)sge_idx, dma_addr,
+ mbuf->data_len);
+ mbuf = mbuf->next;
+ sge_idx++;
+ }
+ }
+
+ /* for now: support non-tso over 17 sge, copy the last 2 mbuf */
+ if (unlikely(sqe_info->cpy_mbuf_cnt != 0)) {
+ /* copy invalid mbuf segs to a valid buffer, lost performance */
+ txq->txq_stats.cpy_pkts += 1;
+ mbuf = hinic_copy_tx_mbuf(txq->nic_dev, mbuf,
+ real_nb_segs - nb_segs);
+ if (unlikely(!mbuf))
+ return false;
+
+ txq->tx_info[sqe_info->pi].cpy_mbuf = mbuf;
+
+ /* deal with the last mbuf */
+ dma_addr = rte_mbuf_data_iova(mbuf);
+ hinic_set_sge((struct hinic_sge *)sge_idx, dma_addr,
+ mbuf->data_len);
+ if (unlikely(sqe_info->around))
+ hinic_sge_cpu_to_be32((void *)sge_idx, 1);
+ }
+
+ return true;
+}
+
+static inline void hinic_fill_sq_wqe_header(struct hinic_sq_ctrl *ctrl,
+ u32 queue_info, int nr_descs,
+ u8 owner)
+{
+ u32 ctrl_size, task_size, bufdesc_size;
+
+ ctrl_size = SIZE_8BYTES(sizeof(struct hinic_sq_ctrl));
+ task_size = SIZE_8BYTES(sizeof(struct hinic_sq_task));
+ bufdesc_size = HINIC_BUF_DESC_SIZE(nr_descs);
+
+ ctrl->ctrl_fmt = SQ_CTRL_SET(bufdesc_size, BUFDESC_SECT_LEN) |
+ SQ_CTRL_SET(task_size, TASKSECT_LEN) |
+ SQ_CTRL_SET(SQ_NORMAL_WQE, DATA_FORMAT) |
+ SQ_CTRL_SET(ctrl_size, LEN) |
+ SQ_CTRL_SET(owner, OWNER);
+
+ ctrl->queue_info = queue_info;
+ ctrl->queue_info |= SQ_CTRL_QUEUE_INFO_SET(1U, UC);
+
+ if (!SQ_CTRL_QUEUE_INFO_GET(ctrl->queue_info, MSS)) {
+ ctrl->queue_info |=
+ SQ_CTRL_QUEUE_INFO_SET(TX_MSS_DEFAULT, MSS);
+ } else if (SQ_CTRL_QUEUE_INFO_GET(ctrl->queue_info, MSS) < TX_MSS_MIN) {
+ /* mss should not be less than 80 */
+ ctrl->queue_info =
+ SQ_CTRL_QUEUE_INFO_CLEAR(ctrl->queue_info, MSS);
+ ctrl->queue_info |= SQ_CTRL_QUEUE_INFO_SET(TX_MSS_MIN, MSS);
+ }
+}
+
+static inline bool hinic_is_tso_sge_valid(struct rte_mbuf *mbuf,
+ struct hinic_tx_offload_info
+ *poff_info,
+ struct hinic_wqe_info *sqe_info)
+{
+ u32 total_len, limit_len, checked_len, left_len;
+ u32 i, first_mss_sges, left_sges;
+ struct rte_mbuf *mbuf_head, *mbuf_pre;
+
+ left_sges = mbuf->nb_segs;
+ mbuf_head = mbuf;
+
+ /* tso sge number validation */
+ if (unlikely(left_sges >= HINIC_NONTSO_PKT_MAX_SGE)) {
+ checked_len = 0;
+ limit_len = mbuf->tso_segsz + poff_info->payload_offset;
+ first_mss_sges = HINIC_NONTSO_PKT_MAX_SGE;
+
+ /* each continues 17 mbufs segmust do one check */
+ while (left_sges >= HINIC_NONTSO_PKT_MAX_SGE) {
+ /* total len of first 16 mbufs must equal
+ * or more than limit_len
+ */
+ total_len = 0;
+ for (i = 0; i < first_mss_sges; i++) {
+ total_len += mbuf->data_len;
+ mbuf_pre = mbuf;
+ mbuf = mbuf->next;
+ if (total_len >= limit_len) {
+ limit_len = mbuf_head->tso_segsz;
+ break;
+ }
+ }
+
+ checked_len += total_len;
+
+ /* try to copy if not valid */
+ if (unlikely(first_mss_sges == i)) {
+ left_sges -= first_mss_sges;
+ checked_len -= mbuf_pre->data_len;
+
+ left_len = mbuf_head->pkt_len - checked_len;
+ if (left_len > HINIC_COPY_MBUF_SIZE)
+ return false;
+
+ sqe_info->sge_cnt = mbuf_head->nb_segs -
+ left_sges;
+ sqe_info->cpy_mbuf_cnt = 1;
+
+ return true;
+ }
+ first_mss_sges = (HINIC_NONTSO_PKT_MAX_SGE - 1);
+
+ /* continue next 16 mbufs */
+ left_sges -= (i + 1);
+ } /* end of while */
+ }
+
+ sqe_info->sge_cnt = mbuf_head->nb_segs;
+ return true;
+}
+
+static inline void
+hinic_set_l4_csum_info(struct hinic_sq_task *task,
+ u32 *queue_info, struct hinic_tx_offload_info *poff_info)
+{
+ u32 tcp_udp_cs, sctp;
+ u16 l2hdr_len;
+
+ sctp = 0;
+ if (unlikely(poff_info->inner_l4_type == SCTP_OFFLOAD_ENABLE))
+ sctp = 1;
+
+ tcp_udp_cs = poff_info->inner_l4_tcp_udp;
+
+ if (poff_info->tunnel_type == TUNNEL_UDP_NO_CSUM) {
+ l2hdr_len = poff_info->outer_l2_len;
+
+ task->pkt_info2 |=
+ SQ_TASK_INFO2_SET(poff_info->outer_l3_type, OUTER_L3TYPE) |
+ SQ_TASK_INFO2_SET(poff_info->outer_l3_len, OUTER_L3LEN);
+ task->pkt_info2 |=
+ SQ_TASK_INFO2_SET(poff_info->tunnel_type, TUNNEL_L4TYPE) |
+ SQ_TASK_INFO2_SET(poff_info->tunnel_length, TUNNEL_L4LEN);
+ } else {
+ l2hdr_len = poff_info->inner_l2_len;
+ }
+
+ task->pkt_info0 |= SQ_TASK_INFO0_SET(l2hdr_len, L2HDR_LEN);
+ task->pkt_info1 |=
+ SQ_TASK_INFO1_SET(poff_info->inner_l3_len, INNER_L3LEN);
+ task->pkt_info0 |=
+ SQ_TASK_INFO0_SET(poff_info->inner_l3_type, INNER_L3TYPE);
+ task->pkt_info1 |=
+ SQ_TASK_INFO1_SET(poff_info->inner_l4_len, INNER_L4LEN);
+ task->pkt_info0 |=
+ SQ_TASK_INFO0_SET(poff_info->inner_l4_type, L4OFFLOAD);
+ *queue_info |=
+ SQ_CTRL_QUEUE_INFO_SET(poff_info->payload_offset, PLDOFF) |
+ SQ_CTRL_QUEUE_INFO_SET(tcp_udp_cs, TCPUDP_CS) |
+ SQ_CTRL_QUEUE_INFO_SET(sctp, SCTP);
+}
+
+static inline void
+hinic_set_tso_info(struct hinic_sq_task *task,
+ u32 *queue_info, struct rte_mbuf *mbuf,
+ struct hinic_tx_offload_info *poff_info)
+{
+ hinic_set_l4_csum_info(task, queue_info, poff_info);
+
+ /* wqe for tso */
+ task->pkt_info0 |=
+ SQ_TASK_INFO0_SET(poff_info->inner_l3_type, INNER_L3TYPE);
+ task->pkt_info0 |= SQ_TASK_INFO0_SET(TSO_ENABLE, TSO_UFO);
+ *queue_info |= SQ_CTRL_QUEUE_INFO_SET(TSO_ENABLE, TSO);
+ /* qsf was initialized in prepare_sq_wqe */
+ *queue_info = SQ_CTRL_QUEUE_INFO_CLEAR(*queue_info, MSS);
+ *queue_info |= SQ_CTRL_QUEUE_INFO_SET(mbuf->tso_segsz, MSS);
+}
+
+static inline void
+hinic_set_vlan_tx_offload(struct hinic_sq_task *task,
+ u32 *queue_info, u16 vlan_tag, u16 vlan_pri)
+{
+ task->pkt_info0 |= SQ_TASK_INFO0_SET(vlan_tag, VLAN_TAG) |
+ SQ_TASK_INFO0_SET(1U, VLAN_OFFLOAD);
+
+ *queue_info |= SQ_CTRL_QUEUE_INFO_SET(vlan_pri, PRI);
+}
+
+static inline void
+hinic_fill_tx_offload_info(struct rte_mbuf *mbuf,
+ struct hinic_sq_task *task, u32 *queue_info,
+ struct hinic_tx_offload_info *tx_off_info)
+{
+ u16 vlan_tag;
+ uint64_t ol_flags = mbuf->ol_flags;
+
+ /* clear DW0~2 of task section for offload */
+ task->pkt_info0 = 0;
+ task->pkt_info1 = 0;
+ task->pkt_info2 = 0;
+
+ /* Base VLAN */
+ if (unlikely(ol_flags & PKT_TX_VLAN_PKT)) {
+ vlan_tag = mbuf->vlan_tci;
+ hinic_set_vlan_tx_offload(task, queue_info, vlan_tag,
+ vlan_tag >> VLAN_PRIO_SHIFT);
+ }
+
+ /* non checksum or tso */
+ if (unlikely(!(ol_flags & HINIC_TX_CKSUM_OFFLOAD_MASK)))
+ return;
+
+ if ((ol_flags & PKT_TX_TCP_SEG))
+ /* set tso info for task and qsf */
+ hinic_set_tso_info(task, queue_info, mbuf, tx_off_info);
+ else /* just support l4 checksum offload */
+ hinic_set_l4_csum_info(task, queue_info, tx_off_info);
+}
+
+static inline void hinic_xmit_mbuf_cleanup(struct hinic_txq *txq)
+{
+ struct hinic_tx_info *tx_info;
+ struct rte_mbuf *mbuf, *m, *mbuf_free[HINIC_MAX_TX_FREE_BULK];
+ int i, nb_free = 0;
+ u16 hw_ci, sw_ci, sq_mask;
+ int wqebb_cnt = 0;
+
+ hw_ci = HINIC_GET_SQ_HW_CI(txq);
+ sw_ci = HINIC_GET_SQ_LOCAL_CI(txq);
+ sq_mask = HINIC_GET_SQ_WQE_MASK(txq);
+
+ for (i = 0; i < txq->tx_free_thresh; ++i) {
+ tx_info = &txq->tx_info[sw_ci];
+ if (hw_ci == sw_ci ||
+ (((hw_ci - sw_ci) & sq_mask) < tx_info->wqebb_cnt))
+ break;
+
+ sw_ci = (sw_ci + tx_info->wqebb_cnt) & sq_mask;
+
+ if (unlikely(tx_info->cpy_mbuf != NULL)) {
+ rte_pktmbuf_free(tx_info->cpy_mbuf);
+ tx_info->cpy_mbuf = NULL;
+ }
+
+ wqebb_cnt += tx_info->wqebb_cnt;
+ mbuf = tx_info->mbuf;
+
+ if (likely(mbuf->nb_segs == 1)) {
+ m = rte_pktmbuf_prefree_seg(mbuf);
+ tx_info->mbuf = NULL;
+
+ if (unlikely(m == NULL))
+ continue;
+
+ mbuf_free[nb_free++] = m;
+ if (unlikely(m->pool != mbuf_free[0]->pool ||
+ nb_free >= HINIC_MAX_TX_FREE_BULK)) {
+ rte_mempool_put_bulk(mbuf_free[0]->pool,
+ (void **)mbuf_free, (nb_free - 1));
+ nb_free = 0;
+ mbuf_free[nb_free++] = m;
+ }
+ } else {
+ rte_pktmbuf_free(mbuf);
+ tx_info->mbuf = NULL;
+ }
+ }
+
+ if (nb_free > 0)
+ rte_mempool_put_bulk(mbuf_free[0]->pool, (void **)mbuf_free,
+ nb_free);
+
+ HINIC_UPDATE_SQ_LOCAL_CI(txq, wqebb_cnt);
+}
+
+static inline struct hinic_sq_wqe *
+hinic_get_sq_wqe(struct hinic_txq *txq, int wqebb_cnt,
+ struct hinic_wqe_info *wqe_info)
+{
+ u32 cur_pi, end_pi;
+ u16 remain_wqebbs;
+ struct hinic_sq *sq = txq->sq;
+ struct hinic_wq *wq = txq->wq;
+
+ /* record current pi */
+ cur_pi = MASKED_WQE_IDX(wq, wq->prod_idx);
+ end_pi = cur_pi + wqebb_cnt;
+
+ /* update next pi and delta */
+ wq->prod_idx += wqebb_cnt;
+ wq->delta -= wqebb_cnt;
+
+ /* return current pi and owner */
+ wqe_info->pi = cur_pi;
+ wqe_info->owner = sq->owner;
+ wqe_info->around = 0;
+ wqe_info->seq_wqebbs = wqebb_cnt;
+
+ if (unlikely(end_pi >= txq->q_depth)) {
+ /* update owner of next prod_idx */
+ sq->owner = !sq->owner;
+
+ /* turn around to head */
+ if (unlikely(end_pi > txq->q_depth)) {
+ wqe_info->around = 1;
+ remain_wqebbs = txq->q_depth - cur_pi;
+ wqe_info->seq_wqebbs = remain_wqebbs;
+ }
+ }
+
+ return (struct hinic_sq_wqe *)WQ_WQE_ADDR(wq, cur_pi);
+}
+
+static inline int
+hinic_validate_tx_offload(const struct rte_mbuf *m)
+{
+ uint64_t ol_flags = m->ol_flags;
+ uint64_t inner_l3_offset = m->l2_len;
+
+ /* just support vxlan offload */
+ if ((ol_flags & PKT_TX_TUNNEL_MASK) &&
+ !(ol_flags & PKT_TX_TUNNEL_VXLAN))
+ return -ENOTSUP;
+
+ if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
+ inner_l3_offset += m->outer_l2_len + m->outer_l3_len;
+
+ /* Headers are fragmented */
+ if (rte_pktmbuf_data_len(m) < inner_l3_offset + m->l3_len + m->l4_len)
+ return -ENOTSUP;
+
+ /* IP checksum can be counted only for IPv4 packet */
+ if ((ol_flags & PKT_TX_IP_CKSUM) && (ol_flags & PKT_TX_IPV6))
+ return -EINVAL;
+
+ /* IP type not set when required */
+ if (ol_flags & (PKT_TX_L4_MASK | PKT_TX_TCP_SEG)) {
+ if (!(ol_flags & (PKT_TX_IPV4 | PKT_TX_IPV6)))
+ return -EINVAL;
+ }
+
+ /* Check requirements for TSO packet */
+ if (ol_flags & PKT_TX_TCP_SEG) {
+ if (m->tso_segsz == 0 ||
+ ((ol_flags & PKT_TX_IPV4) &&
+ !(ol_flags & PKT_TX_IP_CKSUM)))
+ return -EINVAL;
+ }
+
+ /* PKT_TX_OUTER_IP_CKSUM set for non outer IPv4 packet. */
+ if ((ol_flags & PKT_TX_OUTER_IP_CKSUM) &&
+ !(ol_flags & PKT_TX_OUTER_IPV4))
+ return -EINVAL;
+
+ return 0;
+}
+
+static inline uint16_t
+hinic_ipv4_phdr_cksum(const struct rte_ipv4_hdr *ipv4_hdr, uint64_t ol_flags)
+{
+ struct ipv4_psd_header {
+ uint32_t src_addr; /* IP address of source host. */
+ uint32_t dst_addr; /* IP address of destination host. */
+ uint8_t zero; /* zero. */
+ uint8_t proto; /* L4 protocol type. */
+ uint16_t len; /* L4 length. */
+ } psd_hdr;
+ uint8_t ihl;
+
+ psd_hdr.src_addr = ipv4_hdr->src_addr;
+ psd_hdr.dst_addr = ipv4_hdr->dst_addr;
+ psd_hdr.zero = 0;
+ psd_hdr.proto = ipv4_hdr->next_proto_id;
+ if (ol_flags & PKT_TX_TCP_SEG) {
+ psd_hdr.len = 0;
+ } else {
+ /* ipv4_hdr->version_ihl is uint8_t big endian, ihl locates
+ * lower 4 bits and unit is 4 bytes
+ */
+ ihl = (ipv4_hdr->version_ihl & 0xF) << 2;
+ psd_hdr.len =
+ rte_cpu_to_be_16(rte_be_to_cpu_16(ipv4_hdr->total_length) -
+ ihl);
+ }
+ return rte_raw_cksum(&psd_hdr, sizeof(psd_hdr));
+}
+
+static inline uint16_t
+hinic_ipv6_phdr_cksum(const struct rte_ipv6_hdr *ipv6_hdr, uint64_t ol_flags)
+{
+ uint32_t sum;
+ struct {
+ uint32_t len; /* L4 length. */
+ uint32_t proto; /* L4 protocol - top 3 bytes must be zero */
+ } psd_hdr;
+
+ psd_hdr.proto = (ipv6_hdr->proto << 24);
+ if (ol_flags & PKT_TX_TCP_SEG)
+ psd_hdr.len = 0;
+ else
+ psd_hdr.len = ipv6_hdr->payload_len;
+
+ sum = __rte_raw_cksum(ipv6_hdr->src_addr,
+ sizeof(ipv6_hdr->src_addr) + sizeof(ipv6_hdr->dst_addr), 0);
+ sum = __rte_raw_cksum(&psd_hdr, sizeof(psd_hdr), sum);
+ return __rte_raw_cksum_reduce(sum);
+}
+
+static inline int
+hinic_tx_offload_pkt_prepare(struct rte_mbuf *m,
+ struct hinic_tx_offload_info *off_info)
+{
+ struct rte_ipv4_hdr *ipv4_hdr;
+ struct rte_ipv6_hdr *ipv6_hdr;
+ struct rte_tcp_hdr *tcp_hdr;
+ struct rte_udp_hdr *udp_hdr;
+ struct rte_ether_hdr *eth_hdr;
+ struct rte_vlan_hdr *vlan_hdr;
+ u16 eth_type = 0;
+ uint64_t inner_l3_offset = m->l2_len;
+ uint64_t ol_flags = m->ol_flags;
+
+ /* Does packet set any of available offloads */
+ if (!(ol_flags & HINIC_TX_CKSUM_OFFLOAD_MASK))
+ return 0;
+
+ if (unlikely(hinic_validate_tx_offload(m)))
+ return -EINVAL;
+
+ if ((ol_flags & PKT_TX_OUTER_IP_CKSUM) ||
+ (ol_flags & PKT_TX_OUTER_IPV6) ||
+ (ol_flags & PKT_TX_TUNNEL_VXLAN)) {
+ inner_l3_offset += m->outer_l2_len + m->outer_l3_len;
+ off_info->outer_l2_len = m->outer_l2_len;
+ off_info->outer_l3_len = m->outer_l3_len;
+ /* just support vxlan tunneling pkt */
+ off_info->inner_l2_len = m->l2_len - VXLANLEN -
+ sizeof(struct rte_udp_hdr);
+ off_info->inner_l3_len = m->l3_len;
+ off_info->inner_l4_len = m->l4_len;
+ off_info->tunnel_length = m->l2_len;
+ off_info->payload_offset = m->outer_l2_len +
+ m->outer_l3_len + m->l2_len + m->l3_len;
+ off_info->tunnel_type = TUNNEL_UDP_NO_CSUM;
+ } else {
+ off_info->inner_l2_len = m->l2_len;
+ off_info->inner_l3_len = m->l3_len;
+ off_info->inner_l4_len = m->l4_len;
+ off_info->tunnel_type = NOT_TUNNEL;
+ off_info->payload_offset = m->l2_len + m->l3_len;
+ }
+
+ if (((ol_flags & PKT_TX_L4_MASK) != PKT_TX_SCTP_CKSUM) &&
+ ((ol_flags & PKT_TX_L4_MASK) != PKT_TX_UDP_CKSUM))
+ off_info->payload_offset += m->l4_len;
+
+ /* invalid udp or tcp header */
+ if (unlikely(off_info->payload_offset > MAX_PLD_OFFSET))
+ return -EINVAL;
+
+ /* Process outter udp pseudo-header checksum */
+ if ((ol_flags & PKT_TX_TUNNEL_VXLAN) && ((ol_flags & PKT_TX_TCP_SEG) ||
+ (ol_flags & PKT_TX_OUTER_IP_CKSUM) ||
+ (ol_flags & PKT_TX_OUTER_IPV6))) {
+ off_info->tunnel_type = TUNNEL_UDP_CSUM;
+
+ /* inner_l4_tcp_udp csum should be setted to calculate outter
+ * udp checksum when vxlan packets without inner l3 and l4
+ */
+ off_info->inner_l4_tcp_udp = 1;
+
+ eth_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
+ eth_type = rte_be_to_cpu_16(eth_hdr->ether_type);
+
+ if (eth_type == RTE_ETHER_TYPE_VLAN) {
+ vlan_hdr = (struct rte_vlan_hdr *)(eth_hdr + 1);
+ eth_type = rte_be_to_cpu_16(vlan_hdr->eth_proto);
+ }
+
+ if (eth_type == RTE_ETHER_TYPE_IPV4) {
+ ipv4_hdr =
+ rte_pktmbuf_mtod_offset(m, struct rte_ipv4_hdr *,
+ m->outer_l2_len);
+ off_info->outer_l3_type = IPV4_PKT_WITH_CHKSUM_OFFLOAD;
+ ipv4_hdr->hdr_checksum = 0;
+
+ udp_hdr = (struct rte_udp_hdr *)((char *)ipv4_hdr +
+ m->outer_l3_len);
+ udp_hdr->dgram_cksum =
+ hinic_ipv4_phdr_cksum(ipv4_hdr, ol_flags);
+ } else if (eth_type == RTE_ETHER_TYPE_IPV6) {
+ off_info->outer_l3_type = IPV6_PKT;
+ ipv6_hdr =
+ rte_pktmbuf_mtod_offset(m, struct rte_ipv6_hdr *,
+ m->outer_l2_len);
+
+ udp_hdr =
+ rte_pktmbuf_mtod_offset(m, struct rte_udp_hdr *,
+ (m->outer_l2_len +
+ m->outer_l3_len));
+ udp_hdr->dgram_cksum =
+ hinic_ipv6_phdr_cksum(ipv6_hdr, ol_flags);
+ }
+ }
+
+ if (ol_flags & PKT_TX_IPV4)
+ off_info->inner_l3_type = (ol_flags & PKT_TX_IP_CKSUM) ?
+ IPV4_PKT_WITH_CHKSUM_OFFLOAD :
+ IPV4_PKT_NO_CHKSUM_OFFLOAD;
+ else if (ol_flags & PKT_TX_IPV6)
+ off_info->inner_l3_type = IPV6_PKT;
+
+ /* Process the pseudo-header checksum */
+ if ((ol_flags & PKT_TX_L4_MASK) == PKT_TX_UDP_CKSUM) {
+ if (ol_flags & PKT_TX_IPV4) {
+ ipv4_hdr =
+ rte_pktmbuf_mtod_offset(m, struct rte_ipv4_hdr *,
+ inner_l3_offset);
+
+ if (ol_flags & PKT_TX_IP_CKSUM)
+ ipv4_hdr->hdr_checksum = 0;
+
+ udp_hdr = (struct rte_udp_hdr *)((char *)ipv4_hdr +
+ m->l3_len);
+ udp_hdr->dgram_cksum =
+ hinic_ipv4_phdr_cksum(ipv4_hdr, ol_flags);
+ } else {
+ ipv6_hdr =
+ rte_pktmbuf_mtod_offset(m, struct rte_ipv6_hdr *,
+ inner_l3_offset);
+
+ udp_hdr =
+ rte_pktmbuf_mtod_offset(m, struct rte_udp_hdr *,
+ (inner_l3_offset + m->l3_len));
+ udp_hdr->dgram_cksum =
+ hinic_ipv6_phdr_cksum(ipv6_hdr, ol_flags);
+ }
+
+ off_info->inner_l4_type = UDP_OFFLOAD_ENABLE;
+ off_info->inner_l4_tcp_udp = 1;
+ off_info->inner_l4_len = sizeof(struct rte_udp_hdr);
+ } else if (((ol_flags & PKT_TX_L4_MASK) == PKT_TX_TCP_CKSUM) ||
+ (ol_flags & PKT_TX_TCP_SEG)) {
+ if (ol_flags & PKT_TX_IPV4) {
+ ipv4_hdr =
+ rte_pktmbuf_mtod_offset(m, struct rte_ipv4_hdr *,
+ inner_l3_offset);
+
+ if (ol_flags & PKT_TX_IP_CKSUM)
+ ipv4_hdr->hdr_checksum = 0;
+
+ /* non-TSO tcp */
+ tcp_hdr = (struct rte_tcp_hdr *)((char *)ipv4_hdr +
+ m->l3_len);
+ tcp_hdr->cksum =
+ hinic_ipv4_phdr_cksum(ipv4_hdr, ol_flags);
+ } else {
+ ipv6_hdr =
+ rte_pktmbuf_mtod_offset(m, struct rte_ipv6_hdr *,
+ inner_l3_offset);
+ /* non-TSO tcp */
+ tcp_hdr =
+ rte_pktmbuf_mtod_offset(m, struct rte_tcp_hdr *,
+ (inner_l3_offset + m->l3_len));
+ tcp_hdr->cksum =
+ hinic_ipv6_phdr_cksum(ipv6_hdr, ol_flags);
+ }
+
+ off_info->inner_l4_type = TCP_OFFLOAD_ENABLE;
+ off_info->inner_l4_tcp_udp = 1;
+ } else if ((ol_flags & PKT_TX_L4_MASK) == PKT_TX_SCTP_CKSUM) {
+ off_info->inner_l4_type = SCTP_OFFLOAD_ENABLE;
+ off_info->inner_l4_tcp_udp = 0;
+ off_info->inner_l4_len = sizeof(struct rte_sctp_hdr);
+ }
+
+ return 0;
+}
+
+static inline bool hinic_get_sge_txoff_info(struct rte_mbuf *mbuf_pkt,
+ struct hinic_wqe_info *sqe_info,
+ struct hinic_tx_offload_info
+ *off_info)
+{
+ u16 i, total_len, sge_cnt = mbuf_pkt->nb_segs;
+ struct rte_mbuf *mbuf;
+ int ret;
+
+ memset(off_info, 0, sizeof(*off_info));
+
+ ret = hinic_tx_offload_pkt_prepare(mbuf_pkt, off_info);
+ if (unlikely(ret))
+ return false;
+
+ sqe_info->cpy_mbuf_cnt = 0;
+
+ /* non tso mbuf */
+ if (likely(!(mbuf_pkt->ol_flags & PKT_TX_TCP_SEG))) {
+ if (unlikely(mbuf_pkt->pkt_len > MAX_SINGLE_SGE_SIZE)) {
+ /* non tso packet len must less than 64KB */
+ return false;
+ } else if (unlikely(HINIC_NONTSO_SEG_NUM_INVALID(sge_cnt))) {
+ /* non tso packet buffer number must less than 17
+ * the mbuf segs more than 17 must copy to one buffer
+ */
+ total_len = 0;
+ mbuf = mbuf_pkt;
+ for (i = 0; i < (HINIC_NONTSO_PKT_MAX_SGE - 1) ; i++) {
+ total_len += mbuf->data_len;
+ mbuf = mbuf->next;
+ }
+
+ /* default support copy total 4k mbuf segs */
+ if ((u32)(total_len + (u16)HINIC_COPY_MBUF_SIZE) <
+ mbuf_pkt->pkt_len)
+ return false;
+
+ sqe_info->sge_cnt = HINIC_NONTSO_PKT_MAX_SGE;
+ sqe_info->cpy_mbuf_cnt = 1;
+ return true;
+ }
+
+ /* valid non tso mbuf */
+ sqe_info->sge_cnt = sge_cnt;
+ } else {
+ /* tso mbuf */
+ if (unlikely(HINIC_TSO_SEG_NUM_INVALID(sge_cnt)))
+ /* too many mbuf segs */
+ return false;
+
+ /* check tso mbuf segs are valid or not */
+ if (unlikely(!hinic_is_tso_sge_valid(mbuf_pkt,
+ off_info, sqe_info)))
+ return false;
+ }
+
+ return true;
+}
+
+static inline void hinic_sq_write_db(struct hinic_sq *sq, int cos)
+{
+ u16 prod_idx;
+ u32 hi_prod_idx;
+ struct hinic_sq_db sq_db;
+
+ prod_idx = MASKED_SQ_IDX(sq, sq->wq->prod_idx);
+ hi_prod_idx = SQ_DB_PI_HIGH(prod_idx);
+
+ sq_db.db_info = SQ_DB_INFO_SET(hi_prod_idx, HI_PI) |
+ SQ_DB_INFO_SET(SQ_DB, TYPE) |
+ SQ_DB_INFO_SET(SQ_CFLAG_DP, CFLAG) |
+ SQ_DB_INFO_SET(cos, COS) |
+ SQ_DB_INFO_SET(sq->q_id, QID);
+
+ /* Data should be written to HW in Big Endian Format */
+ sq_db.db_info = cpu_to_be32(sq_db.db_info);
+
+ /* Write all before the doorbell */
+ rte_wmb();
+ writel(sq_db.db_info, SQ_DB_ADDR(sq, prod_idx));
+}
+
+u16 hinic_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, u16 nb_pkts)
+{
+ int free_wqebb_cnt, wqe_wqebb_cnt;
+ u32 queue_info, tx_bytes = 0;
+ u16 nb_tx;
+ struct hinic_wqe_info sqe_info;
+ struct hinic_tx_offload_info off_info;
+ struct rte_mbuf *mbuf_pkt;
+ struct hinic_txq *txq = tx_queue;
+ struct hinic_tx_info *tx_info;
+ struct hinic_sq_wqe *sq_wqe;
+ struct hinic_sq_task *task;
+
+ /* reclaim tx mbuf before xmit new packet */
+ if (HINIC_GET_SQ_FREE_WQEBBS(txq) < txq->tx_free_thresh)
+ hinic_xmit_mbuf_cleanup(txq);
+
+ /* tx loop routine */
+ for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
+ mbuf_pkt = *tx_pkts++;
+ queue_info = 0;
+
+ /* 1. parse sge and tx offlod info from mbuf */
+ if (unlikely(!hinic_get_sge_txoff_info(mbuf_pkt,
+ &sqe_info, &off_info))) {
+ txq->txq_stats.off_errs++;
+ break;
+ }
+
+ /* 2. try to get enough wqebb */
+ wqe_wqebb_cnt = HINIC_SQ_WQEBB_CNT(sqe_info.sge_cnt);
+ free_wqebb_cnt = HINIC_GET_SQ_FREE_WQEBBS(txq);
+ if (unlikely(wqe_wqebb_cnt > free_wqebb_cnt)) {
+ /* reclaim again */
+ hinic_xmit_mbuf_cleanup(txq);
+ free_wqebb_cnt = HINIC_GET_SQ_FREE_WQEBBS(txq);
+ if (unlikely(wqe_wqebb_cnt > free_wqebb_cnt)) {
+ txq->txq_stats.tx_busy += (nb_pkts - nb_tx);
+ break;
+ }
+ }
+
+ /* 3. get sq tail wqe address from wqe_page,
+ * sq have enough wqebb for this packet
+ */
+ sq_wqe = hinic_get_sq_wqe(txq, wqe_wqebb_cnt, &sqe_info);
+
+ /* 4. fill sq wqe sge section */
+ if (unlikely(!hinic_mbuf_dma_map_sge(txq, mbuf_pkt,
+ sq_wqe->buf_descs,
+ &sqe_info))) {
+ hinic_return_sq_wqe(txq->nic_dev->hwdev, txq->q_id,
+ wqe_wqebb_cnt, sqe_info.owner);
+ txq->txq_stats.off_errs++;
+ break;
+ }
+
+ /* 5. fill sq wqe task section and queue info */
+ task = &sq_wqe->task;
+
+ /* tx packet offload configure */
+ hinic_fill_tx_offload_info(mbuf_pkt, task, &queue_info,
+ &off_info);
+
+ /* 6. record tx info */
+ tx_info = &txq->tx_info[sqe_info.pi];
+ tx_info->mbuf = mbuf_pkt;
+ tx_info->wqebb_cnt = wqe_wqebb_cnt;
+
+ /* 7. fill sq wqe header section */
+ hinic_fill_sq_wqe_header(&sq_wqe->ctrl, queue_info,
+ sqe_info.sge_cnt, sqe_info.owner);
+
+ /* 8.convert continue or bottom wqe byteorder to big endian */
+ hinic_sq_wqe_cpu_to_be32(sq_wqe, sqe_info.seq_wqebbs);
+
+ tx_bytes += mbuf_pkt->pkt_len;
+ }
+
+ /* 9. write sq doorbell in burst mode */
+ if (nb_tx) {
+ hinic_sq_write_db(txq->sq, txq->cos);
+
+ txq->txq_stats.packets += nb_tx;
+ txq->txq_stats.bytes += tx_bytes;
+ }
+ txq->txq_stats.burst_pkts = nb_tx;
+
+ return nb_tx;
+}
void hinic_free_all_tx_skbs(struct hinic_txq *txq)
{