985540a9352f2adb3b1067d4dfea6a2b2597b536
[dpdk.git] / drivers / net / hinic / hinic_pmd_tx.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2017 Huawei Technologies Co., Ltd
3  */
4
5 #include <rte_mbuf.h>
6 #include <rte_tcp.h>
7 #include <rte_sctp.h>
8 #include <rte_udp.h>
9 #include <rte_ip.h>
10 #ifdef __ARM64_NEON__
11 #include <arm_neon.h>
12 #endif
13
14 #include "base/hinic_compat.h"
15 #include "base/hinic_pmd_hwdev.h"
16 #include "base/hinic_pmd_hwif.h"
17 #include "base/hinic_pmd_wq.h"
18 #include "base/hinic_pmd_nicio.h"
19 #include "hinic_pmd_ethdev.h"
20 #include "hinic_pmd_tx.h"
21
22 /* packet header and tx offload info */
23 #define ETHER_LEN_NO_VLAN               14
24 #define ETHER_LEN_WITH_VLAN             18
25 #define HEADER_LEN_OFFSET               2
26 #define VXLANLEN                        8
27 #define MAX_PLD_OFFSET                  221
28 #define MAX_SINGLE_SGE_SIZE             65536
29 #define TSO_ENABLE                      1
30 #define TX_MSS_DEFAULT                  0x3E00
31 #define TX_MSS_MIN                      0x50
32
33 #define HINIC_NONTSO_PKT_MAX_SGE                17      /* non-tso max sge 17 */
34 #define HINIC_NONTSO_SEG_NUM_INVALID(num)       \
35                         ((num) > HINIC_NONTSO_PKT_MAX_SGE)
36
37 #define HINIC_TSO_PKT_MAX_SGE                   127     /* tso max sge 127 */
38 #define HINIC_TSO_SEG_NUM_INVALID(num)          ((num) > HINIC_TSO_PKT_MAX_SGE)
39
40 #define HINIC_TX_OUTER_CHECKSUM_FLAG_SET       1
41 #define HINIC_TX_OUTER_CHECKSUM_FLAG_NO_SET    0
42
43 /* sizeof(struct hinic_sq_bufdesc) == 16, shift 4 */
44 #define HINIC_BUF_DESC_SIZE(nr_descs)   (SIZE_8BYTES(((u32)nr_descs) << 4))
45
46 #define MASKED_SQ_IDX(sq, idx)          ((idx) & (sq)->wq->mask)
47
48 /* SQ_CTRL */
49 #define SQ_CTRL_BUFDESC_SECT_LEN_SHIFT          0
50 #define SQ_CTRL_TASKSECT_LEN_SHIFT              16
51 #define SQ_CTRL_DATA_FORMAT_SHIFT               22
52 #define SQ_CTRL_LEN_SHIFT                       29
53 #define SQ_CTRL_OWNER_SHIFT                     31
54
55 #define SQ_CTRL_BUFDESC_SECT_LEN_MASK           0xFFU
56 #define SQ_CTRL_TASKSECT_LEN_MASK               0x1FU
57 #define SQ_CTRL_DATA_FORMAT_MASK                0x1U
58 #define SQ_CTRL_LEN_MASK                        0x3U
59 #define SQ_CTRL_OWNER_MASK                      0x1U
60
61 #define SQ_CTRL_SET(val, member)        \
62         (((val) & SQ_CTRL_##member##_MASK) << SQ_CTRL_##member##_SHIFT)
63
64 #define SQ_CTRL_QUEUE_INFO_PLDOFF_SHIFT         2
65 #define SQ_CTRL_QUEUE_INFO_UFO_SHIFT            10
66 #define SQ_CTRL_QUEUE_INFO_TSO_SHIFT            11
67 #define SQ_CTRL_QUEUE_INFO_TCPUDP_CS_SHIFT      12
68 #define SQ_CTRL_QUEUE_INFO_MSS_SHIFT            13
69 #define SQ_CTRL_QUEUE_INFO_SCTP_SHIFT           27
70 #define SQ_CTRL_QUEUE_INFO_UC_SHIFT             28
71 #define SQ_CTRL_QUEUE_INFO_PRI_SHIFT            29
72
73 #define SQ_CTRL_QUEUE_INFO_PLDOFF_MASK          0xFFU
74 #define SQ_CTRL_QUEUE_INFO_UFO_MASK             0x1U
75 #define SQ_CTRL_QUEUE_INFO_TSO_MASK             0x1U
76 #define SQ_CTRL_QUEUE_INFO_TCPUDP_CS_MASK       0x1U
77 #define SQ_CTRL_QUEUE_INFO_MSS_MASK             0x3FFFU
78 #define SQ_CTRL_QUEUE_INFO_SCTP_MASK            0x1U
79 #define SQ_CTRL_QUEUE_INFO_UC_MASK              0x1U
80 #define SQ_CTRL_QUEUE_INFO_PRI_MASK             0x7U
81
82 #define SQ_CTRL_QUEUE_INFO_SET(val, member)     \
83         (((u32)(val) & SQ_CTRL_QUEUE_INFO_##member##_MASK) <<   \
84                         SQ_CTRL_QUEUE_INFO_##member##_SHIFT)
85
86 #define SQ_CTRL_QUEUE_INFO_GET(val, member)     \
87         (((val) >> SQ_CTRL_QUEUE_INFO_##member##_SHIFT) &       \
88                         SQ_CTRL_QUEUE_INFO_##member##_MASK)
89
90 #define SQ_CTRL_QUEUE_INFO_CLEAR(val, member)   \
91         ((val) & (~(SQ_CTRL_QUEUE_INFO_##member##_MASK << \
92                         SQ_CTRL_QUEUE_INFO_##member##_SHIFT)))
93
94 #define SQ_TASK_INFO0_L2HDR_LEN_SHIFT           0
95 #define SQ_TASK_INFO0_L4OFFLOAD_SHIFT           8
96 #define SQ_TASK_INFO0_INNER_L3TYPE_SHIFT        10
97 #define SQ_TASK_INFO0_VLAN_OFFLOAD_SHIFT        12
98 #define SQ_TASK_INFO0_PARSE_FLAG_SHIFT          13
99 #define SQ_TASK_INFO0_UFO_AVD_SHIFT             14
100 #define SQ_TASK_INFO0_TSO_UFO_SHIFT             15
101 #define SQ_TASK_INFO0_VLAN_TAG_SHIFT            16
102
103 #define SQ_TASK_INFO0_L2HDR_LEN_MASK            0xFFU
104 #define SQ_TASK_INFO0_L4OFFLOAD_MASK            0x3U
105 #define SQ_TASK_INFO0_INNER_L3TYPE_MASK         0x3U
106 #define SQ_TASK_INFO0_VLAN_OFFLOAD_MASK         0x1U
107 #define SQ_TASK_INFO0_PARSE_FLAG_MASK           0x1U
108 #define SQ_TASK_INFO0_UFO_AVD_MASK              0x1U
109 #define SQ_TASK_INFO0_TSO_UFO_MASK              0x1U
110 #define SQ_TASK_INFO0_VLAN_TAG_MASK             0xFFFFU
111
112 #define SQ_TASK_INFO0_SET(val, member)                  \
113         (((u32)(val) & SQ_TASK_INFO0_##member##_MASK) <<        \
114                         SQ_TASK_INFO0_##member##_SHIFT)
115
116 #define SQ_TASK_INFO1_MD_TYPE_SHIFT             8
117 #define SQ_TASK_INFO1_INNER_L4LEN_SHIFT         16
118 #define SQ_TASK_INFO1_INNER_L3LEN_SHIFT         24
119
120 #define SQ_TASK_INFO1_MD_TYPE_MASK              0xFFU
121 #define SQ_TASK_INFO1_INNER_L4LEN_MASK          0xFFU
122 #define SQ_TASK_INFO1_INNER_L3LEN_MASK          0xFFU
123
124 #define SQ_TASK_INFO1_SET(val, member)                  \
125         (((val) & SQ_TASK_INFO1_##member##_MASK) <<     \
126                         SQ_TASK_INFO1_##member##_SHIFT)
127
128 #define SQ_TASK_INFO2_TUNNEL_L4LEN_SHIFT        0
129 #define SQ_TASK_INFO2_OUTER_L3LEN_SHIFT         8
130 #define SQ_TASK_INFO2_TUNNEL_L4TYPE_SHIFT       16
131 #define SQ_TASK_INFO2_OUTER_L3TYPE_SHIFT        24
132
133 #define SQ_TASK_INFO2_TUNNEL_L4LEN_MASK         0xFFU
134 #define SQ_TASK_INFO2_OUTER_L3LEN_MASK          0xFFU
135 #define SQ_TASK_INFO2_TUNNEL_L4TYPE_MASK        0x7U
136 #define SQ_TASK_INFO2_OUTER_L3TYPE_MASK         0x3U
137
138 #define SQ_TASK_INFO2_SET(val, member)                  \
139         (((val) & SQ_TASK_INFO2_##member##_MASK) <<     \
140                         SQ_TASK_INFO2_##member##_SHIFT)
141
142 #define SQ_TASK_INFO4_L2TYPE_SHIFT              31
143
144 #define SQ_TASK_INFO4_L2TYPE_MASK               0x1U
145
146 #define SQ_TASK_INFO4_SET(val, member)          \
147         (((u32)(val) & SQ_TASK_INFO4_##member##_MASK) << \
148                         SQ_TASK_INFO4_##member##_SHIFT)
149
150 /* SQ_DB */
151 #define SQ_DB_OFF                               0x00000800
152 #define SQ_DB_INFO_HI_PI_SHIFT                  0
153 #define SQ_DB_INFO_QID_SHIFT                    8
154 #define SQ_DB_INFO_CFLAG_SHIFT                  23
155 #define SQ_DB_INFO_COS_SHIFT                    24
156 #define SQ_DB_INFO_TYPE_SHIFT                   27
157
158 #define SQ_DB_INFO_HI_PI_MASK                   0xFFU
159 #define SQ_DB_INFO_QID_MASK                     0x3FFU
160 #define SQ_DB_INFO_CFLAG_MASK                   0x1U
161 #define SQ_DB_INFO_COS_MASK                     0x7U
162 #define SQ_DB_INFO_TYPE_MASK                    0x1FU
163 #define SQ_DB_INFO_SET(val, member)             \
164         (((u32)(val) & SQ_DB_INFO_##member##_MASK) <<   \
165                         SQ_DB_INFO_##member##_SHIFT)
166
167 #define SQ_DB                                   1
168 #define SQ_CFLAG_DP                             0       /* CFLAG_DATA_PATH */
169
170 #define SQ_DB_PI_LOW_MASK                       0xFF
171 #define SQ_DB_PI_LOW(pi)                        ((pi) & SQ_DB_PI_LOW_MASK)
172 #define SQ_DB_PI_HI_SHIFT                       8
173 #define SQ_DB_PI_HIGH(pi)                       ((pi) >> SQ_DB_PI_HI_SHIFT)
174 #define SQ_DB_ADDR(sq, pi)              \
175         ((u64 *)((u8 __iomem *)((sq)->db_addr) + SQ_DB_OFF) + SQ_DB_PI_LOW(pi))
176
177 /* txq wq operations */
178 #define HINIC_GET_SQ_WQE_MASK(txq)              ((txq)->wq->mask)
179
180 #define HINIC_GET_SQ_HW_CI(txq) \
181         ((be16_to_cpu(*(txq)->cons_idx_addr)) & HINIC_GET_SQ_WQE_MASK(txq))
182
183 #define HINIC_GET_SQ_LOCAL_CI(txq)      \
184         (((txq)->wq->cons_idx) & HINIC_GET_SQ_WQE_MASK(txq))
185
186 #define HINIC_UPDATE_SQ_LOCAL_CI(txq, wqebb_cnt)        \
187         do {                                            \
188                 (txq)->wq->cons_idx += wqebb_cnt;       \
189                 (txq)->wq->delta += wqebb_cnt;          \
190         } while (0)
191
192 #define HINIC_GET_SQ_FREE_WQEBBS(txq)   ((txq)->wq->delta - 1)
193
194 #define HINIC_IS_SQ_EMPTY(txq)  (((txq)->wq->delta) == ((txq)->q_depth))
195
196 #define BUF_DESC_SIZE_SHIFT             4
197
198 #define HINIC_SQ_WQE_SIZE(num_sge)              \
199         (sizeof(struct hinic_sq_ctrl) + sizeof(struct hinic_sq_task) +  \
200                         (unsigned int)((num_sge) << BUF_DESC_SIZE_SHIFT))
201
202 #define HINIC_SQ_WQEBB_CNT(num_sge)     \
203         (int)(ALIGN(HINIC_SQ_WQE_SIZE((u32)num_sge), \
204                         HINIC_SQ_WQEBB_SIZE) >> HINIC_SQ_WQEBB_SHIFT)
205
206
207 static inline void hinic_sq_wqe_cpu_to_be32(void *data, int nr_wqebb)
208 {
209 #if defined(__X86_64_SSE__)
210         int i;
211         __m128i *wqe_line = (__m128i *)data;
212         __m128i shuf_mask = _mm_set_epi8(12, 13, 14, 15, 8, 9, 10,
213                                         11, 4, 5, 6, 7, 0, 1, 2, 3);
214
215         for (i = 0; i < nr_wqebb; i++) {
216                 /* convert 64B wqebb using 4 SSE instructions */
217                 wqe_line[0] = _mm_shuffle_epi8(wqe_line[0], shuf_mask);
218                 wqe_line[1] = _mm_shuffle_epi8(wqe_line[1], shuf_mask);
219                 wqe_line[2] = _mm_shuffle_epi8(wqe_line[2], shuf_mask);
220                 wqe_line[3] = _mm_shuffle_epi8(wqe_line[3], shuf_mask);
221                 wqe_line += 4;
222         }
223 #elif defined(__ARM64_NEON__)
224         int i;
225         uint8x16_t *wqe_line = (uint8x16_t *)data;
226         const uint8x16_t shuf_mask = {3, 2, 1, 0, 7, 6, 5, 4, 11, 10,
227                                         9, 8, 15, 14, 13, 12};
228
229         for (i = 0; i < nr_wqebb; i++) {
230                 wqe_line[0] = vqtbl1q_u8(wqe_line[0], shuf_mask);
231                 wqe_line[1] = vqtbl1q_u8(wqe_line[1], shuf_mask);
232                 wqe_line[2] = vqtbl1q_u8(wqe_line[2], shuf_mask);
233                 wqe_line[3] = vqtbl1q_u8(wqe_line[3], shuf_mask);
234                 wqe_line += 4;
235         }
236 #else
237         hinic_cpu_to_be32(data, nr_wqebb * HINIC_SQ_WQEBB_SIZE);
238 #endif
239 }
240
241 static inline void hinic_sge_cpu_to_be32(void *data, int nr_sge)
242 {
243 #if defined(__X86_64_SSE__)
244         int i;
245         __m128i *sge_line = (__m128i *)data;
246         __m128i shuf_mask = _mm_set_epi8(12, 13, 14, 15, 8, 9, 10,
247                                         11, 4, 5, 6, 7, 0, 1, 2, 3);
248
249         for (i = 0; i < nr_sge; i++) {
250                 /* convert 16B sge using 1 SSE instructions */
251                 *sge_line = _mm_shuffle_epi8(*sge_line, shuf_mask);
252                 sge_line++;
253         }
254 #elif defined(__ARM64_NEON__)
255         int i;
256         uint8x16_t *sge_line = (uint8x16_t *)data;
257         const uint8x16_t shuf_mask = {3, 2, 1, 0, 7, 6, 5, 4, 11, 10,
258                                         9, 8, 15, 14, 13, 12};
259
260         for (i = 0; i < nr_sge; i++) {
261                 *sge_line = vqtbl1q_u8(*sge_line, shuf_mask);
262                 sge_line++;
263         }
264 #else
265         hinic_cpu_to_be32(data, nr_sge * sizeof(struct hinic_sq_bufdesc));
266 #endif
267 }
268
269 void hinic_txq_get_stats(struct hinic_txq *txq, struct hinic_txq_stats *stats)
270 {
271         if (!txq || !stats) {
272                 PMD_DRV_LOG(ERR, "Txq or stats is NULL");
273                 return;
274         }
275
276         memcpy(stats, &txq->txq_stats, sizeof(txq->txq_stats));
277 }
278
279 void hinic_txq_stats_reset(struct hinic_txq *txq)
280 {
281         struct hinic_txq_stats *txq_stats;
282
283         if (txq == NULL)
284                 return;
285
286         txq_stats = &txq->txq_stats;
287         memset(txq_stats, 0, sizeof(*txq_stats));
288 }
289
290 static inline struct rte_mbuf *hinic_copy_tx_mbuf(struct hinic_nic_dev *nic_dev,
291                                                   struct rte_mbuf *mbuf,
292                                                   u16 sge_cnt)
293 {
294         struct rte_mbuf *dst_mbuf;
295         u32 offset = 0;
296         u16 i;
297
298         if (unlikely(!nic_dev->cpy_mpool))
299                 return NULL;
300
301         dst_mbuf = rte_pktmbuf_alloc(nic_dev->cpy_mpool);
302         if (unlikely(!dst_mbuf))
303                 return NULL;
304
305         dst_mbuf->data_off = 0;
306         for (i = 0; i < sge_cnt; i++) {
307                 rte_memcpy((char *)dst_mbuf->buf_addr + offset,
308                            (char *)mbuf->buf_addr + mbuf->data_off,
309                            mbuf->data_len);
310                 dst_mbuf->data_len += mbuf->data_len;
311                 offset += mbuf->data_len;
312                 mbuf = mbuf->next;
313         }
314
315         return dst_mbuf;
316 }
317
318 static inline bool hinic_mbuf_dma_map_sge(struct hinic_txq *txq,
319                                           struct rte_mbuf *mbuf,
320                                           struct hinic_sq_bufdesc *sges,
321                                           struct hinic_wqe_info *sqe_info)
322 {
323         dma_addr_t dma_addr;
324         u16 i, around_sges;
325         u16 nb_segs = sqe_info->sge_cnt - sqe_info->cpy_mbuf_cnt;
326         u16 real_nb_segs = mbuf->nb_segs;
327         struct hinic_sq_bufdesc *sge_idx = sges;
328
329         if (unlikely(sqe_info->around)) {
330                 /* parts of wqe is in sq bottom while parts
331                  * of wqe is in sq head
332                  */
333                 i = 0;
334                 for (sge_idx = sges; (u64)sge_idx <= txq->sq_bot_sge_addr;
335                      sge_idx++) {
336                         dma_addr = rte_mbuf_data_iova(mbuf);
337                         hinic_set_sge((struct hinic_sge *)sge_idx, dma_addr,
338                                       mbuf->data_len);
339                         mbuf = mbuf->next;
340                         i++;
341                 }
342
343                 around_sges = nb_segs - i;
344                 sge_idx = (struct hinic_sq_bufdesc *)
345                                 ((void *)txq->sq_head_addr);
346                 for (; i < nb_segs; i++) {
347                         dma_addr = rte_mbuf_data_iova(mbuf);
348                         hinic_set_sge((struct hinic_sge *)sge_idx, dma_addr,
349                                       mbuf->data_len);
350                         mbuf = mbuf->next;
351                         sge_idx++;
352                 }
353
354                 /* covert sges at head to big endian */
355                 hinic_sge_cpu_to_be32((void *)txq->sq_head_addr, around_sges);
356         } else {
357                 /* wqe is in continuous space */
358                 for (i = 0; i < nb_segs; i++) {
359                         dma_addr = rte_mbuf_data_iova(mbuf);
360                         hinic_set_sge((struct hinic_sge *)sge_idx, dma_addr,
361                                       mbuf->data_len);
362                         mbuf = mbuf->next;
363                         sge_idx++;
364                 }
365         }
366
367         /* for now: support non-tso over 17 sge, copy the last 2 mbuf */
368         if (unlikely(sqe_info->cpy_mbuf_cnt != 0)) {
369                 /* copy invalid mbuf segs to a valid buffer, lost performance */
370                 txq->txq_stats.cpy_pkts += 1;
371                 mbuf = hinic_copy_tx_mbuf(txq->nic_dev, mbuf,
372                                           real_nb_segs - nb_segs);
373                 if (unlikely(!mbuf))
374                         return false;
375
376                 txq->tx_info[sqe_info->pi].cpy_mbuf = mbuf;
377
378                 /* deal with the last mbuf */
379                 dma_addr = rte_mbuf_data_iova(mbuf);
380                 hinic_set_sge((struct hinic_sge *)sge_idx, dma_addr,
381                               mbuf->data_len);
382                 if (unlikely(sqe_info->around))
383                         hinic_sge_cpu_to_be32((void *)sge_idx, 1);
384         }
385
386         return true;
387 }
388
389 static inline void hinic_fill_sq_wqe_header(struct hinic_sq_ctrl *ctrl,
390                                             u32 queue_info, int nr_descs,
391                                             u8 owner)
392 {
393         u32 ctrl_size, task_size, bufdesc_size;
394
395         ctrl_size = SIZE_8BYTES(sizeof(struct hinic_sq_ctrl));
396         task_size = SIZE_8BYTES(sizeof(struct hinic_sq_task));
397         bufdesc_size = HINIC_BUF_DESC_SIZE(nr_descs);
398
399         ctrl->ctrl_fmt = SQ_CTRL_SET(bufdesc_size, BUFDESC_SECT_LEN) |
400                         SQ_CTRL_SET(task_size, TASKSECT_LEN)    |
401                         SQ_CTRL_SET(SQ_NORMAL_WQE, DATA_FORMAT) |
402                         SQ_CTRL_SET(ctrl_size, LEN)             |
403                         SQ_CTRL_SET(owner, OWNER);
404
405         ctrl->queue_info = queue_info;
406         ctrl->queue_info |= SQ_CTRL_QUEUE_INFO_SET(1U, UC);
407
408         if (!SQ_CTRL_QUEUE_INFO_GET(ctrl->queue_info, MSS)) {
409                 ctrl->queue_info |=
410                         SQ_CTRL_QUEUE_INFO_SET(TX_MSS_DEFAULT, MSS);
411         } else if (SQ_CTRL_QUEUE_INFO_GET(ctrl->queue_info, MSS) < TX_MSS_MIN) {
412                 /* mss should not be less than 80 */
413                 ctrl->queue_info =
414                                 SQ_CTRL_QUEUE_INFO_CLEAR(ctrl->queue_info, MSS);
415                 ctrl->queue_info |= SQ_CTRL_QUEUE_INFO_SET(TX_MSS_MIN, MSS);
416         }
417 }
418
419 static inline bool hinic_is_tso_sge_valid(struct rte_mbuf *mbuf,
420                                           struct hinic_tx_offload_info
421                                           *poff_info,
422                                           struct hinic_wqe_info *sqe_info)
423 {
424         u32 total_len, limit_len, checked_len, left_len;
425         u32 i, first_mss_sges, left_sges;
426         struct rte_mbuf *mbuf_head, *mbuf_pre;
427
428         left_sges = mbuf->nb_segs;
429         mbuf_head = mbuf;
430
431         /* tso sge number validation */
432         if (unlikely(left_sges >= HINIC_NONTSO_PKT_MAX_SGE)) {
433                 checked_len = 0;
434                 limit_len = mbuf->tso_segsz + poff_info->payload_offset;
435                 first_mss_sges = HINIC_NONTSO_PKT_MAX_SGE;
436
437                 /* each continues 17 mbufs segmust do one check */
438                 while (left_sges >= HINIC_NONTSO_PKT_MAX_SGE) {
439                         /* total len of first 16 mbufs must equal
440                          * or more than limit_len
441                          */
442                         total_len = 0;
443                         for (i = 0; i < first_mss_sges; i++) {
444                                 total_len += mbuf->data_len;
445                                 mbuf_pre = mbuf;
446                                 mbuf = mbuf->next;
447                                 if (total_len >= limit_len) {
448                                         limit_len = mbuf_head->tso_segsz;
449                                         break;
450                                 }
451                         }
452
453                         checked_len += total_len;
454
455                         /* try to copy if not valid */
456                         if (unlikely(first_mss_sges == i)) {
457                                 left_sges -= first_mss_sges;
458                                 checked_len -= mbuf_pre->data_len;
459
460                                 left_len = mbuf_head->pkt_len - checked_len;
461                                 if (left_len > HINIC_COPY_MBUF_SIZE)
462                                         return false;
463
464                                 sqe_info->sge_cnt = mbuf_head->nb_segs -
465                                                         left_sges;
466                                 sqe_info->cpy_mbuf_cnt = 1;
467
468                                 return true;
469                         }
470                         first_mss_sges = (HINIC_NONTSO_PKT_MAX_SGE - 1);
471
472                         /* continue next 16 mbufs */
473                         left_sges -= (i + 1);
474                 } /* end of while */
475         }
476
477         sqe_info->sge_cnt = mbuf_head->nb_segs;
478         return true;
479 }
480
481 static inline void
482 hinic_set_l4_csum_info(struct hinic_sq_task *task,
483                 u32 *queue_info, struct hinic_tx_offload_info *poff_info)
484 {
485         u32 tcp_udp_cs, sctp = 0;
486         u16 l2hdr_len;
487
488         if (unlikely(poff_info->inner_l4_type == SCTP_OFFLOAD_ENABLE))
489                 sctp = 1;
490
491         tcp_udp_cs = poff_info->inner_l4_tcp_udp;
492
493         if (poff_info->tunnel_type == TUNNEL_UDP_CSUM ||
494             poff_info->tunnel_type == TUNNEL_UDP_NO_CSUM) {
495                 l2hdr_len =  poff_info->outer_l2_len;
496
497                 task->pkt_info2 |=
498                 SQ_TASK_INFO2_SET(poff_info->outer_l3_type, OUTER_L3TYPE) |
499                 SQ_TASK_INFO2_SET(poff_info->outer_l3_len, OUTER_L3LEN);
500                 task->pkt_info2 |=
501                 SQ_TASK_INFO2_SET(poff_info->tunnel_type, TUNNEL_L4TYPE) |
502                 SQ_TASK_INFO2_SET(poff_info->tunnel_length, TUNNEL_L4LEN);
503         } else {
504                 l2hdr_len = poff_info->inner_l2_len;
505         }
506
507         task->pkt_info0 |= SQ_TASK_INFO0_SET(l2hdr_len, L2HDR_LEN);
508         task->pkt_info1 |=
509                 SQ_TASK_INFO1_SET(poff_info->inner_l3_len, INNER_L3LEN);
510         task->pkt_info0 |=
511                 SQ_TASK_INFO0_SET(poff_info->inner_l3_type, INNER_L3TYPE);
512         task->pkt_info1 |=
513                 SQ_TASK_INFO1_SET(poff_info->inner_l4_len, INNER_L4LEN);
514         task->pkt_info0 |=
515                 SQ_TASK_INFO0_SET(poff_info->inner_l4_type, L4OFFLOAD);
516         *queue_info |=
517                 SQ_CTRL_QUEUE_INFO_SET(poff_info->payload_offset, PLDOFF) |
518                 SQ_CTRL_QUEUE_INFO_SET(tcp_udp_cs, TCPUDP_CS) |
519                 SQ_CTRL_QUEUE_INFO_SET(sctp, SCTP);
520 }
521
522 static inline void
523 hinic_set_tso_info(struct hinic_sq_task *task,
524                 u32 *queue_info, struct rte_mbuf *mbuf,
525                 struct hinic_tx_offload_info *poff_info)
526 {
527         hinic_set_l4_csum_info(task, queue_info, poff_info);
528
529         /* wqe for tso */
530         task->pkt_info0 |=
531                 SQ_TASK_INFO0_SET(poff_info->inner_l3_type, INNER_L3TYPE);
532         task->pkt_info0 |= SQ_TASK_INFO0_SET(TSO_ENABLE, TSO_UFO);
533         *queue_info |= SQ_CTRL_QUEUE_INFO_SET(TSO_ENABLE, TSO);
534         /* qsf was initialized in prepare_sq_wqe */
535         *queue_info = SQ_CTRL_QUEUE_INFO_CLEAR(*queue_info, MSS);
536         *queue_info |= SQ_CTRL_QUEUE_INFO_SET(mbuf->tso_segsz, MSS);
537 }
538
539 static inline void
540 hinic_set_vlan_tx_offload(struct hinic_sq_task *task,
541                         u32 *queue_info, u16 vlan_tag, u16 vlan_pri)
542 {
543         task->pkt_info0 |= SQ_TASK_INFO0_SET(vlan_tag, VLAN_TAG) |
544                                 SQ_TASK_INFO0_SET(1U, VLAN_OFFLOAD);
545
546         *queue_info |= SQ_CTRL_QUEUE_INFO_SET(vlan_pri, PRI);
547 }
548
549 static inline void
550 hinic_fill_tx_offload_info(struct rte_mbuf *mbuf,
551                 struct hinic_sq_task *task, u32 *queue_info,
552                 struct hinic_tx_offload_info *tx_off_info)
553 {
554         u16 vlan_tag;
555         uint64_t ol_flags = mbuf->ol_flags;
556
557         /* clear DW0~2 of task section for offload */
558         task->pkt_info0 = 0;
559         task->pkt_info1 = 0;
560         task->pkt_info2 = 0;
561
562         /* Base VLAN */
563         if (unlikely(ol_flags & PKT_TX_VLAN_PKT)) {
564                 vlan_tag = mbuf->vlan_tci;
565                 hinic_set_vlan_tx_offload(task, queue_info, vlan_tag,
566                                           vlan_tag >> VLAN_PRIO_SHIFT);
567         }
568
569         /* non checksum or tso */
570         if (unlikely(!(ol_flags & HINIC_TX_CKSUM_OFFLOAD_MASK)))
571                 return;
572
573         if ((ol_flags & PKT_TX_TCP_SEG))
574                 /* set tso info for task and qsf */
575                 hinic_set_tso_info(task, queue_info, mbuf, tx_off_info);
576         else /* just support l4 checksum offload */
577                 hinic_set_l4_csum_info(task, queue_info, tx_off_info);
578 }
579
580 static inline void hinic_xmit_mbuf_cleanup(struct hinic_txq *txq)
581 {
582         struct hinic_tx_info *tx_info;
583         struct rte_mbuf *mbuf, *m, *mbuf_free[HINIC_MAX_TX_FREE_BULK];
584         int i, nb_free = 0;
585         u16 hw_ci, sw_ci, sq_mask;
586         int wqebb_cnt = 0;
587
588         hw_ci = HINIC_GET_SQ_HW_CI(txq);
589         sw_ci = HINIC_GET_SQ_LOCAL_CI(txq);
590         sq_mask = HINIC_GET_SQ_WQE_MASK(txq);
591
592         for (i = 0; i < txq->tx_free_thresh; ++i) {
593                 tx_info = &txq->tx_info[sw_ci];
594                 if (hw_ci == sw_ci ||
595                         (((hw_ci - sw_ci) & sq_mask) < tx_info->wqebb_cnt))
596                         break;
597
598                 sw_ci = (sw_ci + tx_info->wqebb_cnt) & sq_mask;
599
600                 if (unlikely(tx_info->cpy_mbuf != NULL)) {
601                         rte_pktmbuf_free(tx_info->cpy_mbuf);
602                         tx_info->cpy_mbuf = NULL;
603                 }
604
605                 wqebb_cnt += tx_info->wqebb_cnt;
606                 mbuf = tx_info->mbuf;
607
608                 if (likely(mbuf->nb_segs == 1)) {
609                         m = rte_pktmbuf_prefree_seg(mbuf);
610                         tx_info->mbuf = NULL;
611
612                         if (unlikely(m == NULL))
613                                 continue;
614
615                         mbuf_free[nb_free++] = m;
616                         if (unlikely(m->pool != mbuf_free[0]->pool ||
617                                 nb_free >= HINIC_MAX_TX_FREE_BULK)) {
618                                 rte_mempool_put_bulk(mbuf_free[0]->pool,
619                                         (void **)mbuf_free, (nb_free - 1));
620                                 nb_free = 0;
621                                 mbuf_free[nb_free++] = m;
622                         }
623                 } else {
624                         rte_pktmbuf_free(mbuf);
625                         tx_info->mbuf = NULL;
626                 }
627         }
628
629         if (nb_free > 0)
630                 rte_mempool_put_bulk(mbuf_free[0]->pool, (void **)mbuf_free,
631                                      nb_free);
632
633         HINIC_UPDATE_SQ_LOCAL_CI(txq, wqebb_cnt);
634 }
635
636 static inline struct hinic_sq_wqe *
637 hinic_get_sq_wqe(struct hinic_txq *txq, int wqebb_cnt,
638                 struct hinic_wqe_info *wqe_info)
639 {
640         u32 cur_pi, end_pi;
641         u16 remain_wqebbs;
642         struct hinic_sq *sq = txq->sq;
643         struct hinic_wq *wq = txq->wq;
644
645         /* record current pi */
646         cur_pi = MASKED_WQE_IDX(wq, wq->prod_idx);
647         end_pi = cur_pi + wqebb_cnt;
648
649         /* update next pi and delta */
650         wq->prod_idx += wqebb_cnt;
651         wq->delta -= wqebb_cnt;
652
653         /* return current pi and owner */
654         wqe_info->pi = cur_pi;
655         wqe_info->owner = sq->owner;
656         wqe_info->around = 0;
657         wqe_info->seq_wqebbs = wqebb_cnt;
658
659         if (unlikely(end_pi >= txq->q_depth)) {
660                 /* update owner of next prod_idx */
661                 sq->owner = !sq->owner;
662
663                 /* turn around to head */
664                 if (unlikely(end_pi > txq->q_depth)) {
665                         wqe_info->around = 1;
666                         remain_wqebbs = txq->q_depth - cur_pi;
667                         wqe_info->seq_wqebbs = remain_wqebbs;
668                 }
669         }
670
671         return (struct hinic_sq_wqe *)WQ_WQE_ADDR(wq, cur_pi);
672 }
673
674 static inline uint16_t
675 hinic_ipv4_phdr_cksum(const struct rte_ipv4_hdr *ipv4_hdr, uint64_t ol_flags)
676 {
677         struct ipv4_psd_header {
678                 uint32_t src_addr; /* IP address of source host. */
679                 uint32_t dst_addr; /* IP address of destination host. */
680                 uint8_t  zero;     /* zero. */
681                 uint8_t  proto;    /* L4 protocol type. */
682                 uint16_t len;      /* L4 length. */
683         } psd_hdr;
684         uint8_t ihl;
685
686         psd_hdr.src_addr = ipv4_hdr->src_addr;
687         psd_hdr.dst_addr = ipv4_hdr->dst_addr;
688         psd_hdr.zero = 0;
689         psd_hdr.proto = ipv4_hdr->next_proto_id;
690         if (ol_flags & PKT_TX_TCP_SEG) {
691                 psd_hdr.len = 0;
692         } else {
693                 /* ipv4_hdr->version_ihl is uint8_t big endian, ihl locates
694                  * lower 4 bits and unit is 4 bytes
695                  */
696                 ihl = (ipv4_hdr->version_ihl & 0xF) << 2;
697                 psd_hdr.len =
698                 rte_cpu_to_be_16(rte_be_to_cpu_16(ipv4_hdr->total_length) -
699                                  ihl);
700         }
701         return rte_raw_cksum(&psd_hdr, sizeof(psd_hdr));
702 }
703
704 static inline uint16_t
705 hinic_ipv6_phdr_cksum(const struct rte_ipv6_hdr *ipv6_hdr, uint64_t ol_flags)
706 {
707         uint32_t sum;
708         struct {
709                 uint32_t len;   /* L4 length. */
710                 uint32_t proto; /* L4 protocol - top 3 bytes must be zero */
711         } psd_hdr;
712
713         psd_hdr.proto = (ipv6_hdr->proto << 24);
714         if (ol_flags & PKT_TX_TCP_SEG)
715                 psd_hdr.len = 0;
716         else
717                 psd_hdr.len = ipv6_hdr->payload_len;
718
719         sum = __rte_raw_cksum(ipv6_hdr->src_addr,
720                 sizeof(ipv6_hdr->src_addr) + sizeof(ipv6_hdr->dst_addr), 0);
721         sum = __rte_raw_cksum(&psd_hdr, sizeof(psd_hdr), sum);
722         return __rte_raw_cksum_reduce(sum);
723 }
724
725 static inline void
726 hinic_get_pld_offset(struct rte_mbuf *m, struct hinic_tx_offload_info *off_info,
727                      int outer_cs_flag)
728 {
729         uint64_t ol_flags = m->ol_flags;
730
731         if (outer_cs_flag == 1) {
732                 if ((ol_flags & PKT_TX_UDP_CKSUM) == PKT_TX_UDP_CKSUM) {
733                         off_info->payload_offset = m->outer_l2_len +
734                                 m->outer_l3_len + m->l2_len + m->l3_len;
735                 } else if ((ol_flags & PKT_TX_TCP_CKSUM) ||
736                                 (ol_flags & PKT_TX_TCP_SEG)) {
737                         off_info->payload_offset = m->outer_l2_len +
738                                         m->outer_l3_len + m->l2_len +
739                                         m->l3_len + m->l4_len;
740                 }
741         } else {
742                 if ((ol_flags & PKT_TX_UDP_CKSUM) == PKT_TX_UDP_CKSUM) {
743                         off_info->payload_offset = m->l2_len + m->l3_len;
744                 } else if ((ol_flags & PKT_TX_TCP_CKSUM) ||
745                         (ol_flags & PKT_TX_TCP_SEG)) {
746                         off_info->payload_offset = m->l2_len + m->l3_len +
747                                                    m->l4_len;
748                 }
749         }
750 }
751
752 static inline void
753 hinic_analyze_tx_info(struct rte_mbuf *mbuf,
754                       struct hinic_tx_offload_info *off_info)
755 {
756         struct rte_ether_hdr *eth_hdr;
757         struct rte_vlan_hdr *vlan_hdr;
758         struct rte_ipv4_hdr *ip4h;
759         u16 pkt_type;
760         u8 *hdr;
761
762         hdr = (u8 *)rte_pktmbuf_mtod(mbuf, u8*);
763         eth_hdr = (struct rte_ether_hdr *)hdr;
764         pkt_type = rte_be_to_cpu_16(eth_hdr->ether_type);
765
766         if (pkt_type == RTE_ETHER_TYPE_VLAN) {
767                 off_info->outer_l2_len = ETHER_LEN_WITH_VLAN;
768                 vlan_hdr = (struct rte_vlan_hdr *)(hdr + 1);
769                 pkt_type = rte_be_to_cpu_16(vlan_hdr->eth_proto);
770         } else {
771                 off_info->outer_l2_len = ETHER_LEN_NO_VLAN;
772         }
773
774         if (pkt_type == RTE_ETHER_TYPE_IPV4) {
775                 ip4h = (struct rte_ipv4_hdr *)(hdr + off_info->outer_l2_len);
776                 off_info->outer_l3_len = (ip4h->version_ihl & 0xf) <<
777                                         HEADER_LEN_OFFSET;
778         } else if (pkt_type == RTE_ETHER_TYPE_IPV6) {
779                 /* not support ipv6 extension header */
780                 off_info->outer_l3_len = sizeof(struct rte_ipv6_hdr);
781         }
782 }
783
784 static inline int
785 hinic_tx_offload_pkt_prepare(struct rte_mbuf *m,
786                                 struct hinic_tx_offload_info *off_info)
787 {
788         struct rte_ipv4_hdr *ipv4_hdr;
789         struct rte_ipv6_hdr *ipv6_hdr;
790         struct rte_tcp_hdr *tcp_hdr;
791         struct rte_udp_hdr *udp_hdr;
792         struct rte_ether_hdr *eth_hdr;
793         struct rte_vlan_hdr *vlan_hdr;
794         u16 eth_type = 0;
795         uint64_t inner_l3_offset;
796         uint64_t ol_flags = m->ol_flags;
797
798         /* Check if the packets set available offload flags */
799         if (!(ol_flags & HINIC_TX_CKSUM_OFFLOAD_MASK))
800                 return 0;
801
802         /* Support only vxlan offload */
803         if ((ol_flags & PKT_TX_TUNNEL_MASK) &&
804             !(ol_flags & PKT_TX_TUNNEL_VXLAN))
805                 return -ENOTSUP;
806
807 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
808         if (rte_validate_tx_offload(m) != 0)
809                 return -EINVAL;
810 #endif
811
812         if (ol_flags & PKT_TX_TUNNEL_VXLAN) {
813                 if ((ol_flags & PKT_TX_OUTER_IP_CKSUM) ||
814                     (ol_flags & PKT_TX_OUTER_IPV6) ||
815                     (ol_flags & PKT_TX_TCP_SEG)) {
816                         inner_l3_offset = m->l2_len + m->outer_l2_len +
817                                 m->outer_l3_len;
818                         off_info->outer_l2_len = m->outer_l2_len;
819                         off_info->outer_l3_len = m->outer_l3_len;
820                         /* just support vxlan tunneling pkt */
821                         off_info->inner_l2_len = m->l2_len - VXLANLEN -
822                                 sizeof(*udp_hdr);
823                         off_info->inner_l3_len = m->l3_len;
824                         off_info->inner_l4_len = m->l4_len;
825                         off_info->tunnel_length = m->l2_len;
826                         off_info->tunnel_type = TUNNEL_UDP_NO_CSUM;
827
828                         hinic_get_pld_offset(m, off_info,
829                                              HINIC_TX_OUTER_CHECKSUM_FLAG_SET);
830                 } else {
831                         inner_l3_offset = m->l2_len;
832                         hinic_analyze_tx_info(m, off_info);
833                         /* just support vxlan tunneling pkt */
834                         off_info->inner_l2_len = m->l2_len - VXLANLEN -
835                                 sizeof(*udp_hdr) - off_info->outer_l2_len -
836                                 off_info->outer_l3_len;
837                         off_info->inner_l3_len = m->l3_len;
838                         off_info->inner_l4_len = m->l4_len;
839                         off_info->tunnel_length = m->l2_len -
840                                 off_info->outer_l2_len - off_info->outer_l3_len;
841                         off_info->tunnel_type = TUNNEL_UDP_NO_CSUM;
842
843                         hinic_get_pld_offset(m, off_info,
844                                 HINIC_TX_OUTER_CHECKSUM_FLAG_NO_SET);
845                 }
846         } else {
847                 inner_l3_offset = m->l2_len;
848                 off_info->inner_l2_len = m->l2_len;
849                 off_info->inner_l3_len = m->l3_len;
850                 off_info->inner_l4_len = m->l4_len;
851                 off_info->tunnel_type = NOT_TUNNEL;
852
853                 hinic_get_pld_offset(m, off_info,
854                                      HINIC_TX_OUTER_CHECKSUM_FLAG_NO_SET);
855         }
856
857         /* invalid udp or tcp header */
858         if (unlikely(off_info->payload_offset > MAX_PLD_OFFSET))
859                 return -EINVAL;
860
861         /* Process outter udp pseudo-header checksum */
862         if ((ol_flags & PKT_TX_TUNNEL_VXLAN) && ((ol_flags & PKT_TX_TCP_SEG) ||
863                         (ol_flags & PKT_TX_OUTER_IP_CKSUM) ||
864                         (ol_flags & PKT_TX_OUTER_IPV6))) {
865
866                 /* inner_l4_tcp_udp csum should be setted to calculate outter
867                  * udp checksum when vxlan packets without inner l3 and l4
868                  */
869                 off_info->inner_l4_tcp_udp = 1;
870
871                 eth_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
872                 eth_type = rte_be_to_cpu_16(eth_hdr->ether_type);
873
874                 if (eth_type == RTE_ETHER_TYPE_VLAN) {
875                         vlan_hdr = (struct rte_vlan_hdr *)(eth_hdr + 1);
876                         eth_type = rte_be_to_cpu_16(vlan_hdr->eth_proto);
877                 }
878
879                 if (eth_type == RTE_ETHER_TYPE_IPV4) {
880                         ipv4_hdr =
881                         rte_pktmbuf_mtod_offset(m, struct rte_ipv4_hdr *,
882                                                 m->outer_l2_len);
883                         off_info->outer_l3_type = IPV4_PKT_WITH_CHKSUM_OFFLOAD;
884                         ipv4_hdr->hdr_checksum = 0;
885
886                         udp_hdr = (struct rte_udp_hdr *)((char *)ipv4_hdr +
887                                                         m->outer_l3_len);
888                         udp_hdr->dgram_cksum = 0;
889                 } else if (eth_type == RTE_ETHER_TYPE_IPV6) {
890                         off_info->outer_l3_type = IPV6_PKT;
891                         ipv6_hdr =
892                         rte_pktmbuf_mtod_offset(m, struct rte_ipv6_hdr *,
893                                                 m->outer_l2_len);
894
895                         udp_hdr =
896                         rte_pktmbuf_mtod_offset(m, struct rte_udp_hdr *,
897                                                 (m->outer_l2_len +
898                                                 m->outer_l3_len));
899                         udp_hdr->dgram_cksum = 0;
900                 }
901         } else if (ol_flags & PKT_TX_OUTER_IPV4) {
902                 off_info->tunnel_type = TUNNEL_UDP_NO_CSUM;
903                 off_info->inner_l4_tcp_udp = 1;
904                 off_info->outer_l3_type = IPV4_PKT_NO_CHKSUM_OFFLOAD;
905         }
906
907         if (ol_flags & PKT_TX_IPV4)
908                 off_info->inner_l3_type = (ol_flags & PKT_TX_IP_CKSUM) ?
909                                         IPV4_PKT_WITH_CHKSUM_OFFLOAD :
910                                         IPV4_PKT_NO_CHKSUM_OFFLOAD;
911         else if (ol_flags & PKT_TX_IPV6)
912                 off_info->inner_l3_type = IPV6_PKT;
913
914         /* Process the pseudo-header checksum */
915         if ((ol_flags & PKT_TX_L4_MASK) == PKT_TX_UDP_CKSUM) {
916                 if (ol_flags & PKT_TX_IPV4) {
917                         ipv4_hdr =
918                         rte_pktmbuf_mtod_offset(m, struct rte_ipv4_hdr *,
919                                                 inner_l3_offset);
920
921                         if (ol_flags & PKT_TX_IP_CKSUM)
922                                 ipv4_hdr->hdr_checksum = 0;
923
924                         udp_hdr = (struct rte_udp_hdr *)((char *)ipv4_hdr +
925                                                                 m->l3_len);
926                         udp_hdr->dgram_cksum =
927                                 hinic_ipv4_phdr_cksum(ipv4_hdr, ol_flags);
928                 } else {
929                         ipv6_hdr =
930                         rte_pktmbuf_mtod_offset(m, struct rte_ipv6_hdr *,
931                                                 inner_l3_offset);
932
933                         udp_hdr =
934                         rte_pktmbuf_mtod_offset(m, struct rte_udp_hdr *,
935                                                 (inner_l3_offset + m->l3_len));
936                         udp_hdr->dgram_cksum =
937                                 hinic_ipv6_phdr_cksum(ipv6_hdr, ol_flags);
938                 }
939
940                 off_info->inner_l4_type = UDP_OFFLOAD_ENABLE;
941                 off_info->inner_l4_tcp_udp = 1;
942         } else if (((ol_flags & PKT_TX_L4_MASK) == PKT_TX_TCP_CKSUM) ||
943                         (ol_flags & PKT_TX_TCP_SEG)) {
944                 if (ol_flags & PKT_TX_IPV4) {
945                         ipv4_hdr =
946                         rte_pktmbuf_mtod_offset(m, struct rte_ipv4_hdr *,
947                                                 inner_l3_offset);
948
949                         if (ol_flags & PKT_TX_IP_CKSUM)
950                                 ipv4_hdr->hdr_checksum = 0;
951
952                         /* non-TSO tcp */
953                         tcp_hdr = (struct rte_tcp_hdr *)((char *)ipv4_hdr +
954                                                                 m->l3_len);
955                         tcp_hdr->cksum =
956                                 hinic_ipv4_phdr_cksum(ipv4_hdr, ol_flags);
957                 } else {
958                         ipv6_hdr =
959                         rte_pktmbuf_mtod_offset(m, struct rte_ipv6_hdr *,
960                                                 inner_l3_offset);
961                         /* non-TSO tcp */
962                         tcp_hdr =
963                         rte_pktmbuf_mtod_offset(m, struct rte_tcp_hdr *,
964                                                 (inner_l3_offset + m->l3_len));
965                         tcp_hdr->cksum =
966                                 hinic_ipv6_phdr_cksum(ipv6_hdr, ol_flags);
967                 }
968
969                 off_info->inner_l4_type = TCP_OFFLOAD_ENABLE;
970                 off_info->inner_l4_tcp_udp = 1;
971         } else if ((ol_flags & PKT_TX_L4_MASK) == PKT_TX_SCTP_CKSUM) {
972                 off_info->inner_l4_type = SCTP_OFFLOAD_ENABLE;
973                 off_info->inner_l4_tcp_udp = 0;
974                 off_info->inner_l4_len = sizeof(struct rte_sctp_hdr);
975         }
976
977         return 0;
978 }
979
980 static inline bool hinic_get_sge_txoff_info(struct rte_mbuf *mbuf_pkt,
981                                             struct hinic_wqe_info *sqe_info,
982                                             struct hinic_tx_offload_info
983                                             *off_info)
984 {
985         u16  i, total_len, sge_cnt = mbuf_pkt->nb_segs;
986         struct rte_mbuf *mbuf;
987         int ret;
988
989         memset(off_info, 0, sizeof(*off_info));
990
991         ret = hinic_tx_offload_pkt_prepare(mbuf_pkt, off_info);
992         if (unlikely(ret))
993                 return false;
994
995         sqe_info->cpy_mbuf_cnt = 0;
996
997         /* non tso mbuf */
998         if (likely(!(mbuf_pkt->ol_flags & PKT_TX_TCP_SEG))) {
999                 if (unlikely(mbuf_pkt->pkt_len > MAX_SINGLE_SGE_SIZE)) {
1000                         /* non tso packet len must less than 64KB */
1001                         return false;
1002                 } else if (unlikely(HINIC_NONTSO_SEG_NUM_INVALID(sge_cnt))) {
1003                         /* non tso packet buffer number must less than 17
1004                          * the mbuf segs more than 17 must copy to one buffer
1005                          */
1006                         total_len = 0;
1007                         mbuf = mbuf_pkt;
1008                         for (i = 0; i < (HINIC_NONTSO_PKT_MAX_SGE - 1) ; i++) {
1009                                 total_len += mbuf->data_len;
1010                                 mbuf = mbuf->next;
1011                         }
1012
1013                         /* default support copy total 4k mbuf segs */
1014                         if ((u32)(total_len + (u16)HINIC_COPY_MBUF_SIZE) <
1015                                   mbuf_pkt->pkt_len)
1016                                 return false;
1017
1018                         sqe_info->sge_cnt = HINIC_NONTSO_PKT_MAX_SGE;
1019                         sqe_info->cpy_mbuf_cnt = 1;
1020                         return true;
1021                 }
1022
1023                 /* valid non tso mbuf */
1024                 sqe_info->sge_cnt = sge_cnt;
1025         } else {
1026                 /* tso mbuf */
1027                 if (unlikely(HINIC_TSO_SEG_NUM_INVALID(sge_cnt)))
1028                         /* too many mbuf segs */
1029                         return false;
1030
1031                 /* check tso mbuf segs are valid or not */
1032                 if (unlikely(!hinic_is_tso_sge_valid(mbuf_pkt,
1033                              off_info, sqe_info)))
1034                         return false;
1035         }
1036
1037         return true;
1038 }
1039
1040 static inline void hinic_sq_write_db(struct hinic_sq *sq, int cos)
1041 {
1042         u16 prod_idx;
1043         u32 hi_prod_idx;
1044         struct hinic_sq_db sq_db;
1045
1046         prod_idx = MASKED_SQ_IDX(sq, sq->wq->prod_idx);
1047         hi_prod_idx = SQ_DB_PI_HIGH(prod_idx);
1048
1049         sq_db.db_info = SQ_DB_INFO_SET(hi_prod_idx, HI_PI) |
1050                         SQ_DB_INFO_SET(SQ_DB, TYPE) |
1051                         SQ_DB_INFO_SET(SQ_CFLAG_DP, CFLAG) |
1052                         SQ_DB_INFO_SET(cos, COS) |
1053                         SQ_DB_INFO_SET(sq->q_id, QID);
1054
1055         /* Data should be written to HW in Big Endian Format */
1056         sq_db.db_info = cpu_to_be32(sq_db.db_info);
1057
1058         /* Write all before the doorbell */
1059         rte_wmb();
1060         writel(sq_db.db_info, SQ_DB_ADDR(sq, prod_idx));
1061 }
1062
1063 u16 hinic_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, u16 nb_pkts)
1064 {
1065         int free_wqebb_cnt, wqe_wqebb_cnt;
1066         u32 queue_info, tx_bytes = 0;
1067         u16 nb_tx;
1068         struct hinic_wqe_info sqe_info;
1069         struct hinic_tx_offload_info off_info;
1070         struct rte_mbuf *mbuf_pkt;
1071         struct hinic_txq *txq = tx_queue;
1072         struct hinic_tx_info *tx_info;
1073         struct hinic_sq_wqe *sq_wqe;
1074         struct hinic_sq_task *task;
1075
1076         /* reclaim tx mbuf before xmit new packet */
1077         if (HINIC_GET_SQ_FREE_WQEBBS(txq) < txq->tx_free_thresh)
1078                 hinic_xmit_mbuf_cleanup(txq);
1079
1080         /* tx loop routine */
1081         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
1082                 mbuf_pkt = *tx_pkts++;
1083                 queue_info = 0;
1084
1085                 /* 1. parse sge and tx offlod info from mbuf */
1086                 if (unlikely(!hinic_get_sge_txoff_info(mbuf_pkt,
1087                                                        &sqe_info, &off_info))) {
1088                         txq->txq_stats.off_errs++;
1089                         break;
1090                 }
1091
1092                 /* 2. try to get enough wqebb */
1093                 wqe_wqebb_cnt = HINIC_SQ_WQEBB_CNT(sqe_info.sge_cnt);
1094                 free_wqebb_cnt = HINIC_GET_SQ_FREE_WQEBBS(txq);
1095                 if (unlikely(wqe_wqebb_cnt > free_wqebb_cnt)) {
1096                         /* reclaim again */
1097                         hinic_xmit_mbuf_cleanup(txq);
1098                         free_wqebb_cnt = HINIC_GET_SQ_FREE_WQEBBS(txq);
1099                         if (unlikely(wqe_wqebb_cnt > free_wqebb_cnt)) {
1100                                 txq->txq_stats.tx_busy += (nb_pkts - nb_tx);
1101                                 break;
1102                         }
1103                 }
1104
1105                 /* 3. get sq tail wqe address from wqe_page,
1106                  * sq have enough wqebb for this packet
1107                  */
1108                 sq_wqe = hinic_get_sq_wqe(txq, wqe_wqebb_cnt, &sqe_info);
1109
1110                 /* 4. fill sq wqe sge section */
1111                 if (unlikely(!hinic_mbuf_dma_map_sge(txq, mbuf_pkt,
1112                                                      sq_wqe->buf_descs,
1113                                                      &sqe_info))) {
1114                         hinic_return_sq_wqe(txq->nic_dev->hwdev, txq->q_id,
1115                                             wqe_wqebb_cnt, sqe_info.owner);
1116                         txq->txq_stats.off_errs++;
1117                         break;
1118                 }
1119
1120                 /* 5. fill sq wqe task section and queue info */
1121                 task = &sq_wqe->task;
1122
1123                 /* tx packet offload configure */
1124                 hinic_fill_tx_offload_info(mbuf_pkt, task, &queue_info,
1125                                            &off_info);
1126
1127                 /* 6. record tx info */
1128                 tx_info = &txq->tx_info[sqe_info.pi];
1129                 tx_info->mbuf = mbuf_pkt;
1130                 tx_info->wqebb_cnt = wqe_wqebb_cnt;
1131
1132                 /* 7. fill sq wqe header section */
1133                 hinic_fill_sq_wqe_header(&sq_wqe->ctrl, queue_info,
1134                                          sqe_info.sge_cnt, sqe_info.owner);
1135
1136                 /* 8.convert continue or bottom wqe byteorder to big endian */
1137                 hinic_sq_wqe_cpu_to_be32(sq_wqe, sqe_info.seq_wqebbs);
1138
1139                 tx_bytes += mbuf_pkt->pkt_len;
1140         }
1141
1142         /* 9. write sq doorbell in burst mode */
1143         if (nb_tx) {
1144                 hinic_sq_write_db(txq->sq, txq->cos);
1145
1146                 txq->txq_stats.packets += nb_tx;
1147                 txq->txq_stats.bytes += tx_bytes;
1148         }
1149         txq->txq_stats.burst_pkts = nb_tx;
1150
1151         return nb_tx;
1152 }
1153
1154 void hinic_free_all_tx_mbufs(struct hinic_txq *txq)
1155 {
1156         u16 ci;
1157         struct hinic_nic_dev *nic_dev = txq->nic_dev;
1158         struct hinic_tx_info *tx_info;
1159         int free_wqebbs = hinic_get_sq_free_wqebbs(nic_dev->hwdev,
1160                                                    txq->q_id) + 1;
1161
1162         while (free_wqebbs < txq->q_depth) {
1163                 ci = hinic_get_sq_local_ci(nic_dev->hwdev, txq->q_id);
1164
1165                 tx_info = &txq->tx_info[ci];
1166
1167                 if (unlikely(tx_info->cpy_mbuf != NULL)) {
1168                         rte_pktmbuf_free(tx_info->cpy_mbuf);
1169                         tx_info->cpy_mbuf = NULL;
1170                 }
1171
1172                 rte_pktmbuf_free(tx_info->mbuf);
1173                 hinic_update_sq_local_ci(nic_dev->hwdev, txq->q_id,
1174                                          tx_info->wqebb_cnt);
1175
1176                 free_wqebbs += tx_info->wqebb_cnt;
1177                 tx_info->mbuf = NULL;
1178         }
1179 }
1180
1181 void hinic_free_all_tx_resources(struct rte_eth_dev *eth_dev)
1182 {
1183         u16 q_id;
1184         struct hinic_nic_dev *nic_dev =
1185                                 HINIC_ETH_DEV_TO_PRIVATE_NIC_DEV(eth_dev);
1186
1187         for (q_id = 0; q_id < nic_dev->num_sq; q_id++) {
1188                 eth_dev->data->tx_queues[q_id] = NULL;
1189
1190                 if (nic_dev->txqs[q_id] == NULL)
1191                         continue;
1192
1193                 /* stop tx queue free tx mbuf */
1194                 hinic_free_all_tx_mbufs(nic_dev->txqs[q_id]);
1195                 hinic_free_tx_resources(nic_dev->txqs[q_id]);
1196
1197                 /* free txq */
1198                 kfree(nic_dev->txqs[q_id]);
1199                 nic_dev->txqs[q_id] = NULL;
1200         }
1201 }
1202
1203 void hinic_free_all_tx_mbuf(struct rte_eth_dev *eth_dev)
1204 {
1205         u16 q_id;
1206         struct hinic_nic_dev *nic_dev =
1207                                 HINIC_ETH_DEV_TO_PRIVATE_NIC_DEV(eth_dev);
1208
1209         for (q_id = 0; q_id < nic_dev->num_sq; q_id++)
1210                 /* stop tx queue free tx mbuf */
1211                 hinic_free_all_tx_mbufs(nic_dev->txqs[q_id]);
1212 }
1213
1214 int hinic_setup_tx_resources(struct hinic_txq *txq)
1215 {
1216         u64 tx_info_sz;
1217
1218         tx_info_sz = txq->q_depth * sizeof(*txq->tx_info);
1219         txq->tx_info = kzalloc_aligned(tx_info_sz, GFP_KERNEL);
1220         if (!txq->tx_info)
1221                 return -ENOMEM;
1222
1223         return HINIC_OK;
1224 }
1225
1226 void hinic_free_tx_resources(struct hinic_txq *txq)
1227 {
1228         if (txq->tx_info == NULL)
1229                 return;
1230
1231         kfree(txq->tx_info);
1232         txq->tx_info = NULL;
1233 }
1234
1235 int hinic_create_sq(struct hinic_hwdev *hwdev, u16 q_id, u16 sq_depth)
1236 {
1237         int err;
1238         struct hinic_nic_io *nic_io = hwdev->nic_io;
1239         struct hinic_qp *qp = &nic_io->qps[q_id];
1240         struct hinic_sq *sq = &qp->sq;
1241         void __iomem *db_addr;
1242         volatile u32 *ci_addr;
1243
1244         sq->sq_depth = sq_depth;
1245         nic_io->sq_depth = sq_depth;
1246
1247         /* alloc wq */
1248         err = hinic_wq_allocate(nic_io->hwdev, &nic_io->sq_wq[q_id],
1249                                 HINIC_SQ_WQEBB_SHIFT, nic_io->sq_depth);
1250         if (err) {
1251                 PMD_DRV_LOG(ERR, "Failed to allocate WQ for SQ");
1252                 return err;
1253         }
1254
1255         /* alloc sq doorbell space */
1256         err = hinic_alloc_db_addr(nic_io->hwdev, &db_addr);
1257         if (err) {
1258                 PMD_DRV_LOG(ERR, "Failed to init db addr");
1259                 goto alloc_db_err;
1260         }
1261
1262         /* clear hardware ci */
1263         ci_addr = (volatile u32 *)HINIC_CI_VADDR(nic_io->ci_vaddr_base, q_id);
1264         *ci_addr = 0;
1265
1266         sq->q_id = q_id;
1267         sq->wq = &nic_io->sq_wq[q_id];
1268         sq->owner = 1;
1269         sq->cons_idx_addr = (volatile u16 *)ci_addr;
1270         sq->db_addr = db_addr;
1271
1272         return HINIC_OK;
1273
1274 alloc_db_err:
1275         hinic_wq_free(nic_io->hwdev, &nic_io->sq_wq[q_id]);
1276
1277         return err;
1278 }
1279
1280 void hinic_destroy_sq(struct hinic_hwdev *hwdev, u16 q_id)
1281 {
1282         struct hinic_nic_io *nic_io;
1283         struct hinic_qp *qp;
1284
1285         nic_io = hwdev->nic_io;
1286         qp = &nic_io->qps[q_id];
1287
1288         if (qp->sq.wq == NULL)
1289                 return;
1290
1291         hinic_free_db_addr(nic_io->hwdev, qp->sq.db_addr);
1292         hinic_wq_free(nic_io->hwdev, qp->sq.wq);
1293         qp->sq.wq = NULL;
1294 }