4d999678fa32717295fea4b8a2b7175589614554
[dpdk.git] / drivers / net / hinic / hinic_pmd_tx.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2017 Huawei Technologies Co., Ltd
3  */
4
5 #include <rte_mbuf.h>
6 #include <rte_tcp.h>
7 #include <rte_sctp.h>
8 #include <rte_udp.h>
9 #include <rte_ip.h>
10 #ifdef __ARM64_NEON__
11 #include <arm_neon.h>
12 #endif
13
14 #include "base/hinic_compat.h"
15 #include "base/hinic_pmd_hwdev.h"
16 #include "base/hinic_pmd_hwif.h"
17 #include "base/hinic_pmd_wq.h"
18 #include "base/hinic_pmd_nicio.h"
19 #include "base/hinic_pmd_niccfg.h"
20 #include "hinic_pmd_ethdev.h"
21 #include "hinic_pmd_tx.h"
22
23 /* packet header and tx offload info */
24 #define ETHER_LEN_NO_VLAN               14
25 #define ETHER_LEN_WITH_VLAN             18
26 #define HEADER_LEN_OFFSET               2
27 #define VXLANLEN                        8
28 #define MAX_PLD_OFFSET                  221
29 #define MAX_SINGLE_SGE_SIZE             65536
30 #define TSO_ENABLE                      1
31 #define TX_MSS_DEFAULT                  0x3E00
32 #define TX_MSS_MIN                      0x50
33
34 #define HINIC_NONTSO_PKT_MAX_SGE                17      /* non-tso max sge 17 */
35 #define HINIC_NONTSO_SEG_NUM_INVALID(num)       \
36                         ((num) > HINIC_NONTSO_PKT_MAX_SGE)
37
38 #define HINIC_TSO_PKT_MAX_SGE                   127     /* tso max sge 127 */
39 #define HINIC_TSO_SEG_NUM_INVALID(num)          ((num) > HINIC_TSO_PKT_MAX_SGE)
40
41 #define HINIC_TX_OUTER_CHECKSUM_FLAG_SET       1
42 #define HINIC_TX_OUTER_CHECKSUM_FLAG_NO_SET    0
43
44 /* sizeof(struct hinic_sq_bufdesc) == 16, shift 4 */
45 #define HINIC_BUF_DESC_SIZE(nr_descs)   (SIZE_8BYTES(((u32)nr_descs) << 4))
46
47 #define MASKED_SQ_IDX(sq, idx)          ((idx) & (sq)->wq->mask)
48
49 /* SQ_CTRL */
50 #define SQ_CTRL_BUFDESC_SECT_LEN_SHIFT          0
51 #define SQ_CTRL_TASKSECT_LEN_SHIFT              16
52 #define SQ_CTRL_DATA_FORMAT_SHIFT               22
53 #define SQ_CTRL_LEN_SHIFT                       29
54 #define SQ_CTRL_OWNER_SHIFT                     31
55
56 #define SQ_CTRL_BUFDESC_SECT_LEN_MASK           0xFFU
57 #define SQ_CTRL_TASKSECT_LEN_MASK               0x1FU
58 #define SQ_CTRL_DATA_FORMAT_MASK                0x1U
59 #define SQ_CTRL_LEN_MASK                        0x3U
60 #define SQ_CTRL_OWNER_MASK                      0x1U
61
62 #define SQ_CTRL_SET(val, member)        \
63         (((val) & SQ_CTRL_##member##_MASK) << SQ_CTRL_##member##_SHIFT)
64
65 #define SQ_CTRL_QUEUE_INFO_PLDOFF_SHIFT         2
66 #define SQ_CTRL_QUEUE_INFO_UFO_SHIFT            10
67 #define SQ_CTRL_QUEUE_INFO_TSO_SHIFT            11
68 #define SQ_CTRL_QUEUE_INFO_TCPUDP_CS_SHIFT      12
69 #define SQ_CTRL_QUEUE_INFO_MSS_SHIFT            13
70 #define SQ_CTRL_QUEUE_INFO_SCTP_SHIFT           27
71 #define SQ_CTRL_QUEUE_INFO_UC_SHIFT             28
72 #define SQ_CTRL_QUEUE_INFO_PRI_SHIFT            29
73
74 #define SQ_CTRL_QUEUE_INFO_PLDOFF_MASK          0xFFU
75 #define SQ_CTRL_QUEUE_INFO_UFO_MASK             0x1U
76 #define SQ_CTRL_QUEUE_INFO_TSO_MASK             0x1U
77 #define SQ_CTRL_QUEUE_INFO_TCPUDP_CS_MASK       0x1U
78 #define SQ_CTRL_QUEUE_INFO_MSS_MASK             0x3FFFU
79 #define SQ_CTRL_QUEUE_INFO_SCTP_MASK            0x1U
80 #define SQ_CTRL_QUEUE_INFO_UC_MASK              0x1U
81 #define SQ_CTRL_QUEUE_INFO_PRI_MASK             0x7U
82
83 #define SQ_CTRL_QUEUE_INFO_SET(val, member)     \
84         (((u32)(val) & SQ_CTRL_QUEUE_INFO_##member##_MASK) <<   \
85                         SQ_CTRL_QUEUE_INFO_##member##_SHIFT)
86
87 #define SQ_CTRL_QUEUE_INFO_GET(val, member)     \
88         (((val) >> SQ_CTRL_QUEUE_INFO_##member##_SHIFT) &       \
89                         SQ_CTRL_QUEUE_INFO_##member##_MASK)
90
91 #define SQ_CTRL_QUEUE_INFO_CLEAR(val, member)   \
92         ((val) & (~(SQ_CTRL_QUEUE_INFO_##member##_MASK << \
93                         SQ_CTRL_QUEUE_INFO_##member##_SHIFT)))
94
95 #define SQ_TASK_INFO0_L2HDR_LEN_SHIFT           0
96 #define SQ_TASK_INFO0_L4OFFLOAD_SHIFT           8
97 #define SQ_TASK_INFO0_INNER_L3TYPE_SHIFT        10
98 #define SQ_TASK_INFO0_VLAN_OFFLOAD_SHIFT        12
99 #define SQ_TASK_INFO0_PARSE_FLAG_SHIFT          13
100 #define SQ_TASK_INFO0_UFO_AVD_SHIFT             14
101 #define SQ_TASK_INFO0_TSO_UFO_SHIFT             15
102 #define SQ_TASK_INFO0_VLAN_TAG_SHIFT            16
103
104 #define SQ_TASK_INFO0_L2HDR_LEN_MASK            0xFFU
105 #define SQ_TASK_INFO0_L4OFFLOAD_MASK            0x3U
106 #define SQ_TASK_INFO0_INNER_L3TYPE_MASK         0x3U
107 #define SQ_TASK_INFO0_VLAN_OFFLOAD_MASK         0x1U
108 #define SQ_TASK_INFO0_PARSE_FLAG_MASK           0x1U
109 #define SQ_TASK_INFO0_UFO_AVD_MASK              0x1U
110 #define SQ_TASK_INFO0_TSO_UFO_MASK              0x1U
111 #define SQ_TASK_INFO0_VLAN_TAG_MASK             0xFFFFU
112
113 #define SQ_TASK_INFO0_SET(val, member)                  \
114         (((u32)(val) & SQ_TASK_INFO0_##member##_MASK) <<        \
115                         SQ_TASK_INFO0_##member##_SHIFT)
116
117 #define SQ_TASK_INFO1_MD_TYPE_SHIFT             8
118 #define SQ_TASK_INFO1_INNER_L4LEN_SHIFT         16
119 #define SQ_TASK_INFO1_INNER_L3LEN_SHIFT         24
120
121 #define SQ_TASK_INFO1_MD_TYPE_MASK              0xFFU
122 #define SQ_TASK_INFO1_INNER_L4LEN_MASK          0xFFU
123 #define SQ_TASK_INFO1_INNER_L3LEN_MASK          0xFFU
124
125 #define SQ_TASK_INFO1_SET(val, member)                  \
126         (((val) & SQ_TASK_INFO1_##member##_MASK) <<     \
127                         SQ_TASK_INFO1_##member##_SHIFT)
128
129 #define SQ_TASK_INFO2_TUNNEL_L4LEN_SHIFT        0
130 #define SQ_TASK_INFO2_OUTER_L3LEN_SHIFT         8
131 #define SQ_TASK_INFO2_TUNNEL_L4TYPE_SHIFT       16
132 #define SQ_TASK_INFO2_OUTER_L3TYPE_SHIFT        24
133
134 #define SQ_TASK_INFO2_TUNNEL_L4LEN_MASK         0xFFU
135 #define SQ_TASK_INFO2_OUTER_L3LEN_MASK          0xFFU
136 #define SQ_TASK_INFO2_TUNNEL_L4TYPE_MASK        0x7U
137 #define SQ_TASK_INFO2_OUTER_L3TYPE_MASK         0x3U
138
139 #define SQ_TASK_INFO2_SET(val, member)                  \
140         (((val) & SQ_TASK_INFO2_##member##_MASK) <<     \
141                         SQ_TASK_INFO2_##member##_SHIFT)
142
143 #define SQ_TASK_INFO4_L2TYPE_SHIFT              31
144
145 #define SQ_TASK_INFO4_L2TYPE_MASK               0x1U
146
147 #define SQ_TASK_INFO4_SET(val, member)          \
148         (((u32)(val) & SQ_TASK_INFO4_##member##_MASK) << \
149                         SQ_TASK_INFO4_##member##_SHIFT)
150
151 /* SQ_DB */
152 #define SQ_DB_OFF                               0x00000800
153 #define SQ_DB_INFO_HI_PI_SHIFT                  0
154 #define SQ_DB_INFO_QID_SHIFT                    8
155 #define SQ_DB_INFO_CFLAG_SHIFT                  23
156 #define SQ_DB_INFO_COS_SHIFT                    24
157 #define SQ_DB_INFO_TYPE_SHIFT                   27
158
159 #define SQ_DB_INFO_HI_PI_MASK                   0xFFU
160 #define SQ_DB_INFO_QID_MASK                     0x3FFU
161 #define SQ_DB_INFO_CFLAG_MASK                   0x1U
162 #define SQ_DB_INFO_COS_MASK                     0x7U
163 #define SQ_DB_INFO_TYPE_MASK                    0x1FU
164 #define SQ_DB_INFO_SET(val, member)             \
165         (((u32)(val) & SQ_DB_INFO_##member##_MASK) <<   \
166                         SQ_DB_INFO_##member##_SHIFT)
167
168 #define SQ_DB                                   1
169 #define SQ_CFLAG_DP                             0       /* CFLAG_DATA_PATH */
170
171 #define SQ_DB_PI_LOW_MASK                       0xFF
172 #define SQ_DB_PI_LOW(pi)                        ((pi) & SQ_DB_PI_LOW_MASK)
173 #define SQ_DB_PI_HI_SHIFT                       8
174 #define SQ_DB_PI_HIGH(pi)                       ((pi) >> SQ_DB_PI_HI_SHIFT)
175 #define SQ_DB_ADDR(sq, pi)              \
176         ((u64 *)((u8 __iomem *)((sq)->db_addr) + SQ_DB_OFF) + SQ_DB_PI_LOW(pi))
177
178 /* txq wq operations */
179 #define HINIC_GET_SQ_WQE_MASK(txq)              ((txq)->wq->mask)
180
181 #define HINIC_GET_SQ_HW_CI(txq) \
182         ((be16_to_cpu(*(txq)->cons_idx_addr)) & HINIC_GET_SQ_WQE_MASK(txq))
183
184 #define HINIC_GET_SQ_LOCAL_CI(txq)      \
185         (((txq)->wq->cons_idx) & HINIC_GET_SQ_WQE_MASK(txq))
186
187 #define HINIC_UPDATE_SQ_LOCAL_CI(txq, wqebb_cnt)        \
188         do {                                            \
189                 (txq)->wq->cons_idx += wqebb_cnt;       \
190                 (txq)->wq->delta += wqebb_cnt;          \
191         } while (0)
192
193 #define HINIC_GET_SQ_FREE_WQEBBS(txq)   ((txq)->wq->delta - 1)
194
195 #define HINIC_IS_SQ_EMPTY(txq)  (((txq)->wq->delta) == ((txq)->q_depth))
196
197 #define BUF_DESC_SIZE_SHIFT             4
198
199 #define HINIC_SQ_WQE_SIZE(num_sge)              \
200         (sizeof(struct hinic_sq_ctrl) + sizeof(struct hinic_sq_task) +  \
201                         (unsigned int)((num_sge) << BUF_DESC_SIZE_SHIFT))
202
203 #define HINIC_SQ_WQEBB_CNT(num_sge)     \
204         (int)(ALIGN(HINIC_SQ_WQE_SIZE((u32)num_sge), \
205                         HINIC_SQ_WQEBB_SIZE) >> HINIC_SQ_WQEBB_SHIFT)
206
207
208 static inline void hinic_sq_wqe_cpu_to_be32(void *data, int nr_wqebb)
209 {
210 #if defined(__X86_64_SSE__)
211         int i;
212         __m128i *wqe_line = (__m128i *)data;
213         __m128i shuf_mask = _mm_set_epi8(12, 13, 14, 15, 8, 9, 10,
214                                         11, 4, 5, 6, 7, 0, 1, 2, 3);
215
216         for (i = 0; i < nr_wqebb; i++) {
217                 /* convert 64B wqebb using 4 SSE instructions */
218                 wqe_line[0] = _mm_shuffle_epi8(wqe_line[0], shuf_mask);
219                 wqe_line[1] = _mm_shuffle_epi8(wqe_line[1], shuf_mask);
220                 wqe_line[2] = _mm_shuffle_epi8(wqe_line[2], shuf_mask);
221                 wqe_line[3] = _mm_shuffle_epi8(wqe_line[3], shuf_mask);
222                 wqe_line += 4;
223         }
224 #elif defined(__ARM64_NEON__)
225         int i;
226         uint8x16_t *wqe_line = (uint8x16_t *)data;
227         const uint8x16_t shuf_mask = {3, 2, 1, 0, 7, 6, 5, 4, 11, 10,
228                                         9, 8, 15, 14, 13, 12};
229
230         for (i = 0; i < nr_wqebb; i++) {
231                 wqe_line[0] = vqtbl1q_u8(wqe_line[0], shuf_mask);
232                 wqe_line[1] = vqtbl1q_u8(wqe_line[1], shuf_mask);
233                 wqe_line[2] = vqtbl1q_u8(wqe_line[2], shuf_mask);
234                 wqe_line[3] = vqtbl1q_u8(wqe_line[3], shuf_mask);
235                 wqe_line += 4;
236         }
237 #else
238         hinic_cpu_to_be32(data, nr_wqebb * HINIC_SQ_WQEBB_SIZE);
239 #endif
240 }
241
242 static inline void hinic_sge_cpu_to_be32(void *data, int nr_sge)
243 {
244 #if defined(__X86_64_SSE__)
245         int i;
246         __m128i *sge_line = (__m128i *)data;
247         __m128i shuf_mask = _mm_set_epi8(12, 13, 14, 15, 8, 9, 10,
248                                         11, 4, 5, 6, 7, 0, 1, 2, 3);
249
250         for (i = 0; i < nr_sge; i++) {
251                 /* convert 16B sge using 1 SSE instructions */
252                 *sge_line = _mm_shuffle_epi8(*sge_line, shuf_mask);
253                 sge_line++;
254         }
255 #elif defined(__ARM64_NEON__)
256         int i;
257         uint8x16_t *sge_line = (uint8x16_t *)data;
258         const uint8x16_t shuf_mask = {3, 2, 1, 0, 7, 6, 5, 4, 11, 10,
259                                         9, 8, 15, 14, 13, 12};
260
261         for (i = 0; i < nr_sge; i++) {
262                 *sge_line = vqtbl1q_u8(*sge_line, shuf_mask);
263                 sge_line++;
264         }
265 #else
266         hinic_cpu_to_be32(data, nr_sge * sizeof(struct hinic_sq_bufdesc));
267 #endif
268 }
269
270 void hinic_txq_get_stats(struct hinic_txq *txq, struct hinic_txq_stats *stats)
271 {
272         if (!txq || !stats) {
273                 PMD_DRV_LOG(ERR, "Txq or stats is NULL");
274                 return;
275         }
276
277         memcpy(stats, &txq->txq_stats, sizeof(txq->txq_stats));
278 }
279
280 void hinic_txq_stats_reset(struct hinic_txq *txq)
281 {
282         struct hinic_txq_stats *txq_stats;
283
284         if (txq == NULL)
285                 return;
286
287         txq_stats = &txq->txq_stats;
288         memset(txq_stats, 0, sizeof(*txq_stats));
289 }
290
291 static inline struct rte_mbuf *hinic_copy_tx_mbuf(struct hinic_nic_dev *nic_dev,
292                                                   struct rte_mbuf *mbuf,
293                                                   u16 sge_cnt)
294 {
295         struct rte_mbuf *dst_mbuf;
296         u32 offset = 0;
297         u16 i;
298
299         if (unlikely(!nic_dev->cpy_mpool))
300                 return NULL;
301
302         dst_mbuf = rte_pktmbuf_alloc(nic_dev->cpy_mpool);
303         if (unlikely(!dst_mbuf))
304                 return NULL;
305
306         dst_mbuf->data_off = 0;
307         for (i = 0; i < sge_cnt; i++) {
308                 rte_memcpy((char *)dst_mbuf->buf_addr + offset,
309                            (char *)mbuf->buf_addr + mbuf->data_off,
310                            mbuf->data_len);
311                 dst_mbuf->data_len += mbuf->data_len;
312                 offset += mbuf->data_len;
313                 mbuf = mbuf->next;
314         }
315
316         dst_mbuf->pkt_len = dst_mbuf->data_len;
317
318         return dst_mbuf;
319 }
320
321 static inline bool hinic_mbuf_dma_map_sge(struct hinic_txq *txq,
322                                           struct rte_mbuf *mbuf,
323                                           struct hinic_sq_bufdesc *sges,
324                                           struct hinic_wqe_info *sqe_info)
325 {
326         dma_addr_t dma_addr;
327         u16 i, around_sges;
328         u16 nb_segs = sqe_info->sge_cnt - sqe_info->cpy_mbuf_cnt;
329         u16 real_nb_segs = mbuf->nb_segs;
330         struct hinic_sq_bufdesc *sge_idx = sges;
331
332         if (unlikely(sqe_info->around)) {
333                 /* parts of wqe is in sq bottom while parts
334                  * of wqe is in sq head
335                  */
336                 i = 0;
337                 for (sge_idx = sges; (u64)sge_idx <= txq->sq_bot_sge_addr;
338                      sge_idx++) {
339                         if (unlikely(mbuf == NULL)) {
340                                 txq->txq_stats.mbuf_null++;
341                                 return false;
342                         }
343
344                         dma_addr = rte_mbuf_data_iova(mbuf);
345                         if (unlikely(mbuf->data_len == 0)) {
346                                 txq->txq_stats.sge_len0++;
347                                 return false;
348                         }
349                         hinic_set_sge((struct hinic_sge *)sge_idx, dma_addr,
350                                       mbuf->data_len);
351                         mbuf = mbuf->next;
352                         i++;
353                 }
354
355                 around_sges = nb_segs - i;
356                 sge_idx = (struct hinic_sq_bufdesc *)
357                                 ((void *)txq->sq_head_addr);
358                 for (; i < nb_segs; i++) {
359                         if (unlikely(mbuf == NULL)) {
360                                 txq->txq_stats.mbuf_null++;
361                                 return false;
362                         }
363
364                         dma_addr = rte_mbuf_data_iova(mbuf);
365                         if (unlikely(mbuf->data_len == 0)) {
366                                 txq->txq_stats.sge_len0++;
367                                 return false;
368                         }
369                         hinic_set_sge((struct hinic_sge *)sge_idx, dma_addr,
370                                       mbuf->data_len);
371                         mbuf = mbuf->next;
372                         sge_idx++;
373                 }
374
375                 /* covert sges at head to big endian */
376                 hinic_sge_cpu_to_be32((void *)txq->sq_head_addr, around_sges);
377         } else {
378                 /* wqe is in continuous space */
379                 for (i = 0; i < nb_segs; i++) {
380                         if (unlikely(mbuf == NULL)) {
381                                 txq->txq_stats.mbuf_null++;
382                                 return false;
383                         }
384
385                         dma_addr = rte_mbuf_data_iova(mbuf);
386                         if (unlikely(mbuf->data_len == 0)) {
387                                 txq->txq_stats.sge_len0++;
388                                 return false;
389                         }
390                         hinic_set_sge((struct hinic_sge *)sge_idx, dma_addr,
391                                       mbuf->data_len);
392                         mbuf = mbuf->next;
393                         sge_idx++;
394                 }
395         }
396
397         /* for now: support non-tso over 17 sge, copy the last 2 mbuf */
398         if (unlikely(sqe_info->cpy_mbuf_cnt != 0)) {
399                 /* copy invalid mbuf segs to a valid buffer, lost performance */
400                 txq->txq_stats.cpy_pkts += 1;
401                 mbuf = hinic_copy_tx_mbuf(txq->nic_dev, mbuf,
402                                           real_nb_segs - nb_segs);
403                 if (unlikely(!mbuf))
404                         return false;
405
406                 txq->tx_info[sqe_info->pi].cpy_mbuf = mbuf;
407
408                 /* deal with the last mbuf */
409                 dma_addr = rte_mbuf_data_iova(mbuf);
410                 if (unlikely(mbuf->data_len == 0)) {
411                         txq->txq_stats.sge_len0++;
412                         return false;
413                 }
414                 hinic_set_sge((struct hinic_sge *)sge_idx, dma_addr,
415                               mbuf->data_len);
416                 if (unlikely(sqe_info->around))
417                         hinic_sge_cpu_to_be32((void *)sge_idx, 1);
418         }
419
420         return true;
421 }
422
423 static inline void hinic_fill_sq_wqe_header(struct hinic_sq_ctrl *ctrl,
424                                             u32 queue_info, int nr_descs,
425                                             u8 owner)
426 {
427         u32 ctrl_size, task_size, bufdesc_size;
428
429         ctrl_size = SIZE_8BYTES(sizeof(struct hinic_sq_ctrl));
430         task_size = SIZE_8BYTES(sizeof(struct hinic_sq_task));
431         bufdesc_size = HINIC_BUF_DESC_SIZE(nr_descs);
432
433         ctrl->ctrl_fmt = SQ_CTRL_SET(bufdesc_size, BUFDESC_SECT_LEN) |
434                         SQ_CTRL_SET(task_size, TASKSECT_LEN)    |
435                         SQ_CTRL_SET(SQ_NORMAL_WQE, DATA_FORMAT) |
436                         SQ_CTRL_SET(ctrl_size, LEN)             |
437                         SQ_CTRL_SET(owner, OWNER);
438
439         ctrl->queue_info = queue_info;
440         ctrl->queue_info |= SQ_CTRL_QUEUE_INFO_SET(1U, UC);
441
442         if (!SQ_CTRL_QUEUE_INFO_GET(ctrl->queue_info, MSS)) {
443                 ctrl->queue_info |=
444                         SQ_CTRL_QUEUE_INFO_SET(TX_MSS_DEFAULT, MSS);
445         } else if (SQ_CTRL_QUEUE_INFO_GET(ctrl->queue_info, MSS) < TX_MSS_MIN) {
446                 /* mss should not be less than 80 */
447                 ctrl->queue_info =
448                                 SQ_CTRL_QUEUE_INFO_CLEAR(ctrl->queue_info, MSS);
449                 ctrl->queue_info |= SQ_CTRL_QUEUE_INFO_SET(TX_MSS_MIN, MSS);
450         }
451 }
452
453 static inline bool hinic_is_tso_sge_valid(struct rte_mbuf *mbuf,
454                                           struct hinic_tx_offload_info
455                                           *poff_info,
456                                           struct hinic_wqe_info *sqe_info)
457 {
458         u32 total_len, limit_len, checked_len, left_len, adjust_mss;
459         u32 i, first_mss_sges, left_sges;
460         struct rte_mbuf *mbuf_head, *mbuf_pre;
461
462         left_sges = mbuf->nb_segs;
463         mbuf_head = mbuf;
464
465         /* tso sge number validation */
466         if (unlikely(left_sges >= HINIC_NONTSO_PKT_MAX_SGE)) {
467                 checked_len = 0;
468                 adjust_mss = mbuf->tso_segsz >= TX_MSS_MIN ?
469                                 mbuf->tso_segsz : TX_MSS_MIN;
470                 limit_len = adjust_mss + poff_info->payload_offset;
471                 first_mss_sges = HINIC_NONTSO_PKT_MAX_SGE;
472
473                 /* each continues 17 mbufs segmust do one check */
474                 while (left_sges >= HINIC_NONTSO_PKT_MAX_SGE) {
475                         /* total len of first 16 mbufs must equal
476                          * or more than limit_len
477                          */
478                         total_len = 0;
479                         for (i = 0; i < first_mss_sges; i++) {
480                                 total_len += mbuf->data_len;
481                                 mbuf_pre = mbuf;
482                                 mbuf = mbuf->next;
483                                 if (total_len >= limit_len) {
484                                         limit_len = adjust_mss;
485                                         break;
486                                 }
487                         }
488
489                         checked_len += total_len;
490
491                         /* try to copy if not valid */
492                         if (unlikely(first_mss_sges == i)) {
493                                 left_sges -= first_mss_sges;
494                                 checked_len -= mbuf_pre->data_len;
495
496                                 left_len = mbuf_head->pkt_len - checked_len;
497                                 if (left_len > HINIC_COPY_MBUF_SIZE)
498                                         return false;
499
500                                 sqe_info->sge_cnt = mbuf_head->nb_segs -
501                                                         left_sges;
502                                 sqe_info->cpy_mbuf_cnt = 1;
503
504                                 return true;
505                         }
506                         first_mss_sges = (HINIC_NONTSO_PKT_MAX_SGE - 1);
507
508                         /* continue next 16 mbufs */
509                         left_sges -= (i + 1);
510                 } /* end of while */
511         }
512
513         sqe_info->sge_cnt = mbuf_head->nb_segs;
514         return true;
515 }
516
517 static inline void
518 hinic_set_l4_csum_info(struct hinic_sq_task *task,
519                 u32 *queue_info, struct hinic_tx_offload_info *poff_info)
520 {
521         u32 tcp_udp_cs, sctp = 0;
522         u16 l2hdr_len;
523
524         if (unlikely(poff_info->inner_l4_type == SCTP_OFFLOAD_ENABLE))
525                 sctp = 1;
526
527         tcp_udp_cs = poff_info->inner_l4_tcp_udp;
528
529         if (poff_info->tunnel_type == TUNNEL_UDP_CSUM ||
530             poff_info->tunnel_type == TUNNEL_UDP_NO_CSUM) {
531                 l2hdr_len =  poff_info->outer_l2_len;
532
533                 task->pkt_info2 |=
534                 SQ_TASK_INFO2_SET(poff_info->outer_l3_type, OUTER_L3TYPE) |
535                 SQ_TASK_INFO2_SET(poff_info->outer_l3_len, OUTER_L3LEN);
536                 task->pkt_info2 |=
537                 SQ_TASK_INFO2_SET(poff_info->tunnel_type, TUNNEL_L4TYPE) |
538                 SQ_TASK_INFO2_SET(poff_info->tunnel_length, TUNNEL_L4LEN);
539         } else {
540                 l2hdr_len = poff_info->inner_l2_len;
541         }
542
543         task->pkt_info0 |= SQ_TASK_INFO0_SET(l2hdr_len, L2HDR_LEN);
544         task->pkt_info1 |=
545                 SQ_TASK_INFO1_SET(poff_info->inner_l3_len, INNER_L3LEN);
546         task->pkt_info0 |=
547                 SQ_TASK_INFO0_SET(poff_info->inner_l3_type, INNER_L3TYPE);
548         task->pkt_info1 |=
549                 SQ_TASK_INFO1_SET(poff_info->inner_l4_len, INNER_L4LEN);
550         task->pkt_info0 |=
551                 SQ_TASK_INFO0_SET(poff_info->inner_l4_type, L4OFFLOAD);
552         *queue_info |=
553                 SQ_CTRL_QUEUE_INFO_SET(poff_info->payload_offset, PLDOFF) |
554                 SQ_CTRL_QUEUE_INFO_SET(tcp_udp_cs, TCPUDP_CS) |
555                 SQ_CTRL_QUEUE_INFO_SET(sctp, SCTP);
556 }
557
558 static inline void
559 hinic_set_tso_info(struct hinic_sq_task *task,
560                 u32 *queue_info, struct rte_mbuf *mbuf,
561                 struct hinic_tx_offload_info *poff_info)
562 {
563         hinic_set_l4_csum_info(task, queue_info, poff_info);
564
565         /* wqe for tso */
566         task->pkt_info0 |=
567                 SQ_TASK_INFO0_SET(poff_info->inner_l3_type, INNER_L3TYPE);
568         task->pkt_info0 |= SQ_TASK_INFO0_SET(TSO_ENABLE, TSO_UFO);
569         *queue_info |= SQ_CTRL_QUEUE_INFO_SET(TSO_ENABLE, TSO);
570         /* qsf was initialized in prepare_sq_wqe */
571         *queue_info = SQ_CTRL_QUEUE_INFO_CLEAR(*queue_info, MSS);
572         *queue_info |= SQ_CTRL_QUEUE_INFO_SET(mbuf->tso_segsz, MSS);
573 }
574
575 static inline void
576 hinic_set_vlan_tx_offload(struct hinic_sq_task *task,
577                         u32 *queue_info, u16 vlan_tag, u16 vlan_pri)
578 {
579         task->pkt_info0 |= SQ_TASK_INFO0_SET(vlan_tag, VLAN_TAG) |
580                                 SQ_TASK_INFO0_SET(1U, VLAN_OFFLOAD);
581
582         *queue_info |= SQ_CTRL_QUEUE_INFO_SET(vlan_pri, PRI);
583 }
584
585 static inline void
586 hinic_fill_tx_offload_info(struct rte_mbuf *mbuf,
587                 struct hinic_sq_task *task, u32 *queue_info,
588                 struct hinic_tx_offload_info *tx_off_info)
589 {
590         u16 vlan_tag;
591         uint64_t ol_flags = mbuf->ol_flags;
592
593         /* clear DW0~2 of task section for offload */
594         task->pkt_info0 = 0;
595         task->pkt_info1 = 0;
596         task->pkt_info2 = 0;
597
598         /* Base VLAN */
599         if (unlikely(ol_flags & PKT_TX_VLAN_PKT)) {
600                 vlan_tag = mbuf->vlan_tci;
601                 hinic_set_vlan_tx_offload(task, queue_info, vlan_tag,
602                                           vlan_tag >> VLAN_PRIO_SHIFT);
603         }
604
605         /* non checksum or tso */
606         if (unlikely(!(ol_flags & HINIC_TX_CKSUM_OFFLOAD_MASK)))
607                 return;
608
609         if ((ol_flags & PKT_TX_TCP_SEG))
610                 /* set tso info for task and qsf */
611                 hinic_set_tso_info(task, queue_info, mbuf, tx_off_info);
612         else /* just support l4 checksum offload */
613                 hinic_set_l4_csum_info(task, queue_info, tx_off_info);
614 }
615
616 static inline void hinic_xmit_mbuf_cleanup(struct hinic_txq *txq)
617 {
618         struct hinic_tx_info *tx_info;
619         struct rte_mbuf *mbuf, *m, *mbuf_free[HINIC_MAX_TX_FREE_BULK];
620         int i, nb_free = 0;
621         u16 hw_ci, sw_ci, sq_mask;
622         int wqebb_cnt = 0;
623
624         hw_ci = HINIC_GET_SQ_HW_CI(txq);
625         sw_ci = HINIC_GET_SQ_LOCAL_CI(txq);
626         sq_mask = HINIC_GET_SQ_WQE_MASK(txq);
627
628         for (i = 0; i < txq->tx_free_thresh; ++i) {
629                 tx_info = &txq->tx_info[sw_ci];
630                 if (hw_ci == sw_ci ||
631                         (((hw_ci - sw_ci) & sq_mask) < tx_info->wqebb_cnt))
632                         break;
633
634                 sw_ci = (sw_ci + tx_info->wqebb_cnt) & sq_mask;
635
636                 if (unlikely(tx_info->cpy_mbuf != NULL)) {
637                         rte_pktmbuf_free(tx_info->cpy_mbuf);
638                         tx_info->cpy_mbuf = NULL;
639                 }
640
641                 wqebb_cnt += tx_info->wqebb_cnt;
642                 mbuf = tx_info->mbuf;
643
644                 if (likely(mbuf->nb_segs == 1)) {
645                         m = rte_pktmbuf_prefree_seg(mbuf);
646                         tx_info->mbuf = NULL;
647
648                         if (unlikely(m == NULL))
649                                 continue;
650
651                         mbuf_free[nb_free++] = m;
652                         if (unlikely(m->pool != mbuf_free[0]->pool ||
653                                 nb_free >= HINIC_MAX_TX_FREE_BULK)) {
654                                 rte_mempool_put_bulk(mbuf_free[0]->pool,
655                                         (void **)mbuf_free, (nb_free - 1));
656                                 nb_free = 0;
657                                 mbuf_free[nb_free++] = m;
658                         }
659                 } else {
660                         rte_pktmbuf_free(mbuf);
661                         tx_info->mbuf = NULL;
662                 }
663         }
664
665         if (nb_free > 0)
666                 rte_mempool_put_bulk(mbuf_free[0]->pool, (void **)mbuf_free,
667                                      nb_free);
668
669         HINIC_UPDATE_SQ_LOCAL_CI(txq, wqebb_cnt);
670 }
671
672 static inline struct hinic_sq_wqe *
673 hinic_get_sq_wqe(struct hinic_txq *txq, int wqebb_cnt,
674                 struct hinic_wqe_info *wqe_info)
675 {
676         u32 cur_pi, end_pi;
677         u16 remain_wqebbs;
678         struct hinic_sq *sq = txq->sq;
679         struct hinic_wq *wq = txq->wq;
680
681         /* record current pi */
682         cur_pi = MASKED_WQE_IDX(wq, wq->prod_idx);
683         end_pi = cur_pi + wqebb_cnt;
684
685         /* update next pi and delta */
686         wq->prod_idx += wqebb_cnt;
687         wq->delta -= wqebb_cnt;
688
689         /* return current pi and owner */
690         wqe_info->pi = cur_pi;
691         wqe_info->owner = sq->owner;
692         wqe_info->around = 0;
693         wqe_info->seq_wqebbs = wqebb_cnt;
694
695         if (unlikely(end_pi >= txq->q_depth)) {
696                 /* update owner of next prod_idx */
697                 sq->owner = !sq->owner;
698
699                 /* turn around to head */
700                 if (unlikely(end_pi > txq->q_depth)) {
701                         wqe_info->around = 1;
702                         remain_wqebbs = txq->q_depth - cur_pi;
703                         wqe_info->seq_wqebbs = remain_wqebbs;
704                 }
705         }
706
707         return (struct hinic_sq_wqe *)WQ_WQE_ADDR(wq, cur_pi);
708 }
709
710 static inline uint16_t
711 hinic_ipv4_phdr_cksum(const struct rte_ipv4_hdr *ipv4_hdr, uint64_t ol_flags)
712 {
713         struct ipv4_psd_header {
714                 uint32_t src_addr; /* IP address of source host. */
715                 uint32_t dst_addr; /* IP address of destination host. */
716                 uint8_t  zero;     /* zero. */
717                 uint8_t  proto;    /* L4 protocol type. */
718                 uint16_t len;      /* L4 length. */
719         } psd_hdr;
720         uint8_t ihl;
721
722         psd_hdr.src_addr = ipv4_hdr->src_addr;
723         psd_hdr.dst_addr = ipv4_hdr->dst_addr;
724         psd_hdr.zero = 0;
725         psd_hdr.proto = ipv4_hdr->next_proto_id;
726         if (ol_flags & PKT_TX_TCP_SEG) {
727                 psd_hdr.len = 0;
728         } else {
729                 /* ipv4_hdr->version_ihl is uint8_t big endian, ihl locates
730                  * lower 4 bits and unit is 4 bytes
731                  */
732                 ihl = (ipv4_hdr->version_ihl & 0xF) << 2;
733                 psd_hdr.len =
734                 rte_cpu_to_be_16(rte_be_to_cpu_16(ipv4_hdr->total_length) -
735                                  ihl);
736         }
737         return rte_raw_cksum(&psd_hdr, sizeof(psd_hdr));
738 }
739
740 static inline uint16_t
741 hinic_ipv6_phdr_cksum(const struct rte_ipv6_hdr *ipv6_hdr, uint64_t ol_flags)
742 {
743         uint32_t sum;
744         struct {
745                 uint32_t len;   /* L4 length. */
746                 uint32_t proto; /* L4 protocol - top 3 bytes must be zero */
747         } psd_hdr;
748
749         psd_hdr.proto = (ipv6_hdr->proto << 24);
750         if (ol_flags & PKT_TX_TCP_SEG)
751                 psd_hdr.len = 0;
752         else
753                 psd_hdr.len = ipv6_hdr->payload_len;
754
755         sum = __rte_raw_cksum(ipv6_hdr->src_addr,
756                 sizeof(ipv6_hdr->src_addr) + sizeof(ipv6_hdr->dst_addr), 0);
757         sum = __rte_raw_cksum(&psd_hdr, sizeof(psd_hdr), sum);
758         return __rte_raw_cksum_reduce(sum);
759 }
760
761 static inline void
762 hinic_get_pld_offset(struct rte_mbuf *m, struct hinic_tx_offload_info *off_info,
763                      int outer_cs_flag)
764 {
765         uint64_t ol_flags = m->ol_flags;
766
767         if (outer_cs_flag == 1) {
768                 if ((ol_flags & PKT_TX_UDP_CKSUM) == PKT_TX_UDP_CKSUM) {
769                         off_info->payload_offset = m->outer_l2_len +
770                                 m->outer_l3_len + m->l2_len + m->l3_len;
771                 } else if ((ol_flags & PKT_TX_TCP_CKSUM) ||
772                                 (ol_flags & PKT_TX_TCP_SEG)) {
773                         off_info->payload_offset = m->outer_l2_len +
774                                         m->outer_l3_len + m->l2_len +
775                                         m->l3_len + m->l4_len;
776                 }
777         } else {
778                 if ((ol_flags & PKT_TX_UDP_CKSUM) == PKT_TX_UDP_CKSUM) {
779                         off_info->payload_offset = m->l2_len + m->l3_len;
780                 } else if ((ol_flags & PKT_TX_TCP_CKSUM) ||
781                         (ol_flags & PKT_TX_TCP_SEG)) {
782                         off_info->payload_offset = m->l2_len + m->l3_len +
783                                                    m->l4_len;
784                 }
785         }
786 }
787
788 static inline void
789 hinic_analyze_tx_info(struct rte_mbuf *mbuf,
790                       struct hinic_tx_offload_info *off_info)
791 {
792         struct rte_ether_hdr *eth_hdr;
793         struct rte_vlan_hdr *vlan_hdr;
794         struct rte_ipv4_hdr *ip4h;
795         u16 pkt_type;
796         u8 *hdr;
797
798         hdr = (u8 *)rte_pktmbuf_mtod(mbuf, u8*);
799         eth_hdr = (struct rte_ether_hdr *)hdr;
800         pkt_type = rte_be_to_cpu_16(eth_hdr->ether_type);
801
802         if (pkt_type == RTE_ETHER_TYPE_VLAN) {
803                 off_info->outer_l2_len = ETHER_LEN_WITH_VLAN;
804                 vlan_hdr = (struct rte_vlan_hdr *)(hdr + 1);
805                 pkt_type = rte_be_to_cpu_16(vlan_hdr->eth_proto);
806         } else {
807                 off_info->outer_l2_len = ETHER_LEN_NO_VLAN;
808         }
809
810         if (pkt_type == RTE_ETHER_TYPE_IPV4) {
811                 ip4h = (struct rte_ipv4_hdr *)(hdr + off_info->outer_l2_len);
812                 off_info->outer_l3_len = (ip4h->version_ihl & 0xf) <<
813                                         HEADER_LEN_OFFSET;
814         } else if (pkt_type == RTE_ETHER_TYPE_IPV6) {
815                 /* not support ipv6 extension header */
816                 off_info->outer_l3_len = sizeof(struct rte_ipv6_hdr);
817         }
818 }
819
820 static inline int
821 hinic_tx_offload_pkt_prepare(struct rte_mbuf *m,
822                                 struct hinic_tx_offload_info *off_info)
823 {
824         struct rte_ipv4_hdr *ipv4_hdr;
825         struct rte_ipv6_hdr *ipv6_hdr;
826         struct rte_tcp_hdr *tcp_hdr;
827         struct rte_udp_hdr *udp_hdr;
828         struct rte_ether_hdr *eth_hdr;
829         struct rte_vlan_hdr *vlan_hdr;
830         u16 eth_type = 0;
831         uint64_t inner_l3_offset;
832         uint64_t ol_flags = m->ol_flags;
833
834         /* Check if the packets set available offload flags */
835         if (!(ol_flags & HINIC_TX_CKSUM_OFFLOAD_MASK))
836                 return 0;
837
838         /* Support only vxlan offload */
839         if ((ol_flags & PKT_TX_TUNNEL_MASK) &&
840             !(ol_flags & PKT_TX_TUNNEL_VXLAN))
841                 return -ENOTSUP;
842
843 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
844         if (rte_validate_tx_offload(m) != 0)
845                 return -EINVAL;
846 #endif
847
848         if (ol_flags & PKT_TX_TUNNEL_VXLAN) {
849                 if ((ol_flags & PKT_TX_OUTER_IP_CKSUM) ||
850                     (ol_flags & PKT_TX_OUTER_IPV6) ||
851                     (ol_flags & PKT_TX_TCP_SEG)) {
852                         inner_l3_offset = m->l2_len + m->outer_l2_len +
853                                 m->outer_l3_len;
854                         off_info->outer_l2_len = m->outer_l2_len;
855                         off_info->outer_l3_len = m->outer_l3_len;
856                         /* just support vxlan tunneling pkt */
857                         off_info->inner_l2_len = m->l2_len - VXLANLEN -
858                                 sizeof(*udp_hdr);
859                         off_info->inner_l3_len = m->l3_len;
860                         off_info->inner_l4_len = m->l4_len;
861                         off_info->tunnel_length = m->l2_len;
862                         off_info->tunnel_type = TUNNEL_UDP_NO_CSUM;
863
864                         hinic_get_pld_offset(m, off_info,
865                                              HINIC_TX_OUTER_CHECKSUM_FLAG_SET);
866                 } else {
867                         inner_l3_offset = m->l2_len;
868                         hinic_analyze_tx_info(m, off_info);
869                         /* just support vxlan tunneling pkt */
870                         off_info->inner_l2_len = m->l2_len - VXLANLEN -
871                                 sizeof(*udp_hdr) - off_info->outer_l2_len -
872                                 off_info->outer_l3_len;
873                         off_info->inner_l3_len = m->l3_len;
874                         off_info->inner_l4_len = m->l4_len;
875                         off_info->tunnel_length = m->l2_len -
876                                 off_info->outer_l2_len - off_info->outer_l3_len;
877                         off_info->tunnel_type = TUNNEL_UDP_NO_CSUM;
878
879                         hinic_get_pld_offset(m, off_info,
880                                 HINIC_TX_OUTER_CHECKSUM_FLAG_NO_SET);
881                 }
882         } else {
883                 inner_l3_offset = m->l2_len;
884                 off_info->inner_l2_len = m->l2_len;
885                 off_info->inner_l3_len = m->l3_len;
886                 off_info->inner_l4_len = m->l4_len;
887                 off_info->tunnel_type = NOT_TUNNEL;
888
889                 hinic_get_pld_offset(m, off_info,
890                                      HINIC_TX_OUTER_CHECKSUM_FLAG_NO_SET);
891         }
892
893         /* invalid udp or tcp header */
894         if (unlikely(off_info->payload_offset > MAX_PLD_OFFSET))
895                 return -EINVAL;
896
897         /* Process outter udp pseudo-header checksum */
898         if ((ol_flags & PKT_TX_TUNNEL_VXLAN) && ((ol_flags & PKT_TX_TCP_SEG) ||
899                         (ol_flags & PKT_TX_OUTER_IP_CKSUM) ||
900                         (ol_flags & PKT_TX_OUTER_IPV6))) {
901
902                 /* inner_l4_tcp_udp csum should be setted to calculate outter
903                  * udp checksum when vxlan packets without inner l3 and l4
904                  */
905                 off_info->inner_l4_tcp_udp = 1;
906
907                 eth_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
908                 eth_type = rte_be_to_cpu_16(eth_hdr->ether_type);
909
910                 if (eth_type == RTE_ETHER_TYPE_VLAN) {
911                         vlan_hdr = (struct rte_vlan_hdr *)(eth_hdr + 1);
912                         eth_type = rte_be_to_cpu_16(vlan_hdr->eth_proto);
913                 }
914
915                 if (eth_type == RTE_ETHER_TYPE_IPV4) {
916                         ipv4_hdr =
917                         rte_pktmbuf_mtod_offset(m, struct rte_ipv4_hdr *,
918                                                 m->outer_l2_len);
919                         off_info->outer_l3_type = IPV4_PKT_WITH_CHKSUM_OFFLOAD;
920                         ipv4_hdr->hdr_checksum = 0;
921
922                         udp_hdr = (struct rte_udp_hdr *)((char *)ipv4_hdr +
923                                                         m->outer_l3_len);
924                         udp_hdr->dgram_cksum = 0;
925                 } else if (eth_type == RTE_ETHER_TYPE_IPV6) {
926                         off_info->outer_l3_type = IPV6_PKT;
927                         ipv6_hdr =
928                         rte_pktmbuf_mtod_offset(m, struct rte_ipv6_hdr *,
929                                                 m->outer_l2_len);
930
931                         udp_hdr =
932                         rte_pktmbuf_mtod_offset(m, struct rte_udp_hdr *,
933                                                 (m->outer_l2_len +
934                                                 m->outer_l3_len));
935                         udp_hdr->dgram_cksum = 0;
936                 }
937         } else if (ol_flags & PKT_TX_OUTER_IPV4) {
938                 off_info->tunnel_type = TUNNEL_UDP_NO_CSUM;
939                 off_info->inner_l4_tcp_udp = 1;
940                 off_info->outer_l3_type = IPV4_PKT_NO_CHKSUM_OFFLOAD;
941         }
942
943         if (ol_flags & PKT_TX_IPV4)
944                 off_info->inner_l3_type = (ol_flags & PKT_TX_IP_CKSUM) ?
945                                         IPV4_PKT_WITH_CHKSUM_OFFLOAD :
946                                         IPV4_PKT_NO_CHKSUM_OFFLOAD;
947         else if (ol_flags & PKT_TX_IPV6)
948                 off_info->inner_l3_type = IPV6_PKT;
949
950         /* Process the pseudo-header checksum */
951         if ((ol_flags & PKT_TX_L4_MASK) == PKT_TX_UDP_CKSUM) {
952                 if (ol_flags & PKT_TX_IPV4) {
953                         ipv4_hdr =
954                         rte_pktmbuf_mtod_offset(m, struct rte_ipv4_hdr *,
955                                                 inner_l3_offset);
956
957                         if (ol_flags & PKT_TX_IP_CKSUM)
958                                 ipv4_hdr->hdr_checksum = 0;
959
960                         udp_hdr = (struct rte_udp_hdr *)((char *)ipv4_hdr +
961                                                                 m->l3_len);
962                         udp_hdr->dgram_cksum =
963                                 hinic_ipv4_phdr_cksum(ipv4_hdr, ol_flags);
964                 } else {
965                         ipv6_hdr =
966                         rte_pktmbuf_mtod_offset(m, struct rte_ipv6_hdr *,
967                                                 inner_l3_offset);
968
969                         udp_hdr =
970                         rte_pktmbuf_mtod_offset(m, struct rte_udp_hdr *,
971                                                 (inner_l3_offset + m->l3_len));
972                         udp_hdr->dgram_cksum =
973                                 hinic_ipv6_phdr_cksum(ipv6_hdr, ol_flags);
974                 }
975
976                 off_info->inner_l4_type = UDP_OFFLOAD_ENABLE;
977                 off_info->inner_l4_tcp_udp = 1;
978         } else if (((ol_flags & PKT_TX_L4_MASK) == PKT_TX_TCP_CKSUM) ||
979                         (ol_flags & PKT_TX_TCP_SEG)) {
980                 if (ol_flags & PKT_TX_IPV4) {
981                         ipv4_hdr =
982                         rte_pktmbuf_mtod_offset(m, struct rte_ipv4_hdr *,
983                                                 inner_l3_offset);
984
985                         if (ol_flags & PKT_TX_IP_CKSUM)
986                                 ipv4_hdr->hdr_checksum = 0;
987
988                         /* non-TSO tcp */
989                         tcp_hdr = (struct rte_tcp_hdr *)((char *)ipv4_hdr +
990                                                                 m->l3_len);
991                         tcp_hdr->cksum =
992                                 hinic_ipv4_phdr_cksum(ipv4_hdr, ol_flags);
993                 } else {
994                         ipv6_hdr =
995                         rte_pktmbuf_mtod_offset(m, struct rte_ipv6_hdr *,
996                                                 inner_l3_offset);
997                         /* non-TSO tcp */
998                         tcp_hdr =
999                         rte_pktmbuf_mtod_offset(m, struct rte_tcp_hdr *,
1000                                                 (inner_l3_offset + m->l3_len));
1001                         tcp_hdr->cksum =
1002                                 hinic_ipv6_phdr_cksum(ipv6_hdr, ol_flags);
1003                 }
1004
1005                 off_info->inner_l4_type = TCP_OFFLOAD_ENABLE;
1006                 off_info->inner_l4_tcp_udp = 1;
1007         } else if ((ol_flags & PKT_TX_L4_MASK) == PKT_TX_SCTP_CKSUM) {
1008                 off_info->inner_l4_type = SCTP_OFFLOAD_ENABLE;
1009                 off_info->inner_l4_tcp_udp = 0;
1010                 off_info->inner_l4_len = sizeof(struct rte_sctp_hdr);
1011         }
1012
1013         return 0;
1014 }
1015
1016 static inline bool hinic_get_sge_txoff_info(struct rte_mbuf *mbuf_pkt,
1017                                             struct hinic_wqe_info *sqe_info,
1018                                             struct hinic_tx_offload_info
1019                                             *off_info)
1020 {
1021         u16  i, total_len, sge_cnt = mbuf_pkt->nb_segs;
1022         struct rte_mbuf *mbuf;
1023         int ret;
1024
1025         memset(off_info, 0, sizeof(*off_info));
1026
1027         ret = hinic_tx_offload_pkt_prepare(mbuf_pkt, off_info);
1028         if (unlikely(ret))
1029                 return false;
1030
1031         sqe_info->cpy_mbuf_cnt = 0;
1032
1033         /* non tso mbuf */
1034         if (likely(!(mbuf_pkt->ol_flags & PKT_TX_TCP_SEG))) {
1035                 if (unlikely(mbuf_pkt->pkt_len > MAX_SINGLE_SGE_SIZE)) {
1036                         /* non tso packet len must less than 64KB */
1037                         return false;
1038                 } else if (unlikely(HINIC_NONTSO_SEG_NUM_INVALID(sge_cnt))) {
1039                         /* non tso packet buffer number must less than 17
1040                          * the mbuf segs more than 17 must copy to one buffer
1041                          */
1042                         total_len = 0;
1043                         mbuf = mbuf_pkt;
1044                         for (i = 0; i < (HINIC_NONTSO_PKT_MAX_SGE - 1) ; i++) {
1045                                 total_len += mbuf->data_len;
1046                                 mbuf = mbuf->next;
1047                         }
1048
1049                         /* default support copy total 4k mbuf segs */
1050                         if ((u32)(total_len + (u16)HINIC_COPY_MBUF_SIZE) <
1051                                   mbuf_pkt->pkt_len)
1052                                 return false;
1053
1054                         sqe_info->sge_cnt = HINIC_NONTSO_PKT_MAX_SGE;
1055                         sqe_info->cpy_mbuf_cnt = 1;
1056                         return true;
1057                 }
1058
1059                 /* valid non tso mbuf */
1060                 sqe_info->sge_cnt = sge_cnt;
1061         } else {
1062                 /* tso mbuf */
1063                 if (unlikely(HINIC_TSO_SEG_NUM_INVALID(sge_cnt)))
1064                         /* too many mbuf segs */
1065                         return false;
1066
1067                 /* check tso mbuf segs are valid or not */
1068                 if (unlikely(!hinic_is_tso_sge_valid(mbuf_pkt,
1069                              off_info, sqe_info)))
1070                         return false;
1071         }
1072
1073         return true;
1074 }
1075
1076 static inline void hinic_sq_write_db(struct hinic_sq *sq, int cos)
1077 {
1078         u16 prod_idx;
1079         u32 hi_prod_idx;
1080         struct hinic_sq_db sq_db;
1081
1082         prod_idx = MASKED_SQ_IDX(sq, sq->wq->prod_idx);
1083         hi_prod_idx = SQ_DB_PI_HIGH(prod_idx);
1084
1085         sq_db.db_info = SQ_DB_INFO_SET(hi_prod_idx, HI_PI) |
1086                         SQ_DB_INFO_SET(SQ_DB, TYPE) |
1087                         SQ_DB_INFO_SET(SQ_CFLAG_DP, CFLAG) |
1088                         SQ_DB_INFO_SET(cos, COS) |
1089                         SQ_DB_INFO_SET(sq->q_id, QID);
1090
1091         /* Data should be written to HW in Big Endian Format */
1092         sq_db.db_info = cpu_to_be32(sq_db.db_info);
1093
1094         /* Write all before the doorbell */
1095         rte_wmb();
1096         writel(sq_db.db_info, SQ_DB_ADDR(sq, prod_idx));
1097 }
1098
1099 u16 hinic_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, u16 nb_pkts)
1100 {
1101         int free_wqebb_cnt, wqe_wqebb_cnt;
1102         u32 queue_info, tx_bytes = 0;
1103         u16 nb_tx;
1104         struct hinic_wqe_info sqe_info;
1105         struct hinic_tx_offload_info off_info;
1106         struct rte_mbuf *mbuf_pkt;
1107         struct hinic_txq *txq = tx_queue;
1108         struct hinic_tx_info *tx_info;
1109         struct hinic_sq_wqe *sq_wqe;
1110         struct hinic_sq_task *task;
1111
1112         /* reclaim tx mbuf before xmit new packet */
1113         if (HINIC_GET_SQ_FREE_WQEBBS(txq) < txq->tx_free_thresh)
1114                 hinic_xmit_mbuf_cleanup(txq);
1115
1116         /* tx loop routine */
1117         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
1118                 mbuf_pkt = *tx_pkts++;
1119                 queue_info = 0;
1120
1121                 /* 1. parse sge and tx offlod info from mbuf */
1122                 if (unlikely(!hinic_get_sge_txoff_info(mbuf_pkt,
1123                                                        &sqe_info, &off_info))) {
1124                         txq->txq_stats.off_errs++;
1125                         break;
1126                 }
1127
1128                 /* 2. try to get enough wqebb */
1129                 wqe_wqebb_cnt = HINIC_SQ_WQEBB_CNT(sqe_info.sge_cnt);
1130                 free_wqebb_cnt = HINIC_GET_SQ_FREE_WQEBBS(txq);
1131                 if (unlikely(wqe_wqebb_cnt > free_wqebb_cnt)) {
1132                         /* reclaim again */
1133                         hinic_xmit_mbuf_cleanup(txq);
1134                         free_wqebb_cnt = HINIC_GET_SQ_FREE_WQEBBS(txq);
1135                         if (unlikely(wqe_wqebb_cnt > free_wqebb_cnt)) {
1136                                 txq->txq_stats.tx_busy += (nb_pkts - nb_tx);
1137                                 break;
1138                         }
1139                 }
1140
1141                 /* 3. get sq tail wqe address from wqe_page,
1142                  * sq have enough wqebb for this packet
1143                  */
1144                 sq_wqe = hinic_get_sq_wqe(txq, wqe_wqebb_cnt, &sqe_info);
1145
1146                 /* 4. fill sq wqe sge section */
1147                 if (unlikely(!hinic_mbuf_dma_map_sge(txq, mbuf_pkt,
1148                                                      sq_wqe->buf_descs,
1149                                                      &sqe_info))) {
1150                         hinic_return_sq_wqe(txq->nic_dev->hwdev, txq->q_id,
1151                                             wqe_wqebb_cnt, sqe_info.owner);
1152                         txq->txq_stats.off_errs++;
1153                         break;
1154                 }
1155
1156                 /* 5. fill sq wqe task section and queue info */
1157                 task = &sq_wqe->task;
1158
1159                 /* tx packet offload configure */
1160                 hinic_fill_tx_offload_info(mbuf_pkt, task, &queue_info,
1161                                            &off_info);
1162
1163                 /* 6. record tx info */
1164                 tx_info = &txq->tx_info[sqe_info.pi];
1165                 tx_info->mbuf = mbuf_pkt;
1166                 tx_info->wqebb_cnt = wqe_wqebb_cnt;
1167
1168                 /* 7. fill sq wqe header section */
1169                 hinic_fill_sq_wqe_header(&sq_wqe->ctrl, queue_info,
1170                                          sqe_info.sge_cnt, sqe_info.owner);
1171
1172                 /* 8.convert continue or bottom wqe byteorder to big endian */
1173                 hinic_sq_wqe_cpu_to_be32(sq_wqe, sqe_info.seq_wqebbs);
1174
1175                 tx_bytes += mbuf_pkt->pkt_len;
1176         }
1177
1178         /* 9. write sq doorbell in burst mode */
1179         if (nb_tx) {
1180                 hinic_sq_write_db(txq->sq, txq->cos);
1181
1182                 txq->txq_stats.packets += nb_tx;
1183                 txq->txq_stats.bytes += tx_bytes;
1184         }
1185         txq->txq_stats.burst_pkts = nb_tx;
1186
1187         return nb_tx;
1188 }
1189
1190 void hinic_free_all_tx_mbufs(struct hinic_txq *txq)
1191 {
1192         u16 ci;
1193         struct hinic_nic_dev *nic_dev = txq->nic_dev;
1194         struct hinic_tx_info *tx_info;
1195         int free_wqebbs = hinic_get_sq_free_wqebbs(nic_dev->hwdev,
1196                                                    txq->q_id) + 1;
1197
1198         while (free_wqebbs < txq->q_depth) {
1199                 ci = hinic_get_sq_local_ci(nic_dev->hwdev, txq->q_id);
1200
1201                 tx_info = &txq->tx_info[ci];
1202
1203                 if (unlikely(tx_info->cpy_mbuf != NULL)) {
1204                         rte_pktmbuf_free(tx_info->cpy_mbuf);
1205                         tx_info->cpy_mbuf = NULL;
1206                 }
1207
1208                 rte_pktmbuf_free(tx_info->mbuf);
1209                 hinic_update_sq_local_ci(nic_dev->hwdev, txq->q_id,
1210                                          tx_info->wqebb_cnt);
1211
1212                 free_wqebbs += tx_info->wqebb_cnt;
1213                 tx_info->mbuf = NULL;
1214         }
1215 }
1216
1217 void hinic_free_all_tx_resources(struct rte_eth_dev *eth_dev)
1218 {
1219         u16 q_id;
1220         struct hinic_nic_dev *nic_dev =
1221                                 HINIC_ETH_DEV_TO_PRIVATE_NIC_DEV(eth_dev);
1222
1223         for (q_id = 0; q_id < nic_dev->num_sq; q_id++) {
1224                 if (eth_dev->data->tx_queues != NULL)
1225                         eth_dev->data->tx_queues[q_id] = NULL;
1226
1227                 if (nic_dev->txqs[q_id] == NULL)
1228                         continue;
1229
1230                 /* stop tx queue free tx mbuf */
1231                 hinic_free_all_tx_mbufs(nic_dev->txqs[q_id]);
1232                 hinic_free_tx_resources(nic_dev->txqs[q_id]);
1233
1234                 /* free txq */
1235                 kfree(nic_dev->txqs[q_id]);
1236                 nic_dev->txqs[q_id] = NULL;
1237         }
1238 }
1239
1240 void hinic_free_all_tx_mbuf(struct rte_eth_dev *eth_dev)
1241 {
1242         u16 q_id;
1243         struct hinic_nic_dev *nic_dev =
1244                                 HINIC_ETH_DEV_TO_PRIVATE_NIC_DEV(eth_dev);
1245
1246         for (q_id = 0; q_id < nic_dev->num_sq; q_id++)
1247                 /* stop tx queue free tx mbuf */
1248                 hinic_free_all_tx_mbufs(nic_dev->txqs[q_id]);
1249 }
1250
1251 int hinic_setup_tx_resources(struct hinic_txq *txq)
1252 {
1253         u64 tx_info_sz;
1254
1255         tx_info_sz = txq->q_depth * sizeof(*txq->tx_info);
1256         txq->tx_info = rte_zmalloc_socket("tx_info", tx_info_sz,
1257                         RTE_CACHE_LINE_SIZE, txq->socket_id);
1258         if (!txq->tx_info)
1259                 return -ENOMEM;
1260
1261         return HINIC_OK;
1262 }
1263
1264 void hinic_free_tx_resources(struct hinic_txq *txq)
1265 {
1266         if (txq->tx_info == NULL)
1267                 return;
1268
1269         rte_free(txq->tx_info);
1270         txq->tx_info = NULL;
1271 }
1272
1273 int hinic_create_sq(struct hinic_hwdev *hwdev, u16 q_id,
1274                         u16 sq_depth, unsigned int socket_id)
1275 {
1276         int err;
1277         struct hinic_nic_io *nic_io = hwdev->nic_io;
1278         struct hinic_qp *qp = &nic_io->qps[q_id];
1279         struct hinic_sq *sq = &qp->sq;
1280         void __iomem *db_addr;
1281         volatile u32 *ci_addr;
1282
1283         sq->sq_depth = sq_depth;
1284         nic_io->sq_depth = sq_depth;
1285
1286         /* alloc wq */
1287         err = hinic_wq_allocate(nic_io->hwdev, &nic_io->sq_wq[q_id],
1288                                 HINIC_SQ_WQEBB_SHIFT, nic_io->sq_depth,
1289                                 socket_id);
1290         if (err) {
1291                 PMD_DRV_LOG(ERR, "Failed to allocate WQ for SQ");
1292                 return err;
1293         }
1294
1295         /* alloc sq doorbell space */
1296         err = hinic_alloc_db_addr(nic_io->hwdev, &db_addr);
1297         if (err) {
1298                 PMD_DRV_LOG(ERR, "Failed to init db addr");
1299                 goto alloc_db_err;
1300         }
1301
1302         /* clear hardware ci */
1303         ci_addr = (volatile u32 *)HINIC_CI_VADDR(nic_io->ci_vaddr_base, q_id);
1304         *ci_addr = 0;
1305
1306         sq->q_id = q_id;
1307         sq->wq = &nic_io->sq_wq[q_id];
1308         sq->owner = 1;
1309         sq->cons_idx_addr = (volatile u16 *)ci_addr;
1310         sq->db_addr = db_addr;
1311
1312         return HINIC_OK;
1313
1314 alloc_db_err:
1315         hinic_wq_free(nic_io->hwdev, &nic_io->sq_wq[q_id]);
1316
1317         return err;
1318 }
1319
1320 void hinic_destroy_sq(struct hinic_hwdev *hwdev, u16 q_id)
1321 {
1322         struct hinic_nic_io *nic_io;
1323         struct hinic_qp *qp;
1324
1325         nic_io = hwdev->nic_io;
1326         qp = &nic_io->qps[q_id];
1327
1328         if (qp->sq.wq == NULL)
1329                 return;
1330
1331         hinic_free_db_addr(nic_io->hwdev, qp->sq.db_addr);
1332         hinic_wq_free(nic_io->hwdev, qp->sq.wq);
1333         qp->sq.wq = NULL;
1334 }