net/hinic: refactor checksum functions
[dpdk.git] / drivers / net / hinic / hinic_pmd_tx.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2017 Huawei Technologies Co., Ltd
3  */
4
5 #include <rte_mbuf.h>
6 #include <rte_tcp.h>
7 #include <rte_sctp.h>
8 #include <rte_udp.h>
9 #include <rte_ip.h>
10 #ifdef __ARM64_NEON__
11 #include <arm_neon.h>
12 #endif
13
14 #include "base/hinic_compat.h"
15 #include "base/hinic_pmd_hwdev.h"
16 #include "base/hinic_pmd_hwif.h"
17 #include "base/hinic_pmd_wq.h"
18 #include "base/hinic_pmd_nicio.h"
19 #include "base/hinic_pmd_niccfg.h"
20 #include "hinic_pmd_ethdev.h"
21 #include "hinic_pmd_tx.h"
22
23 /* packet header and tx offload info */
24 #define ETHER_LEN_NO_VLAN               14
25 #define ETHER_LEN_WITH_VLAN             18
26 #define HEADER_LEN_OFFSET               2
27 #define VXLANLEN                        8
28 #define MAX_PLD_OFFSET                  221
29 #define MAX_SINGLE_SGE_SIZE             65536
30 #define TSO_ENABLE                      1
31 #define TX_MSS_DEFAULT                  0x3E00
32 #define TX_MSS_MIN                      0x50
33
34 #define HINIC_NONTSO_PKT_MAX_SGE                17      /* non-tso max sge 17 */
35 #define HINIC_NONTSO_SEG_NUM_INVALID(num)       \
36                         ((num) > HINIC_NONTSO_PKT_MAX_SGE)
37
38 #define HINIC_TSO_PKT_MAX_SGE                   127     /* tso max sge 127 */
39 #define HINIC_TSO_SEG_NUM_INVALID(num)          ((num) > HINIC_TSO_PKT_MAX_SGE)
40
41 /* sizeof(struct hinic_sq_bufdesc) == 16, shift 4 */
42 #define HINIC_BUF_DESC_SIZE(nr_descs)   (SIZE_8BYTES(((u32)nr_descs) << 4))
43
44 #define MASKED_SQ_IDX(sq, idx)          ((idx) & (sq)->wq->mask)
45
46 /* SQ_CTRL */
47 #define SQ_CTRL_BUFDESC_SECT_LEN_SHIFT          0
48 #define SQ_CTRL_TASKSECT_LEN_SHIFT              16
49 #define SQ_CTRL_DATA_FORMAT_SHIFT               22
50 #define SQ_CTRL_LEN_SHIFT                       29
51 #define SQ_CTRL_OWNER_SHIFT                     31
52
53 #define SQ_CTRL_BUFDESC_SECT_LEN_MASK           0xFFU
54 #define SQ_CTRL_TASKSECT_LEN_MASK               0x1FU
55 #define SQ_CTRL_DATA_FORMAT_MASK                0x1U
56 #define SQ_CTRL_LEN_MASK                        0x3U
57 #define SQ_CTRL_OWNER_MASK                      0x1U
58
59 #define SQ_CTRL_SET(val, member)        \
60         (((val) & SQ_CTRL_##member##_MASK) << SQ_CTRL_##member##_SHIFT)
61
62 #define SQ_CTRL_QUEUE_INFO_PLDOFF_SHIFT         2
63 #define SQ_CTRL_QUEUE_INFO_UFO_SHIFT            10
64 #define SQ_CTRL_QUEUE_INFO_TSO_SHIFT            11
65 #define SQ_CTRL_QUEUE_INFO_TCPUDP_CS_SHIFT      12
66 #define SQ_CTRL_QUEUE_INFO_MSS_SHIFT            13
67 #define SQ_CTRL_QUEUE_INFO_SCTP_SHIFT           27
68 #define SQ_CTRL_QUEUE_INFO_UC_SHIFT             28
69 #define SQ_CTRL_QUEUE_INFO_PRI_SHIFT            29
70
71 #define SQ_CTRL_QUEUE_INFO_PLDOFF_MASK          0xFFU
72 #define SQ_CTRL_QUEUE_INFO_UFO_MASK             0x1U
73 #define SQ_CTRL_QUEUE_INFO_TSO_MASK             0x1U
74 #define SQ_CTRL_QUEUE_INFO_TCPUDP_CS_MASK       0x1U
75 #define SQ_CTRL_QUEUE_INFO_MSS_MASK             0x3FFFU
76 #define SQ_CTRL_QUEUE_INFO_SCTP_MASK            0x1U
77 #define SQ_CTRL_QUEUE_INFO_UC_MASK              0x1U
78 #define SQ_CTRL_QUEUE_INFO_PRI_MASK             0x7U
79
80 #define SQ_CTRL_QUEUE_INFO_SET(val, member)     \
81         (((u32)(val) & SQ_CTRL_QUEUE_INFO_##member##_MASK) <<   \
82                         SQ_CTRL_QUEUE_INFO_##member##_SHIFT)
83
84 #define SQ_CTRL_QUEUE_INFO_GET(val, member)     \
85         (((val) >> SQ_CTRL_QUEUE_INFO_##member##_SHIFT) &       \
86                         SQ_CTRL_QUEUE_INFO_##member##_MASK)
87
88 #define SQ_CTRL_QUEUE_INFO_CLEAR(val, member)   \
89         ((val) & (~(SQ_CTRL_QUEUE_INFO_##member##_MASK << \
90                         SQ_CTRL_QUEUE_INFO_##member##_SHIFT)))
91
92 #define SQ_TASK_INFO0_L2HDR_LEN_SHIFT           0
93 #define SQ_TASK_INFO0_L4OFFLOAD_SHIFT           8
94 #define SQ_TASK_INFO0_INNER_L3TYPE_SHIFT        10
95 #define SQ_TASK_INFO0_VLAN_OFFLOAD_SHIFT        12
96 #define SQ_TASK_INFO0_PARSE_FLAG_SHIFT          13
97 #define SQ_TASK_INFO0_UFO_AVD_SHIFT             14
98 #define SQ_TASK_INFO0_TSO_UFO_SHIFT             15
99 #define SQ_TASK_INFO0_VLAN_TAG_SHIFT            16
100
101 #define SQ_TASK_INFO0_L2HDR_LEN_MASK            0xFFU
102 #define SQ_TASK_INFO0_L4OFFLOAD_MASK            0x3U
103 #define SQ_TASK_INFO0_INNER_L3TYPE_MASK         0x3U
104 #define SQ_TASK_INFO0_VLAN_OFFLOAD_MASK         0x1U
105 #define SQ_TASK_INFO0_PARSE_FLAG_MASK           0x1U
106 #define SQ_TASK_INFO0_UFO_AVD_MASK              0x1U
107 #define SQ_TASK_INFO0_TSO_UFO_MASK              0x1U
108 #define SQ_TASK_INFO0_VLAN_TAG_MASK             0xFFFFU
109
110 #define SQ_TASK_INFO0_SET(val, member)                  \
111         (((u32)(val) & SQ_TASK_INFO0_##member##_MASK) <<        \
112                         SQ_TASK_INFO0_##member##_SHIFT)
113
114 #define SQ_TASK_INFO1_MD_TYPE_SHIFT             8
115 #define SQ_TASK_INFO1_INNER_L4LEN_SHIFT         16
116 #define SQ_TASK_INFO1_INNER_L3LEN_SHIFT         24
117
118 #define SQ_TASK_INFO1_MD_TYPE_MASK              0xFFU
119 #define SQ_TASK_INFO1_INNER_L4LEN_MASK          0xFFU
120 #define SQ_TASK_INFO1_INNER_L3LEN_MASK          0xFFU
121
122 #define SQ_TASK_INFO1_SET(val, member)                  \
123         (((val) & SQ_TASK_INFO1_##member##_MASK) <<     \
124                         SQ_TASK_INFO1_##member##_SHIFT)
125
126 #define SQ_TASK_INFO2_TUNNEL_L4LEN_SHIFT        0
127 #define SQ_TASK_INFO2_OUTER_L3LEN_SHIFT         8
128 #define SQ_TASK_INFO2_TUNNEL_L4TYPE_SHIFT       16
129 #define SQ_TASK_INFO2_OUTER_L3TYPE_SHIFT        24
130
131 #define SQ_TASK_INFO2_TUNNEL_L4LEN_MASK         0xFFU
132 #define SQ_TASK_INFO2_OUTER_L3LEN_MASK          0xFFU
133 #define SQ_TASK_INFO2_TUNNEL_L4TYPE_MASK        0x7U
134 #define SQ_TASK_INFO2_OUTER_L3TYPE_MASK         0x3U
135
136 #define SQ_TASK_INFO2_SET(val, member)                  \
137         (((val) & SQ_TASK_INFO2_##member##_MASK) <<     \
138                         SQ_TASK_INFO2_##member##_SHIFT)
139
140 #define SQ_TASK_INFO4_L2TYPE_SHIFT              31
141
142 #define SQ_TASK_INFO4_L2TYPE_MASK               0x1U
143
144 #define SQ_TASK_INFO4_SET(val, member)          \
145         (((u32)(val) & SQ_TASK_INFO4_##member##_MASK) << \
146                         SQ_TASK_INFO4_##member##_SHIFT)
147
148 /* SQ_DB */
149 #define SQ_DB_OFF                               0x00000800
150 #define SQ_DB_INFO_HI_PI_SHIFT                  0
151 #define SQ_DB_INFO_QID_SHIFT                    8
152 #define SQ_DB_INFO_CFLAG_SHIFT                  23
153 #define SQ_DB_INFO_COS_SHIFT                    24
154 #define SQ_DB_INFO_TYPE_SHIFT                   27
155
156 #define SQ_DB_INFO_HI_PI_MASK                   0xFFU
157 #define SQ_DB_INFO_QID_MASK                     0x3FFU
158 #define SQ_DB_INFO_CFLAG_MASK                   0x1U
159 #define SQ_DB_INFO_COS_MASK                     0x7U
160 #define SQ_DB_INFO_TYPE_MASK                    0x1FU
161 #define SQ_DB_INFO_SET(val, member)             \
162         (((u32)(val) & SQ_DB_INFO_##member##_MASK) <<   \
163                         SQ_DB_INFO_##member##_SHIFT)
164
165 #define SQ_DB                                   1
166 #define SQ_CFLAG_DP                             0       /* CFLAG_DATA_PATH */
167
168 #define SQ_DB_PI_LOW_MASK                       0xFF
169 #define SQ_DB_PI_LOW(pi)                        ((pi) & SQ_DB_PI_LOW_MASK)
170 #define SQ_DB_PI_HI_SHIFT                       8
171 #define SQ_DB_PI_HIGH(pi)                       ((pi) >> SQ_DB_PI_HI_SHIFT)
172 #define SQ_DB_ADDR(sq, pi)              \
173         ((u64 *)((u8 __iomem *)((sq)->db_addr) + SQ_DB_OFF) + SQ_DB_PI_LOW(pi))
174
175 /* txq wq operations */
176 #define HINIC_GET_SQ_WQE_MASK(txq)              ((txq)->wq->mask)
177
178 #define HINIC_GET_SQ_HW_CI(txq) \
179         ((be16_to_cpu(*(txq)->cons_idx_addr)) & HINIC_GET_SQ_WQE_MASK(txq))
180
181 #define HINIC_GET_SQ_LOCAL_CI(txq)      \
182         (((txq)->wq->cons_idx) & HINIC_GET_SQ_WQE_MASK(txq))
183
184 #define HINIC_UPDATE_SQ_LOCAL_CI(txq, wqebb_cnt)        \
185         do {                                            \
186                 (txq)->wq->cons_idx += wqebb_cnt;       \
187                 (txq)->wq->delta += wqebb_cnt;          \
188         } while (0)
189
190 #define HINIC_GET_SQ_FREE_WQEBBS(txq)   ((txq)->wq->delta - 1)
191
192 #define HINIC_IS_SQ_EMPTY(txq)  (((txq)->wq->delta) == ((txq)->q_depth))
193
194 #define BUF_DESC_SIZE_SHIFT             4
195
196 #define HINIC_SQ_WQE_SIZE(num_sge)              \
197         (sizeof(struct hinic_sq_ctrl) + sizeof(struct hinic_sq_task) +  \
198                         (unsigned int)((num_sge) << BUF_DESC_SIZE_SHIFT))
199
200 #define HINIC_SQ_WQEBB_CNT(num_sge)     \
201         (int)(ALIGN(HINIC_SQ_WQE_SIZE((u32)num_sge), \
202                         HINIC_SQ_WQEBB_SIZE) >> HINIC_SQ_WQEBB_SHIFT)
203
204
205 static inline void hinic_sq_wqe_cpu_to_be32(void *data, int nr_wqebb)
206 {
207 #if defined(__X86_64_SSE__)
208         int i;
209         __m128i *wqe_line = (__m128i *)data;
210         __m128i shuf_mask = _mm_set_epi8(12, 13, 14, 15, 8, 9, 10,
211                                         11, 4, 5, 6, 7, 0, 1, 2, 3);
212
213         for (i = 0; i < nr_wqebb; i++) {
214                 /* convert 64B wqebb using 4 SSE instructions */
215                 wqe_line[0] = _mm_shuffle_epi8(wqe_line[0], shuf_mask);
216                 wqe_line[1] = _mm_shuffle_epi8(wqe_line[1], shuf_mask);
217                 wqe_line[2] = _mm_shuffle_epi8(wqe_line[2], shuf_mask);
218                 wqe_line[3] = _mm_shuffle_epi8(wqe_line[3], shuf_mask);
219                 wqe_line += 4;
220         }
221 #elif defined(__ARM64_NEON__)
222         int i;
223         uint8x16_t *wqe_line = (uint8x16_t *)data;
224         const uint8x16_t shuf_mask = {3, 2, 1, 0, 7, 6, 5, 4, 11, 10,
225                                         9, 8, 15, 14, 13, 12};
226
227         for (i = 0; i < nr_wqebb; i++) {
228                 wqe_line[0] = vqtbl1q_u8(wqe_line[0], shuf_mask);
229                 wqe_line[1] = vqtbl1q_u8(wqe_line[1], shuf_mask);
230                 wqe_line[2] = vqtbl1q_u8(wqe_line[2], shuf_mask);
231                 wqe_line[3] = vqtbl1q_u8(wqe_line[3], shuf_mask);
232                 wqe_line += 4;
233         }
234 #else
235         hinic_cpu_to_be32(data, nr_wqebb * HINIC_SQ_WQEBB_SIZE);
236 #endif
237 }
238
239 static inline void hinic_sge_cpu_to_be32(void *data, int nr_sge)
240 {
241 #if defined(__X86_64_SSE__)
242         int i;
243         __m128i *sge_line = (__m128i *)data;
244         __m128i shuf_mask = _mm_set_epi8(12, 13, 14, 15, 8, 9, 10,
245                                         11, 4, 5, 6, 7, 0, 1, 2, 3);
246
247         for (i = 0; i < nr_sge; i++) {
248                 /* convert 16B sge using 1 SSE instructions */
249                 *sge_line = _mm_shuffle_epi8(*sge_line, shuf_mask);
250                 sge_line++;
251         }
252 #elif defined(__ARM64_NEON__)
253         int i;
254         uint8x16_t *sge_line = (uint8x16_t *)data;
255         const uint8x16_t shuf_mask = {3, 2, 1, 0, 7, 6, 5, 4, 11, 10,
256                                         9, 8, 15, 14, 13, 12};
257
258         for (i = 0; i < nr_sge; i++) {
259                 *sge_line = vqtbl1q_u8(*sge_line, shuf_mask);
260                 sge_line++;
261         }
262 #else
263         hinic_cpu_to_be32(data, nr_sge * sizeof(struct hinic_sq_bufdesc));
264 #endif
265 }
266
267 void hinic_txq_get_stats(struct hinic_txq *txq, struct hinic_txq_stats *stats)
268 {
269         if (!txq || !stats) {
270                 PMD_DRV_LOG(ERR, "Txq or stats is NULL");
271                 return;
272         }
273
274         memcpy(stats, &txq->txq_stats, sizeof(txq->txq_stats));
275 }
276
277 void hinic_txq_stats_reset(struct hinic_txq *txq)
278 {
279         struct hinic_txq_stats *txq_stats;
280
281         if (txq == NULL)
282                 return;
283
284         txq_stats = &txq->txq_stats;
285         memset(txq_stats, 0, sizeof(*txq_stats));
286 }
287
288 static inline struct rte_mbuf *hinic_copy_tx_mbuf(struct hinic_nic_dev *nic_dev,
289                                                   struct rte_mbuf *mbuf,
290                                                   u16 sge_cnt)
291 {
292         struct rte_mbuf *dst_mbuf;
293         u32 offset = 0;
294         u16 i;
295
296         if (unlikely(!nic_dev->cpy_mpool))
297                 return NULL;
298
299         dst_mbuf = rte_pktmbuf_alloc(nic_dev->cpy_mpool);
300         if (unlikely(!dst_mbuf))
301                 return NULL;
302
303         dst_mbuf->data_off = 0;
304         for (i = 0; i < sge_cnt; i++) {
305                 rte_memcpy((char *)dst_mbuf->buf_addr + offset,
306                            (char *)mbuf->buf_addr + mbuf->data_off,
307                            mbuf->data_len);
308                 dst_mbuf->data_len += mbuf->data_len;
309                 offset += mbuf->data_len;
310                 mbuf = mbuf->next;
311         }
312
313         dst_mbuf->pkt_len = dst_mbuf->data_len;
314
315         return dst_mbuf;
316 }
317
318 static inline bool hinic_mbuf_dma_map_sge(struct hinic_txq *txq,
319                                           struct rte_mbuf *mbuf,
320                                           struct hinic_sq_bufdesc *sges,
321                                           struct hinic_wqe_info *sqe_info)
322 {
323         dma_addr_t dma_addr;
324         u16 i, around_sges;
325         u16 nb_segs = sqe_info->sge_cnt - sqe_info->cpy_mbuf_cnt;
326         u16 real_nb_segs = mbuf->nb_segs;
327         struct hinic_sq_bufdesc *sge_idx = sges;
328
329         if (unlikely(sqe_info->around)) {
330                 /* parts of wqe is in sq bottom while parts
331                  * of wqe is in sq head
332                  */
333                 i = 0;
334                 for (sge_idx = sges; (u64)sge_idx <= txq->sq_bot_sge_addr;
335                      sge_idx++) {
336                         if (unlikely(mbuf == NULL)) {
337                                 txq->txq_stats.mbuf_null++;
338                                 return false;
339                         }
340
341                         dma_addr = rte_mbuf_data_iova(mbuf);
342                         if (unlikely(mbuf->data_len == 0)) {
343                                 txq->txq_stats.sge_len0++;
344                                 return false;
345                         }
346                         hinic_set_sge((struct hinic_sge *)sge_idx, dma_addr,
347                                       mbuf->data_len);
348                         mbuf = mbuf->next;
349                         i++;
350                 }
351
352                 around_sges = nb_segs - i;
353                 sge_idx = (struct hinic_sq_bufdesc *)
354                                 ((void *)txq->sq_head_addr);
355                 for (; i < nb_segs; i++) {
356                         if (unlikely(mbuf == NULL)) {
357                                 txq->txq_stats.mbuf_null++;
358                                 return false;
359                         }
360
361                         dma_addr = rte_mbuf_data_iova(mbuf);
362                         if (unlikely(mbuf->data_len == 0)) {
363                                 txq->txq_stats.sge_len0++;
364                                 return false;
365                         }
366                         hinic_set_sge((struct hinic_sge *)sge_idx, dma_addr,
367                                       mbuf->data_len);
368                         mbuf = mbuf->next;
369                         sge_idx++;
370                 }
371
372                 /* covert sges at head to big endian */
373                 hinic_sge_cpu_to_be32((void *)txq->sq_head_addr, around_sges);
374         } else {
375                 /* wqe is in continuous space */
376                 for (i = 0; i < nb_segs; i++) {
377                         if (unlikely(mbuf == NULL)) {
378                                 txq->txq_stats.mbuf_null++;
379                                 return false;
380                         }
381
382                         dma_addr = rte_mbuf_data_iova(mbuf);
383                         if (unlikely(mbuf->data_len == 0)) {
384                                 txq->txq_stats.sge_len0++;
385                                 return false;
386                         }
387                         hinic_set_sge((struct hinic_sge *)sge_idx, dma_addr,
388                                       mbuf->data_len);
389                         mbuf = mbuf->next;
390                         sge_idx++;
391                 }
392         }
393
394         /* for now: support non-tso over 17 sge, copy the last 2 mbuf */
395         if (unlikely(sqe_info->cpy_mbuf_cnt != 0)) {
396                 /* copy invalid mbuf segs to a valid buffer, lost performance */
397                 txq->txq_stats.cpy_pkts += 1;
398                 mbuf = hinic_copy_tx_mbuf(txq->nic_dev, mbuf,
399                                           real_nb_segs - nb_segs);
400                 if (unlikely(!mbuf))
401                         return false;
402
403                 txq->tx_info[sqe_info->pi].cpy_mbuf = mbuf;
404
405                 /* deal with the last mbuf */
406                 dma_addr = rte_mbuf_data_iova(mbuf);
407                 if (unlikely(mbuf->data_len == 0)) {
408                         txq->txq_stats.sge_len0++;
409                         return false;
410                 }
411                 hinic_set_sge((struct hinic_sge *)sge_idx, dma_addr,
412                               mbuf->data_len);
413                 if (unlikely(sqe_info->around))
414                         hinic_sge_cpu_to_be32((void *)sge_idx, 1);
415         }
416
417         return true;
418 }
419
420 static inline void hinic_fill_sq_wqe_header(struct hinic_sq_ctrl *ctrl,
421                                             u32 queue_info, int nr_descs,
422                                             u8 owner)
423 {
424         u32 ctrl_size, task_size, bufdesc_size;
425
426         ctrl_size = SIZE_8BYTES(sizeof(struct hinic_sq_ctrl));
427         task_size = SIZE_8BYTES(sizeof(struct hinic_sq_task));
428         bufdesc_size = HINIC_BUF_DESC_SIZE(nr_descs);
429
430         ctrl->ctrl_fmt = SQ_CTRL_SET(bufdesc_size, BUFDESC_SECT_LEN) |
431                         SQ_CTRL_SET(task_size, TASKSECT_LEN)    |
432                         SQ_CTRL_SET(SQ_NORMAL_WQE, DATA_FORMAT) |
433                         SQ_CTRL_SET(ctrl_size, LEN)             |
434                         SQ_CTRL_SET(owner, OWNER);
435
436         ctrl->queue_info = queue_info;
437         ctrl->queue_info |= SQ_CTRL_QUEUE_INFO_SET(1U, UC);
438
439         if (!SQ_CTRL_QUEUE_INFO_GET(ctrl->queue_info, MSS)) {
440                 ctrl->queue_info |=
441                         SQ_CTRL_QUEUE_INFO_SET(TX_MSS_DEFAULT, MSS);
442         } else if (SQ_CTRL_QUEUE_INFO_GET(ctrl->queue_info, MSS) < TX_MSS_MIN) {
443                 /* mss should not be less than 80 */
444                 ctrl->queue_info =
445                                 SQ_CTRL_QUEUE_INFO_CLEAR(ctrl->queue_info, MSS);
446                 ctrl->queue_info |= SQ_CTRL_QUEUE_INFO_SET(TX_MSS_MIN, MSS);
447         }
448 }
449
450 static inline bool hinic_is_tso_sge_valid(struct rte_mbuf *mbuf,
451                                           struct hinic_tx_offload_info
452                                           *poff_info,
453                                           struct hinic_wqe_info *sqe_info)
454 {
455         u32 total_len, limit_len, checked_len, left_len, adjust_mss;
456         u32 i, first_mss_sges, left_sges;
457         struct rte_mbuf *mbuf_head, *mbuf_pre;
458
459         left_sges = mbuf->nb_segs;
460         mbuf_head = mbuf;
461
462         /* tso sge number validation */
463         if (unlikely(left_sges >= HINIC_NONTSO_PKT_MAX_SGE)) {
464                 checked_len = 0;
465                 adjust_mss = mbuf->tso_segsz >= TX_MSS_MIN ?
466                                 mbuf->tso_segsz : TX_MSS_MIN;
467                 limit_len = adjust_mss + poff_info->payload_offset;
468                 first_mss_sges = HINIC_NONTSO_PKT_MAX_SGE;
469
470                 /* each continues 17 mbufs segmust do one check */
471                 while (left_sges >= HINIC_NONTSO_PKT_MAX_SGE) {
472                         /* total len of first 16 mbufs must equal
473                          * or more than limit_len
474                          */
475                         total_len = 0;
476                         for (i = 0; i < first_mss_sges; i++) {
477                                 total_len += mbuf->data_len;
478                                 mbuf_pre = mbuf;
479                                 mbuf = mbuf->next;
480                                 if (total_len >= limit_len) {
481                                         limit_len = adjust_mss;
482                                         break;
483                                 }
484                         }
485
486                         checked_len += total_len;
487
488                         /* try to copy if not valid */
489                         if (unlikely(first_mss_sges == i)) {
490                                 left_sges -= first_mss_sges;
491                                 checked_len -= mbuf_pre->data_len;
492
493                                 left_len = mbuf_head->pkt_len - checked_len;
494                                 if (left_len > HINIC_COPY_MBUF_SIZE)
495                                         return false;
496
497                                 sqe_info->sge_cnt = mbuf_head->nb_segs -
498                                                         left_sges;
499                                 sqe_info->cpy_mbuf_cnt = 1;
500
501                                 return true;
502                         }
503                         first_mss_sges = (HINIC_NONTSO_PKT_MAX_SGE - 1);
504
505                         /* continue next 16 mbufs */
506                         left_sges -= (i + 1);
507                 } /* end of while */
508         }
509
510         sqe_info->sge_cnt = mbuf_head->nb_segs;
511         return true;
512 }
513
514 static inline void
515 hinic_set_l4_csum_info(struct hinic_sq_task *task,
516                 u32 *queue_info, struct hinic_tx_offload_info *poff_info)
517 {
518         u32 tcp_udp_cs, sctp = 0;
519         u16 l2hdr_len;
520
521         if (unlikely(poff_info->inner_l4_type == SCTP_OFFLOAD_ENABLE))
522                 sctp = 1;
523
524         tcp_udp_cs = poff_info->inner_l4_tcp_udp;
525
526         if (poff_info->tunnel_type == TUNNEL_UDP_CSUM ||
527             poff_info->tunnel_type == TUNNEL_UDP_NO_CSUM) {
528                 l2hdr_len =  poff_info->outer_l2_len;
529
530                 task->pkt_info2 |=
531                 SQ_TASK_INFO2_SET(poff_info->outer_l3_type, OUTER_L3TYPE) |
532                 SQ_TASK_INFO2_SET(poff_info->outer_l3_len, OUTER_L3LEN);
533                 task->pkt_info2 |=
534                 SQ_TASK_INFO2_SET(poff_info->tunnel_type, TUNNEL_L4TYPE) |
535                 SQ_TASK_INFO2_SET(poff_info->tunnel_length, TUNNEL_L4LEN);
536         } else {
537                 l2hdr_len = poff_info->inner_l2_len;
538         }
539
540         task->pkt_info0 |= SQ_TASK_INFO0_SET(l2hdr_len, L2HDR_LEN);
541         task->pkt_info1 |=
542                 SQ_TASK_INFO1_SET(poff_info->inner_l3_len, INNER_L3LEN);
543         task->pkt_info0 |=
544                 SQ_TASK_INFO0_SET(poff_info->inner_l3_type, INNER_L3TYPE);
545         task->pkt_info1 |=
546                 SQ_TASK_INFO1_SET(poff_info->inner_l4_len, INNER_L4LEN);
547         task->pkt_info0 |=
548                 SQ_TASK_INFO0_SET(poff_info->inner_l4_type, L4OFFLOAD);
549         *queue_info |=
550                 SQ_CTRL_QUEUE_INFO_SET(poff_info->payload_offset, PLDOFF) |
551                 SQ_CTRL_QUEUE_INFO_SET(tcp_udp_cs, TCPUDP_CS) |
552                 SQ_CTRL_QUEUE_INFO_SET(sctp, SCTP);
553 }
554
555 static inline void
556 hinic_set_tso_info(struct hinic_sq_task *task,
557                 u32 *queue_info, struct rte_mbuf *mbuf,
558                 struct hinic_tx_offload_info *poff_info)
559 {
560         hinic_set_l4_csum_info(task, queue_info, poff_info);
561
562         /* wqe for tso */
563         task->pkt_info0 |=
564                 SQ_TASK_INFO0_SET(poff_info->inner_l3_type, INNER_L3TYPE);
565         task->pkt_info0 |= SQ_TASK_INFO0_SET(TSO_ENABLE, TSO_UFO);
566         *queue_info |= SQ_CTRL_QUEUE_INFO_SET(TSO_ENABLE, TSO);
567         /* qsf was initialized in prepare_sq_wqe */
568         *queue_info = SQ_CTRL_QUEUE_INFO_CLEAR(*queue_info, MSS);
569         *queue_info |= SQ_CTRL_QUEUE_INFO_SET(mbuf->tso_segsz, MSS);
570 }
571
572 static inline void
573 hinic_set_vlan_tx_offload(struct hinic_sq_task *task,
574                         u32 *queue_info, u16 vlan_tag, u16 vlan_pri)
575 {
576         task->pkt_info0 |= SQ_TASK_INFO0_SET(vlan_tag, VLAN_TAG) |
577                                 SQ_TASK_INFO0_SET(1U, VLAN_OFFLOAD);
578
579         *queue_info |= SQ_CTRL_QUEUE_INFO_SET(vlan_pri, PRI);
580 }
581
582 static inline void
583 hinic_fill_tx_offload_info(struct rte_mbuf *mbuf,
584                 struct hinic_sq_task *task, u32 *queue_info,
585                 struct hinic_tx_offload_info *tx_off_info)
586 {
587         u16 vlan_tag;
588         uint64_t ol_flags = mbuf->ol_flags;
589
590         /* clear DW0~2 of task section for offload */
591         task->pkt_info0 = 0;
592         task->pkt_info1 = 0;
593         task->pkt_info2 = 0;
594
595         /* Base VLAN */
596         if (unlikely(ol_flags & PKT_TX_VLAN_PKT)) {
597                 vlan_tag = mbuf->vlan_tci;
598                 hinic_set_vlan_tx_offload(task, queue_info, vlan_tag,
599                                           vlan_tag >> VLAN_PRIO_SHIFT);
600         }
601
602         /* non checksum or tso */
603         if (unlikely(!(ol_flags & HINIC_TX_CKSUM_OFFLOAD_MASK)))
604                 return;
605
606         if ((ol_flags & PKT_TX_TCP_SEG))
607                 /* set tso info for task and qsf */
608                 hinic_set_tso_info(task, queue_info, mbuf, tx_off_info);
609         else /* just support l4 checksum offload */
610                 hinic_set_l4_csum_info(task, queue_info, tx_off_info);
611 }
612
613 static inline void hinic_xmit_mbuf_cleanup(struct hinic_txq *txq)
614 {
615         struct hinic_tx_info *tx_info;
616         struct rte_mbuf *mbuf, *m, *mbuf_free[HINIC_MAX_TX_FREE_BULK];
617         int i, nb_free = 0;
618         u16 hw_ci, sw_ci, sq_mask;
619         int wqebb_cnt = 0;
620
621         hw_ci = HINIC_GET_SQ_HW_CI(txq);
622         sw_ci = HINIC_GET_SQ_LOCAL_CI(txq);
623         sq_mask = HINIC_GET_SQ_WQE_MASK(txq);
624
625         for (i = 0; i < txq->tx_free_thresh; ++i) {
626                 tx_info = &txq->tx_info[sw_ci];
627                 if (hw_ci == sw_ci ||
628                         (((hw_ci - sw_ci) & sq_mask) < tx_info->wqebb_cnt))
629                         break;
630
631                 sw_ci = (sw_ci + tx_info->wqebb_cnt) & sq_mask;
632
633                 if (unlikely(tx_info->cpy_mbuf != NULL)) {
634                         rte_pktmbuf_free(tx_info->cpy_mbuf);
635                         tx_info->cpy_mbuf = NULL;
636                 }
637
638                 wqebb_cnt += tx_info->wqebb_cnt;
639                 mbuf = tx_info->mbuf;
640
641                 if (likely(mbuf->nb_segs == 1)) {
642                         m = rte_pktmbuf_prefree_seg(mbuf);
643                         tx_info->mbuf = NULL;
644
645                         if (unlikely(m == NULL))
646                                 continue;
647
648                         mbuf_free[nb_free++] = m;
649                         if (unlikely(m->pool != mbuf_free[0]->pool ||
650                                 nb_free >= HINIC_MAX_TX_FREE_BULK)) {
651                                 rte_mempool_put_bulk(mbuf_free[0]->pool,
652                                         (void **)mbuf_free, (nb_free - 1));
653                                 nb_free = 0;
654                                 mbuf_free[nb_free++] = m;
655                         }
656                 } else {
657                         rte_pktmbuf_free(mbuf);
658                         tx_info->mbuf = NULL;
659                 }
660         }
661
662         if (nb_free > 0)
663                 rte_mempool_put_bulk(mbuf_free[0]->pool, (void **)mbuf_free,
664                                      nb_free);
665
666         HINIC_UPDATE_SQ_LOCAL_CI(txq, wqebb_cnt);
667 }
668
669 static inline struct hinic_sq_wqe *
670 hinic_get_sq_wqe(struct hinic_txq *txq, int wqebb_cnt,
671                  struct hinic_wqe_info *wqe_info)
672 {
673         u32 cur_pi, end_pi;
674         u16 remain_wqebbs;
675         struct hinic_sq *sq = txq->sq;
676         struct hinic_wq *wq = txq->wq;
677
678         /* record current pi */
679         cur_pi = MASKED_WQE_IDX(wq, wq->prod_idx);
680         end_pi = cur_pi + wqebb_cnt;
681
682         /* update next pi and delta */
683         wq->prod_idx += wqebb_cnt;
684         wq->delta -= wqebb_cnt;
685
686         /* return current pi and owner */
687         wqe_info->pi = cur_pi;
688         wqe_info->owner = sq->owner;
689         wqe_info->around = 0;
690         wqe_info->seq_wqebbs = wqebb_cnt;
691
692         if (unlikely(end_pi >= txq->q_depth)) {
693                 /* update owner of next prod_idx */
694                 sq->owner = !sq->owner;
695
696                 /* turn around to head */
697                 if (unlikely(end_pi > txq->q_depth)) {
698                         wqe_info->around = 1;
699                         remain_wqebbs = txq->q_depth - cur_pi;
700                         wqe_info->seq_wqebbs = remain_wqebbs;
701                 }
702         }
703
704         return (struct hinic_sq_wqe *)WQ_WQE_ADDR(wq, cur_pi);
705 }
706
707 static inline uint16_t
708 hinic_ipv4_phdr_cksum(const struct rte_ipv4_hdr *ipv4_hdr, uint64_t ol_flags)
709 {
710         struct ipv4_psd_header {
711                 uint32_t src_addr; /* IP address of source host. */
712                 uint32_t dst_addr; /* IP address of destination host. */
713                 uint8_t  zero;     /* zero. */
714                 uint8_t  proto;    /* L4 protocol type. */
715                 uint16_t len;      /* L4 length. */
716         } psd_hdr;
717         uint8_t ihl;
718
719         psd_hdr.src_addr = ipv4_hdr->src_addr;
720         psd_hdr.dst_addr = ipv4_hdr->dst_addr;
721         psd_hdr.zero = 0;
722         psd_hdr.proto = ipv4_hdr->next_proto_id;
723         if (ol_flags & PKT_TX_TCP_SEG) {
724                 psd_hdr.len = 0;
725         } else {
726                 /* ipv4_hdr->version_ihl is uint8_t big endian, ihl locates
727                  * lower 4 bits and unit is 4 bytes
728                  */
729                 ihl = (ipv4_hdr->version_ihl & 0xF) << 2;
730                 psd_hdr.len =
731                 rte_cpu_to_be_16(rte_be_to_cpu_16(ipv4_hdr->total_length) -
732                                  ihl);
733         }
734         return rte_raw_cksum(&psd_hdr, sizeof(psd_hdr));
735 }
736
737 static inline uint16_t
738 hinic_ipv6_phdr_cksum(const struct rte_ipv6_hdr *ipv6_hdr, uint64_t ol_flags)
739 {
740         uint32_t sum;
741         struct {
742                 uint32_t len;   /* L4 length. */
743                 uint32_t proto; /* L4 protocol - top 3 bytes must be zero */
744         } psd_hdr;
745
746         psd_hdr.proto = (ipv6_hdr->proto << 24);
747         if (ol_flags & PKT_TX_TCP_SEG)
748                 psd_hdr.len = 0;
749         else
750                 psd_hdr.len = ipv6_hdr->payload_len;
751
752         sum = __rte_raw_cksum(ipv6_hdr->src_addr,
753                 sizeof(ipv6_hdr->src_addr) + sizeof(ipv6_hdr->dst_addr), 0);
754         sum = __rte_raw_cksum(&psd_hdr, sizeof(psd_hdr), sum);
755         return __rte_raw_cksum_reduce(sum);
756 }
757
758 static inline void hinic_get_outer_cs_pld_offset(struct rte_mbuf *m,
759                                         struct hinic_tx_offload_info *off_info)
760 {
761         uint64_t ol_flags = m->ol_flags;
762
763         if ((ol_flags & PKT_TX_L4_MASK) == PKT_TX_UDP_CKSUM)
764                 off_info->payload_offset = m->outer_l2_len + m->outer_l3_len +
765                                            m->l2_len + m->l3_len;
766         else if ((ol_flags & PKT_TX_TCP_CKSUM) || (ol_flags & PKT_TX_TCP_SEG))
767                 off_info->payload_offset = m->outer_l2_len + m->outer_l3_len +
768                                            m->l2_len + m->l3_len + m->l4_len;
769 }
770
771 static inline void hinic_get_pld_offset(struct rte_mbuf *m,
772                                         struct hinic_tx_offload_info *off_info)
773 {
774         uint64_t ol_flags = m->ol_flags;
775
776         if ((ol_flags & PKT_TX_L4_MASK) == PKT_TX_UDP_CKSUM)
777                 off_info->payload_offset = m->l2_len + m->l3_len;
778         else if ((ol_flags & PKT_TX_TCP_CKSUM) || (ol_flags & PKT_TX_TCP_SEG))
779                 off_info->payload_offset = m->l2_len + m->l3_len +
780                                            m->l4_len;
781 }
782
783 static inline void hinic_analyze_tx_info(struct rte_mbuf *mbuf,
784                                          struct hinic_tx_offload_info *off_info)
785 {
786         struct rte_ether_hdr *eth_hdr;
787         struct rte_vlan_hdr *vlan_hdr;
788         struct rte_ipv4_hdr *ip4h;
789         u16 pkt_type;
790         u8 *hdr;
791
792         hdr = (u8 *)rte_pktmbuf_mtod(mbuf, u8*);
793         eth_hdr = (struct rte_ether_hdr *)hdr;
794         pkt_type = rte_be_to_cpu_16(eth_hdr->ether_type);
795
796         if (pkt_type == RTE_ETHER_TYPE_VLAN) {
797                 off_info->outer_l2_len = ETHER_LEN_WITH_VLAN;
798                 vlan_hdr = (struct rte_vlan_hdr *)(hdr + 1);
799                 pkt_type = rte_be_to_cpu_16(vlan_hdr->eth_proto);
800         } else {
801                 off_info->outer_l2_len = ETHER_LEN_NO_VLAN;
802         }
803
804         if (pkt_type == RTE_ETHER_TYPE_IPV4) {
805                 ip4h = (struct rte_ipv4_hdr *)(hdr + off_info->outer_l2_len);
806                 off_info->outer_l3_len = (ip4h->version_ihl & 0xf) <<
807                                         HEADER_LEN_OFFSET;
808         } else if (pkt_type == RTE_ETHER_TYPE_IPV6) {
809                 /* not support ipv6 extension header */
810                 off_info->outer_l3_len = sizeof(struct rte_ipv6_hdr);
811         }
812 }
813
814 static inline void hinic_analyze_outer_ip_vxlan(struct rte_mbuf *mbuf,
815                                         struct hinic_tx_offload_info *off_info)
816 {
817         struct rte_ether_hdr *eth_hdr;
818         struct rte_vlan_hdr *vlan_hdr;
819         struct rte_ipv4_hdr *ipv4_hdr;
820         struct rte_udp_hdr *udp_hdr;
821         u16 eth_type = 0;
822
823         eth_hdr = rte_pktmbuf_mtod(mbuf, struct rte_ether_hdr *);
824         eth_type = rte_be_to_cpu_16(eth_hdr->ether_type);
825
826         if (eth_type == RTE_ETHER_TYPE_VLAN) {
827                 vlan_hdr = (struct rte_vlan_hdr *)(eth_hdr + 1);
828                 eth_type = rte_be_to_cpu_16(vlan_hdr->eth_proto);
829         }
830
831         if (eth_type == RTE_ETHER_TYPE_IPV4) {
832                 ipv4_hdr = rte_pktmbuf_mtod_offset(mbuf, struct rte_ipv4_hdr *,
833                                                    mbuf->outer_l2_len);
834                 off_info->outer_l3_type = IPV4_PKT_WITH_CHKSUM_OFFLOAD;
835                 ipv4_hdr->hdr_checksum = 0;
836
837                 udp_hdr = (struct rte_udp_hdr *)((char *)ipv4_hdr +
838                                                  mbuf->outer_l3_len);
839                 udp_hdr->dgram_cksum = 0;
840         } else if (eth_type == RTE_ETHER_TYPE_IPV6) {
841                 off_info->outer_l3_type = IPV6_PKT;
842
843                 udp_hdr = rte_pktmbuf_mtod_offset(mbuf, struct rte_udp_hdr *,
844                                                   (mbuf->outer_l2_len +
845                                                    mbuf->outer_l3_len));
846                 udp_hdr->dgram_cksum = 0;
847         }
848 }
849
850 static inline uint8_t hinic_analyze_l3_type(struct rte_mbuf *mbuf)
851 {
852         uint8_t l3_type;
853         uint64_t ol_flags = mbuf->ol_flags;
854
855         if (ol_flags & PKT_TX_IPV4)
856                 l3_type = (ol_flags & PKT_TX_IP_CKSUM) ?
857                           IPV4_PKT_WITH_CHKSUM_OFFLOAD :
858                           IPV4_PKT_NO_CHKSUM_OFFLOAD;
859         else if (ol_flags & PKT_TX_IPV6)
860                 l3_type = IPV6_PKT;
861         else
862                 l3_type = UNKNOWN_L3TYPE;
863
864         return l3_type;
865 }
866
867 static inline void hinic_calculate_tcp_checksum(struct rte_mbuf *mbuf,
868                                         struct hinic_tx_offload_info *off_info,
869                                         uint64_t inner_l3_offset)
870 {
871         struct rte_ipv4_hdr *ipv4_hdr;
872         struct rte_ipv6_hdr *ipv6_hdr;
873         struct rte_tcp_hdr *tcp_hdr;
874         uint64_t ol_flags = mbuf->ol_flags;
875
876         if (ol_flags & PKT_TX_IPV4) {
877                 ipv4_hdr = rte_pktmbuf_mtod_offset(mbuf, struct rte_ipv4_hdr *,
878                                                    inner_l3_offset);
879
880                 if (ol_flags & PKT_TX_IP_CKSUM)
881                         ipv4_hdr->hdr_checksum = 0;
882
883                 tcp_hdr = (struct rte_tcp_hdr *)((char *)ipv4_hdr +
884                                                  mbuf->l3_len);
885                 tcp_hdr->cksum = hinic_ipv4_phdr_cksum(ipv4_hdr, ol_flags);
886         } else {
887                 ipv6_hdr = rte_pktmbuf_mtod_offset(mbuf, struct rte_ipv6_hdr *,
888                                                    inner_l3_offset);
889                 tcp_hdr = rte_pktmbuf_mtod_offset(mbuf, struct rte_tcp_hdr *,
890                                                   (inner_l3_offset +
891                                                    mbuf->l3_len));
892                 tcp_hdr->cksum = hinic_ipv6_phdr_cksum(ipv6_hdr, ol_flags);
893         }
894
895         off_info->inner_l4_type = TCP_OFFLOAD_ENABLE;
896         off_info->inner_l4_tcp_udp = 1;
897 }
898
899 static inline void hinic_calculate_udp_checksum(struct rte_mbuf *mbuf,
900                                         struct hinic_tx_offload_info *off_info,
901                                         uint64_t inner_l3_offset)
902 {
903         struct rte_ipv4_hdr *ipv4_hdr;
904         struct rte_ipv6_hdr *ipv6_hdr;
905         struct rte_udp_hdr *udp_hdr;
906         uint64_t ol_flags = mbuf->ol_flags;
907
908         if (ol_flags & PKT_TX_IPV4) {
909                 ipv4_hdr = rte_pktmbuf_mtod_offset(mbuf, struct rte_ipv4_hdr *,
910                                                    inner_l3_offset);
911
912                 if (ol_flags & PKT_TX_IP_CKSUM)
913                         ipv4_hdr->hdr_checksum = 0;
914
915                 udp_hdr = (struct rte_udp_hdr *)((char *)ipv4_hdr +
916                                                  mbuf->l3_len);
917                 udp_hdr->dgram_cksum = hinic_ipv4_phdr_cksum(ipv4_hdr,
918                                                              ol_flags);
919         } else {
920                 ipv6_hdr = rte_pktmbuf_mtod_offset(mbuf, struct rte_ipv6_hdr *,
921                                                    inner_l3_offset);
922
923                 udp_hdr = rte_pktmbuf_mtod_offset(mbuf, struct rte_udp_hdr *,
924                                                   (inner_l3_offset +
925                                                    mbuf->l3_len));
926                 udp_hdr->dgram_cksum = hinic_ipv6_phdr_cksum(ipv6_hdr,
927                                                              ol_flags);
928         }
929
930         off_info->inner_l4_type = UDP_OFFLOAD_ENABLE;
931         off_info->inner_l4_tcp_udp = 1;
932 }
933
934 static inline void
935 hinic_calculate_sctp_checksum(struct hinic_tx_offload_info *off_info)
936 {
937         off_info->inner_l4_type = SCTP_OFFLOAD_ENABLE;
938         off_info->inner_l4_tcp_udp = 0;
939         off_info->inner_l4_len = sizeof(struct rte_sctp_hdr);
940 }
941
942 static inline void hinic_calculate_checksum(struct rte_mbuf *mbuf,
943                                         struct hinic_tx_offload_info *off_info,
944                                         uint64_t inner_l3_offset)
945 {
946         uint64_t ol_flags = mbuf->ol_flags;
947
948         switch (ol_flags & PKT_TX_L4_MASK) {
949         case PKT_TX_UDP_CKSUM:
950                 hinic_calculate_udp_checksum(mbuf, off_info, inner_l3_offset);
951                 break;
952
953         case PKT_TX_TCP_CKSUM:
954                 hinic_calculate_tcp_checksum(mbuf, off_info, inner_l3_offset);
955                 break;
956
957         case PKT_TX_SCTP_CKSUM:
958                 hinic_calculate_sctp_checksum(off_info);
959                 break;
960
961         default:
962                 if (ol_flags & PKT_TX_TCP_SEG)
963                         hinic_calculate_tcp_checksum(mbuf, off_info,
964                                                      inner_l3_offset);
965                 break;
966         }
967 }
968
969 static inline int hinic_tx_offload_pkt_prepare(struct rte_mbuf *m,
970                                         struct hinic_tx_offload_info *off_info)
971 {
972         uint64_t inner_l3_offset;
973         uint64_t ol_flags = m->ol_flags;
974
975         /* Check if the packets set available offload flags */
976         if (!(ol_flags & HINIC_TX_CKSUM_OFFLOAD_MASK))
977                 return 0;
978
979         /* Support only vxlan offload */
980         if (unlikely((ol_flags & PKT_TX_TUNNEL_MASK) &&
981             !(ol_flags & PKT_TX_TUNNEL_VXLAN)))
982                 return -ENOTSUP;
983
984 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
985         if (rte_validate_tx_offload(m) != 0)
986                 return -EINVAL;
987 #endif
988
989         if (ol_flags & PKT_TX_TUNNEL_VXLAN) {
990                 off_info->tunnel_type = TUNNEL_UDP_NO_CSUM;
991
992                 /* inner_l4_tcp_udp csum should be set to calculate outer
993                  * udp checksum when vxlan packets without inner l3 and l4
994                  */
995                 off_info->inner_l4_tcp_udp = 1;
996
997                 if ((ol_flags & PKT_TX_OUTER_IP_CKSUM) ||
998                     (ol_flags & PKT_TX_OUTER_IPV6) ||
999                     (ol_flags & PKT_TX_TCP_SEG)) {
1000                         inner_l3_offset = m->l2_len + m->outer_l2_len +
1001                                           m->outer_l3_len;
1002                         off_info->outer_l2_len = m->outer_l2_len;
1003                         off_info->outer_l3_len = m->outer_l3_len;
1004                         /* just support vxlan tunneling pkt */
1005                         off_info->inner_l2_len = m->l2_len - VXLANLEN -
1006                                                  sizeof(struct rte_udp_hdr);
1007                         off_info->tunnel_length = m->l2_len;
1008
1009                         hinic_analyze_outer_ip_vxlan(m, off_info);
1010
1011                         hinic_get_outer_cs_pld_offset(m, off_info);
1012                 } else {
1013                         inner_l3_offset = m->l2_len;
1014                         hinic_analyze_tx_info(m, off_info);
1015                         /* just support vxlan tunneling pkt */
1016                         off_info->inner_l2_len = m->l2_len - VXLANLEN -
1017                                                  sizeof(struct rte_udp_hdr) -
1018                                                  off_info->outer_l2_len -
1019                                                  off_info->outer_l3_len;
1020                         off_info->tunnel_length = m->l2_len -
1021                                                   off_info->outer_l2_len -
1022                                                   off_info->outer_l3_len;
1023                         off_info->outer_l3_type = IPV4_PKT_NO_CHKSUM_OFFLOAD;
1024
1025                         hinic_get_pld_offset(m, off_info);
1026                 }
1027         } else {
1028                 inner_l3_offset = m->l2_len;
1029                 off_info->inner_l2_len = m->l2_len;
1030                 off_info->tunnel_type = NOT_TUNNEL;
1031
1032                 hinic_get_pld_offset(m, off_info);
1033         }
1034
1035         /* invalid udp or tcp header */
1036         if (unlikely(off_info->payload_offset > MAX_PLD_OFFSET))
1037                 return -EINVAL;
1038
1039         off_info->inner_l3_len = m->l3_len;
1040         off_info->inner_l4_len = m->l4_len;
1041         off_info->inner_l3_type = hinic_analyze_l3_type(m);
1042
1043         /* Process the pseudo-header checksum */
1044         hinic_calculate_checksum(m, off_info, inner_l3_offset);
1045
1046         return 0;
1047 }
1048
1049 static inline bool hinic_get_sge_txoff_info(struct rte_mbuf *mbuf_pkt,
1050                                             struct hinic_wqe_info *sqe_info,
1051                                             struct hinic_tx_offload_info
1052                                             *off_info)
1053 {
1054         u16  i, total_len, sge_cnt = mbuf_pkt->nb_segs;
1055         struct rte_mbuf *mbuf;
1056         int ret;
1057
1058         memset(off_info, 0, sizeof(*off_info));
1059
1060         ret = hinic_tx_offload_pkt_prepare(mbuf_pkt, off_info);
1061         if (unlikely(ret))
1062                 return false;
1063
1064         sqe_info->cpy_mbuf_cnt = 0;
1065
1066         /* non tso mbuf */
1067         if (likely(!(mbuf_pkt->ol_flags & PKT_TX_TCP_SEG))) {
1068                 if (unlikely(mbuf_pkt->pkt_len > MAX_SINGLE_SGE_SIZE)) {
1069                         /* non tso packet len must less than 64KB */
1070                         return false;
1071                 } else if (unlikely(HINIC_NONTSO_SEG_NUM_INVALID(sge_cnt))) {
1072                         /* non tso packet buffer number must less than 17
1073                          * the mbuf segs more than 17 must copy to one buffer
1074                          */
1075                         total_len = 0;
1076                         mbuf = mbuf_pkt;
1077                         for (i = 0; i < (HINIC_NONTSO_PKT_MAX_SGE - 1) ; i++) {
1078                                 total_len += mbuf->data_len;
1079                                 mbuf = mbuf->next;
1080                         }
1081
1082                         /* default support copy total 4k mbuf segs */
1083                         if ((u32)(total_len + (u16)HINIC_COPY_MBUF_SIZE) <
1084                                   mbuf_pkt->pkt_len)
1085                                 return false;
1086
1087                         sqe_info->sge_cnt = HINIC_NONTSO_PKT_MAX_SGE;
1088                         sqe_info->cpy_mbuf_cnt = 1;
1089                         return true;
1090                 }
1091
1092                 /* valid non tso mbuf */
1093                 sqe_info->sge_cnt = sge_cnt;
1094         } else {
1095                 /* tso mbuf */
1096                 if (unlikely(HINIC_TSO_SEG_NUM_INVALID(sge_cnt)))
1097                         /* too many mbuf segs */
1098                         return false;
1099
1100                 /* check tso mbuf segs are valid or not */
1101                 if (unlikely(!hinic_is_tso_sge_valid(mbuf_pkt,
1102                              off_info, sqe_info)))
1103                         return false;
1104         }
1105
1106         return true;
1107 }
1108
1109 static inline void hinic_sq_write_db(struct hinic_sq *sq, int cos)
1110 {
1111         u16 prod_idx;
1112         u32 hi_prod_idx;
1113         struct hinic_sq_db sq_db;
1114
1115         prod_idx = MASKED_SQ_IDX(sq, sq->wq->prod_idx);
1116         hi_prod_idx = SQ_DB_PI_HIGH(prod_idx);
1117
1118         sq_db.db_info = SQ_DB_INFO_SET(hi_prod_idx, HI_PI) |
1119                         SQ_DB_INFO_SET(SQ_DB, TYPE) |
1120                         SQ_DB_INFO_SET(SQ_CFLAG_DP, CFLAG) |
1121                         SQ_DB_INFO_SET(cos, COS) |
1122                         SQ_DB_INFO_SET(sq->q_id, QID);
1123
1124         /* Data should be written to HW in Big Endian Format */
1125         sq_db.db_info = cpu_to_be32(sq_db.db_info);
1126
1127         /* Write all before the doorbell */
1128         rte_wmb();
1129         writel(sq_db.db_info, SQ_DB_ADDR(sq, prod_idx));
1130 }
1131
1132 u16 hinic_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, u16 nb_pkts)
1133 {
1134         int free_wqebb_cnt, wqe_wqebb_cnt;
1135         u32 queue_info, tx_bytes = 0;
1136         u16 nb_tx;
1137         struct hinic_wqe_info sqe_info;
1138         struct hinic_tx_offload_info off_info;
1139         struct rte_mbuf *mbuf_pkt;
1140         struct hinic_txq *txq = tx_queue;
1141         struct hinic_tx_info *tx_info;
1142         struct hinic_sq_wqe *sq_wqe;
1143         struct hinic_sq_task *task;
1144
1145         /* reclaim tx mbuf before xmit new packet */
1146         if (HINIC_GET_SQ_FREE_WQEBBS(txq) < txq->tx_free_thresh)
1147                 hinic_xmit_mbuf_cleanup(txq);
1148
1149         /* tx loop routine */
1150         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
1151                 mbuf_pkt = *tx_pkts++;
1152                 queue_info = 0;
1153
1154                 /* 1. parse sge and tx offlod info from mbuf */
1155                 if (unlikely(!hinic_get_sge_txoff_info(mbuf_pkt,
1156                                                        &sqe_info, &off_info))) {
1157                         txq->txq_stats.off_errs++;
1158                         break;
1159                 }
1160
1161                 /* 2. try to get enough wqebb */
1162                 wqe_wqebb_cnt = HINIC_SQ_WQEBB_CNT(sqe_info.sge_cnt);
1163                 free_wqebb_cnt = HINIC_GET_SQ_FREE_WQEBBS(txq);
1164                 if (unlikely(wqe_wqebb_cnt > free_wqebb_cnt)) {
1165                         /* reclaim again */
1166                         hinic_xmit_mbuf_cleanup(txq);
1167                         free_wqebb_cnt = HINIC_GET_SQ_FREE_WQEBBS(txq);
1168                         if (unlikely(wqe_wqebb_cnt > free_wqebb_cnt)) {
1169                                 txq->txq_stats.tx_busy += (nb_pkts - nb_tx);
1170                                 break;
1171                         }
1172                 }
1173
1174                 /* 3. get sq tail wqe address from wqe_page,
1175                  * sq have enough wqebb for this packet
1176                  */
1177                 sq_wqe = hinic_get_sq_wqe(txq, wqe_wqebb_cnt, &sqe_info);
1178
1179                 /* 4. fill sq wqe sge section */
1180                 if (unlikely(!hinic_mbuf_dma_map_sge(txq, mbuf_pkt,
1181                                                      sq_wqe->buf_descs,
1182                                                      &sqe_info))) {
1183                         hinic_return_sq_wqe(txq->nic_dev->hwdev, txq->q_id,
1184                                             wqe_wqebb_cnt, sqe_info.owner);
1185                         txq->txq_stats.off_errs++;
1186                         break;
1187                 }
1188
1189                 /* 5. fill sq wqe task section and queue info */
1190                 task = &sq_wqe->task;
1191
1192                 /* tx packet offload configure */
1193                 hinic_fill_tx_offload_info(mbuf_pkt, task, &queue_info,
1194                                            &off_info);
1195
1196                 /* 6. record tx info */
1197                 tx_info = &txq->tx_info[sqe_info.pi];
1198                 tx_info->mbuf = mbuf_pkt;
1199                 tx_info->wqebb_cnt = wqe_wqebb_cnt;
1200
1201                 /* 7. fill sq wqe header section */
1202                 hinic_fill_sq_wqe_header(&sq_wqe->ctrl, queue_info,
1203                                          sqe_info.sge_cnt, sqe_info.owner);
1204
1205                 /* 8.convert continue or bottom wqe byteorder to big endian */
1206                 hinic_sq_wqe_cpu_to_be32(sq_wqe, sqe_info.seq_wqebbs);
1207
1208                 tx_bytes += mbuf_pkt->pkt_len;
1209         }
1210
1211         /* 9. write sq doorbell in burst mode */
1212         if (nb_tx) {
1213                 hinic_sq_write_db(txq->sq, txq->cos);
1214
1215                 txq->txq_stats.packets += nb_tx;
1216                 txq->txq_stats.bytes += tx_bytes;
1217         }
1218         txq->txq_stats.burst_pkts = nb_tx;
1219
1220         return nb_tx;
1221 }
1222
1223 void hinic_free_all_tx_mbufs(struct hinic_txq *txq)
1224 {
1225         u16 ci;
1226         struct hinic_nic_dev *nic_dev = txq->nic_dev;
1227         struct hinic_tx_info *tx_info;
1228         int free_wqebbs = hinic_get_sq_free_wqebbs(nic_dev->hwdev,
1229                                                    txq->q_id) + 1;
1230
1231         while (free_wqebbs < txq->q_depth) {
1232                 ci = hinic_get_sq_local_ci(nic_dev->hwdev, txq->q_id);
1233
1234                 tx_info = &txq->tx_info[ci];
1235
1236                 if (unlikely(tx_info->cpy_mbuf != NULL)) {
1237                         rte_pktmbuf_free(tx_info->cpy_mbuf);
1238                         tx_info->cpy_mbuf = NULL;
1239                 }
1240
1241                 rte_pktmbuf_free(tx_info->mbuf);
1242                 hinic_update_sq_local_ci(nic_dev->hwdev, txq->q_id,
1243                                          tx_info->wqebb_cnt);
1244
1245                 free_wqebbs += tx_info->wqebb_cnt;
1246                 tx_info->mbuf = NULL;
1247         }
1248 }
1249
1250 void hinic_free_all_tx_resources(struct rte_eth_dev *eth_dev)
1251 {
1252         u16 q_id;
1253         struct hinic_nic_dev *nic_dev =
1254                                 HINIC_ETH_DEV_TO_PRIVATE_NIC_DEV(eth_dev);
1255
1256         for (q_id = 0; q_id < nic_dev->num_sq; q_id++) {
1257                 if (eth_dev->data->tx_queues != NULL)
1258                         eth_dev->data->tx_queues[q_id] = NULL;
1259
1260                 if (nic_dev->txqs[q_id] == NULL)
1261                         continue;
1262
1263                 /* stop tx queue free tx mbuf */
1264                 hinic_free_all_tx_mbufs(nic_dev->txqs[q_id]);
1265                 hinic_free_tx_resources(nic_dev->txqs[q_id]);
1266
1267                 /* free txq */
1268                 kfree(nic_dev->txqs[q_id]);
1269                 nic_dev->txqs[q_id] = NULL;
1270         }
1271 }
1272
1273 void hinic_free_all_tx_mbuf(struct rte_eth_dev *eth_dev)
1274 {
1275         u16 q_id;
1276         struct hinic_nic_dev *nic_dev =
1277                                 HINIC_ETH_DEV_TO_PRIVATE_NIC_DEV(eth_dev);
1278
1279         for (q_id = 0; q_id < nic_dev->num_sq; q_id++)
1280                 /* stop tx queue free tx mbuf */
1281                 hinic_free_all_tx_mbufs(nic_dev->txqs[q_id]);
1282 }
1283
1284 int hinic_setup_tx_resources(struct hinic_txq *txq)
1285 {
1286         u64 tx_info_sz;
1287
1288         tx_info_sz = txq->q_depth * sizeof(*txq->tx_info);
1289         txq->tx_info = rte_zmalloc_socket("tx_info", tx_info_sz,
1290                         RTE_CACHE_LINE_SIZE, txq->socket_id);
1291         if (!txq->tx_info)
1292                 return -ENOMEM;
1293
1294         return HINIC_OK;
1295 }
1296
1297 void hinic_free_tx_resources(struct hinic_txq *txq)
1298 {
1299         if (txq->tx_info == NULL)
1300                 return;
1301
1302         rte_free(txq->tx_info);
1303         txq->tx_info = NULL;
1304 }
1305
1306 int hinic_create_sq(struct hinic_hwdev *hwdev, u16 q_id,
1307                         u16 sq_depth, unsigned int socket_id)
1308 {
1309         int err;
1310         struct hinic_nic_io *nic_io = hwdev->nic_io;
1311         struct hinic_qp *qp = &nic_io->qps[q_id];
1312         struct hinic_sq *sq = &qp->sq;
1313         void __iomem *db_addr;
1314         volatile u32 *ci_addr;
1315
1316         sq->sq_depth = sq_depth;
1317         nic_io->sq_depth = sq_depth;
1318
1319         /* alloc wq */
1320         err = hinic_wq_allocate(nic_io->hwdev, &nic_io->sq_wq[q_id],
1321                                 HINIC_SQ_WQEBB_SHIFT, nic_io->sq_depth,
1322                                 socket_id);
1323         if (err) {
1324                 PMD_DRV_LOG(ERR, "Failed to allocate WQ for SQ");
1325                 return err;
1326         }
1327
1328         /* alloc sq doorbell space */
1329         err = hinic_alloc_db_addr(nic_io->hwdev, &db_addr);
1330         if (err) {
1331                 PMD_DRV_LOG(ERR, "Failed to init db addr");
1332                 goto alloc_db_err;
1333         }
1334
1335         /* clear hardware ci */
1336         ci_addr = (volatile u32 *)HINIC_CI_VADDR(nic_io->ci_vaddr_base, q_id);
1337         *ci_addr = 0;
1338
1339         sq->q_id = q_id;
1340         sq->wq = &nic_io->sq_wq[q_id];
1341         sq->owner = 1;
1342         sq->cons_idx_addr = (volatile u16 *)ci_addr;
1343         sq->db_addr = db_addr;
1344
1345         return HINIC_OK;
1346
1347 alloc_db_err:
1348         hinic_wq_free(nic_io->hwdev, &nic_io->sq_wq[q_id]);
1349
1350         return err;
1351 }
1352
1353 void hinic_destroy_sq(struct hinic_hwdev *hwdev, u16 q_id)
1354 {
1355         struct hinic_nic_io *nic_io;
1356         struct hinic_qp *qp;
1357
1358         nic_io = hwdev->nic_io;
1359         qp = &nic_io->qps[q_id];
1360
1361         if (qp->sq.wq == NULL)
1362                 return;
1363
1364         hinic_free_db_addr(nic_io->hwdev, qp->sq.db_addr);
1365         hinic_wq_free(nic_io->hwdev, qp->sq.wq);
1366         qp->sq.wq = NULL;
1367 }