net/i40e: fix Rx packet statistics
[dpdk.git] / drivers / net / hinic / hinic_pmd_tx.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2017 Huawei Technologies Co., Ltd
3  */
4
5 #include <rte_mbuf.h>
6 #include <rte_tcp.h>
7 #include <rte_sctp.h>
8 #include <rte_udp.h>
9 #include <rte_ip.h>
10 #ifdef RTE_ARCH_ARM64
11 #include <arm_neon.h>
12 #endif
13
14 #include "base/hinic_compat.h"
15 #include "base/hinic_pmd_hwdev.h"
16 #include "base/hinic_pmd_hwif.h"
17 #include "base/hinic_pmd_wq.h"
18 #include "base/hinic_pmd_nicio.h"
19 #include "base/hinic_pmd_niccfg.h"
20 #include "hinic_pmd_ethdev.h"
21 #include "hinic_pmd_tx.h"
22
23 /* packet header and tx offload info */
24 #define ETHER_LEN_NO_VLAN               14
25 #define ETHER_LEN_WITH_VLAN             18
26 #define VXLANLEN                        8
27 #define MAX_PLD_OFFSET                  221
28 #define MAX_SINGLE_SGE_SIZE             65536
29 #define TSO_ENABLE                      1
30 #define TX_MSS_DEFAULT                  0x3E00
31 #define TX_MSS_MIN                      0x50
32
33 #define HINIC_NONTSO_PKT_MAX_SGE                17      /* non-tso max sge 17 */
34 #define HINIC_NONTSO_SEG_NUM_INVALID(num)       \
35                         ((num) > HINIC_NONTSO_PKT_MAX_SGE)
36
37 #define HINIC_TSO_PKT_MAX_SGE                   127     /* tso max sge 127 */
38 #define HINIC_TSO_SEG_NUM_INVALID(num)          ((num) > HINIC_TSO_PKT_MAX_SGE)
39
40 /* sizeof(struct hinic_sq_bufdesc) == 16, shift 4 */
41 #define HINIC_BUF_DESC_SIZE(nr_descs)   (SIZE_8BYTES(((u32)nr_descs) << 4))
42
43 #define MASKED_SQ_IDX(sq, idx)          ((idx) & (sq)->wq->mask)
44
45 /* SQ_CTRL */
46 #define SQ_CTRL_BUFDESC_SECT_LEN_SHIFT          0
47 #define SQ_CTRL_TASKSECT_LEN_SHIFT              16
48 #define SQ_CTRL_DATA_FORMAT_SHIFT               22
49 #define SQ_CTRL_LEN_SHIFT                       29
50 #define SQ_CTRL_OWNER_SHIFT                     31
51
52 #define SQ_CTRL_BUFDESC_SECT_LEN_MASK           0xFFU
53 #define SQ_CTRL_TASKSECT_LEN_MASK               0x1FU
54 #define SQ_CTRL_DATA_FORMAT_MASK                0x1U
55 #define SQ_CTRL_LEN_MASK                        0x3U
56 #define SQ_CTRL_OWNER_MASK                      0x1U
57
58 #define SQ_CTRL_SET(val, member)        \
59         (((val) & SQ_CTRL_##member##_MASK) << SQ_CTRL_##member##_SHIFT)
60
61 #define SQ_CTRL_QUEUE_INFO_PLDOFF_SHIFT         2
62 #define SQ_CTRL_QUEUE_INFO_UFO_SHIFT            10
63 #define SQ_CTRL_QUEUE_INFO_TSO_SHIFT            11
64 #define SQ_CTRL_QUEUE_INFO_TCPUDP_CS_SHIFT      12
65 #define SQ_CTRL_QUEUE_INFO_MSS_SHIFT            13
66 #define SQ_CTRL_QUEUE_INFO_SCTP_SHIFT           27
67 #define SQ_CTRL_QUEUE_INFO_UC_SHIFT             28
68 #define SQ_CTRL_QUEUE_INFO_PRI_SHIFT            29
69
70 #define SQ_CTRL_QUEUE_INFO_PLDOFF_MASK          0xFFU
71 #define SQ_CTRL_QUEUE_INFO_UFO_MASK             0x1U
72 #define SQ_CTRL_QUEUE_INFO_TSO_MASK             0x1U
73 #define SQ_CTRL_QUEUE_INFO_TCPUDP_CS_MASK       0x1U
74 #define SQ_CTRL_QUEUE_INFO_MSS_MASK             0x3FFFU
75 #define SQ_CTRL_QUEUE_INFO_SCTP_MASK            0x1U
76 #define SQ_CTRL_QUEUE_INFO_UC_MASK              0x1U
77 #define SQ_CTRL_QUEUE_INFO_PRI_MASK             0x7U
78
79 #define SQ_CTRL_QUEUE_INFO_SET(val, member)     \
80         (((u32)(val) & SQ_CTRL_QUEUE_INFO_##member##_MASK) <<   \
81                         SQ_CTRL_QUEUE_INFO_##member##_SHIFT)
82
83 #define SQ_CTRL_QUEUE_INFO_GET(val, member)     \
84         (((val) >> SQ_CTRL_QUEUE_INFO_##member##_SHIFT) &       \
85                         SQ_CTRL_QUEUE_INFO_##member##_MASK)
86
87 #define SQ_CTRL_QUEUE_INFO_CLEAR(val, member)   \
88         ((val) & (~(SQ_CTRL_QUEUE_INFO_##member##_MASK << \
89                         SQ_CTRL_QUEUE_INFO_##member##_SHIFT)))
90
91 #define SQ_TASK_INFO0_L2HDR_LEN_SHIFT           0
92 #define SQ_TASK_INFO0_L4OFFLOAD_SHIFT           8
93 #define SQ_TASK_INFO0_INNER_L3TYPE_SHIFT        10
94 #define SQ_TASK_INFO0_VLAN_OFFLOAD_SHIFT        12
95 #define SQ_TASK_INFO0_PARSE_FLAG_SHIFT          13
96 #define SQ_TASK_INFO0_UFO_AVD_SHIFT             14
97 #define SQ_TASK_INFO0_TSO_UFO_SHIFT             15
98 #define SQ_TASK_INFO0_VLAN_TAG_SHIFT            16
99
100 #define SQ_TASK_INFO0_L2HDR_LEN_MASK            0xFFU
101 #define SQ_TASK_INFO0_L4OFFLOAD_MASK            0x3U
102 #define SQ_TASK_INFO0_INNER_L3TYPE_MASK         0x3U
103 #define SQ_TASK_INFO0_VLAN_OFFLOAD_MASK         0x1U
104 #define SQ_TASK_INFO0_PARSE_FLAG_MASK           0x1U
105 #define SQ_TASK_INFO0_UFO_AVD_MASK              0x1U
106 #define SQ_TASK_INFO0_TSO_UFO_MASK              0x1U
107 #define SQ_TASK_INFO0_VLAN_TAG_MASK             0xFFFFU
108
109 #define SQ_TASK_INFO0_SET(val, member)                  \
110         (((u32)(val) & SQ_TASK_INFO0_##member##_MASK) <<        \
111                         SQ_TASK_INFO0_##member##_SHIFT)
112
113 #define SQ_TASK_INFO1_MD_TYPE_SHIFT             8
114 #define SQ_TASK_INFO1_INNER_L4LEN_SHIFT         16
115 #define SQ_TASK_INFO1_INNER_L3LEN_SHIFT         24
116
117 #define SQ_TASK_INFO1_MD_TYPE_MASK              0xFFU
118 #define SQ_TASK_INFO1_INNER_L4LEN_MASK          0xFFU
119 #define SQ_TASK_INFO1_INNER_L3LEN_MASK          0xFFU
120
121 #define SQ_TASK_INFO1_SET(val, member)                  \
122         (((val) & SQ_TASK_INFO1_##member##_MASK) <<     \
123                         SQ_TASK_INFO1_##member##_SHIFT)
124
125 #define SQ_TASK_INFO2_TUNNEL_L4LEN_SHIFT        0
126 #define SQ_TASK_INFO2_OUTER_L3LEN_SHIFT         8
127 #define SQ_TASK_INFO2_TUNNEL_L4TYPE_SHIFT       16
128 #define SQ_TASK_INFO2_OUTER_L3TYPE_SHIFT        24
129
130 #define SQ_TASK_INFO2_TUNNEL_L4LEN_MASK         0xFFU
131 #define SQ_TASK_INFO2_OUTER_L3LEN_MASK          0xFFU
132 #define SQ_TASK_INFO2_TUNNEL_L4TYPE_MASK        0x7U
133 #define SQ_TASK_INFO2_OUTER_L3TYPE_MASK         0x3U
134
135 #define SQ_TASK_INFO2_SET(val, member)                  \
136         (((val) & SQ_TASK_INFO2_##member##_MASK) <<     \
137                         SQ_TASK_INFO2_##member##_SHIFT)
138
139 #define SQ_TASK_INFO4_L2TYPE_SHIFT              31
140
141 #define SQ_TASK_INFO4_L2TYPE_MASK               0x1U
142
143 #define SQ_TASK_INFO4_SET(val, member)          \
144         (((u32)(val) & SQ_TASK_INFO4_##member##_MASK) << \
145                         SQ_TASK_INFO4_##member##_SHIFT)
146
147 /* SQ_DB */
148 #define SQ_DB_OFF                               0x00000800
149 #define SQ_DB_INFO_HI_PI_SHIFT                  0
150 #define SQ_DB_INFO_QID_SHIFT                    8
151 #define SQ_DB_INFO_CFLAG_SHIFT                  23
152 #define SQ_DB_INFO_COS_SHIFT                    24
153 #define SQ_DB_INFO_TYPE_SHIFT                   27
154
155 #define SQ_DB_INFO_HI_PI_MASK                   0xFFU
156 #define SQ_DB_INFO_QID_MASK                     0x3FFU
157 #define SQ_DB_INFO_CFLAG_MASK                   0x1U
158 #define SQ_DB_INFO_COS_MASK                     0x7U
159 #define SQ_DB_INFO_TYPE_MASK                    0x1FU
160 #define SQ_DB_INFO_SET(val, member)             \
161         (((u32)(val) & SQ_DB_INFO_##member##_MASK) <<   \
162                         SQ_DB_INFO_##member##_SHIFT)
163
164 #define SQ_DB                                   1
165 #define SQ_CFLAG_DP                             0       /* CFLAG_DATA_PATH */
166
167 #define SQ_DB_PI_LOW_MASK                       0xFF
168 #define SQ_DB_PI_LOW(pi)                        ((pi) & SQ_DB_PI_LOW_MASK)
169 #define SQ_DB_PI_HI_SHIFT                       8
170 #define SQ_DB_PI_HIGH(pi)                       ((pi) >> SQ_DB_PI_HI_SHIFT)
171 #define SQ_DB_ADDR(sq, pi)              \
172         ((u64 *)((u8 __iomem *)((sq)->db_addr) + SQ_DB_OFF) + SQ_DB_PI_LOW(pi))
173
174 /* txq wq operations */
175 #define HINIC_GET_SQ_WQE_MASK(txq)              ((txq)->wq->mask)
176
177 #define HINIC_GET_SQ_HW_CI(txq) \
178         ((be16_to_cpu(*(txq)->cons_idx_addr)) & HINIC_GET_SQ_WQE_MASK(txq))
179
180 #define HINIC_GET_SQ_LOCAL_CI(txq)      \
181         (((txq)->wq->cons_idx) & HINIC_GET_SQ_WQE_MASK(txq))
182
183 #define HINIC_UPDATE_SQ_LOCAL_CI(txq, wqebb_cnt)        \
184         do {                                            \
185                 (txq)->wq->cons_idx += wqebb_cnt;       \
186                 (txq)->wq->delta += wqebb_cnt;          \
187         } while (0)
188
189 #define HINIC_GET_SQ_FREE_WQEBBS(txq)   ((txq)->wq->delta - 1)
190
191 #define HINIC_IS_SQ_EMPTY(txq)  (((txq)->wq->delta) == ((txq)->q_depth))
192
193 #define BUF_DESC_SIZE_SHIFT             4
194
195 #define HINIC_SQ_WQE_SIZE(num_sge)              \
196         (sizeof(struct hinic_sq_ctrl) + sizeof(struct hinic_sq_task) +  \
197                         (unsigned int)((num_sge) << BUF_DESC_SIZE_SHIFT))
198
199 #define HINIC_SQ_WQEBB_CNT(num_sge)     \
200         (int)(ALIGN(HINIC_SQ_WQE_SIZE((u32)num_sge), \
201                         HINIC_SQ_WQEBB_SIZE) >> HINIC_SQ_WQEBB_SHIFT)
202
203
204 static inline void hinic_sq_wqe_cpu_to_be32(void *data, int nr_wqebb)
205 {
206 #if defined(RTE_ARCH_X86_64)
207         int i;
208         __m128i *wqe_line = (__m128i *)data;
209         __m128i shuf_mask = _mm_set_epi8(12, 13, 14, 15, 8, 9, 10,
210                                         11, 4, 5, 6, 7, 0, 1, 2, 3);
211
212         for (i = 0; i < nr_wqebb; i++) {
213                 /* convert 64B wqebb using 4 SSE instructions */
214                 wqe_line[0] = _mm_shuffle_epi8(wqe_line[0], shuf_mask);
215                 wqe_line[1] = _mm_shuffle_epi8(wqe_line[1], shuf_mask);
216                 wqe_line[2] = _mm_shuffle_epi8(wqe_line[2], shuf_mask);
217                 wqe_line[3] = _mm_shuffle_epi8(wqe_line[3], shuf_mask);
218                 wqe_line += 4;
219         }
220 #elif defined(RTE_ARCH_ARM64)
221         int i;
222         uint8x16_t *wqe_line = (uint8x16_t *)data;
223         const uint8x16_t shuf_mask = {3, 2, 1, 0, 7, 6, 5, 4, 11, 10,
224                                         9, 8, 15, 14, 13, 12};
225
226         for (i = 0; i < nr_wqebb; i++) {
227                 wqe_line[0] = vqtbl1q_u8(wqe_line[0], shuf_mask);
228                 wqe_line[1] = vqtbl1q_u8(wqe_line[1], shuf_mask);
229                 wqe_line[2] = vqtbl1q_u8(wqe_line[2], shuf_mask);
230                 wqe_line[3] = vqtbl1q_u8(wqe_line[3], shuf_mask);
231                 wqe_line += 4;
232         }
233 #else
234         hinic_cpu_to_be32(data, nr_wqebb * HINIC_SQ_WQEBB_SIZE);
235 #endif
236 }
237
238 static inline void hinic_sge_cpu_to_be32(void *data, int nr_sge)
239 {
240 #if defined(RTE_ARCH_X86_64)
241         int i;
242         __m128i *sge_line = (__m128i *)data;
243         __m128i shuf_mask = _mm_set_epi8(12, 13, 14, 15, 8, 9, 10,
244                                         11, 4, 5, 6, 7, 0, 1, 2, 3);
245
246         for (i = 0; i < nr_sge; i++) {
247                 /* convert 16B sge using 1 SSE instructions */
248                 *sge_line = _mm_shuffle_epi8(*sge_line, shuf_mask);
249                 sge_line++;
250         }
251 #elif defined(RTE_ARCH_ARM64)
252         int i;
253         uint8x16_t *sge_line = (uint8x16_t *)data;
254         const uint8x16_t shuf_mask = {3, 2, 1, 0, 7, 6, 5, 4, 11, 10,
255                                         9, 8, 15, 14, 13, 12};
256
257         for (i = 0; i < nr_sge; i++) {
258                 *sge_line = vqtbl1q_u8(*sge_line, shuf_mask);
259                 sge_line++;
260         }
261 #else
262         hinic_cpu_to_be32(data, nr_sge * sizeof(struct hinic_sq_bufdesc));
263 #endif
264 }
265
266 void hinic_txq_get_stats(struct hinic_txq *txq, struct hinic_txq_stats *stats)
267 {
268         if (!txq || !stats) {
269                 PMD_DRV_LOG(ERR, "Txq or stats is NULL");
270                 return;
271         }
272
273         memcpy(stats, &txq->txq_stats, sizeof(txq->txq_stats));
274 }
275
276 void hinic_txq_stats_reset(struct hinic_txq *txq)
277 {
278         struct hinic_txq_stats *txq_stats;
279
280         if (txq == NULL)
281                 return;
282
283         txq_stats = &txq->txq_stats;
284         memset(txq_stats, 0, sizeof(*txq_stats));
285 }
286
287 static inline struct rte_mbuf *hinic_copy_tx_mbuf(struct hinic_nic_dev *nic_dev,
288                                                   struct rte_mbuf *mbuf,
289                                                   u16 sge_cnt)
290 {
291         struct rte_mbuf *dst_mbuf;
292         u32 offset = 0;
293         u16 i;
294
295         if (unlikely(!nic_dev->cpy_mpool))
296                 return NULL;
297
298         dst_mbuf = rte_pktmbuf_alloc(nic_dev->cpy_mpool);
299         if (unlikely(!dst_mbuf))
300                 return NULL;
301
302         dst_mbuf->data_off = 0;
303         for (i = 0; i < sge_cnt; i++) {
304                 rte_memcpy((char *)dst_mbuf->buf_addr + offset,
305                            (char *)mbuf->buf_addr + mbuf->data_off,
306                            mbuf->data_len);
307                 dst_mbuf->data_len += mbuf->data_len;
308                 offset += mbuf->data_len;
309                 mbuf = mbuf->next;
310         }
311
312         dst_mbuf->pkt_len = dst_mbuf->data_len;
313
314         return dst_mbuf;
315 }
316
317 static inline bool hinic_mbuf_dma_map_sge(struct hinic_txq *txq,
318                                           struct rte_mbuf *mbuf,
319                                           struct hinic_sq_bufdesc *sges,
320                                           struct hinic_wqe_info *sqe_info)
321 {
322         dma_addr_t dma_addr;
323         u16 i, around_sges;
324         u16 nb_segs = sqe_info->sge_cnt - sqe_info->cpy_mbuf_cnt;
325         u16 real_nb_segs = mbuf->nb_segs;
326         struct hinic_sq_bufdesc *sge_idx = sges;
327
328         if (unlikely(sqe_info->around)) {
329                 /* parts of wqe is in sq bottom while parts
330                  * of wqe is in sq head
331                  */
332                 i = 0;
333                 for (sge_idx = sges; (u64)sge_idx <= txq->sq_bot_sge_addr;
334                      sge_idx++) {
335                         if (unlikely(mbuf == NULL)) {
336                                 txq->txq_stats.mbuf_null++;
337                                 return false;
338                         }
339
340                         dma_addr = rte_mbuf_data_iova(mbuf);
341                         if (unlikely(mbuf->data_len == 0)) {
342                                 txq->txq_stats.sge_len0++;
343                                 return false;
344                         }
345                         hinic_set_sge((struct hinic_sge *)sge_idx, dma_addr,
346                                       mbuf->data_len);
347                         mbuf = mbuf->next;
348                         i++;
349                 }
350
351                 around_sges = nb_segs - i;
352                 sge_idx = (struct hinic_sq_bufdesc *)
353                                 ((void *)txq->sq_head_addr);
354                 for (; i < nb_segs; i++) {
355                         if (unlikely(mbuf == NULL)) {
356                                 txq->txq_stats.mbuf_null++;
357                                 return false;
358                         }
359
360                         dma_addr = rte_mbuf_data_iova(mbuf);
361                         if (unlikely(mbuf->data_len == 0)) {
362                                 txq->txq_stats.sge_len0++;
363                                 return false;
364                         }
365                         hinic_set_sge((struct hinic_sge *)sge_idx, dma_addr,
366                                       mbuf->data_len);
367                         mbuf = mbuf->next;
368                         sge_idx++;
369                 }
370
371                 /* covert sges at head to big endian */
372                 hinic_sge_cpu_to_be32((void *)txq->sq_head_addr, around_sges);
373         } else {
374                 /* wqe is in continuous space */
375                 for (i = 0; i < nb_segs; i++) {
376                         if (unlikely(mbuf == NULL)) {
377                                 txq->txq_stats.mbuf_null++;
378                                 return false;
379                         }
380
381                         dma_addr = rte_mbuf_data_iova(mbuf);
382                         if (unlikely(mbuf->data_len == 0)) {
383                                 txq->txq_stats.sge_len0++;
384                                 return false;
385                         }
386                         hinic_set_sge((struct hinic_sge *)sge_idx, dma_addr,
387                                       mbuf->data_len);
388                         mbuf = mbuf->next;
389                         sge_idx++;
390                 }
391         }
392
393         /* for now: support non-tso over 17 sge, copy the last 2 mbuf */
394         if (unlikely(sqe_info->cpy_mbuf_cnt != 0)) {
395                 /* copy invalid mbuf segs to a valid buffer, lost performance */
396                 txq->txq_stats.cpy_pkts += 1;
397                 mbuf = hinic_copy_tx_mbuf(txq->nic_dev, mbuf,
398                                           real_nb_segs - nb_segs);
399                 if (unlikely(!mbuf))
400                         return false;
401
402                 txq->tx_info[sqe_info->pi].cpy_mbuf = mbuf;
403
404                 /* deal with the last mbuf */
405                 dma_addr = rte_mbuf_data_iova(mbuf);
406                 if (unlikely(mbuf->data_len == 0)) {
407                         txq->txq_stats.sge_len0++;
408                         return false;
409                 }
410                 hinic_set_sge((struct hinic_sge *)sge_idx, dma_addr,
411                               mbuf->data_len);
412                 if (unlikely(sqe_info->around))
413                         hinic_sge_cpu_to_be32((void *)sge_idx, 1);
414         }
415
416         return true;
417 }
418
419 static inline void hinic_fill_sq_wqe_header(struct hinic_sq_ctrl *ctrl,
420                                             u32 queue_info, int nr_descs,
421                                             u8 owner)
422 {
423         u32 ctrl_size, task_size, bufdesc_size;
424
425         ctrl_size = SIZE_8BYTES(sizeof(struct hinic_sq_ctrl));
426         task_size = SIZE_8BYTES(sizeof(struct hinic_sq_task));
427         bufdesc_size = HINIC_BUF_DESC_SIZE(nr_descs);
428
429         ctrl->ctrl_fmt = SQ_CTRL_SET(bufdesc_size, BUFDESC_SECT_LEN) |
430                         SQ_CTRL_SET(task_size, TASKSECT_LEN)    |
431                         SQ_CTRL_SET(SQ_NORMAL_WQE, DATA_FORMAT) |
432                         SQ_CTRL_SET(ctrl_size, LEN)             |
433                         SQ_CTRL_SET(owner, OWNER);
434
435         ctrl->queue_info = queue_info;
436         ctrl->queue_info |= SQ_CTRL_QUEUE_INFO_SET(1U, UC);
437
438         if (!SQ_CTRL_QUEUE_INFO_GET(ctrl->queue_info, MSS)) {
439                 ctrl->queue_info |=
440                         SQ_CTRL_QUEUE_INFO_SET(TX_MSS_DEFAULT, MSS);
441         } else if (SQ_CTRL_QUEUE_INFO_GET(ctrl->queue_info, MSS) < TX_MSS_MIN) {
442                 /* mss should not be less than 80 */
443                 ctrl->queue_info =
444                                 SQ_CTRL_QUEUE_INFO_CLEAR(ctrl->queue_info, MSS);
445                 ctrl->queue_info |= SQ_CTRL_QUEUE_INFO_SET(TX_MSS_MIN, MSS);
446         }
447 }
448
449 static inline bool hinic_is_tso_sge_valid(struct rte_mbuf *mbuf,
450                                           struct hinic_tx_offload_info
451                                           *poff_info,
452                                           struct hinic_wqe_info *sqe_info)
453 {
454         u32 total_len, limit_len, checked_len, left_len, adjust_mss;
455         u32 i, first_mss_sges, left_sges;
456         struct rte_mbuf *mbuf_head, *mbuf_pre;
457
458         left_sges = mbuf->nb_segs;
459         mbuf_head = mbuf;
460
461         /* tso sge number validation */
462         if (unlikely(left_sges >= HINIC_NONTSO_PKT_MAX_SGE)) {
463                 checked_len = 0;
464                 adjust_mss = mbuf->tso_segsz >= TX_MSS_MIN ?
465                                 mbuf->tso_segsz : TX_MSS_MIN;
466                 limit_len = adjust_mss + poff_info->payload_offset;
467                 first_mss_sges = HINIC_NONTSO_PKT_MAX_SGE;
468
469                 /* each continues 17 mbufs segmust do one check */
470                 while (left_sges >= HINIC_NONTSO_PKT_MAX_SGE) {
471                         /* total len of first 16 mbufs must equal
472                          * or more than limit_len
473                          */
474                         total_len = 0;
475                         for (i = 0; i < first_mss_sges; i++) {
476                                 total_len += mbuf->data_len;
477                                 mbuf_pre = mbuf;
478                                 mbuf = mbuf->next;
479                                 if (total_len >= limit_len) {
480                                         limit_len = adjust_mss;
481                                         break;
482                                 }
483                         }
484
485                         checked_len += total_len;
486
487                         /* try to copy if not valid */
488                         if (unlikely(first_mss_sges == i)) {
489                                 left_sges -= first_mss_sges;
490                                 checked_len -= mbuf_pre->data_len;
491
492                                 left_len = mbuf_head->pkt_len - checked_len;
493                                 if (left_len > HINIC_COPY_MBUF_SIZE)
494                                         return false;
495
496                                 sqe_info->sge_cnt = mbuf_head->nb_segs -
497                                                         left_sges;
498                                 sqe_info->cpy_mbuf_cnt = 1;
499
500                                 return true;
501                         }
502                         first_mss_sges = (HINIC_NONTSO_PKT_MAX_SGE - 1);
503
504                         /* continue next 16 mbufs */
505                         left_sges -= (i + 1);
506                 } /* end of while */
507         }
508
509         sqe_info->sge_cnt = mbuf_head->nb_segs;
510         return true;
511 }
512
513 static inline void
514 hinic_set_l4_csum_info(struct hinic_sq_task *task,
515                 u32 *queue_info, struct hinic_tx_offload_info *poff_info)
516 {
517         u32 tcp_udp_cs, sctp = 0;
518         u16 l2hdr_len;
519
520         if (unlikely(poff_info->inner_l4_type == SCTP_OFFLOAD_ENABLE))
521                 sctp = 1;
522
523         tcp_udp_cs = poff_info->inner_l4_tcp_udp;
524
525         if (poff_info->tunnel_type == TUNNEL_UDP_CSUM ||
526             poff_info->tunnel_type == TUNNEL_UDP_NO_CSUM) {
527                 l2hdr_len =  poff_info->outer_l2_len;
528
529                 task->pkt_info2 |=
530                 SQ_TASK_INFO2_SET(poff_info->outer_l3_type, OUTER_L3TYPE) |
531                 SQ_TASK_INFO2_SET(poff_info->outer_l3_len, OUTER_L3LEN);
532                 task->pkt_info2 |=
533                 SQ_TASK_INFO2_SET(poff_info->tunnel_type, TUNNEL_L4TYPE) |
534                 SQ_TASK_INFO2_SET(poff_info->tunnel_length, TUNNEL_L4LEN);
535         } else {
536                 l2hdr_len = poff_info->inner_l2_len;
537         }
538
539         task->pkt_info0 |= SQ_TASK_INFO0_SET(l2hdr_len, L2HDR_LEN);
540         task->pkt_info1 |=
541                 SQ_TASK_INFO1_SET(poff_info->inner_l3_len, INNER_L3LEN);
542         task->pkt_info0 |=
543                 SQ_TASK_INFO0_SET(poff_info->inner_l3_type, INNER_L3TYPE);
544         task->pkt_info1 |=
545                 SQ_TASK_INFO1_SET(poff_info->inner_l4_len, INNER_L4LEN);
546         task->pkt_info0 |=
547                 SQ_TASK_INFO0_SET(poff_info->inner_l4_type, L4OFFLOAD);
548         *queue_info |=
549                 SQ_CTRL_QUEUE_INFO_SET(poff_info->payload_offset, PLDOFF) |
550                 SQ_CTRL_QUEUE_INFO_SET(tcp_udp_cs, TCPUDP_CS) |
551                 SQ_CTRL_QUEUE_INFO_SET(sctp, SCTP);
552 }
553
554 static inline void
555 hinic_set_tso_info(struct hinic_sq_task *task,
556                 u32 *queue_info, struct rte_mbuf *mbuf,
557                 struct hinic_tx_offload_info *poff_info)
558 {
559         hinic_set_l4_csum_info(task, queue_info, poff_info);
560
561         /* wqe for tso */
562         task->pkt_info0 |=
563                 SQ_TASK_INFO0_SET(poff_info->inner_l3_type, INNER_L3TYPE);
564         task->pkt_info0 |= SQ_TASK_INFO0_SET(TSO_ENABLE, TSO_UFO);
565         *queue_info |= SQ_CTRL_QUEUE_INFO_SET(TSO_ENABLE, TSO);
566         /* qsf was initialized in prepare_sq_wqe */
567         *queue_info = SQ_CTRL_QUEUE_INFO_CLEAR(*queue_info, MSS);
568         *queue_info |= SQ_CTRL_QUEUE_INFO_SET(mbuf->tso_segsz, MSS);
569 }
570
571 static inline void
572 hinic_set_vlan_tx_offload(struct hinic_sq_task *task,
573                         u32 *queue_info, u16 vlan_tag, u16 vlan_pri)
574 {
575         task->pkt_info0 |= SQ_TASK_INFO0_SET(vlan_tag, VLAN_TAG) |
576                                 SQ_TASK_INFO0_SET(1U, VLAN_OFFLOAD);
577
578         *queue_info |= SQ_CTRL_QUEUE_INFO_SET(vlan_pri, PRI);
579 }
580
581 static inline void
582 hinic_fill_tx_offload_info(struct rte_mbuf *mbuf,
583                 struct hinic_sq_task *task, u32 *queue_info,
584                 struct hinic_tx_offload_info *tx_off_info)
585 {
586         u16 vlan_tag;
587         uint64_t ol_flags = mbuf->ol_flags;
588
589         /* clear DW0~2 of task section for offload */
590         task->pkt_info0 = 0;
591         task->pkt_info1 = 0;
592         task->pkt_info2 = 0;
593
594         /* Base VLAN */
595         if (unlikely(ol_flags & PKT_TX_VLAN_PKT)) {
596                 vlan_tag = mbuf->vlan_tci;
597                 hinic_set_vlan_tx_offload(task, queue_info, vlan_tag,
598                                           vlan_tag >> VLAN_PRIO_SHIFT);
599         }
600
601         /* non checksum or tso */
602         if (unlikely(!(ol_flags & HINIC_TX_CKSUM_OFFLOAD_MASK)))
603                 return;
604
605         if ((ol_flags & PKT_TX_TCP_SEG))
606                 /* set tso info for task and qsf */
607                 hinic_set_tso_info(task, queue_info, mbuf, tx_off_info);
608         else /* just support l4 checksum offload */
609                 hinic_set_l4_csum_info(task, queue_info, tx_off_info);
610 }
611
612 static inline void hinic_xmit_mbuf_cleanup(struct hinic_txq *txq)
613 {
614         struct hinic_tx_info *tx_info;
615         struct rte_mbuf *mbuf, *m, *mbuf_free[HINIC_MAX_TX_FREE_BULK];
616         int i, nb_free = 0;
617         u16 hw_ci, sw_ci, sq_mask;
618         int wqebb_cnt = 0;
619
620         hw_ci = HINIC_GET_SQ_HW_CI(txq);
621         sw_ci = HINIC_GET_SQ_LOCAL_CI(txq);
622         sq_mask = HINIC_GET_SQ_WQE_MASK(txq);
623
624         for (i = 0; i < txq->tx_free_thresh; ++i) {
625                 tx_info = &txq->tx_info[sw_ci];
626                 if (hw_ci == sw_ci ||
627                         (((hw_ci - sw_ci) & sq_mask) < tx_info->wqebb_cnt))
628                         break;
629
630                 sw_ci = (sw_ci + tx_info->wqebb_cnt) & sq_mask;
631
632                 if (unlikely(tx_info->cpy_mbuf != NULL)) {
633                         rte_pktmbuf_free(tx_info->cpy_mbuf);
634                         tx_info->cpy_mbuf = NULL;
635                 }
636
637                 wqebb_cnt += tx_info->wqebb_cnt;
638                 mbuf = tx_info->mbuf;
639
640                 if (likely(mbuf->nb_segs == 1)) {
641                         m = rte_pktmbuf_prefree_seg(mbuf);
642                         tx_info->mbuf = NULL;
643
644                         if (unlikely(m == NULL))
645                                 continue;
646
647                         mbuf_free[nb_free++] = m;
648                         if (unlikely(m->pool != mbuf_free[0]->pool ||
649                                 nb_free >= HINIC_MAX_TX_FREE_BULK)) {
650                                 rte_mempool_put_bulk(mbuf_free[0]->pool,
651                                         (void **)mbuf_free, (nb_free - 1));
652                                 nb_free = 0;
653                                 mbuf_free[nb_free++] = m;
654                         }
655                 } else {
656                         rte_pktmbuf_free(mbuf);
657                         tx_info->mbuf = NULL;
658                 }
659         }
660
661         if (nb_free > 0)
662                 rte_mempool_put_bulk(mbuf_free[0]->pool, (void **)mbuf_free,
663                                      nb_free);
664
665         HINIC_UPDATE_SQ_LOCAL_CI(txq, wqebb_cnt);
666 }
667
668 static inline struct hinic_sq_wqe *
669 hinic_get_sq_wqe(struct hinic_txq *txq, int wqebb_cnt,
670                  struct hinic_wqe_info *wqe_info)
671 {
672         u32 cur_pi, end_pi;
673         u16 remain_wqebbs;
674         struct hinic_sq *sq = txq->sq;
675         struct hinic_wq *wq = txq->wq;
676
677         /* record current pi */
678         cur_pi = MASKED_WQE_IDX(wq, wq->prod_idx);
679         end_pi = cur_pi + wqebb_cnt;
680
681         /* update next pi and delta */
682         wq->prod_idx += wqebb_cnt;
683         wq->delta -= wqebb_cnt;
684
685         /* return current pi and owner */
686         wqe_info->pi = cur_pi;
687         wqe_info->owner = sq->owner;
688         wqe_info->around = 0;
689         wqe_info->seq_wqebbs = wqebb_cnt;
690
691         if (unlikely(end_pi >= txq->q_depth)) {
692                 /* update owner of next prod_idx */
693                 sq->owner = !sq->owner;
694
695                 /* turn around to head */
696                 if (unlikely(end_pi > txq->q_depth)) {
697                         wqe_info->around = 1;
698                         remain_wqebbs = txq->q_depth - cur_pi;
699                         wqe_info->seq_wqebbs = remain_wqebbs;
700                 }
701         }
702
703         return (struct hinic_sq_wqe *)WQ_WQE_ADDR(wq, cur_pi);
704 }
705
706 static inline uint16_t
707 hinic_ipv4_phdr_cksum(const struct rte_ipv4_hdr *ipv4_hdr, uint64_t ol_flags)
708 {
709         struct ipv4_psd_header {
710                 uint32_t src_addr; /* IP address of source host. */
711                 uint32_t dst_addr; /* IP address of destination host. */
712                 uint8_t  zero;     /* zero. */
713                 uint8_t  proto;    /* L4 protocol type. */
714                 uint16_t len;      /* L4 length. */
715         } psd_hdr;
716
717         psd_hdr.src_addr = ipv4_hdr->src_addr;
718         psd_hdr.dst_addr = ipv4_hdr->dst_addr;
719         psd_hdr.zero = 0;
720         psd_hdr.proto = ipv4_hdr->next_proto_id;
721         if (ol_flags & PKT_TX_TCP_SEG) {
722                 psd_hdr.len = 0;
723         } else {
724                 psd_hdr.len =
725                 rte_cpu_to_be_16(rte_be_to_cpu_16(ipv4_hdr->total_length) -
726                                  rte_ipv4_hdr_len(ipv4_hdr));
727         }
728         return rte_raw_cksum(&psd_hdr, sizeof(psd_hdr));
729 }
730
731 static inline uint16_t
732 hinic_ipv6_phdr_cksum(const struct rte_ipv6_hdr *ipv6_hdr, uint64_t ol_flags)
733 {
734         uint32_t sum;
735         struct {
736                 uint32_t len;   /* L4 length. */
737                 uint32_t proto; /* L4 protocol - top 3 bytes must be zero */
738         } psd_hdr;
739
740         psd_hdr.proto = (ipv6_hdr->proto << 24);
741         if (ol_flags & PKT_TX_TCP_SEG)
742                 psd_hdr.len = 0;
743         else
744                 psd_hdr.len = ipv6_hdr->payload_len;
745
746         sum = __rte_raw_cksum(ipv6_hdr->src_addr,
747                 sizeof(ipv6_hdr->src_addr) + sizeof(ipv6_hdr->dst_addr), 0);
748         sum = __rte_raw_cksum(&psd_hdr, sizeof(psd_hdr), sum);
749         return __rte_raw_cksum_reduce(sum);
750 }
751
752 static inline void hinic_get_outer_cs_pld_offset(struct rte_mbuf *m,
753                                         struct hinic_tx_offload_info *off_info)
754 {
755         uint64_t ol_flags = m->ol_flags;
756
757         if ((ol_flags & PKT_TX_L4_MASK) == PKT_TX_UDP_CKSUM)
758                 off_info->payload_offset = m->outer_l2_len + m->outer_l3_len +
759                                            m->l2_len + m->l3_len;
760         else if ((ol_flags & PKT_TX_TCP_CKSUM) || (ol_flags & PKT_TX_TCP_SEG))
761                 off_info->payload_offset = m->outer_l2_len + m->outer_l3_len +
762                                            m->l2_len + m->l3_len + m->l4_len;
763 }
764
765 static inline void hinic_get_pld_offset(struct rte_mbuf *m,
766                                         struct hinic_tx_offload_info *off_info)
767 {
768         uint64_t ol_flags = m->ol_flags;
769
770         if (((ol_flags & PKT_TX_L4_MASK) == PKT_TX_UDP_CKSUM) ||
771             ((ol_flags & PKT_TX_L4_MASK) == PKT_TX_SCTP_CKSUM))
772                 off_info->payload_offset = m->l2_len + m->l3_len;
773         else if ((ol_flags & PKT_TX_TCP_CKSUM) || (ol_flags & PKT_TX_TCP_SEG))
774                 off_info->payload_offset = m->l2_len + m->l3_len +
775                                            m->l4_len;
776 }
777
778 static inline void hinic_analyze_tx_info(struct rte_mbuf *mbuf,
779                                          struct hinic_tx_offload_info *off_info)
780 {
781         struct rte_ether_hdr *eth_hdr;
782         struct rte_vlan_hdr *vlan_hdr;
783         struct rte_ipv4_hdr *ipv4_hdr;
784         u16 eth_type;
785
786         eth_hdr = rte_pktmbuf_mtod(mbuf, struct rte_ether_hdr *);
787         eth_type = rte_be_to_cpu_16(eth_hdr->ether_type);
788
789         if (eth_type == RTE_ETHER_TYPE_VLAN) {
790                 off_info->outer_l2_len = ETHER_LEN_WITH_VLAN;
791                 vlan_hdr = (struct rte_vlan_hdr *)(eth_hdr + 1);
792                 eth_type = rte_be_to_cpu_16(vlan_hdr->eth_proto);
793         } else {
794                 off_info->outer_l2_len = ETHER_LEN_NO_VLAN;
795         }
796
797         if (eth_type == RTE_ETHER_TYPE_IPV4) {
798                 ipv4_hdr = rte_pktmbuf_mtod_offset(mbuf, struct rte_ipv4_hdr *,
799                                                    off_info->outer_l2_len);
800                 off_info->outer_l3_len = rte_ipv4_hdr_len(ipv4_hdr);
801         } else if (eth_type == RTE_ETHER_TYPE_IPV6) {
802                 /* not support ipv6 extension header */
803                 off_info->outer_l3_len = sizeof(struct rte_ipv6_hdr);
804         }
805 }
806
807 static inline void hinic_analyze_outer_ip_vxlan(struct rte_mbuf *mbuf,
808                                         struct hinic_tx_offload_info *off_info)
809 {
810         struct rte_ether_hdr *eth_hdr;
811         struct rte_vlan_hdr *vlan_hdr;
812         struct rte_ipv4_hdr *ipv4_hdr;
813         struct rte_udp_hdr *udp_hdr;
814         u16 eth_type = 0;
815
816         eth_hdr = rte_pktmbuf_mtod(mbuf, struct rte_ether_hdr *);
817         eth_type = rte_be_to_cpu_16(eth_hdr->ether_type);
818
819         if (eth_type == RTE_ETHER_TYPE_VLAN) {
820                 vlan_hdr = (struct rte_vlan_hdr *)(eth_hdr + 1);
821                 eth_type = rte_be_to_cpu_16(vlan_hdr->eth_proto);
822         }
823
824         if (eth_type == RTE_ETHER_TYPE_IPV4) {
825                 ipv4_hdr = rte_pktmbuf_mtod_offset(mbuf, struct rte_ipv4_hdr *,
826                                                    mbuf->outer_l2_len);
827                 off_info->outer_l3_type = IPV4_PKT_WITH_CHKSUM_OFFLOAD;
828                 ipv4_hdr->hdr_checksum = 0;
829
830                 udp_hdr = (struct rte_udp_hdr *)((char *)ipv4_hdr +
831                                                  mbuf->outer_l3_len);
832                 udp_hdr->dgram_cksum = 0;
833         } else if (eth_type == RTE_ETHER_TYPE_IPV6) {
834                 off_info->outer_l3_type = IPV6_PKT;
835
836                 udp_hdr = rte_pktmbuf_mtod_offset(mbuf, struct rte_udp_hdr *,
837                                                   (mbuf->outer_l2_len +
838                                                    mbuf->outer_l3_len));
839                 udp_hdr->dgram_cksum = 0;
840         }
841 }
842
843 static inline uint8_t hinic_analyze_l3_type(struct rte_mbuf *mbuf)
844 {
845         uint8_t l3_type;
846         uint64_t ol_flags = mbuf->ol_flags;
847
848         if (ol_flags & PKT_TX_IPV4)
849                 l3_type = (ol_flags & PKT_TX_IP_CKSUM) ?
850                           IPV4_PKT_WITH_CHKSUM_OFFLOAD :
851                           IPV4_PKT_NO_CHKSUM_OFFLOAD;
852         else if (ol_flags & PKT_TX_IPV6)
853                 l3_type = IPV6_PKT;
854         else
855                 l3_type = UNKNOWN_L3TYPE;
856
857         return l3_type;
858 }
859
860 static inline void hinic_calculate_tcp_checksum(struct rte_mbuf *mbuf,
861                                         struct hinic_tx_offload_info *off_info,
862                                         uint64_t inner_l3_offset)
863 {
864         struct rte_ipv4_hdr *ipv4_hdr;
865         struct rte_ipv6_hdr *ipv6_hdr;
866         struct rte_tcp_hdr *tcp_hdr;
867         uint64_t ol_flags = mbuf->ol_flags;
868
869         if (ol_flags & PKT_TX_IPV4) {
870                 ipv4_hdr = rte_pktmbuf_mtod_offset(mbuf, struct rte_ipv4_hdr *,
871                                                    inner_l3_offset);
872
873                 if (ol_flags & PKT_TX_IP_CKSUM)
874                         ipv4_hdr->hdr_checksum = 0;
875
876                 tcp_hdr = (struct rte_tcp_hdr *)((char *)ipv4_hdr +
877                                                  mbuf->l3_len);
878                 tcp_hdr->cksum = hinic_ipv4_phdr_cksum(ipv4_hdr, ol_flags);
879         } else {
880                 ipv6_hdr = rte_pktmbuf_mtod_offset(mbuf, struct rte_ipv6_hdr *,
881                                                    inner_l3_offset);
882                 tcp_hdr = rte_pktmbuf_mtod_offset(mbuf, struct rte_tcp_hdr *,
883                                                   (inner_l3_offset +
884                                                    mbuf->l3_len));
885                 tcp_hdr->cksum = hinic_ipv6_phdr_cksum(ipv6_hdr, ol_flags);
886         }
887
888         off_info->inner_l4_type = TCP_OFFLOAD_ENABLE;
889         off_info->inner_l4_tcp_udp = 1;
890 }
891
892 static inline void hinic_calculate_udp_checksum(struct rte_mbuf *mbuf,
893                                         struct hinic_tx_offload_info *off_info,
894                                         uint64_t inner_l3_offset)
895 {
896         struct rte_ipv4_hdr *ipv4_hdr;
897         struct rte_ipv6_hdr *ipv6_hdr;
898         struct rte_udp_hdr *udp_hdr;
899         uint64_t ol_flags = mbuf->ol_flags;
900
901         if (ol_flags & PKT_TX_IPV4) {
902                 ipv4_hdr = rte_pktmbuf_mtod_offset(mbuf, struct rte_ipv4_hdr *,
903                                                    inner_l3_offset);
904
905                 if (ol_flags & PKT_TX_IP_CKSUM)
906                         ipv4_hdr->hdr_checksum = 0;
907
908                 udp_hdr = (struct rte_udp_hdr *)((char *)ipv4_hdr +
909                                                  mbuf->l3_len);
910                 udp_hdr->dgram_cksum = hinic_ipv4_phdr_cksum(ipv4_hdr,
911                                                              ol_flags);
912         } else {
913                 ipv6_hdr = rte_pktmbuf_mtod_offset(mbuf, struct rte_ipv6_hdr *,
914                                                    inner_l3_offset);
915
916                 udp_hdr = rte_pktmbuf_mtod_offset(mbuf, struct rte_udp_hdr *,
917                                                   (inner_l3_offset +
918                                                    mbuf->l3_len));
919                 udp_hdr->dgram_cksum = hinic_ipv6_phdr_cksum(ipv6_hdr,
920                                                              ol_flags);
921         }
922
923         off_info->inner_l4_type = UDP_OFFLOAD_ENABLE;
924         off_info->inner_l4_tcp_udp = 1;
925 }
926
927 static inline void
928 hinic_calculate_sctp_checksum(struct hinic_tx_offload_info *off_info)
929 {
930         off_info->inner_l4_type = SCTP_OFFLOAD_ENABLE;
931         off_info->inner_l4_tcp_udp = 0;
932         off_info->inner_l4_len = sizeof(struct rte_sctp_hdr);
933 }
934
935 static inline void hinic_calculate_checksum(struct rte_mbuf *mbuf,
936                                         struct hinic_tx_offload_info *off_info,
937                                         uint64_t inner_l3_offset)
938 {
939         uint64_t ol_flags = mbuf->ol_flags;
940
941         switch (ol_flags & PKT_TX_L4_MASK) {
942         case PKT_TX_UDP_CKSUM:
943                 hinic_calculate_udp_checksum(mbuf, off_info, inner_l3_offset);
944                 break;
945
946         case PKT_TX_TCP_CKSUM:
947                 hinic_calculate_tcp_checksum(mbuf, off_info, inner_l3_offset);
948                 break;
949
950         case PKT_TX_SCTP_CKSUM:
951                 hinic_calculate_sctp_checksum(off_info);
952                 break;
953
954         default:
955                 if (ol_flags & PKT_TX_TCP_SEG)
956                         hinic_calculate_tcp_checksum(mbuf, off_info,
957                                                      inner_l3_offset);
958                 break;
959         }
960 }
961
962 static inline int hinic_tx_offload_pkt_prepare(struct rte_mbuf *m,
963                                         struct hinic_tx_offload_info *off_info)
964 {
965         uint64_t inner_l3_offset;
966         uint64_t ol_flags = m->ol_flags;
967
968         /* Check if the packets set available offload flags */
969         if (!(ol_flags & HINIC_TX_CKSUM_OFFLOAD_MASK))
970                 return 0;
971
972         /* Support only vxlan offload */
973         if (unlikely((ol_flags & PKT_TX_TUNNEL_MASK) &&
974             !(ol_flags & PKT_TX_TUNNEL_VXLAN)))
975                 return -ENOTSUP;
976
977 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
978         if (rte_validate_tx_offload(m) != 0)
979                 return -EINVAL;
980 #endif
981
982         if (ol_flags & PKT_TX_TUNNEL_VXLAN) {
983                 off_info->tunnel_type = TUNNEL_UDP_NO_CSUM;
984
985                 /* inner_l4_tcp_udp csum should be set to calculate outer
986                  * udp checksum when vxlan packets without inner l3 and l4
987                  */
988                 off_info->inner_l4_tcp_udp = 1;
989
990                 if ((ol_flags & PKT_TX_OUTER_IP_CKSUM) ||
991                     (ol_flags & PKT_TX_OUTER_IPV6) ||
992                     (ol_flags & PKT_TX_TCP_SEG)) {
993                         inner_l3_offset = m->l2_len + m->outer_l2_len +
994                                           m->outer_l3_len;
995                         off_info->outer_l2_len = m->outer_l2_len;
996                         off_info->outer_l3_len = m->outer_l3_len;
997                         /* just support vxlan tunneling pkt */
998                         off_info->inner_l2_len = m->l2_len - VXLANLEN -
999                                                  sizeof(struct rte_udp_hdr);
1000                         off_info->tunnel_length = m->l2_len;
1001
1002                         hinic_analyze_outer_ip_vxlan(m, off_info);
1003
1004                         hinic_get_outer_cs_pld_offset(m, off_info);
1005                 } else {
1006                         inner_l3_offset = m->l2_len;
1007                         hinic_analyze_tx_info(m, off_info);
1008                         /* just support vxlan tunneling pkt */
1009                         off_info->inner_l2_len = m->l2_len - VXLANLEN -
1010                                                  sizeof(struct rte_udp_hdr) -
1011                                                  off_info->outer_l2_len -
1012                                                  off_info->outer_l3_len;
1013                         off_info->tunnel_length = m->l2_len -
1014                                                   off_info->outer_l2_len -
1015                                                   off_info->outer_l3_len;
1016                         off_info->outer_l3_type = IPV4_PKT_NO_CHKSUM_OFFLOAD;
1017
1018                         hinic_get_pld_offset(m, off_info);
1019                 }
1020         } else {
1021                 inner_l3_offset = m->l2_len;
1022                 off_info->inner_l2_len = m->l2_len;
1023                 off_info->tunnel_type = NOT_TUNNEL;
1024
1025                 hinic_get_pld_offset(m, off_info);
1026         }
1027
1028         /* invalid udp or tcp header */
1029         if (unlikely(off_info->payload_offset > MAX_PLD_OFFSET))
1030                 return -EINVAL;
1031
1032         off_info->inner_l3_len = m->l3_len;
1033         off_info->inner_l4_len = m->l4_len;
1034         off_info->inner_l3_type = hinic_analyze_l3_type(m);
1035
1036         /* Process the pseudo-header checksum */
1037         hinic_calculate_checksum(m, off_info, inner_l3_offset);
1038
1039         return 0;
1040 }
1041
1042 static inline bool hinic_get_sge_txoff_info(struct rte_mbuf *mbuf_pkt,
1043                                             struct hinic_wqe_info *sqe_info,
1044                                             struct hinic_tx_offload_info
1045                                             *off_info)
1046 {
1047         u16  i, total_len, sge_cnt = mbuf_pkt->nb_segs;
1048         struct rte_mbuf *mbuf;
1049         int ret;
1050
1051         memset(off_info, 0, sizeof(*off_info));
1052
1053         ret = hinic_tx_offload_pkt_prepare(mbuf_pkt, off_info);
1054         if (unlikely(ret))
1055                 return false;
1056
1057         sqe_info->cpy_mbuf_cnt = 0;
1058
1059         /* non tso mbuf */
1060         if (likely(!(mbuf_pkt->ol_flags & PKT_TX_TCP_SEG))) {
1061                 if (unlikely(mbuf_pkt->pkt_len > MAX_SINGLE_SGE_SIZE)) {
1062                         /* non tso packet len must less than 64KB */
1063                         return false;
1064                 } else if (unlikely(HINIC_NONTSO_SEG_NUM_INVALID(sge_cnt))) {
1065                         /* non tso packet buffer number must less than 17
1066                          * the mbuf segs more than 17 must copy to one buffer
1067                          */
1068                         total_len = 0;
1069                         mbuf = mbuf_pkt;
1070                         for (i = 0; i < (HINIC_NONTSO_PKT_MAX_SGE - 1) ; i++) {
1071                                 total_len += mbuf->data_len;
1072                                 mbuf = mbuf->next;
1073                         }
1074
1075                         /* default support copy total 4k mbuf segs */
1076                         if ((u32)(total_len + (u16)HINIC_COPY_MBUF_SIZE) <
1077                                   mbuf_pkt->pkt_len)
1078                                 return false;
1079
1080                         sqe_info->sge_cnt = HINIC_NONTSO_PKT_MAX_SGE;
1081                         sqe_info->cpy_mbuf_cnt = 1;
1082                         return true;
1083                 }
1084
1085                 /* valid non tso mbuf */
1086                 sqe_info->sge_cnt = sge_cnt;
1087         } else {
1088                 /* tso mbuf */
1089                 if (unlikely(HINIC_TSO_SEG_NUM_INVALID(sge_cnt)))
1090                         /* too many mbuf segs */
1091                         return false;
1092
1093                 /* check tso mbuf segs are valid or not */
1094                 if (unlikely(!hinic_is_tso_sge_valid(mbuf_pkt,
1095                              off_info, sqe_info)))
1096                         return false;
1097         }
1098
1099         return true;
1100 }
1101
1102 static inline void hinic_sq_write_db(struct hinic_sq *sq, int cos)
1103 {
1104         u16 prod_idx;
1105         u32 hi_prod_idx;
1106         struct hinic_sq_db sq_db;
1107
1108         prod_idx = MASKED_SQ_IDX(sq, sq->wq->prod_idx);
1109         hi_prod_idx = SQ_DB_PI_HIGH(prod_idx);
1110
1111         sq_db.db_info = SQ_DB_INFO_SET(hi_prod_idx, HI_PI) |
1112                         SQ_DB_INFO_SET(SQ_DB, TYPE) |
1113                         SQ_DB_INFO_SET(SQ_CFLAG_DP, CFLAG) |
1114                         SQ_DB_INFO_SET(cos, COS) |
1115                         SQ_DB_INFO_SET(sq->q_id, QID);
1116
1117         /* Data should be written to HW in Big Endian Format */
1118         sq_db.db_info = cpu_to_be32(sq_db.db_info);
1119
1120         /* Write all before the doorbell */
1121         rte_wmb();
1122         writel(sq_db.db_info, SQ_DB_ADDR(sq, prod_idx));
1123 }
1124
1125 u16 hinic_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, u16 nb_pkts)
1126 {
1127         int free_wqebb_cnt, wqe_wqebb_cnt;
1128         u32 queue_info, tx_bytes = 0;
1129         u16 nb_tx;
1130         struct hinic_wqe_info sqe_info;
1131         struct hinic_tx_offload_info off_info;
1132         struct rte_mbuf *mbuf_pkt;
1133         struct hinic_txq *txq = tx_queue;
1134         struct hinic_tx_info *tx_info;
1135         struct hinic_sq_wqe *sq_wqe;
1136         struct hinic_sq_task *task;
1137
1138         /* reclaim tx mbuf before xmit new packet */
1139         if (HINIC_GET_SQ_FREE_WQEBBS(txq) < txq->tx_free_thresh)
1140                 hinic_xmit_mbuf_cleanup(txq);
1141
1142         /* tx loop routine */
1143         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
1144                 mbuf_pkt = *tx_pkts++;
1145                 queue_info = 0;
1146
1147                 /* 1. parse sge and tx offlod info from mbuf */
1148                 if (unlikely(!hinic_get_sge_txoff_info(mbuf_pkt,
1149                                                        &sqe_info, &off_info))) {
1150                         txq->txq_stats.off_errs++;
1151                         break;
1152                 }
1153
1154                 /* 2. try to get enough wqebb */
1155                 wqe_wqebb_cnt = HINIC_SQ_WQEBB_CNT(sqe_info.sge_cnt);
1156                 free_wqebb_cnt = HINIC_GET_SQ_FREE_WQEBBS(txq);
1157                 if (unlikely(wqe_wqebb_cnt > free_wqebb_cnt)) {
1158                         /* reclaim again */
1159                         hinic_xmit_mbuf_cleanup(txq);
1160                         free_wqebb_cnt = HINIC_GET_SQ_FREE_WQEBBS(txq);
1161                         if (unlikely(wqe_wqebb_cnt > free_wqebb_cnt)) {
1162                                 txq->txq_stats.tx_busy += (nb_pkts - nb_tx);
1163                                 break;
1164                         }
1165                 }
1166
1167                 /* 3. get sq tail wqe address from wqe_page,
1168                  * sq have enough wqebb for this packet
1169                  */
1170                 sq_wqe = hinic_get_sq_wqe(txq, wqe_wqebb_cnt, &sqe_info);
1171
1172                 /* 4. fill sq wqe sge section */
1173                 if (unlikely(!hinic_mbuf_dma_map_sge(txq, mbuf_pkt,
1174                                                      sq_wqe->buf_descs,
1175                                                      &sqe_info))) {
1176                         hinic_return_sq_wqe(txq->nic_dev->hwdev, txq->q_id,
1177                                             wqe_wqebb_cnt, sqe_info.owner);
1178                         txq->txq_stats.off_errs++;
1179                         break;
1180                 }
1181
1182                 /* 5. fill sq wqe task section and queue info */
1183                 task = &sq_wqe->task;
1184
1185                 /* tx packet offload configure */
1186                 hinic_fill_tx_offload_info(mbuf_pkt, task, &queue_info,
1187                                            &off_info);
1188
1189                 /* 6. record tx info */
1190                 tx_info = &txq->tx_info[sqe_info.pi];
1191                 tx_info->mbuf = mbuf_pkt;
1192                 tx_info->wqebb_cnt = wqe_wqebb_cnt;
1193
1194                 /* 7. fill sq wqe header section */
1195                 hinic_fill_sq_wqe_header(&sq_wqe->ctrl, queue_info,
1196                                          sqe_info.sge_cnt, sqe_info.owner);
1197
1198                 /* 8.convert continue or bottom wqe byteorder to big endian */
1199                 hinic_sq_wqe_cpu_to_be32(sq_wqe, sqe_info.seq_wqebbs);
1200
1201                 tx_bytes += mbuf_pkt->pkt_len;
1202         }
1203
1204         /* 9. write sq doorbell in burst mode */
1205         if (nb_tx) {
1206                 hinic_sq_write_db(txq->sq, txq->cos);
1207
1208                 txq->txq_stats.packets += nb_tx;
1209                 txq->txq_stats.bytes += tx_bytes;
1210         }
1211         txq->txq_stats.burst_pkts = nb_tx;
1212
1213         return nb_tx;
1214 }
1215
1216 void hinic_free_all_tx_mbufs(struct hinic_txq *txq)
1217 {
1218         u16 ci;
1219         struct hinic_nic_dev *nic_dev = txq->nic_dev;
1220         struct hinic_tx_info *tx_info;
1221         int free_wqebbs = hinic_get_sq_free_wqebbs(nic_dev->hwdev,
1222                                                    txq->q_id) + 1;
1223
1224         while (free_wqebbs < txq->q_depth) {
1225                 ci = hinic_get_sq_local_ci(nic_dev->hwdev, txq->q_id);
1226
1227                 tx_info = &txq->tx_info[ci];
1228
1229                 if (unlikely(tx_info->cpy_mbuf != NULL)) {
1230                         rte_pktmbuf_free(tx_info->cpy_mbuf);
1231                         tx_info->cpy_mbuf = NULL;
1232                 }
1233
1234                 rte_pktmbuf_free(tx_info->mbuf);
1235                 hinic_update_sq_local_ci(nic_dev->hwdev, txq->q_id,
1236                                          tx_info->wqebb_cnt);
1237
1238                 free_wqebbs += tx_info->wqebb_cnt;
1239                 tx_info->mbuf = NULL;
1240         }
1241 }
1242
1243 void hinic_free_all_tx_resources(struct rte_eth_dev *eth_dev)
1244 {
1245         u16 q_id;
1246         struct hinic_nic_dev *nic_dev =
1247                                 HINIC_ETH_DEV_TO_PRIVATE_NIC_DEV(eth_dev);
1248
1249         for (q_id = 0; q_id < nic_dev->num_sq; q_id++) {
1250                 if (eth_dev->data->tx_queues != NULL)
1251                         eth_dev->data->tx_queues[q_id] = NULL;
1252
1253                 if (nic_dev->txqs[q_id] == NULL)
1254                         continue;
1255
1256                 /* stop tx queue free tx mbuf */
1257                 hinic_free_all_tx_mbufs(nic_dev->txqs[q_id]);
1258                 hinic_free_tx_resources(nic_dev->txqs[q_id]);
1259
1260                 /* free txq */
1261                 kfree(nic_dev->txqs[q_id]);
1262                 nic_dev->txqs[q_id] = NULL;
1263         }
1264 }
1265
1266 void hinic_free_all_tx_mbuf(struct rte_eth_dev *eth_dev)
1267 {
1268         u16 q_id;
1269         struct hinic_nic_dev *nic_dev =
1270                                 HINIC_ETH_DEV_TO_PRIVATE_NIC_DEV(eth_dev);
1271
1272         for (q_id = 0; q_id < nic_dev->num_sq; q_id++)
1273                 /* stop tx queue free tx mbuf */
1274                 hinic_free_all_tx_mbufs(nic_dev->txqs[q_id]);
1275 }
1276
1277 int hinic_setup_tx_resources(struct hinic_txq *txq)
1278 {
1279         u64 tx_info_sz;
1280
1281         tx_info_sz = txq->q_depth * sizeof(*txq->tx_info);
1282         txq->tx_info = rte_zmalloc_socket("tx_info", tx_info_sz,
1283                         RTE_CACHE_LINE_SIZE, txq->socket_id);
1284         if (!txq->tx_info)
1285                 return -ENOMEM;
1286
1287         return HINIC_OK;
1288 }
1289
1290 void hinic_free_tx_resources(struct hinic_txq *txq)
1291 {
1292         if (txq->tx_info == NULL)
1293                 return;
1294
1295         rte_free(txq->tx_info);
1296         txq->tx_info = NULL;
1297 }
1298
1299 int hinic_create_sq(struct hinic_hwdev *hwdev, u16 q_id,
1300                         u16 sq_depth, unsigned int socket_id)
1301 {
1302         int err;
1303         struct hinic_nic_io *nic_io = hwdev->nic_io;
1304         struct hinic_qp *qp = &nic_io->qps[q_id];
1305         struct hinic_sq *sq = &qp->sq;
1306         void __iomem *db_addr;
1307         volatile u32 *ci_addr;
1308
1309         sq->sq_depth = sq_depth;
1310         nic_io->sq_depth = sq_depth;
1311
1312         /* alloc wq */
1313         err = hinic_wq_allocate(nic_io->hwdev, &nic_io->sq_wq[q_id],
1314                                 HINIC_SQ_WQEBB_SHIFT, nic_io->sq_depth,
1315                                 socket_id);
1316         if (err) {
1317                 PMD_DRV_LOG(ERR, "Failed to allocate WQ for SQ");
1318                 return err;
1319         }
1320
1321         /* alloc sq doorbell space */
1322         err = hinic_alloc_db_addr(nic_io->hwdev, &db_addr);
1323         if (err) {
1324                 PMD_DRV_LOG(ERR, "Failed to init db addr");
1325                 goto alloc_db_err;
1326         }
1327
1328         /* clear hardware ci */
1329         ci_addr = (volatile u32 *)HINIC_CI_VADDR(nic_io->ci_vaddr_base, q_id);
1330         *ci_addr = 0;
1331
1332         sq->q_id = q_id;
1333         sq->wq = &nic_io->sq_wq[q_id];
1334         sq->owner = 1;
1335         sq->cons_idx_addr = (volatile u16 *)ci_addr;
1336         sq->db_addr = db_addr;
1337
1338         return HINIC_OK;
1339
1340 alloc_db_err:
1341         hinic_wq_free(nic_io->hwdev, &nic_io->sq_wq[q_id]);
1342
1343         return err;
1344 }
1345
1346 void hinic_destroy_sq(struct hinic_hwdev *hwdev, u16 q_id)
1347 {
1348         struct hinic_nic_io *nic_io;
1349         struct hinic_qp *qp;
1350
1351         nic_io = hwdev->nic_io;
1352         qp = &nic_io->qps[q_id];
1353
1354         if (qp->sq.wq == NULL)
1355                 return;
1356
1357         hinic_free_db_addr(nic_io->hwdev, qp->sq.db_addr);
1358         hinic_wq_free(nic_io->hwdev, qp->sq.wq);
1359         qp->sq.wq = NULL;
1360 }