net/hinic: add Rx/Tx
[dpdk.git] / drivers / net / hinic / hinic_pmd_tx.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2017 Huawei Technologies Co., Ltd
3  */
4
5 #include <rte_mbuf.h>
6 #include <rte_tcp.h>
7 #include <rte_sctp.h>
8 #include <rte_udp.h>
9 #include <rte_ip.h>
10 #ifdef __ARM64_NEON__
11 #include <arm_neon.h>
12 #endif
13
14 #include "base/hinic_compat.h"
15 #include "base/hinic_pmd_hwdev.h"
16 #include "base/hinic_pmd_hwif.h"
17 #include "base/hinic_pmd_wq.h"
18 #include "base/hinic_pmd_nicio.h"
19 #include "hinic_pmd_ethdev.h"
20 #include "hinic_pmd_tx.h"
21
22 /* packet header and tx offload info */
23 #define VXLANLEN                        8
24 #define MAX_PLD_OFFSET                  221
25 #define MAX_SINGLE_SGE_SIZE             65536
26 #define TSO_ENABLE                      1
27 #define TX_MSS_DEFAULT                  0x3E00
28 #define TX_MSS_MIN                      0x50
29
30 #define HINIC_NONTSO_PKT_MAX_SGE                17      /* non-tso max sge 17 */
31 #define HINIC_NONTSO_SEG_NUM_INVALID(num)       \
32                         ((num) > HINIC_NONTSO_PKT_MAX_SGE)
33
34 #define HINIC_TSO_PKT_MAX_SGE                   127     /* tso max sge 127 */
35 #define HINIC_TSO_SEG_NUM_INVALID(num)          ((num) > HINIC_TSO_PKT_MAX_SGE)
36
37 /* sizeof(struct hinic_sq_bufdesc) == 16, shift 4 */
38 #define HINIC_BUF_DESC_SIZE(nr_descs)   (SIZE_8BYTES(((u32)nr_descs) << 4))
39
40 #define MASKED_SQ_IDX(sq, idx)          ((idx) & (sq)->wq->mask)
41
42 /* SQ_CTRL */
43 #define SQ_CTRL_BUFDESC_SECT_LEN_SHIFT          0
44 #define SQ_CTRL_TASKSECT_LEN_SHIFT              16
45 #define SQ_CTRL_DATA_FORMAT_SHIFT               22
46 #define SQ_CTRL_LEN_SHIFT                       29
47 #define SQ_CTRL_OWNER_SHIFT                     31
48
49 #define SQ_CTRL_BUFDESC_SECT_LEN_MASK           0xFFU
50 #define SQ_CTRL_TASKSECT_LEN_MASK               0x1FU
51 #define SQ_CTRL_DATA_FORMAT_MASK                0x1U
52 #define SQ_CTRL_LEN_MASK                        0x3U
53 #define SQ_CTRL_OWNER_MASK                      0x1U
54
55 #define SQ_CTRL_SET(val, member)        \
56         (((val) & SQ_CTRL_##member##_MASK) << SQ_CTRL_##member##_SHIFT)
57
58 #define SQ_CTRL_QUEUE_INFO_PLDOFF_SHIFT         2
59 #define SQ_CTRL_QUEUE_INFO_UFO_SHIFT            10
60 #define SQ_CTRL_QUEUE_INFO_TSO_SHIFT            11
61 #define SQ_CTRL_QUEUE_INFO_TCPUDP_CS_SHIFT      12
62 #define SQ_CTRL_QUEUE_INFO_MSS_SHIFT            13
63 #define SQ_CTRL_QUEUE_INFO_SCTP_SHIFT           27
64 #define SQ_CTRL_QUEUE_INFO_UC_SHIFT             28
65 #define SQ_CTRL_QUEUE_INFO_PRI_SHIFT            29
66
67 #define SQ_CTRL_QUEUE_INFO_PLDOFF_MASK          0xFFU
68 #define SQ_CTRL_QUEUE_INFO_UFO_MASK             0x1U
69 #define SQ_CTRL_QUEUE_INFO_TSO_MASK             0x1U
70 #define SQ_CTRL_QUEUE_INFO_TCPUDP_CS_MASK       0x1U
71 #define SQ_CTRL_QUEUE_INFO_MSS_MASK             0x3FFFU
72 #define SQ_CTRL_QUEUE_INFO_SCTP_MASK            0x1U
73 #define SQ_CTRL_QUEUE_INFO_UC_MASK              0x1U
74 #define SQ_CTRL_QUEUE_INFO_PRI_MASK             0x7U
75
76 #define SQ_CTRL_QUEUE_INFO_SET(val, member)     \
77         (((u32)(val) & SQ_CTRL_QUEUE_INFO_##member##_MASK) <<   \
78                         SQ_CTRL_QUEUE_INFO_##member##_SHIFT)
79
80 #define SQ_CTRL_QUEUE_INFO_GET(val, member)     \
81         (((val) >> SQ_CTRL_QUEUE_INFO_##member##_SHIFT) &       \
82                         SQ_CTRL_QUEUE_INFO_##member##_MASK)
83
84 #define SQ_CTRL_QUEUE_INFO_CLEAR(val, member)   \
85         ((val) & (~(SQ_CTRL_QUEUE_INFO_##member##_MASK << \
86                         SQ_CTRL_QUEUE_INFO_##member##_SHIFT)))
87
88 #define SQ_TASK_INFO0_L2HDR_LEN_SHIFT           0
89 #define SQ_TASK_INFO0_L4OFFLOAD_SHIFT           8
90 #define SQ_TASK_INFO0_INNER_L3TYPE_SHIFT        10
91 #define SQ_TASK_INFO0_VLAN_OFFLOAD_SHIFT        12
92 #define SQ_TASK_INFO0_PARSE_FLAG_SHIFT          13
93 #define SQ_TASK_INFO0_UFO_AVD_SHIFT             14
94 #define SQ_TASK_INFO0_TSO_UFO_SHIFT             15
95 #define SQ_TASK_INFO0_VLAN_TAG_SHIFT            16
96
97 #define SQ_TASK_INFO0_L2HDR_LEN_MASK            0xFFU
98 #define SQ_TASK_INFO0_L4OFFLOAD_MASK            0x3U
99 #define SQ_TASK_INFO0_INNER_L3TYPE_MASK         0x3U
100 #define SQ_TASK_INFO0_VLAN_OFFLOAD_MASK         0x1U
101 #define SQ_TASK_INFO0_PARSE_FLAG_MASK           0x1U
102 #define SQ_TASK_INFO0_UFO_AVD_MASK              0x1U
103 #define SQ_TASK_INFO0_TSO_UFO_MASK              0x1U
104 #define SQ_TASK_INFO0_VLAN_TAG_MASK             0xFFFFU
105
106 #define SQ_TASK_INFO0_SET(val, member)                  \
107         (((u32)(val) & SQ_TASK_INFO0_##member##_MASK) <<        \
108                         SQ_TASK_INFO0_##member##_SHIFT)
109
110 #define SQ_TASK_INFO1_MD_TYPE_SHIFT             8
111 #define SQ_TASK_INFO1_INNER_L4LEN_SHIFT         16
112 #define SQ_TASK_INFO1_INNER_L3LEN_SHIFT         24
113
114 #define SQ_TASK_INFO1_MD_TYPE_MASK              0xFFU
115 #define SQ_TASK_INFO1_INNER_L4LEN_MASK          0xFFU
116 #define SQ_TASK_INFO1_INNER_L3LEN_MASK          0xFFU
117
118 #define SQ_TASK_INFO1_SET(val, member)                  \
119         (((val) & SQ_TASK_INFO1_##member##_MASK) <<     \
120                         SQ_TASK_INFO1_##member##_SHIFT)
121
122 #define SQ_TASK_INFO2_TUNNEL_L4LEN_SHIFT        0
123 #define SQ_TASK_INFO2_OUTER_L3LEN_SHIFT         8
124 #define SQ_TASK_INFO2_TUNNEL_L4TYPE_SHIFT       16
125 #define SQ_TASK_INFO2_OUTER_L3TYPE_SHIFT        24
126
127 #define SQ_TASK_INFO2_TUNNEL_L4LEN_MASK         0xFFU
128 #define SQ_TASK_INFO2_OUTER_L3LEN_MASK          0xFFU
129 #define SQ_TASK_INFO2_TUNNEL_L4TYPE_MASK        0x7U
130 #define SQ_TASK_INFO2_OUTER_L3TYPE_MASK         0x3U
131
132 #define SQ_TASK_INFO2_SET(val, member)                  \
133         (((val) & SQ_TASK_INFO2_##member##_MASK) <<     \
134                         SQ_TASK_INFO2_##member##_SHIFT)
135
136 #define SQ_TASK_INFO4_L2TYPE_SHIFT              31
137
138 #define SQ_TASK_INFO4_L2TYPE_MASK               0x1U
139
140 #define SQ_TASK_INFO4_SET(val, member)          \
141         (((u32)(val) & SQ_TASK_INFO4_##member##_MASK) << \
142                         SQ_TASK_INFO4_##member##_SHIFT)
143
144 /* SQ_DB */
145 #define SQ_DB_OFF                               0x00000800
146 #define SQ_DB_INFO_HI_PI_SHIFT                  0
147 #define SQ_DB_INFO_QID_SHIFT                    8
148 #define SQ_DB_INFO_CFLAG_SHIFT                  23
149 #define SQ_DB_INFO_COS_SHIFT                    24
150 #define SQ_DB_INFO_TYPE_SHIFT                   27
151
152 #define SQ_DB_INFO_HI_PI_MASK                   0xFFU
153 #define SQ_DB_INFO_QID_MASK                     0x3FFU
154 #define SQ_DB_INFO_CFLAG_MASK                   0x1U
155 #define SQ_DB_INFO_COS_MASK                     0x7U
156 #define SQ_DB_INFO_TYPE_MASK                    0x1FU
157 #define SQ_DB_INFO_SET(val, member)             \
158         (((u32)(val) & SQ_DB_INFO_##member##_MASK) <<   \
159                         SQ_DB_INFO_##member##_SHIFT)
160
161 #define SQ_DB                                   1
162 #define SQ_CFLAG_DP                             0       /* CFLAG_DATA_PATH */
163
164 #define SQ_DB_PI_LOW_MASK                       0xFF
165 #define SQ_DB_PI_LOW(pi)                        ((pi) & SQ_DB_PI_LOW_MASK)
166 #define SQ_DB_PI_HI_SHIFT                       8
167 #define SQ_DB_PI_HIGH(pi)                       ((pi) >> SQ_DB_PI_HI_SHIFT)
168 #define SQ_DB_ADDR(sq, pi)              \
169         ((u64 *)((u8 __iomem *)((sq)->db_addr) + SQ_DB_OFF) + SQ_DB_PI_LOW(pi))
170
171 /* txq wq operations */
172 #define HINIC_GET_SQ_WQE_MASK(txq)              ((txq)->wq->mask)
173
174 #define HINIC_GET_SQ_HW_CI(txq) \
175         ((be16_to_cpu(*(txq)->cons_idx_addr)) & HINIC_GET_SQ_WQE_MASK(txq))
176
177 #define HINIC_GET_SQ_LOCAL_CI(txq)      \
178         (((txq)->wq->cons_idx) & HINIC_GET_SQ_WQE_MASK(txq))
179
180 #define HINIC_UPDATE_SQ_LOCAL_CI(txq, wqebb_cnt)        \
181         do {                                            \
182                 (txq)->wq->cons_idx += wqebb_cnt;       \
183                 (txq)->wq->delta += wqebb_cnt;          \
184         } while (0)
185
186 #define HINIC_GET_SQ_FREE_WQEBBS(txq)   ((txq)->wq->delta - 1)
187
188 #define HINIC_IS_SQ_EMPTY(txq)  (((txq)->wq->delta) == ((txq)->q_depth))
189
190 #define BUF_DESC_SIZE_SHIFT             4
191
192 #define HINIC_SQ_WQE_SIZE(num_sge)              \
193         (sizeof(struct hinic_sq_ctrl) + sizeof(struct hinic_sq_task) +  \
194                         (unsigned int)((num_sge) << BUF_DESC_SIZE_SHIFT))
195
196 #define HINIC_SQ_WQEBB_CNT(num_sge)     \
197         (int)(ALIGN(HINIC_SQ_WQE_SIZE((u32)num_sge), \
198                         HINIC_SQ_WQEBB_SIZE) >> HINIC_SQ_WQEBB_SHIFT)
199
200
201 static inline void hinic_sq_wqe_cpu_to_be32(void *data, int nr_wqebb)
202 {
203 #if defined(__X86_64_SSE__)
204         int i;
205         __m128i *wqe_line = (__m128i *)data;
206         __m128i shuf_mask = _mm_set_epi8(12, 13, 14, 15, 8, 9, 10,
207                                         11, 4, 5, 6, 7, 0, 1, 2, 3);
208
209         for (i = 0; i < nr_wqebb; i++) {
210                 /* convert 64B wqebb using 4 SSE instructions */
211                 wqe_line[0] = _mm_shuffle_epi8(wqe_line[0], shuf_mask);
212                 wqe_line[1] = _mm_shuffle_epi8(wqe_line[1], shuf_mask);
213                 wqe_line[2] = _mm_shuffle_epi8(wqe_line[2], shuf_mask);
214                 wqe_line[3] = _mm_shuffle_epi8(wqe_line[3], shuf_mask);
215                 wqe_line += 4;
216         }
217 #elif defined(__ARM64_NEON__)
218         int i;
219         uint8x16_t *wqe_line = (uint8x16_t *)data;
220         const uint8x16_t shuf_mask = {3, 2, 1, 0, 7, 6, 5, 4, 11, 10,
221                                         9, 8, 15, 14, 13, 12};
222
223         for (i = 0; i < nr_wqebb; i++) {
224                 wqe_line[0] = vqtbl1q_u8(wqe_line[0], shuf_mask);
225                 wqe_line[1] = vqtbl1q_u8(wqe_line[1], shuf_mask);
226                 wqe_line[2] = vqtbl1q_u8(wqe_line[2], shuf_mask);
227                 wqe_line[3] = vqtbl1q_u8(wqe_line[3], shuf_mask);
228                 wqe_line += 4;
229         }
230 #else
231         hinic_cpu_to_be32(data, nr_wqebb * HINIC_SQ_WQEBB_SIZE);
232 #endif
233 }
234
235 static inline void hinic_sge_cpu_to_be32(void *data, int nr_sge)
236 {
237 #if defined(__X86_64_SSE__)
238         int i;
239         __m128i *sge_line = (__m128i *)data;
240         __m128i shuf_mask = _mm_set_epi8(12, 13, 14, 15, 8, 9, 10,
241                                         11, 4, 5, 6, 7, 0, 1, 2, 3);
242
243         for (i = 0; i < nr_sge; i++) {
244                 /* convert 16B sge using 1 SSE instructions */
245                 *sge_line = _mm_shuffle_epi8(*sge_line, shuf_mask);
246                 sge_line++;
247         }
248 #elif defined(__ARM64_NEON__)
249         int i;
250         uint8x16_t *sge_line = (uint8x16_t *)data;
251         const uint8x16_t shuf_mask = {3, 2, 1, 0, 7, 6, 5, 4, 11, 10,
252                                         9, 8, 15, 14, 13, 12};
253
254         for (i = 0; i < nr_sge; i++) {
255                 *sge_line = vqtbl1q_u8(*sge_line, shuf_mask);
256                 sge_line++;
257         }
258 #else
259         hinic_cpu_to_be32(data, nr_sge * sizeof(struct hinic_sq_bufdesc));
260 #endif
261 }
262
263 void hinic_txq_get_stats(struct hinic_txq *txq, struct hinic_txq_stats *stats)
264 {
265         if (!txq || !stats) {
266                 PMD_DRV_LOG(ERR, "Txq or stats is NULL");
267                 return;
268         }
269
270         memcpy(stats, &txq->txq_stats, sizeof(txq->txq_stats));
271 }
272
273 void hinic_txq_stats_reset(struct hinic_txq *txq)
274 {
275         struct hinic_txq_stats *txq_stats;
276
277         if (txq == NULL)
278                 return;
279
280         txq_stats = &txq->txq_stats;
281         memset(txq_stats, 0, sizeof(*txq_stats));
282 }
283
284 static inline struct rte_mbuf *hinic_copy_tx_mbuf(struct hinic_nic_dev *nic_dev,
285                                                   struct rte_mbuf *mbuf,
286                                                   u16 sge_cnt)
287 {
288         struct rte_mbuf *dst_mbuf;
289         u32 offset = 0;
290         u16 i;
291
292         if (unlikely(!nic_dev->cpy_mpool))
293                 return NULL;
294
295         dst_mbuf = rte_pktmbuf_alloc(nic_dev->cpy_mpool);
296         if (unlikely(!dst_mbuf))
297                 return NULL;
298
299         dst_mbuf->data_off = 0;
300         for (i = 0; i < sge_cnt; i++) {
301                 rte_memcpy((char *)dst_mbuf->buf_addr + offset,
302                            (char *)mbuf->buf_addr + mbuf->data_off,
303                            mbuf->data_len);
304                 dst_mbuf->data_len += mbuf->data_len;
305                 offset += mbuf->data_len;
306                 mbuf = mbuf->next;
307         }
308
309         return dst_mbuf;
310 }
311
312 static inline bool hinic_mbuf_dma_map_sge(struct hinic_txq *txq,
313                                           struct rte_mbuf *mbuf,
314                                           struct hinic_sq_bufdesc *sges,
315                                           struct hinic_wqe_info *sqe_info)
316 {
317         dma_addr_t dma_addr;
318         u16 i, around_sges;
319         u16 nb_segs = sqe_info->sge_cnt - sqe_info->cpy_mbuf_cnt;
320         u16 real_nb_segs = mbuf->nb_segs;
321         struct hinic_sq_bufdesc *sge_idx = sges;
322
323         if (unlikely(sqe_info->around)) {
324                 /* parts of wqe is in sq bottom while parts
325                  * of wqe is in sq head
326                  */
327                 i = 0;
328                 for (sge_idx = sges; (u64)sge_idx <= txq->sq_bot_sge_addr;
329                      sge_idx++) {
330                         dma_addr = rte_mbuf_data_iova(mbuf);
331                         hinic_set_sge((struct hinic_sge *)sge_idx, dma_addr,
332                                       mbuf->data_len);
333                         mbuf = mbuf->next;
334                         i++;
335                 }
336
337                 around_sges = nb_segs - i;
338                 sge_idx = (struct hinic_sq_bufdesc *)
339                                 ((void *)txq->sq_head_addr);
340                 for (; i < nb_segs; i++) {
341                         dma_addr = rte_mbuf_data_iova(mbuf);
342                         hinic_set_sge((struct hinic_sge *)sge_idx, dma_addr,
343                                       mbuf->data_len);
344                         mbuf = mbuf->next;
345                         sge_idx++;
346                 }
347
348                 /* covert sges at head to big endian */
349                 hinic_sge_cpu_to_be32((void *)txq->sq_head_addr, around_sges);
350         } else {
351                 /* wqe is in continuous space */
352                 for (i = 0; i < nb_segs; i++) {
353                         dma_addr = rte_mbuf_data_iova(mbuf);
354                         hinic_set_sge((struct hinic_sge *)sge_idx, dma_addr,
355                                       mbuf->data_len);
356                         mbuf = mbuf->next;
357                         sge_idx++;
358                 }
359         }
360
361         /* for now: support non-tso over 17 sge, copy the last 2 mbuf */
362         if (unlikely(sqe_info->cpy_mbuf_cnt != 0)) {
363                 /* copy invalid mbuf segs to a valid buffer, lost performance */
364                 txq->txq_stats.cpy_pkts += 1;
365                 mbuf = hinic_copy_tx_mbuf(txq->nic_dev, mbuf,
366                                           real_nb_segs - nb_segs);
367                 if (unlikely(!mbuf))
368                         return false;
369
370                 txq->tx_info[sqe_info->pi].cpy_mbuf = mbuf;
371
372                 /* deal with the last mbuf */
373                 dma_addr = rte_mbuf_data_iova(mbuf);
374                 hinic_set_sge((struct hinic_sge *)sge_idx, dma_addr,
375                               mbuf->data_len);
376                 if (unlikely(sqe_info->around))
377                         hinic_sge_cpu_to_be32((void *)sge_idx, 1);
378         }
379
380         return true;
381 }
382
383 static inline void hinic_fill_sq_wqe_header(struct hinic_sq_ctrl *ctrl,
384                                             u32 queue_info, int nr_descs,
385                                             u8 owner)
386 {
387         u32 ctrl_size, task_size, bufdesc_size;
388
389         ctrl_size = SIZE_8BYTES(sizeof(struct hinic_sq_ctrl));
390         task_size = SIZE_8BYTES(sizeof(struct hinic_sq_task));
391         bufdesc_size = HINIC_BUF_DESC_SIZE(nr_descs);
392
393         ctrl->ctrl_fmt = SQ_CTRL_SET(bufdesc_size, BUFDESC_SECT_LEN) |
394                         SQ_CTRL_SET(task_size, TASKSECT_LEN)    |
395                         SQ_CTRL_SET(SQ_NORMAL_WQE, DATA_FORMAT) |
396                         SQ_CTRL_SET(ctrl_size, LEN)             |
397                         SQ_CTRL_SET(owner, OWNER);
398
399         ctrl->queue_info = queue_info;
400         ctrl->queue_info |= SQ_CTRL_QUEUE_INFO_SET(1U, UC);
401
402         if (!SQ_CTRL_QUEUE_INFO_GET(ctrl->queue_info, MSS)) {
403                 ctrl->queue_info |=
404                         SQ_CTRL_QUEUE_INFO_SET(TX_MSS_DEFAULT, MSS);
405         } else if (SQ_CTRL_QUEUE_INFO_GET(ctrl->queue_info, MSS) < TX_MSS_MIN) {
406                 /* mss should not be less than 80 */
407                 ctrl->queue_info =
408                                 SQ_CTRL_QUEUE_INFO_CLEAR(ctrl->queue_info, MSS);
409                 ctrl->queue_info |= SQ_CTRL_QUEUE_INFO_SET(TX_MSS_MIN, MSS);
410         }
411 }
412
413 static inline bool hinic_is_tso_sge_valid(struct rte_mbuf *mbuf,
414                                           struct hinic_tx_offload_info
415                                           *poff_info,
416                                           struct hinic_wqe_info *sqe_info)
417 {
418         u32 total_len, limit_len, checked_len, left_len;
419         u32 i, first_mss_sges, left_sges;
420         struct rte_mbuf *mbuf_head, *mbuf_pre;
421
422         left_sges = mbuf->nb_segs;
423         mbuf_head = mbuf;
424
425         /* tso sge number validation */
426         if (unlikely(left_sges >= HINIC_NONTSO_PKT_MAX_SGE)) {
427                 checked_len = 0;
428                 limit_len = mbuf->tso_segsz + poff_info->payload_offset;
429                 first_mss_sges = HINIC_NONTSO_PKT_MAX_SGE;
430
431                 /* each continues 17 mbufs segmust do one check */
432                 while (left_sges >= HINIC_NONTSO_PKT_MAX_SGE) {
433                         /* total len of first 16 mbufs must equal
434                          * or more than limit_len
435                          */
436                         total_len = 0;
437                         for (i = 0; i < first_mss_sges; i++) {
438                                 total_len += mbuf->data_len;
439                                 mbuf_pre = mbuf;
440                                 mbuf = mbuf->next;
441                                 if (total_len >= limit_len) {
442                                         limit_len = mbuf_head->tso_segsz;
443                                         break;
444                                 }
445                         }
446
447                         checked_len += total_len;
448
449                         /* try to copy if not valid */
450                         if (unlikely(first_mss_sges == i)) {
451                                 left_sges -= first_mss_sges;
452                                 checked_len -= mbuf_pre->data_len;
453
454                                 left_len = mbuf_head->pkt_len - checked_len;
455                                 if (left_len > HINIC_COPY_MBUF_SIZE)
456                                         return false;
457
458                                 sqe_info->sge_cnt = mbuf_head->nb_segs -
459                                                         left_sges;
460                                 sqe_info->cpy_mbuf_cnt = 1;
461
462                                 return true;
463                         }
464                         first_mss_sges = (HINIC_NONTSO_PKT_MAX_SGE - 1);
465
466                         /* continue next 16 mbufs */
467                         left_sges -= (i + 1);
468                 } /* end of while */
469         }
470
471         sqe_info->sge_cnt = mbuf_head->nb_segs;
472         return true;
473 }
474
475 static inline void
476 hinic_set_l4_csum_info(struct hinic_sq_task *task,
477                 u32 *queue_info, struct hinic_tx_offload_info *poff_info)
478 {
479         u32 tcp_udp_cs, sctp;
480         u16 l2hdr_len;
481
482         sctp = 0;
483         if (unlikely(poff_info->inner_l4_type == SCTP_OFFLOAD_ENABLE))
484                 sctp = 1;
485
486         tcp_udp_cs = poff_info->inner_l4_tcp_udp;
487
488         if (poff_info->tunnel_type == TUNNEL_UDP_NO_CSUM) {
489                 l2hdr_len =  poff_info->outer_l2_len;
490
491                 task->pkt_info2 |=
492                 SQ_TASK_INFO2_SET(poff_info->outer_l3_type, OUTER_L3TYPE) |
493                 SQ_TASK_INFO2_SET(poff_info->outer_l3_len, OUTER_L3LEN);
494                 task->pkt_info2 |=
495                 SQ_TASK_INFO2_SET(poff_info->tunnel_type, TUNNEL_L4TYPE) |
496                 SQ_TASK_INFO2_SET(poff_info->tunnel_length, TUNNEL_L4LEN);
497         } else {
498                 l2hdr_len = poff_info->inner_l2_len;
499         }
500
501         task->pkt_info0 |= SQ_TASK_INFO0_SET(l2hdr_len, L2HDR_LEN);
502         task->pkt_info1 |=
503                 SQ_TASK_INFO1_SET(poff_info->inner_l3_len, INNER_L3LEN);
504         task->pkt_info0 |=
505                 SQ_TASK_INFO0_SET(poff_info->inner_l3_type, INNER_L3TYPE);
506         task->pkt_info1 |=
507                 SQ_TASK_INFO1_SET(poff_info->inner_l4_len, INNER_L4LEN);
508         task->pkt_info0 |=
509                 SQ_TASK_INFO0_SET(poff_info->inner_l4_type, L4OFFLOAD);
510         *queue_info |=
511                 SQ_CTRL_QUEUE_INFO_SET(poff_info->payload_offset, PLDOFF) |
512                 SQ_CTRL_QUEUE_INFO_SET(tcp_udp_cs, TCPUDP_CS) |
513                 SQ_CTRL_QUEUE_INFO_SET(sctp, SCTP);
514 }
515
516 static inline void
517 hinic_set_tso_info(struct hinic_sq_task *task,
518                 u32 *queue_info, struct rte_mbuf *mbuf,
519                 struct hinic_tx_offload_info *poff_info)
520 {
521         hinic_set_l4_csum_info(task, queue_info, poff_info);
522
523         /* wqe for tso */
524         task->pkt_info0 |=
525                 SQ_TASK_INFO0_SET(poff_info->inner_l3_type, INNER_L3TYPE);
526         task->pkt_info0 |= SQ_TASK_INFO0_SET(TSO_ENABLE, TSO_UFO);
527         *queue_info |= SQ_CTRL_QUEUE_INFO_SET(TSO_ENABLE, TSO);
528         /* qsf was initialized in prepare_sq_wqe */
529         *queue_info = SQ_CTRL_QUEUE_INFO_CLEAR(*queue_info, MSS);
530         *queue_info |= SQ_CTRL_QUEUE_INFO_SET(mbuf->tso_segsz, MSS);
531 }
532
533 static inline void
534 hinic_set_vlan_tx_offload(struct hinic_sq_task *task,
535                         u32 *queue_info, u16 vlan_tag, u16 vlan_pri)
536 {
537         task->pkt_info0 |= SQ_TASK_INFO0_SET(vlan_tag, VLAN_TAG) |
538                                 SQ_TASK_INFO0_SET(1U, VLAN_OFFLOAD);
539
540         *queue_info |= SQ_CTRL_QUEUE_INFO_SET(vlan_pri, PRI);
541 }
542
543 static inline void
544 hinic_fill_tx_offload_info(struct rte_mbuf *mbuf,
545                 struct hinic_sq_task *task, u32 *queue_info,
546                 struct hinic_tx_offload_info *tx_off_info)
547 {
548         u16 vlan_tag;
549         uint64_t ol_flags = mbuf->ol_flags;
550
551         /* clear DW0~2 of task section for offload */
552         task->pkt_info0 = 0;
553         task->pkt_info1 = 0;
554         task->pkt_info2 = 0;
555
556         /* Base VLAN */
557         if (unlikely(ol_flags & PKT_TX_VLAN_PKT)) {
558                 vlan_tag = mbuf->vlan_tci;
559                 hinic_set_vlan_tx_offload(task, queue_info, vlan_tag,
560                                           vlan_tag >> VLAN_PRIO_SHIFT);
561         }
562
563         /* non checksum or tso */
564         if (unlikely(!(ol_flags & HINIC_TX_CKSUM_OFFLOAD_MASK)))
565                 return;
566
567         if ((ol_flags & PKT_TX_TCP_SEG))
568                 /* set tso info for task and qsf */
569                 hinic_set_tso_info(task, queue_info, mbuf, tx_off_info);
570         else /* just support l4 checksum offload */
571                 hinic_set_l4_csum_info(task, queue_info, tx_off_info);
572 }
573
574 static inline void hinic_xmit_mbuf_cleanup(struct hinic_txq *txq)
575 {
576         struct hinic_tx_info *tx_info;
577         struct rte_mbuf *mbuf, *m, *mbuf_free[HINIC_MAX_TX_FREE_BULK];
578         int i, nb_free = 0;
579         u16 hw_ci, sw_ci, sq_mask;
580         int wqebb_cnt = 0;
581
582         hw_ci = HINIC_GET_SQ_HW_CI(txq);
583         sw_ci = HINIC_GET_SQ_LOCAL_CI(txq);
584         sq_mask = HINIC_GET_SQ_WQE_MASK(txq);
585
586         for (i = 0; i < txq->tx_free_thresh; ++i) {
587                 tx_info = &txq->tx_info[sw_ci];
588                 if (hw_ci == sw_ci ||
589                         (((hw_ci - sw_ci) & sq_mask) < tx_info->wqebb_cnt))
590                         break;
591
592                 sw_ci = (sw_ci + tx_info->wqebb_cnt) & sq_mask;
593
594                 if (unlikely(tx_info->cpy_mbuf != NULL)) {
595                         rte_pktmbuf_free(tx_info->cpy_mbuf);
596                         tx_info->cpy_mbuf = NULL;
597                 }
598
599                 wqebb_cnt += tx_info->wqebb_cnt;
600                 mbuf = tx_info->mbuf;
601
602                 if (likely(mbuf->nb_segs == 1)) {
603                         m = rte_pktmbuf_prefree_seg(mbuf);
604                         tx_info->mbuf = NULL;
605
606                         if (unlikely(m == NULL))
607                                 continue;
608
609                         mbuf_free[nb_free++] = m;
610                         if (unlikely(m->pool != mbuf_free[0]->pool ||
611                                 nb_free >= HINIC_MAX_TX_FREE_BULK)) {
612                                 rte_mempool_put_bulk(mbuf_free[0]->pool,
613                                         (void **)mbuf_free, (nb_free - 1));
614                                 nb_free = 0;
615                                 mbuf_free[nb_free++] = m;
616                         }
617                 } else {
618                         rte_pktmbuf_free(mbuf);
619                         tx_info->mbuf = NULL;
620                 }
621         }
622
623         if (nb_free > 0)
624                 rte_mempool_put_bulk(mbuf_free[0]->pool, (void **)mbuf_free,
625                                      nb_free);
626
627         HINIC_UPDATE_SQ_LOCAL_CI(txq, wqebb_cnt);
628 }
629
630 static inline struct hinic_sq_wqe *
631 hinic_get_sq_wqe(struct hinic_txq *txq, int wqebb_cnt,
632                 struct hinic_wqe_info *wqe_info)
633 {
634         u32 cur_pi, end_pi;
635         u16 remain_wqebbs;
636         struct hinic_sq *sq = txq->sq;
637         struct hinic_wq *wq = txq->wq;
638
639         /* record current pi */
640         cur_pi = MASKED_WQE_IDX(wq, wq->prod_idx);
641         end_pi = cur_pi + wqebb_cnt;
642
643         /* update next pi and delta */
644         wq->prod_idx += wqebb_cnt;
645         wq->delta -= wqebb_cnt;
646
647         /* return current pi and owner */
648         wqe_info->pi = cur_pi;
649         wqe_info->owner = sq->owner;
650         wqe_info->around = 0;
651         wqe_info->seq_wqebbs = wqebb_cnt;
652
653         if (unlikely(end_pi >= txq->q_depth)) {
654                 /* update owner of next prod_idx */
655                 sq->owner = !sq->owner;
656
657                 /* turn around to head */
658                 if (unlikely(end_pi > txq->q_depth)) {
659                         wqe_info->around = 1;
660                         remain_wqebbs = txq->q_depth - cur_pi;
661                         wqe_info->seq_wqebbs = remain_wqebbs;
662                 }
663         }
664
665         return (struct hinic_sq_wqe *)WQ_WQE_ADDR(wq, cur_pi);
666 }
667
668 static inline int
669 hinic_validate_tx_offload(const struct rte_mbuf *m)
670 {
671         uint64_t ol_flags = m->ol_flags;
672         uint64_t inner_l3_offset = m->l2_len;
673
674         /* just support vxlan offload */
675         if ((ol_flags & PKT_TX_TUNNEL_MASK) &&
676             !(ol_flags & PKT_TX_TUNNEL_VXLAN))
677                 return -ENOTSUP;
678
679         if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
680                 inner_l3_offset += m->outer_l2_len + m->outer_l3_len;
681
682         /* Headers are fragmented */
683         if (rte_pktmbuf_data_len(m) < inner_l3_offset + m->l3_len + m->l4_len)
684                 return -ENOTSUP;
685
686         /* IP checksum can be counted only for IPv4 packet */
687         if ((ol_flags & PKT_TX_IP_CKSUM) && (ol_flags & PKT_TX_IPV6))
688                 return -EINVAL;
689
690         /* IP type not set when required */
691         if (ol_flags & (PKT_TX_L4_MASK | PKT_TX_TCP_SEG)) {
692                 if (!(ol_flags & (PKT_TX_IPV4 | PKT_TX_IPV6)))
693                         return -EINVAL;
694         }
695
696         /* Check requirements for TSO packet */
697         if (ol_flags & PKT_TX_TCP_SEG) {
698                 if (m->tso_segsz == 0 ||
699                         ((ol_flags & PKT_TX_IPV4) &&
700                         !(ol_flags & PKT_TX_IP_CKSUM)))
701                         return -EINVAL;
702         }
703
704         /* PKT_TX_OUTER_IP_CKSUM set for non outer IPv4 packet. */
705         if ((ol_flags & PKT_TX_OUTER_IP_CKSUM) &&
706                 !(ol_flags & PKT_TX_OUTER_IPV4))
707                 return -EINVAL;
708
709         return 0;
710 }
711
712 static inline uint16_t
713 hinic_ipv4_phdr_cksum(const struct rte_ipv4_hdr *ipv4_hdr, uint64_t ol_flags)
714 {
715         struct ipv4_psd_header {
716                 uint32_t src_addr; /* IP address of source host. */
717                 uint32_t dst_addr; /* IP address of destination host. */
718                 uint8_t  zero;     /* zero. */
719                 uint8_t  proto;    /* L4 protocol type. */
720                 uint16_t len;      /* L4 length. */
721         } psd_hdr;
722         uint8_t ihl;
723
724         psd_hdr.src_addr = ipv4_hdr->src_addr;
725         psd_hdr.dst_addr = ipv4_hdr->dst_addr;
726         psd_hdr.zero = 0;
727         psd_hdr.proto = ipv4_hdr->next_proto_id;
728         if (ol_flags & PKT_TX_TCP_SEG) {
729                 psd_hdr.len = 0;
730         } else {
731                 /* ipv4_hdr->version_ihl is uint8_t big endian, ihl locates
732                  * lower 4 bits and unit is 4 bytes
733                  */
734                 ihl = (ipv4_hdr->version_ihl & 0xF) << 2;
735                 psd_hdr.len =
736                 rte_cpu_to_be_16(rte_be_to_cpu_16(ipv4_hdr->total_length) -
737                                  ihl);
738         }
739         return rte_raw_cksum(&psd_hdr, sizeof(psd_hdr));
740 }
741
742 static inline uint16_t
743 hinic_ipv6_phdr_cksum(const struct rte_ipv6_hdr *ipv6_hdr, uint64_t ol_flags)
744 {
745         uint32_t sum;
746         struct {
747                 uint32_t len;   /* L4 length. */
748                 uint32_t proto; /* L4 protocol - top 3 bytes must be zero */
749         } psd_hdr;
750
751         psd_hdr.proto = (ipv6_hdr->proto << 24);
752         if (ol_flags & PKT_TX_TCP_SEG)
753                 psd_hdr.len = 0;
754         else
755                 psd_hdr.len = ipv6_hdr->payload_len;
756
757         sum = __rte_raw_cksum(ipv6_hdr->src_addr,
758                 sizeof(ipv6_hdr->src_addr) + sizeof(ipv6_hdr->dst_addr), 0);
759         sum = __rte_raw_cksum(&psd_hdr, sizeof(psd_hdr), sum);
760         return __rte_raw_cksum_reduce(sum);
761 }
762
763 static inline int
764 hinic_tx_offload_pkt_prepare(struct rte_mbuf *m,
765                                 struct hinic_tx_offload_info *off_info)
766 {
767         struct rte_ipv4_hdr *ipv4_hdr;
768         struct rte_ipv6_hdr *ipv6_hdr;
769         struct rte_tcp_hdr *tcp_hdr;
770         struct rte_udp_hdr *udp_hdr;
771         struct rte_ether_hdr *eth_hdr;
772         struct rte_vlan_hdr *vlan_hdr;
773         u16 eth_type = 0;
774         uint64_t inner_l3_offset = m->l2_len;
775         uint64_t ol_flags = m->ol_flags;
776
777         /* Does packet set any of available offloads */
778         if (!(ol_flags & HINIC_TX_CKSUM_OFFLOAD_MASK))
779                 return 0;
780
781         if (unlikely(hinic_validate_tx_offload(m)))
782                 return -EINVAL;
783
784         if ((ol_flags & PKT_TX_OUTER_IP_CKSUM) ||
785                         (ol_flags & PKT_TX_OUTER_IPV6) ||
786                         (ol_flags & PKT_TX_TUNNEL_VXLAN)) {
787                 inner_l3_offset += m->outer_l2_len + m->outer_l3_len;
788                 off_info->outer_l2_len = m->outer_l2_len;
789                 off_info->outer_l3_len = m->outer_l3_len;
790                 /* just support vxlan tunneling pkt */
791                 off_info->inner_l2_len = m->l2_len - VXLANLEN -
792                                                 sizeof(struct rte_udp_hdr);
793                 off_info->inner_l3_len = m->l3_len;
794                 off_info->inner_l4_len = m->l4_len;
795                 off_info->tunnel_length = m->l2_len;
796                 off_info->payload_offset = m->outer_l2_len +
797                                 m->outer_l3_len + m->l2_len + m->l3_len;
798                 off_info->tunnel_type = TUNNEL_UDP_NO_CSUM;
799         } else {
800                 off_info->inner_l2_len = m->l2_len;
801                 off_info->inner_l3_len = m->l3_len;
802                 off_info->inner_l4_len = m->l4_len;
803                 off_info->tunnel_type = NOT_TUNNEL;
804                 off_info->payload_offset = m->l2_len + m->l3_len;
805         }
806
807         if (((ol_flags & PKT_TX_L4_MASK) != PKT_TX_SCTP_CKSUM) &&
808             ((ol_flags & PKT_TX_L4_MASK) != PKT_TX_UDP_CKSUM))
809                 off_info->payload_offset += m->l4_len;
810
811         /* invalid udp or tcp header */
812         if (unlikely(off_info->payload_offset > MAX_PLD_OFFSET))
813                 return -EINVAL;
814
815         /* Process outter udp pseudo-header checksum */
816         if ((ol_flags & PKT_TX_TUNNEL_VXLAN) && ((ol_flags & PKT_TX_TCP_SEG) ||
817                         (ol_flags & PKT_TX_OUTER_IP_CKSUM) ||
818                         (ol_flags & PKT_TX_OUTER_IPV6))) {
819                 off_info->tunnel_type = TUNNEL_UDP_CSUM;
820
821                 /* inner_l4_tcp_udp csum should be setted to calculate outter
822                  * udp checksum when vxlan packets without inner l3 and l4
823                  */
824                 off_info->inner_l4_tcp_udp = 1;
825
826                 eth_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
827                 eth_type = rte_be_to_cpu_16(eth_hdr->ether_type);
828
829                 if (eth_type == RTE_ETHER_TYPE_VLAN) {
830                         vlan_hdr = (struct rte_vlan_hdr *)(eth_hdr + 1);
831                         eth_type = rte_be_to_cpu_16(vlan_hdr->eth_proto);
832                 }
833
834                 if (eth_type == RTE_ETHER_TYPE_IPV4) {
835                         ipv4_hdr =
836                         rte_pktmbuf_mtod_offset(m, struct rte_ipv4_hdr *,
837                                                 m->outer_l2_len);
838                         off_info->outer_l3_type = IPV4_PKT_WITH_CHKSUM_OFFLOAD;
839                         ipv4_hdr->hdr_checksum = 0;
840
841                         udp_hdr = (struct rte_udp_hdr *)((char *)ipv4_hdr +
842                                                         m->outer_l3_len);
843                         udp_hdr->dgram_cksum =
844                                 hinic_ipv4_phdr_cksum(ipv4_hdr, ol_flags);
845                 } else if (eth_type == RTE_ETHER_TYPE_IPV6) {
846                         off_info->outer_l3_type = IPV6_PKT;
847                         ipv6_hdr =
848                         rte_pktmbuf_mtod_offset(m, struct rte_ipv6_hdr *,
849                                                 m->outer_l2_len);
850
851                         udp_hdr =
852                         rte_pktmbuf_mtod_offset(m, struct rte_udp_hdr *,
853                                                 (m->outer_l2_len +
854                                                 m->outer_l3_len));
855                         udp_hdr->dgram_cksum =
856                                 hinic_ipv6_phdr_cksum(ipv6_hdr, ol_flags);
857                 }
858         }
859
860         if (ol_flags & PKT_TX_IPV4)
861                 off_info->inner_l3_type = (ol_flags & PKT_TX_IP_CKSUM) ?
862                                         IPV4_PKT_WITH_CHKSUM_OFFLOAD :
863                                         IPV4_PKT_NO_CHKSUM_OFFLOAD;
864         else if (ol_flags & PKT_TX_IPV6)
865                 off_info->inner_l3_type = IPV6_PKT;
866
867         /* Process the pseudo-header checksum */
868         if ((ol_flags & PKT_TX_L4_MASK) == PKT_TX_UDP_CKSUM) {
869                 if (ol_flags & PKT_TX_IPV4) {
870                         ipv4_hdr =
871                         rte_pktmbuf_mtod_offset(m, struct rte_ipv4_hdr *,
872                                                 inner_l3_offset);
873
874                         if (ol_flags & PKT_TX_IP_CKSUM)
875                                 ipv4_hdr->hdr_checksum = 0;
876
877                         udp_hdr = (struct rte_udp_hdr *)((char *)ipv4_hdr +
878                                                                 m->l3_len);
879                         udp_hdr->dgram_cksum =
880                                 hinic_ipv4_phdr_cksum(ipv4_hdr, ol_flags);
881                 } else {
882                         ipv6_hdr =
883                         rte_pktmbuf_mtod_offset(m, struct rte_ipv6_hdr *,
884                                                 inner_l3_offset);
885
886                         udp_hdr =
887                         rte_pktmbuf_mtod_offset(m, struct rte_udp_hdr *,
888                                                 (inner_l3_offset + m->l3_len));
889                         udp_hdr->dgram_cksum =
890                                 hinic_ipv6_phdr_cksum(ipv6_hdr, ol_flags);
891                 }
892
893                 off_info->inner_l4_type = UDP_OFFLOAD_ENABLE;
894                 off_info->inner_l4_tcp_udp = 1;
895                 off_info->inner_l4_len = sizeof(struct rte_udp_hdr);
896         } else if (((ol_flags & PKT_TX_L4_MASK) == PKT_TX_TCP_CKSUM) ||
897                         (ol_flags & PKT_TX_TCP_SEG)) {
898                 if (ol_flags & PKT_TX_IPV4) {
899                         ipv4_hdr =
900                         rte_pktmbuf_mtod_offset(m, struct rte_ipv4_hdr *,
901                                                 inner_l3_offset);
902
903                         if (ol_flags & PKT_TX_IP_CKSUM)
904                                 ipv4_hdr->hdr_checksum = 0;
905
906                         /* non-TSO tcp */
907                         tcp_hdr = (struct rte_tcp_hdr *)((char *)ipv4_hdr +
908                                                                 m->l3_len);
909                         tcp_hdr->cksum =
910                                 hinic_ipv4_phdr_cksum(ipv4_hdr, ol_flags);
911                 } else {
912                         ipv6_hdr =
913                         rte_pktmbuf_mtod_offset(m, struct rte_ipv6_hdr *,
914                                                 inner_l3_offset);
915                         /* non-TSO tcp */
916                         tcp_hdr =
917                         rte_pktmbuf_mtod_offset(m, struct rte_tcp_hdr *,
918                                                 (inner_l3_offset + m->l3_len));
919                         tcp_hdr->cksum =
920                                 hinic_ipv6_phdr_cksum(ipv6_hdr, ol_flags);
921                 }
922
923                 off_info->inner_l4_type = TCP_OFFLOAD_ENABLE;
924                 off_info->inner_l4_tcp_udp = 1;
925         } else if ((ol_flags & PKT_TX_L4_MASK) == PKT_TX_SCTP_CKSUM) {
926                 off_info->inner_l4_type = SCTP_OFFLOAD_ENABLE;
927                 off_info->inner_l4_tcp_udp = 0;
928                 off_info->inner_l4_len = sizeof(struct rte_sctp_hdr);
929         }
930
931         return 0;
932 }
933
934 static inline bool hinic_get_sge_txoff_info(struct rte_mbuf *mbuf_pkt,
935                                             struct hinic_wqe_info *sqe_info,
936                                             struct hinic_tx_offload_info
937                                             *off_info)
938 {
939         u16  i, total_len, sge_cnt = mbuf_pkt->nb_segs;
940         struct rte_mbuf *mbuf;
941         int ret;
942
943         memset(off_info, 0, sizeof(*off_info));
944
945         ret = hinic_tx_offload_pkt_prepare(mbuf_pkt, off_info);
946         if (unlikely(ret))
947                 return false;
948
949         sqe_info->cpy_mbuf_cnt = 0;
950
951         /* non tso mbuf */
952         if (likely(!(mbuf_pkt->ol_flags & PKT_TX_TCP_SEG))) {
953                 if (unlikely(mbuf_pkt->pkt_len > MAX_SINGLE_SGE_SIZE)) {
954                         /* non tso packet len must less than 64KB */
955                         return false;
956                 } else if (unlikely(HINIC_NONTSO_SEG_NUM_INVALID(sge_cnt))) {
957                         /* non tso packet buffer number must less than 17
958                          * the mbuf segs more than 17 must copy to one buffer
959                          */
960                         total_len = 0;
961                         mbuf = mbuf_pkt;
962                         for (i = 0; i < (HINIC_NONTSO_PKT_MAX_SGE - 1) ; i++) {
963                                 total_len += mbuf->data_len;
964                                 mbuf = mbuf->next;
965                         }
966
967                         /* default support copy total 4k mbuf segs */
968                         if ((u32)(total_len + (u16)HINIC_COPY_MBUF_SIZE) <
969                                   mbuf_pkt->pkt_len)
970                                 return false;
971
972                         sqe_info->sge_cnt = HINIC_NONTSO_PKT_MAX_SGE;
973                         sqe_info->cpy_mbuf_cnt = 1;
974                         return true;
975                 }
976
977                 /* valid non tso mbuf */
978                 sqe_info->sge_cnt = sge_cnt;
979         } else {
980                 /* tso mbuf */
981                 if (unlikely(HINIC_TSO_SEG_NUM_INVALID(sge_cnt)))
982                         /* too many mbuf segs */
983                         return false;
984
985                 /* check tso mbuf segs are valid or not */
986                 if (unlikely(!hinic_is_tso_sge_valid(mbuf_pkt,
987                              off_info, sqe_info)))
988                         return false;
989         }
990
991         return true;
992 }
993
994 static inline void hinic_sq_write_db(struct hinic_sq *sq, int cos)
995 {
996         u16 prod_idx;
997         u32 hi_prod_idx;
998         struct hinic_sq_db sq_db;
999
1000         prod_idx = MASKED_SQ_IDX(sq, sq->wq->prod_idx);
1001         hi_prod_idx = SQ_DB_PI_HIGH(prod_idx);
1002
1003         sq_db.db_info = SQ_DB_INFO_SET(hi_prod_idx, HI_PI) |
1004                         SQ_DB_INFO_SET(SQ_DB, TYPE) |
1005                         SQ_DB_INFO_SET(SQ_CFLAG_DP, CFLAG) |
1006                         SQ_DB_INFO_SET(cos, COS) |
1007                         SQ_DB_INFO_SET(sq->q_id, QID);
1008
1009         /* Data should be written to HW in Big Endian Format */
1010         sq_db.db_info = cpu_to_be32(sq_db.db_info);
1011
1012         /* Write all before the doorbell */
1013         rte_wmb();
1014         writel(sq_db.db_info, SQ_DB_ADDR(sq, prod_idx));
1015 }
1016
1017 u16 hinic_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, u16 nb_pkts)
1018 {
1019         int free_wqebb_cnt, wqe_wqebb_cnt;
1020         u32 queue_info, tx_bytes = 0;
1021         u16 nb_tx;
1022         struct hinic_wqe_info sqe_info;
1023         struct hinic_tx_offload_info off_info;
1024         struct rte_mbuf *mbuf_pkt;
1025         struct hinic_txq *txq = tx_queue;
1026         struct hinic_tx_info *tx_info;
1027         struct hinic_sq_wqe *sq_wqe;
1028         struct hinic_sq_task *task;
1029
1030         /* reclaim tx mbuf before xmit new packet */
1031         if (HINIC_GET_SQ_FREE_WQEBBS(txq) < txq->tx_free_thresh)
1032                 hinic_xmit_mbuf_cleanup(txq);
1033
1034         /* tx loop routine */
1035         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
1036                 mbuf_pkt = *tx_pkts++;
1037                 queue_info = 0;
1038
1039                 /* 1. parse sge and tx offlod info from mbuf */
1040                 if (unlikely(!hinic_get_sge_txoff_info(mbuf_pkt,
1041                                                        &sqe_info, &off_info))) {
1042                         txq->txq_stats.off_errs++;
1043                         break;
1044                 }
1045
1046                 /* 2. try to get enough wqebb */
1047                 wqe_wqebb_cnt = HINIC_SQ_WQEBB_CNT(sqe_info.sge_cnt);
1048                 free_wqebb_cnt = HINIC_GET_SQ_FREE_WQEBBS(txq);
1049                 if (unlikely(wqe_wqebb_cnt > free_wqebb_cnt)) {
1050                         /* reclaim again */
1051                         hinic_xmit_mbuf_cleanup(txq);
1052                         free_wqebb_cnt = HINIC_GET_SQ_FREE_WQEBBS(txq);
1053                         if (unlikely(wqe_wqebb_cnt > free_wqebb_cnt)) {
1054                                 txq->txq_stats.tx_busy += (nb_pkts - nb_tx);
1055                                 break;
1056                         }
1057                 }
1058
1059                 /* 3. get sq tail wqe address from wqe_page,
1060                  * sq have enough wqebb for this packet
1061                  */
1062                 sq_wqe = hinic_get_sq_wqe(txq, wqe_wqebb_cnt, &sqe_info);
1063
1064                 /* 4. fill sq wqe sge section */
1065                 if (unlikely(!hinic_mbuf_dma_map_sge(txq, mbuf_pkt,
1066                                                      sq_wqe->buf_descs,
1067                                                      &sqe_info))) {
1068                         hinic_return_sq_wqe(txq->nic_dev->hwdev, txq->q_id,
1069                                             wqe_wqebb_cnt, sqe_info.owner);
1070                         txq->txq_stats.off_errs++;
1071                         break;
1072                 }
1073
1074                 /* 5. fill sq wqe task section and queue info */
1075                 task = &sq_wqe->task;
1076
1077                 /* tx packet offload configure */
1078                 hinic_fill_tx_offload_info(mbuf_pkt, task, &queue_info,
1079                                            &off_info);
1080
1081                 /* 6. record tx info */
1082                 tx_info = &txq->tx_info[sqe_info.pi];
1083                 tx_info->mbuf = mbuf_pkt;
1084                 tx_info->wqebb_cnt = wqe_wqebb_cnt;
1085
1086                 /* 7. fill sq wqe header section */
1087                 hinic_fill_sq_wqe_header(&sq_wqe->ctrl, queue_info,
1088                                          sqe_info.sge_cnt, sqe_info.owner);
1089
1090                 /* 8.convert continue or bottom wqe byteorder to big endian */
1091                 hinic_sq_wqe_cpu_to_be32(sq_wqe, sqe_info.seq_wqebbs);
1092
1093                 tx_bytes += mbuf_pkt->pkt_len;
1094         }
1095
1096         /* 9. write sq doorbell in burst mode */
1097         if (nb_tx) {
1098                 hinic_sq_write_db(txq->sq, txq->cos);
1099
1100                 txq->txq_stats.packets += nb_tx;
1101                 txq->txq_stats.bytes += tx_bytes;
1102         }
1103         txq->txq_stats.burst_pkts = nb_tx;
1104
1105         return nb_tx;
1106 }
1107
1108 void hinic_free_all_tx_skbs(struct hinic_txq *txq)
1109 {
1110         u16 ci;
1111         struct hinic_nic_dev *nic_dev = txq->nic_dev;
1112         struct hinic_tx_info *tx_info;
1113         int free_wqebbs = hinic_get_sq_free_wqebbs(nic_dev->hwdev,
1114                                                    txq->q_id) + 1;
1115
1116         while (free_wqebbs < txq->q_depth) {
1117                 ci = hinic_get_sq_local_ci(nic_dev->hwdev, txq->q_id);
1118
1119                 tx_info = &txq->tx_info[ci];
1120
1121                 if (unlikely(tx_info->cpy_mbuf != NULL)) {
1122                         rte_pktmbuf_free(tx_info->cpy_mbuf);
1123                         tx_info->cpy_mbuf = NULL;
1124                 }
1125
1126                 rte_pktmbuf_free(tx_info->mbuf);
1127                 hinic_update_sq_local_ci(nic_dev->hwdev, txq->q_id,
1128                                          tx_info->wqebb_cnt);
1129
1130                 free_wqebbs += tx_info->wqebb_cnt;
1131                 tx_info->mbuf = NULL;
1132         }
1133 }
1134
1135 void hinic_free_all_tx_resources(struct rte_eth_dev *eth_dev)
1136 {
1137         u16 q_id;
1138         struct hinic_nic_dev *nic_dev =
1139                                 HINIC_ETH_DEV_TO_PRIVATE_NIC_DEV(eth_dev);
1140
1141         for (q_id = 0; q_id < nic_dev->num_sq; q_id++) {
1142                 eth_dev->data->tx_queues[q_id] = NULL;
1143
1144                 if (nic_dev->txqs[q_id] == NULL)
1145                         continue;
1146
1147                 /* stop tx queue free tx mbuf */
1148                 hinic_free_all_tx_skbs(nic_dev->txqs[q_id]);
1149                 hinic_free_tx_resources(nic_dev->txqs[q_id]);
1150
1151                 /* free txq */
1152                 kfree(nic_dev->txqs[q_id]);
1153                 nic_dev->txqs[q_id] = NULL;
1154         }
1155 }
1156
1157 void hinic_free_all_tx_mbuf(struct rte_eth_dev *eth_dev)
1158 {
1159         u16 q_id;
1160         struct hinic_nic_dev *nic_dev =
1161                                 HINIC_ETH_DEV_TO_PRIVATE_NIC_DEV(eth_dev);
1162
1163         for (q_id = 0; q_id < nic_dev->num_sq; q_id++)
1164                 /* stop tx queue free tx mbuf */
1165                 hinic_free_all_tx_skbs(nic_dev->txqs[q_id]);
1166 }
1167
1168 int hinic_setup_tx_resources(struct hinic_txq *txq)
1169 {
1170         u64 tx_info_sz;
1171
1172         tx_info_sz = txq->q_depth * sizeof(*txq->tx_info);
1173         txq->tx_info = kzalloc_aligned(tx_info_sz, GFP_KERNEL);
1174         if (!txq->tx_info)
1175                 return -ENOMEM;
1176
1177         return HINIC_OK;
1178 }
1179
1180 void hinic_free_tx_resources(struct hinic_txq *txq)
1181 {
1182         if (txq->tx_info == NULL)
1183                 return;
1184
1185         kfree(txq->tx_info);
1186         txq->tx_info = NULL;
1187 }
1188
1189 int hinic_create_sq(struct hinic_hwdev *hwdev, u16 q_id, u16 sq_depth)
1190 {
1191         int err;
1192         struct hinic_nic_io *nic_io = hwdev->nic_io;
1193         struct hinic_qp *qp = &nic_io->qps[q_id];
1194         struct hinic_sq *sq = &qp->sq;
1195         void __iomem *db_addr;
1196         volatile u32 *ci_addr;
1197
1198         sq->sq_depth = sq_depth;
1199         nic_io->sq_depth = sq_depth;
1200
1201         /* alloc wq */
1202         err = hinic_wq_allocate(nic_io->hwdev, &nic_io->sq_wq[q_id],
1203                                 HINIC_SQ_WQEBB_SHIFT, nic_io->sq_depth);
1204         if (err) {
1205                 PMD_DRV_LOG(ERR, "Failed to allocate WQ for SQ");
1206                 return err;
1207         }
1208
1209         /* alloc sq doorbell space */
1210         err = hinic_alloc_db_addr(nic_io->hwdev, &db_addr);
1211         if (err) {
1212                 PMD_DRV_LOG(ERR, "Failed to init db addr");
1213                 goto alloc_db_err;
1214         }
1215
1216         /* clear hardware ci */
1217         ci_addr = (volatile u32 *)HINIC_CI_VADDR(nic_io->ci_vaddr_base, q_id);
1218         *ci_addr = 0;
1219
1220         sq->q_id = q_id;
1221         sq->wq = &nic_io->sq_wq[q_id];
1222         sq->owner = 1;
1223         sq->cons_idx_addr = (volatile u16 *)ci_addr;
1224         sq->db_addr = db_addr;
1225
1226         return HINIC_OK;
1227
1228 alloc_db_err:
1229         hinic_wq_free(nic_io->hwdev, &nic_io->sq_wq[q_id]);
1230
1231         return err;
1232 }
1233
1234 void hinic_destroy_sq(struct hinic_hwdev *hwdev, u16 q_id)
1235 {
1236         struct hinic_nic_io *nic_io;
1237         struct hinic_qp *qp;
1238
1239         nic_io = hwdev->nic_io;
1240         qp = &nic_io->qps[q_id];
1241
1242         if (qp->sq.wq == NULL)
1243                 return;
1244
1245         hinic_free_db_addr(nic_io->hwdev, qp->sq.db_addr);
1246         hinic_wq_free(nic_io->hwdev, qp->sq.wq);
1247         qp->sq.wq = NULL;
1248 }