net/hinic: add Rx/Tx
[dpdk.git] / drivers / net / hinic / hinic_pmd_rx.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2017 Huawei Technologies Co., Ltd
3  */
4
5 #include <rte_ether.h>
6 #include <rte_mbuf.h>
7 #ifdef __ARM64_NEON__
8 #include <arm_neon.h>
9 #endif
10
11 #include "base/hinic_compat.h"
12 #include "base/hinic_pmd_hwdev.h"
13 #include "base/hinic_pmd_wq.h"
14 #include "base/hinic_pmd_niccfg.h"
15 #include "base/hinic_pmd_nicio.h"
16 #include "hinic_pmd_ethdev.h"
17 #include "hinic_pmd_rx.h"
18
19 /* rxq wq operations */
20 #define HINIC_GET_RQ_WQE_MASK(rxq)      \
21         ((rxq)->wq->mask)
22
23 #define HINIC_GET_RQ_LOCAL_CI(rxq)      \
24         (((rxq)->wq->cons_idx) & HINIC_GET_RQ_WQE_MASK(rxq))
25
26 #define HINIC_GET_RQ_LOCAL_PI(rxq)      \
27         (((rxq)->wq->prod_idx) & HINIC_GET_RQ_WQE_MASK(rxq))
28
29 #define HINIC_UPDATE_RQ_LOCAL_CI(rxq, wqebb_cnt)        \
30         do {                                            \
31                 (rxq)->wq->cons_idx += (wqebb_cnt);     \
32                 (rxq)->wq->delta += (wqebb_cnt);        \
33         } while (0)
34
35 #define HINIC_UPDATE_RQ_HW_PI(rxq, pi)  \
36         (*((rxq)->pi_virt_addr) =       \
37                 cpu_to_be16((pi) & HINIC_GET_RQ_WQE_MASK(rxq)))
38
39 #define HINIC_GET_RQ_FREE_WQEBBS(rxq)   ((rxq)->wq->delta - 1)
40
41 /* rxq cqe done and status bit */
42 #define HINIC_GET_RX_DONE_BE(status)    \
43         ((status) & 0x80U)
44
45 #define HINIC_RX_CSUM_OFFLOAD_EN        0xFFF
46
47 #define RQ_CQE_SGE_VLAN_SHIFT                   0
48 #define RQ_CQE_SGE_LEN_SHIFT                    16
49
50 #define RQ_CQE_SGE_VLAN_MASK                    0xFFFFU
51 #define RQ_CQE_SGE_LEN_MASK                     0xFFFFU
52
53 #define RQ_CQE_SGE_GET(val, member)             \
54         (((val) >> RQ_CQE_SGE_##member##_SHIFT) & RQ_CQE_SGE_##member##_MASK)
55
56 #define HINIC_GET_RX_VLAN_TAG(vlan_len) \
57                 RQ_CQE_SGE_GET(vlan_len, VLAN)
58
59 #define HINIC_GET_RX_PKT_LEN(vlan_len)  \
60                 RQ_CQE_SGE_GET(vlan_len, LEN)
61
62 #define RQ_CQE_STATUS_CSUM_ERR_SHIFT            0
63 #define RQ_CQE_STATUS_NUM_LRO_SHIFT             16
64 #define RQ_CQE_STATUS_LRO_PUSH_SHIFT            25
65 #define RQ_CQE_STATUS_LRO_ENTER_SHIFT           26
66 #define RQ_CQE_STATUS_LRO_INTR_SHIFT            27
67
68 #define RQ_CQE_STATUS_BP_EN_SHIFT               30
69 #define RQ_CQE_STATUS_RXDONE_SHIFT              31
70 #define RQ_CQE_STATUS_FLUSH_SHIFT               28
71
72 #define RQ_CQE_STATUS_CSUM_ERR_MASK             0xFFFFU
73 #define RQ_CQE_STATUS_NUM_LRO_MASK              0xFFU
74 #define RQ_CQE_STATUS_LRO_PUSH_MASK             0X1U
75 #define RQ_CQE_STATUS_LRO_ENTER_MASK            0X1U
76 #define RQ_CQE_STATUS_LRO_INTR_MASK             0X1U
77 #define RQ_CQE_STATUS_BP_EN_MASK                0X1U
78 #define RQ_CQE_STATUS_RXDONE_MASK               0x1U
79 #define RQ_CQE_STATUS_FLUSH_MASK                0x1U
80
81 #define RQ_CQE_STATUS_GET(val, member)          \
82                 (((val) >> RQ_CQE_STATUS_##member##_SHIFT) & \
83                                 RQ_CQE_STATUS_##member##_MASK)
84
85 #define RQ_CQE_STATUS_CLEAR(val, member)        \
86                 ((val) & (~(RQ_CQE_STATUS_##member##_MASK << \
87                                 RQ_CQE_STATUS_##member##_SHIFT)))
88
89 #define HINIC_GET_RX_CSUM_ERR(status)   \
90                 RQ_CQE_STATUS_GET(status, CSUM_ERR)
91
92 #define HINIC_GET_RX_DONE(status)       \
93                 RQ_CQE_STATUS_GET(status, RXDONE)
94
95 #define HINIC_GET_RX_FLUSH(status)      \
96                 RQ_CQE_STATUS_GET(status, FLUSH)
97
98 #define HINIC_GET_RX_BP_EN(status)      \
99                 RQ_CQE_STATUS_GET(status, BP_EN)
100
101 #define HINIC_GET_RX_NUM_LRO(status)    \
102                 RQ_CQE_STATUS_GET(status, NUM_LRO)
103
104 /* RQ_CTRL */
105 #define RQ_CTRL_BUFDESC_SECT_LEN_SHIFT          0
106 #define RQ_CTRL_COMPLETE_FORMAT_SHIFT           15
107 #define RQ_CTRL_COMPLETE_LEN_SHIFT              27
108 #define RQ_CTRL_LEN_SHIFT                       29
109
110 #define RQ_CTRL_BUFDESC_SECT_LEN_MASK           0xFFU
111 #define RQ_CTRL_COMPLETE_FORMAT_MASK            0x1U
112 #define RQ_CTRL_COMPLETE_LEN_MASK               0x3U
113 #define RQ_CTRL_LEN_MASK                        0x3U
114
115 #define RQ_CTRL_SET(val, member)                \
116         (((val) & RQ_CTRL_##member##_MASK) << RQ_CTRL_##member##_SHIFT)
117
118 #define RQ_CTRL_GET(val, member)                \
119         (((val) >> RQ_CTRL_##member##_SHIFT) & RQ_CTRL_##member##_MASK)
120
121 #define RQ_CTRL_CLEAR(val, member)              \
122         ((val) & (~(RQ_CTRL_##member##_MASK << RQ_CTRL_##member##_SHIFT)))
123
124 #define RQ_CQE_PKT_NUM_SHIFT                    1
125 #define RQ_CQE_PKT_FIRST_LEN_SHIFT              19
126 #define RQ_CQE_PKT_LAST_LEN_SHIFT               6
127 #define RQ_CQE_SUPER_CQE_EN_SHIFT               0
128
129 #define RQ_CQE_PKT_FIRST_LEN_MASK               0x1FFFU
130 #define RQ_CQE_PKT_LAST_LEN_MASK                0x1FFFU
131 #define RQ_CQE_PKT_NUM_MASK                     0x1FU
132 #define RQ_CQE_SUPER_CQE_EN_MASK                0x1
133
134 #define RQ_CQE_PKT_NUM_GET(val, member)         \
135         (((val) >> RQ_CQE_PKT_##member##_SHIFT) & RQ_CQE_PKT_##member##_MASK)
136
137 #define HINIC_GET_RQ_CQE_PKT_NUM(pkt_info) RQ_CQE_PKT_NUM_GET(pkt_info, NUM)
138
139 #define RQ_CQE_SUPER_CQE_EN_GET(val, member)    \
140         (((val) >> RQ_CQE_##member##_SHIFT) & RQ_CQE_##member##_MASK)
141
142 #define HINIC_GET_SUPER_CQE_EN(pkt_info)        \
143         RQ_CQE_SUPER_CQE_EN_GET(pkt_info, SUPER_CQE_EN)
144
145 #define RQ_CQE_OFFOLAD_TYPE_VLAN_EN_SHIFT               21
146 #define RQ_CQE_OFFOLAD_TYPE_VLAN_EN_MASK                0x1U
147
148 #define RQ_CQE_OFFOLAD_TYPE_PKT_TYPE_SHIFT              0
149 #define RQ_CQE_OFFOLAD_TYPE_PKT_TYPE_MASK               0xFFFU
150
151 #define RQ_CQE_OFFOLAD_TYPE_PKT_UMBCAST_SHIFT           19
152 #define RQ_CQE_OFFOLAD_TYPE_PKT_UMBCAST_MASK            0x3U
153
154 #define RQ_CQE_OFFOLAD_TYPE_RSS_TYPE_SHIFT              24
155 #define RQ_CQE_OFFOLAD_TYPE_RSS_TYPE_MASK               0xFFU
156
157 #define RQ_CQE_OFFOLAD_TYPE_GET(val, member)            (((val) >> \
158                                 RQ_CQE_OFFOLAD_TYPE_##member##_SHIFT) & \
159                                 RQ_CQE_OFFOLAD_TYPE_##member##_MASK)
160
161 #define HINIC_GET_RX_VLAN_OFFLOAD_EN(offload_type)      \
162                 RQ_CQE_OFFOLAD_TYPE_GET(offload_type, VLAN_EN)
163
164 #define HINIC_GET_RSS_TYPES(offload_type)       \
165                 RQ_CQE_OFFOLAD_TYPE_GET(offload_type, RSS_TYPE)
166
167 #define HINIC_GET_RX_PKT_TYPE(offload_type)     \
168                 RQ_CQE_OFFOLAD_TYPE_GET(offload_type, PKT_TYPE)
169
170 #define HINIC_GET_RX_PKT_UMBCAST(offload_type)  \
171                 RQ_CQE_OFFOLAD_TYPE_GET(offload_type, PKT_UMBCAST)
172
173 #define RQ_CQE_STATUS_CSUM_BYPASS_VAL                   0x80U
174 #define RQ_CQE_STATUS_CSUM_ERR_IP_MASK                  0x39U
175 #define RQ_CQE_STATUS_CSUM_ERR_L4_MASK                  0x46U
176 #define RQ_CQE_STATUS_CSUM_ERR_OTHER                    0x100U
177
178 #define HINIC_CSUM_ERR_BYPASSED(csum_err)        \
179         ((csum_err) == RQ_CQE_STATUS_CSUM_BYPASS_VAL)
180
181 #define HINIC_CSUM_ERR_IP(csum_err)      \
182         ((csum_err) & RQ_CQE_STATUS_CSUM_ERR_IP_MASK)
183
184 #define HINIC_CSUM_ERR_L4(csum_err)      \
185         ((csum_err) & RQ_CQE_STATUS_CSUM_ERR_L4_MASK)
186
187 #define HINIC_CSUM_ERR_OTHER(csum_err)   \
188         ((csum_err) == RQ_CQE_STATUS_CSUM_ERR_OTHER)
189
190
191 void hinic_get_func_rx_buf_size(struct hinic_nic_dev *nic_dev)
192 {
193         struct hinic_rxq *rxq;
194         u16 q_id;
195         u16 buf_size = 0;
196
197         for (q_id = 0; q_id < nic_dev->num_rq; q_id++) {
198                 rxq = nic_dev->rxqs[q_id];
199
200                 if (rxq == NULL)
201                         continue;
202
203                 if (q_id == 0)
204                         buf_size = rxq->buf_len;
205
206                 buf_size = buf_size > rxq->buf_len ? rxq->buf_len : buf_size;
207         }
208
209         nic_dev->hwdev->nic_io->rq_buf_size = buf_size;
210 }
211
212 int hinic_create_rq(struct hinic_hwdev *hwdev, u16 q_id, u16 rq_depth)
213 {
214         int err;
215         struct hinic_nic_io *nic_io = hwdev->nic_io;
216         struct hinic_qp *qp = &nic_io->qps[q_id];
217         struct hinic_rq *rq = &qp->rq;
218
219         /* in case of hardware still generate interrupt, do not use msix 0 */
220         rq->msix_entry_idx = 1;
221         rq->q_id = q_id;
222         rq->rq_depth = rq_depth;
223         nic_io->rq_depth = rq_depth;
224
225         err = hinic_wq_allocate(hwdev, &nic_io->rq_wq[q_id],
226                                 HINIC_RQ_WQEBB_SHIFT, nic_io->rq_depth);
227         if (err) {
228                 PMD_DRV_LOG(ERR, "Failed to allocate WQ for RQ");
229                 return err;
230         }
231         rq->wq = &nic_io->rq_wq[q_id];
232
233         rq->pi_virt_addr =
234                 (volatile u16 *)dma_zalloc_coherent(hwdev, HINIC_PAGE_SIZE,
235                                                     &rq->pi_dma_addr,
236                                                     GFP_KERNEL);
237         if (!rq->pi_virt_addr) {
238                 PMD_DRV_LOG(ERR, "Failed to allocate rq pi virt addr");
239                 err = -ENOMEM;
240                 goto rq_pi_alloc_err;
241         }
242
243         return HINIC_OK;
244
245 rq_pi_alloc_err:
246         hinic_wq_free(hwdev, &nic_io->rq_wq[q_id]);
247
248         return err;
249 }
250
251 void hinic_destroy_rq(struct hinic_hwdev *hwdev, u16 q_id)
252 {
253         struct hinic_nic_io *nic_io = hwdev->nic_io;
254         struct hinic_qp *qp = &nic_io->qps[q_id];
255         struct hinic_rq *rq = &qp->rq;
256
257         if (qp->rq.wq == NULL)
258                 return;
259
260         dma_free_coherent_volatile(hwdev, HINIC_PAGE_SIZE,
261                                    (volatile void *)rq->pi_virt_addr,
262                                    rq->pi_dma_addr);
263         hinic_wq_free(nic_io->hwdev, qp->rq.wq);
264         qp->rq.wq = NULL;
265 }
266
267 static void
268 hinic_prepare_rq_wqe(void *wqe, __rte_unused u16 pi, dma_addr_t buf_addr,
269                         dma_addr_t cqe_dma)
270 {
271         struct hinic_rq_wqe *rq_wqe = wqe;
272         struct hinic_rq_ctrl *ctrl = &rq_wqe->ctrl;
273         struct hinic_rq_cqe_sect *cqe_sect = &rq_wqe->cqe_sect;
274         struct hinic_rq_bufdesc *buf_desc = &rq_wqe->buf_desc;
275         u32 rq_ceq_len = sizeof(struct hinic_rq_cqe);
276
277         ctrl->ctrl_fmt =
278                 RQ_CTRL_SET(SIZE_8BYTES(sizeof(*ctrl)),  LEN) |
279                 RQ_CTRL_SET(SIZE_8BYTES(sizeof(*cqe_sect)), COMPLETE_LEN) |
280                 RQ_CTRL_SET(SIZE_8BYTES(sizeof(*buf_desc)), BUFDESC_SECT_LEN) |
281                 RQ_CTRL_SET(RQ_COMPLETE_SGE, COMPLETE_FORMAT);
282
283         hinic_set_sge(&cqe_sect->sge, cqe_dma, rq_ceq_len);
284
285         buf_desc->addr_high = upper_32_bits(buf_addr);
286         buf_desc->addr_low = lower_32_bits(buf_addr);
287 }
288
289 void hinic_rxq_get_stats(struct hinic_rxq *rxq, struct hinic_rxq_stats *stats)
290 {
291         if (!rxq || !stats)
292                 return;
293
294         memcpy(stats, &rxq->rxq_stats, sizeof(rxq->rxq_stats));
295 }
296
297 void hinic_rxq_stats_reset(struct hinic_rxq *rxq)
298 {
299         struct hinic_rxq_stats *rxq_stats;
300
301         if (rxq == NULL)
302                 return;
303
304         rxq_stats = &rxq->rxq_stats;
305         memset(rxq_stats, 0, sizeof(*rxq_stats));
306 }
307
308 static int hinic_rx_alloc_cqe(struct hinic_rxq *rxq)
309 {
310         size_t cqe_mem_size;
311
312         /* allocate continuous cqe memory for saving number of memory zone */
313         cqe_mem_size = sizeof(struct hinic_rq_cqe) * rxq->q_depth;
314         rxq->cqe_start_vaddr =
315                 dma_zalloc_coherent(rxq->nic_dev->hwdev,
316                                     cqe_mem_size, &rxq->cqe_start_paddr,
317                                     GFP_KERNEL);
318         if (!rxq->cqe_start_vaddr) {
319                 PMD_DRV_LOG(ERR, "Allocate cqe dma memory failed");
320                 return -ENOMEM;
321         }
322
323         rxq->rx_cqe = (struct hinic_rq_cqe *)rxq->cqe_start_vaddr;
324
325         return HINIC_OK;
326 }
327
328 static void hinic_rx_free_cqe(struct hinic_rxq *rxq)
329 {
330         size_t cqe_mem_size;
331
332         cqe_mem_size = sizeof(struct hinic_rq_cqe) * rxq->q_depth;
333         dma_free_coherent(rxq->nic_dev->hwdev, cqe_mem_size,
334                           rxq->cqe_start_vaddr, rxq->cqe_start_paddr);
335         rxq->cqe_start_vaddr = NULL;
336 }
337
338 static int hinic_rx_fill_wqe(struct hinic_rxq *rxq)
339 {
340         struct hinic_nic_dev *nic_dev = rxq->nic_dev;
341         struct hinic_rq_wqe *rq_wqe;
342         dma_addr_t buf_dma_addr, cqe_dma_addr;
343         u16 pi = 0;
344         int i;
345
346         buf_dma_addr = 0;
347         cqe_dma_addr = rxq->cqe_start_paddr;
348         for (i = 0; i < rxq->q_depth; i++) {
349                 rq_wqe = hinic_get_rq_wqe(nic_dev->hwdev, rxq->q_id, &pi);
350                 if (!rq_wqe) {
351                         PMD_DRV_LOG(ERR, "Get rq wqe failed");
352                         break;
353                 }
354
355                 hinic_prepare_rq_wqe(rq_wqe, pi, buf_dma_addr, cqe_dma_addr);
356                 cqe_dma_addr +=  sizeof(struct hinic_rq_cqe);
357
358                 hinic_cpu_to_be32(rq_wqe, sizeof(struct hinic_rq_wqe));
359         }
360
361         hinic_return_rq_wqe(nic_dev->hwdev, rxq->q_id, i);
362
363         return i;
364 }
365
366 /* alloc cqe and prepare rqe */
367 int hinic_setup_rx_resources(struct hinic_rxq *rxq)
368 {
369         u64 rx_info_sz;
370         int err, pkts;
371
372         rx_info_sz = rxq->q_depth * sizeof(*rxq->rx_info);
373         rxq->rx_info = kzalloc_aligned(rx_info_sz, GFP_KERNEL);
374         if (!rxq->rx_info)
375                 return -ENOMEM;
376
377         err = hinic_rx_alloc_cqe(rxq);
378         if (err) {
379                 PMD_DRV_LOG(ERR, "Allocate rx cqe failed");
380                 goto rx_cqe_err;
381         }
382
383         pkts = hinic_rx_fill_wqe(rxq);
384         if (pkts != rxq->q_depth) {
385                 PMD_DRV_LOG(ERR, "Fill rx wqe failed");
386                 err = -ENOMEM;
387                 goto rx_fill_err;
388         }
389
390         return 0;
391
392 rx_fill_err:
393         hinic_rx_free_cqe(rxq);
394
395 rx_cqe_err:
396         kfree(rxq->rx_info);
397         rxq->rx_info = NULL;
398
399         return err;
400 }
401
402 void hinic_free_rx_resources(struct hinic_rxq *rxq)
403 {
404         if (rxq->rx_info == NULL)
405                 return;
406
407         hinic_rx_free_cqe(rxq);
408         kfree(rxq->rx_info);
409         rxq->rx_info = NULL;
410 }
411
412 void hinic_free_all_rx_resources(struct rte_eth_dev *eth_dev)
413 {
414         u16 q_id;
415         struct hinic_nic_dev *nic_dev =
416                                 HINIC_ETH_DEV_TO_PRIVATE_NIC_DEV(eth_dev);
417
418         for (q_id = 0; q_id < nic_dev->num_rq; q_id++) {
419                 eth_dev->data->rx_queues[q_id] = NULL;
420
421                 if (nic_dev->rxqs[q_id] == NULL)
422                         continue;
423
424                 hinic_free_all_rx_skbs(nic_dev->rxqs[q_id]);
425                 hinic_free_rx_resources(nic_dev->rxqs[q_id]);
426                 kfree(nic_dev->rxqs[q_id]);
427                 nic_dev->rxqs[q_id] = NULL;
428         }
429 }
430
431 void hinic_free_all_rx_mbuf(struct rte_eth_dev *eth_dev)
432 {
433         struct hinic_nic_dev *nic_dev =
434                                 HINIC_ETH_DEV_TO_PRIVATE_NIC_DEV(eth_dev);
435         u16 q_id;
436
437         for (q_id = 0; q_id < nic_dev->num_rq; q_id++)
438                 hinic_free_all_rx_skbs(nic_dev->rxqs[q_id]);
439 }
440
441 static void hinic_recv_jumbo_pkt(struct hinic_rxq *rxq,
442                                  struct rte_mbuf *head_skb,
443                                  u32 remain_pkt_len)
444 {
445         struct hinic_nic_dev *nic_dev = rxq->nic_dev;
446         struct rte_mbuf *cur_mbuf, *rxm = NULL;
447         struct hinic_rx_info *rx_info;
448         u16 sw_ci, rx_buf_len = rxq->buf_len;
449         u32 pkt_len;
450
451         while (remain_pkt_len > 0) {
452                 sw_ci = hinic_get_rq_local_ci(nic_dev->hwdev, rxq->q_id);
453                 rx_info = &rxq->rx_info[sw_ci];
454
455                 hinic_update_rq_local_ci(nic_dev->hwdev, rxq->q_id, 1);
456
457                 pkt_len = remain_pkt_len > rx_buf_len ?
458                         rx_buf_len : remain_pkt_len;
459                 remain_pkt_len -= pkt_len;
460
461                 cur_mbuf = rx_info->mbuf;
462                 cur_mbuf->data_len = (u16)pkt_len;
463                 cur_mbuf->next = NULL;
464
465                 head_skb->pkt_len += cur_mbuf->data_len;
466                 head_skb->nb_segs++;
467
468                 if (!rxm)
469                         head_skb->next = cur_mbuf;
470                 else
471                         rxm->next = cur_mbuf;
472
473                 rxm = cur_mbuf;
474         }
475 }
476
477 static void hinic_rss_deinit(struct hinic_nic_dev *nic_dev)
478 {
479         u8 prio_tc[HINIC_DCB_UP_MAX] = {0};
480         (void)hinic_rss_cfg(nic_dev->hwdev, 0,
481                             nic_dev->rss_tmpl_idx, 0, prio_tc);
482 }
483
484 static int hinic_rss_key_init(struct hinic_nic_dev *nic_dev,
485                               struct rte_eth_rss_conf *rss_conf)
486 {
487         u8 default_rss_key[HINIC_RSS_KEY_SIZE] = {
488                          0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2,
489                          0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0,
490                          0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4,
491                          0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c,
492                          0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa};
493         u8 hashkey[HINIC_RSS_KEY_SIZE] = {0};
494         u8 tmpl_idx = nic_dev->rss_tmpl_idx;
495
496         if (rss_conf->rss_key == NULL)
497                 memcpy(hashkey, default_rss_key, HINIC_RSS_KEY_SIZE);
498         else
499                 memcpy(hashkey, rss_conf->rss_key, rss_conf->rss_key_len);
500
501         return hinic_rss_set_template_tbl(nic_dev->hwdev, tmpl_idx, hashkey);
502 }
503
504 static void hinic_fill_rss_type(struct nic_rss_type *rss_type,
505                                 struct rte_eth_rss_conf *rss_conf)
506 {
507         u64 rss_hf = rss_conf->rss_hf;
508
509         rss_type->ipv4 = (rss_hf & (ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4)) ? 1 : 0;
510         rss_type->tcp_ipv4 = (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP) ? 1 : 0;
511         rss_type->ipv6 = (rss_hf & (ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6)) ? 1 : 0;
512         rss_type->ipv6_ext = (rss_hf & ETH_RSS_IPV6_EX) ? 1 : 0;
513         rss_type->tcp_ipv6 = (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP) ? 1 : 0;
514         rss_type->tcp_ipv6_ext = (rss_hf & ETH_RSS_IPV6_TCP_EX) ? 1 : 0;
515         rss_type->udp_ipv4 = (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP) ? 1 : 0;
516         rss_type->udp_ipv6 = (rss_hf & ETH_RSS_NONFRAG_IPV6_UDP) ? 1 : 0;
517 }
518
519 static void hinic_fillout_indir_tbl(struct hinic_nic_dev *nic_dev, u32 *indir)
520 {
521         u8 rss_queue_count = nic_dev->num_rss;
522         int i = 0, j;
523
524         if (rss_queue_count == 0) {
525                 /* delete q_id from indir tbl */
526                 for (i = 0; i < HINIC_RSS_INDIR_SIZE; i++)
527                         indir[i] = 0xFF;        /* Invalid value in indir tbl */
528         } else {
529                 while (i < HINIC_RSS_INDIR_SIZE)
530                         for (j = 0; (j < rss_queue_count) &&
531                              (i < HINIC_RSS_INDIR_SIZE); j++)
532                                 indir[i++] = nic_dev->rx_queue_list[j];
533         }
534 }
535
536 static int hinic_rss_init(struct hinic_nic_dev *nic_dev,
537                           __attribute__((unused)) u8 *rq2iq_map,
538                           struct rte_eth_rss_conf *rss_conf)
539 {
540         u32 indir_tbl[HINIC_RSS_INDIR_SIZE] = {0};
541         struct nic_rss_type rss_type = {0};
542         u8 prio_tc[HINIC_DCB_UP_MAX] = {0};
543         u8 tmpl_idx = 0xFF, num_tc = 0;
544         int err;
545
546         tmpl_idx = nic_dev->rss_tmpl_idx;
547
548         err = hinic_rss_key_init(nic_dev, rss_conf);
549         if (err)
550                 return err;
551
552         if (!nic_dev->rss_indir_flag) {
553                 hinic_fillout_indir_tbl(nic_dev, indir_tbl);
554                 err = hinic_rss_set_indir_tbl(nic_dev->hwdev, tmpl_idx,
555                                               indir_tbl);
556                 if (err)
557                         return err;
558         }
559
560         hinic_fill_rss_type(&rss_type, rss_conf);
561         err = hinic_set_rss_type(nic_dev->hwdev, tmpl_idx, rss_type);
562         if (err)
563                 return err;
564
565         err = hinic_rss_set_hash_engine(nic_dev->hwdev, tmpl_idx,
566                                         HINIC_RSS_HASH_ENGINE_TYPE_TOEP);
567         if (err)
568                 return err;
569
570         return hinic_rss_cfg(nic_dev->hwdev, 1, tmpl_idx, num_tc, prio_tc);
571 }
572
573 static void
574 hinic_add_rq_to_rx_queue_list(struct hinic_nic_dev *nic_dev, u16 queue_id)
575 {
576         u8 rss_queue_count = nic_dev->num_rss;
577
578         RTE_ASSERT(rss_queue_count <= (RTE_DIM(nic_dev->rx_queue_list) - 1));
579
580         nic_dev->rx_queue_list[rss_queue_count] = queue_id;
581         nic_dev->num_rss++;
582 }
583
584 /**
585  * hinic_setup_num_qps - determine num_qps from rss_tmpl_id
586  * @nic_dev: pointer to the private ethernet device
587  * Return: 0 on Success, error code otherwise.
588  **/
589 static int hinic_setup_num_qps(struct hinic_nic_dev *nic_dev)
590 {
591         int err, i;
592
593         if (!(nic_dev->flags & ETH_MQ_RX_RSS_FLAG)) {
594                 nic_dev->flags &= ~ETH_MQ_RX_RSS_FLAG;
595                 nic_dev->num_rss = 0;
596                 if (nic_dev->num_rq > 1) {
597                         /* get rss template id */
598                         err = hinic_rss_template_alloc(nic_dev->hwdev,
599                                                        &nic_dev->rss_tmpl_idx);
600                         if (err) {
601                                 PMD_DRV_LOG(WARNING, "Alloc rss template failed");
602                                 return err;
603                         }
604                         nic_dev->flags |= ETH_MQ_RX_RSS_FLAG;
605                         for (i = 0; i < nic_dev->num_rq; i++)
606                                 hinic_add_rq_to_rx_queue_list(nic_dev, i);
607                 }
608         }
609
610         return 0;
611 }
612
613 static void hinic_destroy_num_qps(struct hinic_nic_dev *nic_dev)
614 {
615         if (nic_dev->flags & ETH_MQ_RX_RSS_FLAG) {
616                 if (hinic_rss_template_free(nic_dev->hwdev,
617                                             nic_dev->rss_tmpl_idx))
618                         PMD_DRV_LOG(WARNING, "Free rss template failed");
619
620                 nic_dev->flags &= ~ETH_MQ_RX_RSS_FLAG;
621         }
622 }
623
624 static int hinic_config_mq_rx_rss(struct hinic_nic_dev *nic_dev, bool on)
625 {
626         int ret = 0;
627
628         if (on) {
629                 ret = hinic_setup_num_qps(nic_dev);
630                 if (ret)
631                         PMD_DRV_LOG(ERR, "Setup num_qps failed");
632         } else {
633                 hinic_destroy_num_qps(nic_dev);
634         }
635
636         return ret;
637 }
638
639 int hinic_config_mq_mode(struct rte_eth_dev *dev, bool on)
640 {
641         struct hinic_nic_dev *nic_dev = HINIC_ETH_DEV_TO_PRIVATE_NIC_DEV(dev);
642         struct rte_eth_conf *dev_conf = &dev->data->dev_conf;
643         int ret = 0;
644
645         switch (dev_conf->rxmode.mq_mode) {
646         case ETH_MQ_RX_RSS:
647                 ret = hinic_config_mq_rx_rss(nic_dev, on);
648                 break;
649         default:
650                 break;
651         }
652
653         return ret;
654 }
655
656 int hinic_rx_configure(struct rte_eth_dev *dev)
657 {
658         struct hinic_nic_dev *nic_dev = HINIC_ETH_DEV_TO_PRIVATE_NIC_DEV(dev);
659         struct rte_eth_rss_conf rss_conf =
660                 dev->data->dev_conf.rx_adv_conf.rss_conf;
661         u32 csum_en = 0;
662         int err;
663
664         if (nic_dev->flags & ETH_MQ_RX_RSS_FLAG) {
665                 if (rss_conf.rss_hf == 0) {
666                         rss_conf.rss_hf = HINIC_RSS_OFFLOAD_ALL;
667                 } else if ((rss_conf.rss_hf & HINIC_RSS_OFFLOAD_ALL) == 0) {
668                         PMD_DRV_LOG(ERR, "Do not support rss offload all");
669                         goto rss_config_err;
670                 }
671
672                 err = hinic_rss_init(nic_dev, NULL, &rss_conf);
673                 if (err) {
674                         PMD_DRV_LOG(ERR, "Init rss failed");
675                         goto rss_config_err;
676                 }
677         }
678
679         /* Enable both L3/L4 rx checksum offload */
680         if (dev->data->dev_conf.rxmode.offloads & DEV_RX_OFFLOAD_CHECKSUM)
681                 csum_en = HINIC_RX_CSUM_OFFLOAD_EN;
682
683         err = hinic_set_rx_csum_offload(nic_dev->hwdev, csum_en);
684         if (err)
685                 goto rx_csum_ofl_err;
686
687         return 0;
688
689 rx_csum_ofl_err:
690 rss_config_err:
691         hinic_destroy_num_qps(nic_dev);
692
693         return HINIC_ERROR;
694 }
695
696 void hinic_rx_remove_configure(struct rte_eth_dev *dev)
697 {
698         struct hinic_nic_dev *nic_dev = HINIC_ETH_DEV_TO_PRIVATE_NIC_DEV(dev);
699
700         if (nic_dev->flags & ETH_MQ_RX_RSS_FLAG) {
701                 hinic_rss_deinit(nic_dev);
702                 hinic_destroy_num_qps(nic_dev);
703         }
704 }
705
706 void hinic_free_all_rx_skbs(struct hinic_rxq *rxq)
707 {
708         struct hinic_nic_dev *nic_dev = rxq->nic_dev;
709         struct hinic_rx_info *rx_info;
710         int free_wqebbs =
711                 hinic_get_rq_free_wqebbs(nic_dev->hwdev, rxq->q_id) + 1;
712         volatile struct hinic_rq_cqe *rx_cqe;
713         u16 ci;
714
715         while (free_wqebbs++ < rxq->q_depth) {
716                 ci = hinic_get_rq_local_ci(nic_dev->hwdev, rxq->q_id);
717
718                 rx_cqe = &rxq->rx_cqe[ci];
719
720                 /* clear done bit */
721                 rx_cqe->status = 0;
722
723                 rx_info = &rxq->rx_info[ci];
724                 rte_pktmbuf_free(rx_info->mbuf);
725                 rx_info->mbuf = NULL;
726
727                 hinic_update_rq_local_ci(nic_dev->hwdev, rxq->q_id, 1);
728         }
729 }
730
731 static inline void hinic_rq_cqe_be_to_cpu32(void *dst_le32,
732                                             volatile void *src_be32)
733 {
734 #if defined(__X86_64_SSE__)
735         volatile __m128i *wqe_be = (volatile __m128i *)src_be32;
736         __m128i *wqe_le = (__m128i *)dst_le32;
737         __m128i shuf_mask =  _mm_set_epi8(12, 13, 14, 15, 8, 9, 10,
738                                         11, 4, 5, 6, 7, 0, 1, 2, 3);
739
740         /* l2nic just use first 128 bits */
741         wqe_le[0] = _mm_shuffle_epi8(wqe_be[0], shuf_mask);
742 #elif defined(__ARM64_NEON__)
743         volatile uint8x16_t *wqe_be = (volatile uint8x16_t *)src_be32;
744         uint8x16_t *wqe_le = (uint8x16_t *)dst_le32;
745         const uint8x16_t shuf_mask = {3, 2, 1, 0, 7, 6, 5, 4, 11, 10,
746                                         9, 8, 15, 14, 13, 12};
747
748         /* l2nic just use first 128 bits */
749         wqe_le[0] = vqtbl1q_u8(wqe_be[0], shuf_mask);
750 #else
751         u32 i;
752         volatile u32 *wqe_be = (volatile u32 *)src_be32;
753         u32 *wqe_le = (u32 *)dst_le32;
754
755 #define HINIC_L2NIC_RQ_CQE_USED         4 /* 4Bytes unit */
756
757         for (i = 0; i < HINIC_L2NIC_RQ_CQE_USED; i++) {
758                 *wqe_le = rte_be_to_cpu_32(*wqe_be);
759                 wqe_be++;
760                 wqe_le++;
761         }
762 #endif
763 }
764
765 static inline uint64_t hinic_rx_rss_hash(uint32_t offload_type,
766                                          uint32_t cqe_hass_val,
767                                          uint32_t *rss_hash)
768 {
769         uint32_t rss_type;
770
771         rss_type = HINIC_GET_RSS_TYPES(offload_type);
772         if (likely(rss_type != 0)) {
773                 *rss_hash = cqe_hass_val;
774                 return PKT_RX_RSS_HASH;
775         }
776
777         return 0;
778 }
779
780 static inline uint64_t hinic_rx_csum(uint32_t status, struct hinic_rxq *rxq)
781 {
782         uint32_t checksum_err;
783         uint64_t flags;
784
785         /* most case checksum is ok */
786         checksum_err = HINIC_GET_RX_CSUM_ERR(status);
787         if (likely(checksum_err == 0))
788                 return (PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD);
789
790         /* If BYPASS bit set, all other status indications should be ignored */
791         if (unlikely(HINIC_CSUM_ERR_BYPASSED(checksum_err)))
792                 return PKT_RX_IP_CKSUM_UNKNOWN;
793
794         flags = 0;
795
796         /* IP checksum error */
797         if (HINIC_CSUM_ERR_IP(checksum_err))
798                 flags |= PKT_RX_IP_CKSUM_BAD;
799         else
800                 flags |= PKT_RX_IP_CKSUM_GOOD;
801
802         /* L4 checksum error */
803         if (HINIC_CSUM_ERR_L4(checksum_err))
804                 flags |= PKT_RX_L4_CKSUM_BAD;
805         else
806                 flags |= PKT_RX_L4_CKSUM_GOOD;
807
808         if (unlikely(HINIC_CSUM_ERR_OTHER(checksum_err)))
809                 flags = PKT_RX_L4_CKSUM_NONE;
810
811         rxq->rxq_stats.errors++;
812
813         return flags;
814 }
815
816 static inline uint64_t hinic_rx_vlan(uint32_t offload_type, uint32_t vlan_len,
817                                      uint16_t *vlan_tci)
818 {
819         uint16_t vlan_tag;
820
821         vlan_tag = HINIC_GET_RX_VLAN_TAG(vlan_len);
822         if (!HINIC_GET_RX_VLAN_OFFLOAD_EN(offload_type) || 0 == vlan_tag) {
823                 *vlan_tci = 0;
824                 return 0;
825         }
826
827         *vlan_tci = vlan_tag;
828
829         return PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED;
830 }
831
832 static inline u32 hinic_rx_alloc_mbuf_bulk(struct hinic_rxq *rxq,
833                                            struct rte_mbuf **mbufs,
834                                            u32 exp_mbuf_cnt)
835 {
836         int rc;
837         u32 avail_cnt;
838
839         rc = rte_pktmbuf_alloc_bulk(rxq->mb_pool, mbufs, exp_mbuf_cnt);
840         if (likely(rc == HINIC_OK)) {
841                 avail_cnt = exp_mbuf_cnt;
842         } else {
843                 avail_cnt = 0;
844                 rxq->rxq_stats.rx_nombuf += exp_mbuf_cnt;
845         }
846
847         return avail_cnt;
848 }
849
850 static struct rte_mbuf *hinic_rx_alloc_mbuf(struct hinic_rxq *rxq,
851                                         dma_addr_t *dma_addr)
852 {
853         struct rte_mbuf *mbuf;
854
855         mbuf = rte_mbuf_raw_alloc(rxq->mb_pool);
856         if (unlikely(!mbuf))
857                 return NULL;
858
859         *dma_addr = rte_mbuf_data_iova_default(mbuf);
860
861         return mbuf;
862 }
863
864 static inline void hinic_rearm_rxq_mbuf(struct hinic_rxq *rxq)
865 {
866         u16 pi;
867         u32 i, free_wqebbs, rearm_wqebbs, exp_wqebbs;
868         dma_addr_t dma_addr;
869         struct hinic_rq_wqe *rq_wqe;
870         struct rte_mbuf **rearm_mbufs;
871
872         /* check free wqebb fo rearm */
873         free_wqebbs = HINIC_GET_RQ_FREE_WQEBBS(rxq);
874         if (unlikely(free_wqebbs < rxq->rx_free_thresh))
875                 return;
876
877         /* get rearm mbuf array */
878         pi = HINIC_GET_RQ_LOCAL_PI(rxq);
879         rearm_mbufs = (struct rte_mbuf **)(&rxq->rx_info[pi]);
880
881         /* check rxq free wqebbs turn around */
882         exp_wqebbs = rxq->q_depth - pi;
883         if (free_wqebbs < exp_wqebbs)
884                 exp_wqebbs = free_wqebbs;
885
886         /* alloc mbuf in bulk */
887         rearm_wqebbs = hinic_rx_alloc_mbuf_bulk(rxq, rearm_mbufs, exp_wqebbs);
888         if (unlikely(rearm_wqebbs == 0))
889                 return;
890
891         /* rearm rx mbuf */
892         rq_wqe = WQ_WQE_ADDR(rxq->wq, (u32)pi);
893         for (i = 0; i < rearm_wqebbs; i++) {
894                 dma_addr = rte_mbuf_data_iova_default(rearm_mbufs[i]);
895                 rq_wqe->buf_desc.addr_high =
896                                         cpu_to_be32(upper_32_bits(dma_addr));
897                 rq_wqe->buf_desc.addr_low =
898                                         cpu_to_be32(lower_32_bits(dma_addr));
899                 rq_wqe++;
900         }
901         rxq->wq->prod_idx += rearm_wqebbs;
902         rxq->wq->delta -= rearm_wqebbs;
903
904         /* update rq hw_pi */
905         rte_wmb();
906         HINIC_UPDATE_RQ_HW_PI(rxq, pi + rearm_wqebbs);
907 }
908
909 void hinic_rx_alloc_pkts(struct hinic_rxq *rxq)
910 {
911         struct hinic_nic_dev *nic_dev = rxq->nic_dev;
912         struct hinic_rq_wqe *rq_wqe;
913         struct hinic_rx_info *rx_info;
914         struct rte_mbuf *mb;
915         dma_addr_t dma_addr;
916         u16 pi = 0;
917         int i, free_wqebbs;
918
919         free_wqebbs = HINIC_GET_RQ_FREE_WQEBBS(rxq);
920         for (i = 0; i < free_wqebbs; i++) {
921                 mb = hinic_rx_alloc_mbuf(rxq, &dma_addr);
922                 if (unlikely(!mb)) {
923                         rxq->rxq_stats.rx_nombuf++;
924                         break;
925                 }
926
927                 rq_wqe = hinic_get_rq_wqe(nic_dev->hwdev, rxq->q_id, &pi);
928                 if (unlikely(!rq_wqe)) {
929                         rte_pktmbuf_free(mb);
930                         break;
931                 }
932
933                 /* fill buffer address only */
934                 rq_wqe->buf_desc.addr_high =
935                                 cpu_to_be32(upper_32_bits(dma_addr));
936                 rq_wqe->buf_desc.addr_low =
937                                 cpu_to_be32(lower_32_bits(dma_addr));
938
939                 rx_info = &rxq->rx_info[pi];
940                 rx_info->mbuf = mb;
941         }
942
943         if (likely(i > 0)) {
944                 rte_wmb();
945                 HINIC_UPDATE_RQ_HW_PI(rxq, pi + 1);
946         }
947 }
948
949 u16 hinic_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, u16 nb_pkts)
950 {
951         struct rte_mbuf *rxm;
952         struct hinic_rxq *rxq = rx_queue;
953         struct hinic_rx_info *rx_info;
954         volatile struct hinic_rq_cqe *rx_cqe;
955         u16 rx_buf_len, pkts = 0;
956         u16 sw_ci, ci_mask, wqebb_cnt = 0;
957         u32 pkt_len, status, vlan_len;
958         u64 rx_bytes = 0;
959         struct hinic_rq_cqe cqe;
960         u32 offload_type, rss_hash;
961
962         rx_buf_len = rxq->buf_len;
963
964         /* 1. get polling start ci */
965         ci_mask = HINIC_GET_RQ_WQE_MASK(rxq);
966         sw_ci = HINIC_GET_RQ_LOCAL_CI(rxq);
967
968         while (pkts < nb_pkts) {
969                  /* 2. current ci is done */
970                 rx_cqe = &rxq->rx_cqe[sw_ci];
971                 status = rx_cqe->status;
972                 if (!HINIC_GET_RX_DONE_BE(status))
973                         break;
974
975                 /* read other cqe member after status */
976                 rte_rmb();
977
978                 /* convert cqe and get packet length */
979                 hinic_rq_cqe_be_to_cpu32(&cqe, (volatile void *)rx_cqe);
980                 vlan_len = cqe.vlan_len;
981
982                 rx_info = &rxq->rx_info[sw_ci];
983                 rxm = rx_info->mbuf;
984
985                 /* 3. next ci point and prefetch */
986                 sw_ci++;
987                 sw_ci &= ci_mask;
988
989                 /* prefetch next mbuf first 64B */
990                 rte_prefetch0(rxq->rx_info[sw_ci].mbuf);
991
992                 /* 4. jumbo frame process */
993                 pkt_len = HINIC_GET_RX_PKT_LEN(vlan_len);
994                 if (likely(pkt_len <= rx_buf_len)) {
995                         rxm->data_len = pkt_len;
996                         rxm->pkt_len = pkt_len;
997                         wqebb_cnt++;
998                 } else {
999                         rxm->data_len = rx_buf_len;
1000                         rxm->pkt_len = rx_buf_len;
1001
1002                         /* if jumbo use multi-wqebb update ci,
1003                          * recv_jumbo_pkt will also update ci
1004                          */
1005                         HINIC_UPDATE_RQ_LOCAL_CI(rxq, wqebb_cnt + 1);
1006                         wqebb_cnt = 0;
1007                         hinic_recv_jumbo_pkt(rxq, rxm, pkt_len - rx_buf_len);
1008                         sw_ci = HINIC_GET_RQ_LOCAL_CI(rxq);
1009                 }
1010
1011                 /* 5. vlan/checksum/rss/pkt_type/gro offload */
1012                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1013                 rxm->port = rxq->port_id;
1014                 offload_type = cqe.offload_type;
1015
1016                 /* vlan offload */
1017                 rxm->ol_flags |= hinic_rx_vlan(offload_type, vlan_len,
1018                                                &rxm->vlan_tci);
1019
1020                 /* checksum offload */
1021                 rxm->ol_flags |= hinic_rx_csum(cqe.status, rxq);
1022
1023                 /* rss hash offload */
1024                 rss_hash = cqe.rss_hash;
1025                 rxm->ol_flags |= hinic_rx_rss_hash(offload_type, rss_hash,
1026                                                    &rxm->hash.rss);
1027
1028                 /* 6. clear done bit */
1029                 rx_cqe->status = 0;
1030
1031                 rx_bytes += pkt_len;
1032                 rx_pkts[pkts++] = rxm;
1033         }
1034
1035         if (pkts) {
1036                 /* 7. update ci */
1037                 HINIC_UPDATE_RQ_LOCAL_CI(rxq, wqebb_cnt);
1038
1039                 /* do packet stats */
1040                 rxq->rxq_stats.packets += pkts;
1041                 rxq->rxq_stats.bytes += rx_bytes;
1042         }
1043         rxq->rxq_stats.burst_pkts = pkts;
1044
1045         /* 8. rearm mbuf to rxq */
1046         hinic_rearm_rxq_mbuf(rxq);
1047
1048         return pkts;
1049 }