raw/ifpga: add HE-HSSI AFU driver
[dpdk.git] / drivers / raw / ifpga / afu_pmd_n3000.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2022 Intel Corporation
3  */
4
5 #include <errno.h>
6 #include <stdio.h>
7 #include <stdint.h>
8 #include <stdlib.h>
9 #include <inttypes.h>
10 #include <unistd.h>
11 #include <fcntl.h>
12 #include <poll.h>
13 #include <sys/eventfd.h>
14 #include <sys/ioctl.h>
15
16 #include <rte_eal.h>
17 #include <rte_malloc.h>
18 #include <rte_memcpy.h>
19 #include <rte_io.h>
20 #include <rte_vfio.h>
21 #include <rte_bus_pci.h>
22 #include <rte_bus_ifpga.h>
23 #include <rte_rawdev.h>
24
25 #include "afu_pmd_core.h"
26 #include "afu_pmd_n3000.h"
27
28 static int nlb_afu_config(struct afu_rawdev *dev)
29 {
30         struct n3000_afu_priv *priv = NULL;
31         struct rte_pmd_afu_nlb_cfg *cfg = NULL;
32         struct nlb_csr_cfg v;
33
34         if (!dev)
35                 return -EINVAL;
36
37         if (!dev->priv)
38                 return -ENOENT;
39
40         priv = (struct n3000_afu_priv *)dev->priv;
41         cfg = &priv->nlb_cfg;
42
43         v.csr = 0;
44
45         if (cfg->cont)
46                 v.cont = 1;
47
48         if (cfg->cache_policy == NLB_WRPUSH_I)
49                 v.wrpush_i = 1;
50         else
51                 v.wrthru_en = cfg->cache_policy;
52
53         if (cfg->cache_hint == NLB_RDLINE_MIXED)
54                 v.rdsel = 3;
55         else
56                 v.rdsel = cfg->cache_hint;
57
58         v.mode = cfg->mode;
59         v.chsel = cfg->read_vc;
60         v.wr_chsel = cfg->write_vc;
61         v.wrfence_chsel = cfg->wrfence_vc;
62         v.wrthru_en = cfg->cache_policy;
63         v.multicl_len = cfg->multi_cl - 1;
64
65         IFPGA_RAWDEV_PMD_DEBUG("cfg: 0x%08x", v.csr);
66         rte_write32(v.csr, priv->nlb_ctx.addr + CSR_CFG);
67
68         return 0;
69 }
70
71 static void nlb_afu_report(struct afu_rawdev *dev, uint32_t cl)
72 {
73         struct n3000_afu_priv *priv = NULL;
74         struct rte_pmd_afu_nlb_cfg *cfg = NULL;
75         struct nlb_dsm_status *stat = NULL;
76         uint64_t ticks = 0;
77         double num, rd_bw, wr_bw;
78
79         if (!dev || !dev->priv)
80                 return;
81
82         priv = (struct n3000_afu_priv *)dev->priv;
83
84         cfg = &priv->nlb_cfg;
85         stat = priv->nlb_ctx.status_ptr;
86
87         if (cfg->cont)
88                 ticks = stat->num_clocks - stat->start_overhead;
89         else
90                 ticks = stat->num_clocks -
91                         (stat->start_overhead + stat->end_overhead);
92
93         if (cfg->freq_mhz == 0)
94                 cfg->freq_mhz = 200;
95
96         num = (double)stat->num_reads;
97         rd_bw = (num * CLS_TO_SIZE(1) * MHZ(cfg->freq_mhz)) / ticks;
98         num = (double)stat->num_writes;
99         wr_bw = (num * CLS_TO_SIZE(1) * MHZ(cfg->freq_mhz)) / ticks;
100
101         printf("Cachelines  Read_Count Write_Count Clocks@%uMHz   "
102                 "Rd_Bandwidth   Wr_Bandwidth\n", cfg->freq_mhz);
103         printf("%10u  %10u %11u  %12"PRIu64"   %7.3f GB/s   %7.3f GB/s\n",
104                 cl, stat->num_reads, stat->num_writes, ticks,
105                 rd_bw / 1e9, wr_bw / 1e9);
106 }
107
108 static int nlb_afu_test(struct afu_rawdev *dev)
109 {
110         struct n3000_afu_priv *priv = NULL;
111         struct nlb_afu_ctx *ctx = NULL;
112         struct rte_pmd_afu_nlb_cfg *cfg = NULL;
113         struct nlb_csr_ctl ctl;
114         uint32_t *ptr = NULL;
115         uint32_t i, j, cl, val = 0;
116         uint64_t sval = 0;
117         int ret = 0;
118
119         if (!dev)
120                 return -EINVAL;
121
122         if (!dev->priv)
123                 return -ENOENT;
124
125         priv = (struct n3000_afu_priv *)dev->priv;
126         ctx = &priv->nlb_ctx;
127         cfg = &priv->nlb_cfg;
128
129         /* initialize registers */
130         IFPGA_RAWDEV_PMD_DEBUG("dsm_addr: 0x%"PRIx64, ctx->dsm_iova);
131         rte_write64(ctx->dsm_iova, ctx->addr + CSR_AFU_DSM_BASEL);
132
133         ctl.csr = 0;
134         rte_write32(ctl.csr, ctx->addr + CSR_CTL);
135         ctl.reset = 1;
136         rte_write32(ctl.csr, ctx->addr + CSR_CTL);
137
138         IFPGA_RAWDEV_PMD_DEBUG("src_addr: 0x%"PRIx64, ctx->src_iova);
139         rte_write64(SIZE_TO_CLS(ctx->src_iova), ctx->addr + CSR_SRC_ADDR);
140         IFPGA_RAWDEV_PMD_DEBUG("dst_addr: 0x%"PRIx64, ctx->dest_iova);
141         rte_write64(SIZE_TO_CLS(ctx->dest_iova), ctx->addr + CSR_DST_ADDR);
142
143         ret = nlb_afu_config(dev);
144         if (ret)
145                 return ret;
146
147         /* initialize src data */
148         ptr = (uint32_t *)ctx->src_ptr;
149         j = CLS_TO_SIZE(cfg->end) >> 2;
150         for (i = 0; i < j; i++)
151                 *ptr++ = i;
152
153         /* start test */
154         for (cl = cfg->begin; cl <= cfg->end; cl += cfg->multi_cl) {
155                 memset(ctx->dest_ptr, 0, CLS_TO_SIZE(cl));
156                 memset(ctx->dsm_ptr, 0, DSM_SIZE);
157
158                 ctl.csr = 0;
159                 rte_write32(ctl.csr, ctx->addr + CSR_CTL);
160                 ctl.reset = 1;
161                 rte_write32(ctl.csr, ctx->addr + CSR_CTL);
162
163                 rte_write32(cl, ctx->addr + CSR_NUM_LINES);
164
165                 rte_delay_us(10);
166
167                 ctl.start = 1;
168                 rte_write32(ctl.csr, ctx->addr + CSR_CTL);
169
170                 if (cfg->cont) {
171                         rte_delay_ms(cfg->timeout * 1000);
172                         ctl.force_completion = 1;
173                         rte_write32(ctl.csr, ctx->addr + CSR_CTL);
174                         ret = dsm_poll_timeout(&ctx->status_ptr->test_complete,
175                                 val, (val & 0x1) == 1, DSM_POLL_INTERVAL,
176                                 DSM_TIMEOUT);
177                         if (ret) {
178                                 printf("DSM poll timeout\n");
179                                 goto end;
180                         }
181                 } else {
182                         ret = dsm_poll_timeout(&ctx->status_ptr->test_complete,
183                                 val, (val & 0x1) == 1, DSM_POLL_INTERVAL,
184                                 DSM_TIMEOUT);
185                         if (ret) {
186                                 printf("DSM poll timeout\n");
187                                 goto end;
188                         }
189                         ctl.force_completion = 1;
190                         rte_write32(ctl.csr, ctx->addr + CSR_CTL);
191                 }
192
193                 nlb_afu_report(dev, cl);
194
195                 i = 0;
196                 while (i++ < 100) {
197                         sval = rte_read64(ctx->addr + CSR_STATUS1);
198                         if (sval == 0)
199                                 break;
200                         rte_delay_us(1000);
201                 }
202
203                 ptr = (uint32_t *)ctx->dest_ptr;
204                 j = CLS_TO_SIZE(cl) >> 2;
205                 for (i = 0; i < j; i++) {
206                         if (*ptr++ != i) {
207                                 IFPGA_RAWDEV_PMD_ERR("Data mismatch @ %u", i);
208                                 break;
209                         }
210                 }
211         }
212
213 end:
214         return ret;
215 }
216
217 static void dma_afu_buf_free(struct dma_afu_ctx *ctx)
218 {
219         int i = 0;
220
221         if (!ctx)
222                 return;
223
224         for (i = 0; i < NUM_DMA_BUF; i++) {
225                 rte_free(ctx->dma_buf[i]);
226                 ctx->dma_buf[i] = NULL;
227         }
228
229         rte_free(ctx->data_buf);
230         ctx->data_buf = NULL;
231
232         rte_free(ctx->ref_buf);
233         ctx->ref_buf = NULL;
234 }
235
236 static int dma_afu_buf_alloc(struct dma_afu_ctx *ctx,
237         struct rte_pmd_afu_dma_cfg *cfg)
238 {
239         size_t page_sz = sysconf(_SC_PAGE_SIZE);
240         int i, ret = 0;
241
242         if (!ctx || !cfg)
243                 return -EINVAL;
244
245         for (i = 0; i < NUM_DMA_BUF; i++) {
246                 ctx->dma_buf[i] = (uint64_t *)rte_zmalloc(NULL, cfg->size,
247                         TEST_MEM_ALIGN);
248                 if (!ctx->dma_buf[i]) {
249                         ret = -ENOMEM;
250                         goto free_dma_buf;
251                 }
252                 ctx->dma_iova[i] = rte_malloc_virt2iova(ctx->dma_buf[i]);
253                 if (ctx->dma_iova[i] == RTE_BAD_IOVA) {
254                         ret = -ENOMEM;
255                         goto free_dma_buf;
256                 }
257         }
258
259         ctx->data_buf = rte_malloc(NULL, cfg->length, page_sz);
260         if (!ctx->data_buf) {
261                 ret = -ENOMEM;
262                 goto free_dma_buf;
263         }
264
265         ctx->ref_buf = rte_malloc(NULL, cfg->length, page_sz);
266         if (!ctx->ref_buf) {
267                 ret = -ENOMEM;
268                 goto free_data_buf;
269         }
270
271         return 0;
272
273 free_data_buf:
274         rte_free(ctx->data_buf);
275         ctx->data_buf = NULL;
276 free_dma_buf:
277         for (i = 0; i < NUM_DMA_BUF; i++) {
278                 rte_free(ctx->dma_buf[i]);
279                 ctx->dma_buf[i] = NULL;
280         }
281         return ret;
282 }
283
284 static void dma_afu_buf_init(struct dma_afu_ctx *ctx, size_t size)
285 {
286         int *ptr = NULL;
287         size_t i = 0;
288         size_t dword_size = 0;
289
290         if (!ctx || !size)
291                 return;
292
293         ptr = (int *)ctx->ref_buf;
294
295         if (ctx->pattern) {
296                 memset(ptr, ctx->pattern, size);
297         } else {
298                 srand(99);
299                 dword_size = size >> 2;
300                 for (i = 0; i < dword_size; i++)
301                         *ptr++ = rand();
302         }
303         rte_memcpy(ctx->data_buf, ctx->ref_buf, size);
304 }
305
306 static int dma_afu_buf_verify(struct dma_afu_ctx *ctx, size_t size)
307 {
308         uint8_t *src = NULL;
309         uint8_t *dst = NULL;
310         size_t i = 0;
311         int n = 0;
312
313         if (!ctx || !size)
314                 return -EINVAL;
315
316         src = (uint8_t *)ctx->ref_buf;
317         dst = (uint8_t *)ctx->data_buf;
318
319         if (memcmp(src, dst, size)) {
320                 printf("Transfer is corrupted\n");
321                 if (ctx->verbose) {
322                         for (i = 0; i < size; i++) {
323                                 if (*src != *dst) {
324                                         if (++n >= ERR_CHECK_LIMIT)
325                                                 break;
326                                         printf("Mismatch at 0x%zx, "
327                                                 "Expected %02x  Actual %02x\n",
328                                                 i, *src, *dst);
329                                 }
330                                 src++;
331                                 dst++;
332                         }
333                         if (n < ERR_CHECK_LIMIT) {
334                                 printf("Found %d error bytes\n", n);
335                         } else {
336                                 printf("......\n");
337                                 printf("Found more than %d error bytes\n", n);
338                         }
339                 }
340                 return -1;
341         }
342
343         printf("Transfer is verified\n");
344         return 0;
345 }
346
347 static void blk_write64(uint64_t *dev_addr, uint64_t *host_addr, uint64_t bytes)
348 {
349         uint64_t qwords = bytes / sizeof(uint64_t);
350
351         if (!IS_ALIGNED_QWORD((uint64_t)dev_addr) ||
352                 !IS_ALIGNED_QWORD((uint64_t)bytes))
353                 return;
354
355         for (; qwords > 0; qwords--, host_addr++, dev_addr++)
356                 rte_write64(*host_addr, dev_addr);
357 }
358
359 static void blk_read64(uint64_t *dev_addr, uint64_t *host_addr, uint64_t bytes)
360 {
361         uint64_t qwords = bytes / sizeof(uint64_t);
362
363         if (!IS_ALIGNED_QWORD((uint64_t)dev_addr) ||
364                 !IS_ALIGNED_QWORD((uint64_t)bytes))
365                 return;
366
367         for (; qwords > 0; qwords--, host_addr++, dev_addr++)
368                 *host_addr = rte_read64(dev_addr);
369 }
370
371 static void switch_ase_page(struct dma_afu_ctx *ctx, uint64_t addr)
372 {
373         uint64_t requested_page = addr & ~DMA_ASE_WINDOW_MASK;
374
375         if (!ctx)
376                 return;
377
378         if (requested_page != ctx->cur_ase_page) {
379                 rte_write64(requested_page, ctx->ase_ctrl_addr);
380                 ctx->cur_ase_page = requested_page;
381         }
382 }
383
384 static int ase_write_unaligned(struct dma_afu_ctx *ctx, uint64_t dev_addr,
385         uint64_t host_addr, uint32_t count)
386 {
387         uint64_t dev_aligned_addr = 0;
388         uint64_t shift = 0;
389         uint64_t val = 0;
390         uintptr_t addr = (uintptr_t)host_addr;  /* transfer to pointer size */
391
392         IFPGA_RAWDEV_PMD_DEBUG("0x%"PRIx64" --> 0x%"PRIx64" (0x%x)", host_addr,
393                 dev_addr, count);
394
395         if (!ctx || (count >= QWORD_BYTES))
396                 return -EINVAL;
397
398         if (!count)
399                 return 0;
400
401         switch_ase_page(ctx, dev_addr);
402
403         shift = dev_addr % QWORD_BYTES;
404         dev_aligned_addr = (dev_addr - shift) & DMA_ASE_WINDOW_MASK;
405         val = rte_read64(ctx->ase_data_addr + dev_aligned_addr);
406         rte_memcpy(((char *)(&val)) + shift, (void *)addr, count);
407
408         /* write back to device */
409         rte_write64(val, ctx->ase_data_addr + dev_aligned_addr);
410
411         return 0;
412 }
413
414 static int ase_write(struct dma_afu_ctx *ctx, uint64_t *dst_ptr,
415         uint64_t *src_ptr, uint64_t *count)
416 {
417         uint64_t src = *src_ptr;
418         uint64_t dst = *dst_ptr;
419         uint64_t align_bytes = *count;
420         uint64_t offset = 0;
421         uint64_t left_in_page = DMA_ASE_WINDOW;
422         uint64_t size_to_copy = 0;
423
424         IFPGA_RAWDEV_PMD_DEBUG("0x%"PRIx64" --> 0x%"PRIx64" (0x%"PRIx64")", src, dst,
425                 align_bytes);
426
427         if (!ctx || !IS_ALIGNED_DWORD(dst))
428                 return -EINVAL;
429
430         if (align_bytes < DWORD_BYTES)
431                 return 0;
432
433         if (!IS_ALIGNED_QWORD(dst)) {
434                 /* Write out a single DWORD to get QWORD aligned */
435                 switch_ase_page(ctx, dst);
436                 offset = dst & DMA_ASE_WINDOW_MASK;
437
438                 rte_write32(*(uint32_t *)(uintptr_t)src,
439                         ctx->ase_data_addr + offset);
440                 src += DWORD_BYTES;
441                 dst += DWORD_BYTES;
442                 align_bytes -= DWORD_BYTES;
443         }
444
445         if (!align_bytes)
446                 return 0;
447
448         /* Write out blocks of 64-bit values */
449         while (align_bytes >= QWORD_BYTES) {
450                 left_in_page -= dst & DMA_ASE_WINDOW_MASK;
451                 size_to_copy =
452                         MIN(left_in_page, (align_bytes & ~(QWORD_BYTES - 1)));
453                 if (size_to_copy < QWORD_BYTES)
454                         break;
455                 switch_ase_page(ctx, dst);
456                 offset = dst & DMA_ASE_WINDOW_MASK;
457                 blk_write64((uint64_t *)(ctx->ase_data_addr + offset),
458                         (uint64_t *)(uintptr_t)src, size_to_copy);
459                 src += size_to_copy;
460                 dst += size_to_copy;
461                 align_bytes -= size_to_copy;
462         }
463
464         if (align_bytes >= DWORD_BYTES) {
465                 /* Write out remaining DWORD */
466                 switch_ase_page(ctx, dst);
467                 offset = dst & DMA_ASE_WINDOW_MASK;
468                 rte_write32(*(uint32_t *)(uintptr_t)src,
469                         ctx->ase_data_addr + offset);
470                 src += DWORD_BYTES;
471                 dst += DWORD_BYTES;
472                 align_bytes -= DWORD_BYTES;
473         }
474
475         *src_ptr = src;
476         *dst_ptr = dst;
477         *count = align_bytes;
478
479         return 0;
480 }
481
482 static int ase_host_to_fpga(struct dma_afu_ctx *ctx, uint64_t *dst_ptr,
483         uint64_t *src_ptr, uint64_t count)
484 {
485         uint64_t dst = *dst_ptr;
486         uint64_t src = *src_ptr;
487         uint64_t count_left = count;
488         uint64_t unaligned_size = 0;
489         int ret = 0;
490
491         IFPGA_RAWDEV_PMD_DEBUG("0x%"PRIx64" --> 0x%"PRIx64" (0x%"PRIx64")", src, dst,
492                 count);
493
494         /* aligns address to 8 byte using dst masking method */
495         if (!IS_ALIGNED_DWORD(dst) && !IS_ALIGNED_QWORD(dst)) {
496                 unaligned_size = QWORD_BYTES - (dst % QWORD_BYTES);
497                 if (unaligned_size > count_left)
498                         unaligned_size = count_left;
499                 ret = ase_write_unaligned(ctx, dst, src, unaligned_size);
500                 if (ret)
501                         return ret;
502                 count_left -= unaligned_size;
503                 src += unaligned_size;
504                 dst += unaligned_size;
505         }
506
507         /* Handles 8/4 byte MMIO transfer */
508         ret = ase_write(ctx, &dst, &src, &count_left);
509         if (ret)
510                 return ret;
511
512         /* Left over unaligned bytes transferred using dst masking method */
513         unaligned_size = QWORD_BYTES - (dst % QWORD_BYTES);
514         if (unaligned_size > count_left)
515                 unaligned_size = count_left;
516
517         ret = ase_write_unaligned(ctx, dst, src, unaligned_size);
518         if (ret)
519                 return ret;
520
521         count_left -= unaligned_size;
522         *dst_ptr = dst + unaligned_size;
523         *src_ptr = src + unaligned_size;
524
525         return 0;
526 }
527
528 static int ase_read_unaligned(struct dma_afu_ctx *ctx, uint64_t dev_addr,
529         uint64_t host_addr, uint32_t count)
530 {
531         uint64_t dev_aligned_addr = 0;
532         uint64_t shift = 0;
533         uint64_t val = 0;
534         uintptr_t addr = (uintptr_t)host_addr;  /* transfer to pointer size */
535
536         IFPGA_RAWDEV_PMD_DEBUG("0x%"PRIx64" <-- 0x%"PRIx64" (0x%x)", host_addr,
537                 dev_addr, count);
538
539         if (!ctx || (count >= QWORD_BYTES))
540                 return -EINVAL;
541
542         if (!count)
543                 return 0;
544
545         switch_ase_page(ctx, dev_addr);
546
547         shift = dev_addr % QWORD_BYTES;
548         dev_aligned_addr = (dev_addr - shift) & DMA_ASE_WINDOW_MASK;
549         val = rte_read64(ctx->ase_data_addr + dev_aligned_addr);
550         rte_memcpy((void *)addr, ((char *)(&val)) + shift, count);
551
552         return 0;
553 }
554
555 static int ase_read(struct dma_afu_ctx *ctx, uint64_t *src_ptr,
556         uint64_t *dst_ptr, uint64_t *count)
557 {
558         uint64_t src = *src_ptr;
559         uint64_t dst = *dst_ptr;
560         uint64_t align_bytes = *count;
561         uint64_t offset = 0;
562         uint64_t left_in_page = DMA_ASE_WINDOW;
563         uint64_t size_to_copy = 0;
564
565         IFPGA_RAWDEV_PMD_DEBUG("0x%"PRIx64" <-- 0x%"PRIx64" (0x%"PRIx64")", dst, src,
566                 align_bytes);
567
568         if (!ctx || !IS_ALIGNED_DWORD(src))
569                 return -EINVAL;
570
571         if (align_bytes < DWORD_BYTES)
572                 return 0;
573
574         if (!IS_ALIGNED_QWORD(src)) {
575                 /* Read a single DWORD to get QWORD aligned */
576                 switch_ase_page(ctx, src);
577                 offset = src & DMA_ASE_WINDOW_MASK;
578                 *(uint32_t *)(uintptr_t)dst =
579                         rte_read32(ctx->ase_data_addr + offset);
580                 src += DWORD_BYTES;
581                 dst += DWORD_BYTES;
582                 align_bytes -= DWORD_BYTES;
583         }
584
585         if (!align_bytes)
586                 return 0;
587
588         /* Read blocks of 64-bit values */
589         while (align_bytes >= QWORD_BYTES) {
590                 left_in_page -= src & DMA_ASE_WINDOW_MASK;
591                 size_to_copy =
592                         MIN(left_in_page, (align_bytes & ~(QWORD_BYTES - 1)));
593                 if (size_to_copy < QWORD_BYTES)
594                         break;
595                 switch_ase_page(ctx, src);
596                 offset = src & DMA_ASE_WINDOW_MASK;
597                 blk_read64((uint64_t *)(ctx->ase_data_addr + offset),
598                         (uint64_t *)(uintptr_t)dst, size_to_copy);
599                 src += size_to_copy;
600                 dst += size_to_copy;
601                 align_bytes -= size_to_copy;
602         }
603
604         if (align_bytes >= DWORD_BYTES) {
605                 /* Read remaining DWORD */
606                 switch_ase_page(ctx, src);
607                 offset = src & DMA_ASE_WINDOW_MASK;
608                 *(uint32_t *)(uintptr_t)dst =
609                         rte_read32(ctx->ase_data_addr + offset);
610                 src += DWORD_BYTES;
611                 dst += DWORD_BYTES;
612                 align_bytes -= DWORD_BYTES;
613         }
614
615         *src_ptr = src;
616         *dst_ptr = dst;
617         *count = align_bytes;
618
619         return 0;
620 }
621
622 static int ase_fpga_to_host(struct dma_afu_ctx *ctx, uint64_t *src_ptr,
623         uint64_t *dst_ptr, uint64_t count)
624 {
625         uint64_t src = *src_ptr;
626         uint64_t dst = *dst_ptr;
627         uint64_t count_left = count;
628         uint64_t unaligned_size = 0;
629         int ret = 0;
630
631         IFPGA_RAWDEV_PMD_DEBUG("0x%"PRIx64" --> 0x%"PRIx64" (0x%"PRIx64")", src, dst,
632                 count);
633
634         /* Aligns address to 8 byte using src masking method */
635         if (!IS_ALIGNED_DWORD(src) && !IS_ALIGNED_QWORD(src)) {
636                 unaligned_size = QWORD_BYTES - (src % QWORD_BYTES);
637                 if (unaligned_size > count_left)
638                         unaligned_size = count_left;
639                 ret = ase_read_unaligned(ctx, src, dst, unaligned_size);
640                 if (ret)
641                         return ret;
642                 count_left -= unaligned_size;
643                 dst += unaligned_size;
644                 src += unaligned_size;
645         }
646
647         /* Handles 8/4 byte MMIO transfer */
648         ret = ase_read(ctx, &src, &dst, &count_left);
649         if (ret)
650                 return ret;
651
652         /* Left over unaligned bytes transferred using src masking method */
653         unaligned_size = QWORD_BYTES - (src % QWORD_BYTES);
654         if (unaligned_size > count_left)
655                 unaligned_size = count_left;
656
657         ret = ase_read_unaligned(ctx, src, dst, unaligned_size);
658         if (ret)
659                 return ret;
660
661         count_left -= unaligned_size;
662         *dst_ptr = dst + unaligned_size;
663         *src_ptr = src + unaligned_size;
664
665         return 0;
666 }
667
668 static void clear_interrupt(struct dma_afu_ctx *ctx)
669 {
670         /* clear interrupt by writing 1 to IRQ bit in status register */
671         msgdma_status status;
672
673         if (!ctx)
674                 return;
675
676         status.csr = 0;
677         status.irq = 1;
678         rte_write32(status.csr, CSR_STATUS(ctx->csr_addr));
679 }
680
681 static int poll_interrupt(struct dma_afu_ctx *ctx)
682 {
683         struct pollfd pfd = {0};
684         uint64_t count = 0;
685         ssize_t bytes_read = 0;
686         int poll_ret = 0;
687         int ret = 0;
688
689         if (!ctx || (ctx->event_fd < 0))
690                 return -EINVAL;
691
692         pfd.fd = ctx->event_fd;
693         pfd.events = POLLIN;
694         poll_ret = poll(&pfd, 1, DMA_TIMEOUT_MSEC);
695         if (poll_ret < 0) {
696                 IFPGA_RAWDEV_PMD_ERR("Error %s", strerror(errno));
697                 ret = -EFAULT;
698                 goto out;
699         } else if (poll_ret == 0) {
700                 IFPGA_RAWDEV_PMD_ERR("Timeout");
701                 ret = -ETIMEDOUT;
702         } else {
703                 bytes_read = read(pfd.fd, &count, sizeof(count));
704                 if (bytes_read > 0) {
705                         if (ctx->verbose)
706                                 IFPGA_RAWDEV_PMD_DEBUG("Successful, ret %d, cnt %"PRIu64,
707                                         poll_ret, count);
708                         ret = 0;
709                 } else {
710                         IFPGA_RAWDEV_PMD_ERR("Failed %s", bytes_read > 0 ?
711                                 strerror(errno) : "zero bytes read");
712                         ret = -EIO;
713                 }
714         }
715 out:
716         clear_interrupt(ctx);
717         return ret;
718 }
719
720 static void send_descriptor(struct dma_afu_ctx *ctx, msgdma_ext_desc *desc)
721 {
722         msgdma_status status;
723         uint64_t fpga_queue_full = 0;
724
725         if (!ctx)
726                 return;
727
728         if (ctx->verbose) {
729                 IFPGA_RAWDEV_PMD_DEBUG("descriptor.rd_address = 0x%x%08x",
730                         desc->rd_address_ext, desc->rd_address);
731                 IFPGA_RAWDEV_PMD_DEBUG("descriptor.wr_address = 0x%x%08x",
732                         desc->wr_address_ext, desc->wr_address);
733                 IFPGA_RAWDEV_PMD_DEBUG("descriptor.len = %u", desc->len);
734                 IFPGA_RAWDEV_PMD_DEBUG("descriptor.wr_burst_count = %u",
735                         desc->wr_burst_count);
736                 IFPGA_RAWDEV_PMD_DEBUG("descriptor.rd_burst_count = %u",
737                         desc->rd_burst_count);
738                 IFPGA_RAWDEV_PMD_DEBUG("descriptor.wr_stride %u", desc->wr_stride);
739                 IFPGA_RAWDEV_PMD_DEBUG("descriptor.rd_stride %u", desc->rd_stride);
740         }
741
742         do {
743                 status.csr = rte_read32(CSR_STATUS(ctx->csr_addr));
744                 if (fpga_queue_full++ > 100000000) {
745                         IFPGA_RAWDEV_PMD_DEBUG("DMA queue full retry");
746                         fpga_queue_full = 0;
747                 }
748         } while (status.desc_buf_full);
749
750         blk_write64((uint64_t *)ctx->desc_addr, (uint64_t *)desc,
751                 sizeof(*desc));
752 }
753
754 static int do_dma(struct dma_afu_ctx *ctx, uint64_t dst, uint64_t src,
755         int count, int is_last_desc, fpga_dma_type type, int intr_en)
756 {
757         msgdma_ext_desc *desc = NULL;
758         int alignment_offset = 0;
759         int segment_size = 0;
760
761         if (!ctx)
762                 return -EINVAL;
763
764         /* src, dst and count must be 64-byte aligned */
765         if (!IS_DMA_ALIGNED(src) || !IS_DMA_ALIGNED(dst) ||
766                 !IS_DMA_ALIGNED(count))
767                 return -EINVAL;
768         memset(ctx->desc_buf, 0, sizeof(msgdma_ext_desc));
769
770         /* these fields are fixed for all DMA transfers */
771         desc = ctx->desc_buf;
772         desc->seq_num = 0;
773         desc->wr_stride = 1;
774         desc->rd_stride = 1;
775         desc->control.go = 1;
776         if (intr_en)
777                 desc->control.transfer_irq_en = 1;
778         else
779                 desc->control.transfer_irq_en = 0;
780
781         if (!is_last_desc)
782                 desc->control.early_done_en = 1;
783         else
784                 desc->control.early_done_en = 0;
785
786         if (type == FPGA_TO_FPGA) {
787                 desc->rd_address = src & DMA_MASK_32_BIT;
788                 desc->wr_address = dst & DMA_MASK_32_BIT;
789                 desc->len = count;
790                 desc->wr_burst_count = 4;
791                 desc->rd_burst_count = 4;
792                 desc->rd_address_ext = (src >> 32) & DMA_MASK_32_BIT;
793                 desc->wr_address_ext = (dst >> 32) & DMA_MASK_32_BIT;
794                 send_descriptor(ctx, desc);
795         } else {
796                 /* check CCIP (host) address is aligned to 4CL (256B) */
797                 alignment_offset = (type == HOST_TO_FPGA)
798                         ? (src % CCIP_ALIGN_BYTES) : (dst % CCIP_ALIGN_BYTES);
799                 /* performing a short transfer to get aligned */
800                 if (alignment_offset != 0) {
801                         desc->rd_address = src & DMA_MASK_32_BIT;
802                         desc->wr_address = dst & DMA_MASK_32_BIT;
803                         desc->wr_burst_count = 1;
804                         desc->rd_burst_count = 1;
805                         desc->rd_address_ext = (src >> 32) & DMA_MASK_32_BIT;
806                         desc->wr_address_ext = (dst >> 32) & DMA_MASK_32_BIT;
807                         /* count isn't large enough to hit next 4CL boundary */
808                         if ((CCIP_ALIGN_BYTES - alignment_offset) >= count) {
809                                 segment_size = count;
810                                 count = 0;
811                         } else {
812                                 segment_size = CCIP_ALIGN_BYTES
813                                         - alignment_offset;
814                                 src += segment_size;
815                                 dst += segment_size;
816                                 count -= segment_size;
817                                 desc->control.transfer_irq_en = 0;
818                         }
819                         /* post short transfer to align to a 4CL (256 byte) */
820                         desc->len = segment_size;
821                         send_descriptor(ctx, desc);
822                 }
823                 /* at this point we are 4CL (256 byte) aligned */
824                 if (count >= CCIP_ALIGN_BYTES) {
825                         desc->rd_address = src & DMA_MASK_32_BIT;
826                         desc->wr_address = dst & DMA_MASK_32_BIT;
827                         desc->wr_burst_count = 4;
828                         desc->rd_burst_count = 4;
829                         desc->rd_address_ext = (src >> 32) & DMA_MASK_32_BIT;
830                         desc->wr_address_ext = (dst >> 32) & DMA_MASK_32_BIT;
831                         /* buffer ends on 4CL boundary */
832                         if ((count % CCIP_ALIGN_BYTES) == 0) {
833                                 segment_size = count;
834                                 count = 0;
835                         } else {
836                                 segment_size = count
837                                         - (count % CCIP_ALIGN_BYTES);
838                                 src += segment_size;
839                                 dst += segment_size;
840                                 count -= segment_size;
841                                 desc->control.transfer_irq_en = 0;
842                         }
843                         desc->len = segment_size;
844                         send_descriptor(ctx, desc);
845                 }
846                 /* post short transfer to handle the remainder */
847                 if (count > 0) {
848                         desc->rd_address = src & DMA_MASK_32_BIT;
849                         desc->wr_address = dst & DMA_MASK_32_BIT;
850                         desc->len = count;
851                         desc->wr_burst_count = 1;
852                         desc->rd_burst_count = 1;
853                         desc->rd_address_ext = (src >> 32) & DMA_MASK_32_BIT;
854                         desc->wr_address_ext = (dst >> 32) & DMA_MASK_32_BIT;
855                         if (intr_en)
856                                 desc->control.transfer_irq_en = 1;
857                         send_descriptor(ctx, desc);
858                 }
859         }
860
861         return 0;
862 }
863
864 static int issue_magic(struct dma_afu_ctx *ctx)
865 {
866         *(ctx->magic_buf) = 0ULL;
867         return do_dma(ctx, DMA_WF_HOST_ADDR(ctx->magic_iova),
868                 DMA_WF_MAGIC_ROM, 64, 1, FPGA_TO_HOST, 1);
869 }
870
871 static void wait_magic(struct dma_afu_ctx *ctx)
872 {
873         int magic_timeout = 0;
874
875         if (!ctx)
876                 return;
877
878         poll_interrupt(ctx);
879         while (*(ctx->magic_buf) != DMA_WF_MAGIC) {
880                 if (magic_timeout++ > 1000) {
881                         IFPGA_RAWDEV_PMD_ERR("DMA magic operation timeout");
882                         magic_timeout = 0;
883                         break;
884                 }
885         }
886         *(ctx->magic_buf) = 0ULL;
887 }
888
889 static int dma_tx_buf(struct dma_afu_ctx *ctx, uint64_t dst, uint64_t src,
890         uint64_t chunk, int is_last_chunk, int *intr_issued)
891 {
892         int intr_en = 0;
893         int ret = 0;
894
895         if (!ctx || !intr_issued)
896                 return -EINVAL;
897
898         src += chunk * ctx->dma_buf_size;
899         dst += chunk * ctx->dma_buf_size;
900
901         if (((chunk % HALF_DMA_BUF) == (HALF_DMA_BUF - 1)) || is_last_chunk) {
902                 if (*intr_issued) {
903                         ret = poll_interrupt(ctx);
904                         if (ret)
905                                 return ret;
906                 }
907                 intr_en = 1;
908         }
909
910         chunk %= NUM_DMA_BUF;
911         rte_memcpy(ctx->dma_buf[chunk], (void *)(uintptr_t)src,
912                 ctx->dma_buf_size);
913         ret = do_dma(ctx, dst, DMA_HOST_ADDR(ctx->dma_iova[chunk]),
914                         ctx->dma_buf_size, 0, HOST_TO_FPGA, intr_en);
915         if (intr_en)
916                 *intr_issued = 1;
917
918         return ret;
919 }
920
921 static int dma_host_to_fpga(struct dma_afu_ctx *ctx, uint64_t dst, uint64_t src,
922         size_t count)
923 {
924         uint64_t i = 0;
925         uint64_t count_left = count;
926         uint64_t aligned_addr = 0;
927         uint64_t align_bytes = 0;
928         uint64_t dma_chunks = 0;
929         uint64_t dma_tx_bytes = 0;
930         uint64_t offset = 0;
931         int issued_intr = 0;
932         int ret = 0;
933
934         IFPGA_RAWDEV_PMD_DEBUG("0x%"PRIx64" ---> 0x%"PRIx64" (%zu)", src, dst,
935                 count);
936
937         if (!ctx)
938                 return -EINVAL;
939
940         if (!IS_DMA_ALIGNED(dst)) {
941                 if (count_left < DMA_ALIGN_BYTES)
942                         return ase_host_to_fpga(ctx, &dst, &src, count_left);
943
944                 aligned_addr = ((dst / DMA_ALIGN_BYTES) + 1)
945                         * DMA_ALIGN_BYTES;
946                 align_bytes = aligned_addr - dst;
947                 ret = ase_host_to_fpga(ctx, &dst, &src, align_bytes);
948                 if (ret)
949                         return ret;
950                 count_left = count_left - align_bytes;
951         }
952
953         if (count_left) {
954                 dma_chunks = count_left / ctx->dma_buf_size;
955                 offset = dma_chunks * ctx->dma_buf_size;
956                 count_left -= offset;
957                 IFPGA_RAWDEV_PMD_DEBUG("0x%"PRIx64" ---> 0x%"PRIx64
958                         " (%"PRIu64"...0x%"PRIx64")",
959                         src, dst, dma_chunks, count_left);
960                 for (i = 0; i < dma_chunks; i++) {
961                         ret = dma_tx_buf(ctx, dst, src, i,
962                                 i == (dma_chunks - 1), &issued_intr);
963                         if (ret)
964                                 return ret;
965                 }
966
967                 if (issued_intr) {
968                         ret = poll_interrupt(ctx);
969                         if (ret)
970                                 return ret;
971                 }
972
973                 if (count_left) {
974                         i = count_left / DMA_ALIGN_BYTES;
975                         if (i > 0) {
976                                 dma_tx_bytes = i * DMA_ALIGN_BYTES;
977                                 IFPGA_RAWDEV_PMD_DEBUG("left over 0x%"PRIx64" to DMA",
978                                         dma_tx_bytes);
979                                 rte_memcpy(ctx->dma_buf[0],
980                                         (void *)(uintptr_t)(src + offset),
981                                         dma_tx_bytes);
982                                 ret = do_dma(ctx, dst + offset,
983                                         DMA_HOST_ADDR(ctx->dma_iova[0]),
984                                         dma_tx_bytes, 1, HOST_TO_FPGA, 1);
985                                 if (ret)
986                                         return ret;
987                                 ret = poll_interrupt(ctx);
988                                 if (ret)
989                                         return ret;
990                         }
991
992                         count_left -= dma_tx_bytes;
993                         if (count_left) {
994                                 IFPGA_RAWDEV_PMD_DEBUG("left over 0x%"PRIx64" to ASE",
995                                         count_left);
996                                 dst += offset + dma_tx_bytes;
997                                 src += offset + dma_tx_bytes;
998                                 ret = ase_host_to_fpga(ctx, &dst, &src,
999                                         count_left);
1000                         }
1001                 }
1002         }
1003
1004         return ret;
1005 }
1006
1007 static int dma_rx_buf(struct dma_afu_ctx *ctx, uint64_t dst, uint64_t src,
1008         uint64_t chunk, int is_last_chunk, uint64_t *rx_count, int *wf_issued)
1009 {
1010         uint64_t i = chunk % NUM_DMA_BUF;
1011         uint64_t n = *rx_count;
1012         uint64_t num_pending = 0;
1013         int ret = 0;
1014
1015         if (!ctx || !wf_issued)
1016                 return -EINVAL;
1017
1018         ret = do_dma(ctx, DMA_HOST_ADDR(ctx->dma_iova[i]),
1019                 src + chunk * ctx->dma_buf_size,
1020                 ctx->dma_buf_size, 1, FPGA_TO_HOST, 0);
1021         if (ret)
1022                 return ret;
1023
1024         num_pending = chunk - n + 1;
1025         if (num_pending == HALF_DMA_BUF) {
1026                 ret = issue_magic(ctx);
1027                 if (ret) {
1028                         IFPGA_RAWDEV_PMD_DEBUG("Magic issue failed");
1029                         return ret;
1030                 }
1031                 *wf_issued = 1;
1032         }
1033
1034         if ((num_pending > (NUM_DMA_BUF - 1)) || is_last_chunk) {
1035                 if (*wf_issued) {
1036                         wait_magic(ctx);
1037                         for (i = 0; i < HALF_DMA_BUF; i++) {
1038                                 rte_memcpy((void *)(uintptr_t)(dst +
1039                                                 n * ctx->dma_buf_size),
1040                                         ctx->dma_buf[n % NUM_DMA_BUF],
1041                                         ctx->dma_buf_size);
1042                                 n++;
1043                         }
1044                         *wf_issued = 0;
1045                         *rx_count = n;
1046                 }
1047                 ret = issue_magic(ctx);
1048                 if (ret) {
1049                         IFPGA_RAWDEV_PMD_DEBUG("Magic issue failed");
1050                         return ret;
1051                 }
1052                 *wf_issued = 1;
1053         }
1054
1055         return ret;
1056 }
1057
1058 static int dma_fpga_to_host(struct dma_afu_ctx *ctx, uint64_t dst, uint64_t src,
1059         size_t count)
1060 {
1061         uint64_t i = 0;
1062         uint64_t count_left = count;
1063         uint64_t aligned_addr = 0;
1064         uint64_t align_bytes = 0;
1065         uint64_t dma_chunks = 0;
1066         uint64_t pending_buf = 0;
1067         uint64_t dma_rx_bytes = 0;
1068         uint64_t offset = 0;
1069         int wf_issued = 0;
1070         int ret = 0;
1071
1072         IFPGA_RAWDEV_PMD_DEBUG("0x%"PRIx64" ---> 0x%"PRIx64" (%zu)", src, dst,
1073                 count);
1074
1075         if (!ctx)
1076                 return -EINVAL;
1077
1078         if (!IS_DMA_ALIGNED(src)) {
1079                 if (count_left < DMA_ALIGN_BYTES)
1080                         return ase_fpga_to_host(ctx, &src, &dst, count_left);
1081
1082                 aligned_addr = ((src / DMA_ALIGN_BYTES) + 1)
1083                          * DMA_ALIGN_BYTES;
1084                 align_bytes = aligned_addr - src;
1085                 ret = ase_fpga_to_host(ctx, &src, &dst, align_bytes);
1086                 if (ret)
1087                         return ret;
1088                 count_left = count_left - align_bytes;
1089         }
1090
1091         if (count_left) {
1092                 dma_chunks = count_left / ctx->dma_buf_size;
1093                 offset = dma_chunks * ctx->dma_buf_size;
1094                 count_left -= offset;
1095                 IFPGA_RAWDEV_PMD_DEBUG("0x%"PRIx64" ---> 0x%"PRIx64
1096                         " (%"PRIu64"...0x%"PRIx64")",
1097                         src, dst, dma_chunks, count_left);
1098                 for (i = 0; i < dma_chunks; i++) {
1099                         ret = dma_rx_buf(ctx, dst, src, i,
1100                                 i == (dma_chunks - 1),
1101                                 &pending_buf, &wf_issued);
1102                         if (ret)
1103                                 return ret;
1104                 }
1105
1106                 if (wf_issued)
1107                         wait_magic(ctx);
1108
1109                 /* clear out final dma memcpy operations */
1110                 while (pending_buf < dma_chunks) {
1111                         /* constant size transfer; no length check required */
1112                         rte_memcpy((void *)(uintptr_t)(dst +
1113                                         pending_buf * ctx->dma_buf_size),
1114                                 ctx->dma_buf[pending_buf % NUM_DMA_BUF],
1115                                 ctx->dma_buf_size);
1116                         pending_buf++;
1117                 }
1118
1119                 if (count_left > 0) {
1120                         i = count_left / DMA_ALIGN_BYTES;
1121                         if (i > 0) {
1122                                 dma_rx_bytes = i * DMA_ALIGN_BYTES;
1123                                 IFPGA_RAWDEV_PMD_DEBUG("left over 0x%"PRIx64" to DMA",
1124                                         dma_rx_bytes);
1125                                 ret = do_dma(ctx,
1126                                         DMA_HOST_ADDR(ctx->dma_iova[0]),
1127                                         src + offset,
1128                                         dma_rx_bytes, 1, FPGA_TO_HOST, 0);
1129                                 if (ret)
1130                                         return ret;
1131                                 ret = issue_magic(ctx);
1132                                 if (ret)
1133                                         return ret;
1134                                 wait_magic(ctx);
1135                                 rte_memcpy((void *)(uintptr_t)(dst + offset),
1136                                         ctx->dma_buf[0], dma_rx_bytes);
1137                         }
1138
1139                         count_left -= dma_rx_bytes;
1140                         if (count_left) {
1141                                 IFPGA_RAWDEV_PMD_DEBUG("left over 0x%"PRIx64" to ASE",
1142                                         count_left);
1143                                 dst += offset + dma_rx_bytes;
1144                                 src += offset + dma_rx_bytes;
1145                                 ret = ase_fpga_to_host(ctx, &src, &dst,
1146                                                         count_left);
1147                         }
1148                 }
1149         }
1150
1151         return ret;
1152 }
1153
1154 static int dma_fpga_to_fpga(struct dma_afu_ctx *ctx, uint64_t dst, uint64_t src,
1155         size_t count)
1156 {
1157         uint64_t i = 0;
1158         uint64_t count_left = count;
1159         uint64_t dma_chunks = 0;
1160         uint64_t offset = 0;
1161         uint32_t tx_chunks = 0;
1162         uint64_t *tmp_buf = NULL;
1163         int ret = 0;
1164
1165         IFPGA_RAWDEV_PMD_DEBUG("0x%"PRIx64" ---> 0x%"PRIx64" (%zu)", src, dst,
1166                 count);
1167
1168         if (!ctx)
1169                 return -EINVAL;
1170
1171         if (IS_DMA_ALIGNED(dst) && IS_DMA_ALIGNED(src)
1172             && IS_DMA_ALIGNED(count_left)) {
1173                 dma_chunks = count_left / ctx->dma_buf_size;
1174                 offset = dma_chunks * ctx->dma_buf_size;
1175                 count_left -= offset;
1176                 IFPGA_RAWDEV_PMD_DEBUG("0x%"PRIx64" ---> 0x%"PRIx64
1177                         " (%"PRIu64"...0x%"PRIx64")",
1178                         src, dst, dma_chunks, count_left);
1179                 for (i = 0; i < dma_chunks; i++) {
1180                         ret = do_dma(ctx, dst + i * ctx->dma_buf_size,
1181                                 src + i * ctx->dma_buf_size,
1182                                 ctx->dma_buf_size, 0, FPGA_TO_FPGA, 0);
1183                         if (ret)
1184                                 return ret;
1185                         if ((((i + 1) % NUM_DMA_BUF) == 0) ||
1186                                 (i == (dma_chunks - 1))) {
1187                                 ret = issue_magic(ctx);
1188                                 if (ret)
1189                                         return ret;
1190                                 wait_magic(ctx);
1191                         }
1192                 }
1193
1194                 if (count_left > 0) {
1195                         IFPGA_RAWDEV_PMD_DEBUG("left over 0x%"PRIx64" to DMA", count_left);
1196                         ret = do_dma(ctx, dst + offset, src + offset,
1197                                 count_left, 1, FPGA_TO_FPGA, 0);
1198                         if (ret)
1199                                 return ret;
1200                         ret = issue_magic(ctx);
1201                         if (ret)
1202                                 return ret;
1203                         wait_magic(ctx);
1204                 }
1205         } else {
1206                 if ((src < dst) && (src + count_left > dst)) {
1207                         IFPGA_RAWDEV_PMD_ERR("Overlapping: 0x%"PRIx64
1208                                 " -> 0x%"PRIx64" (0x%"PRIx64")",
1209                                 src, dst, count_left);
1210                         return -EINVAL;
1211                 }
1212                 tx_chunks = count_left / ctx->dma_buf_size;
1213                 offset = tx_chunks * ctx->dma_buf_size;
1214                 count_left -= offset;
1215                 IFPGA_RAWDEV_PMD_DEBUG("0x%"PRIx64" --> 0x%"PRIx64
1216                         " (%u...0x%"PRIx64")",
1217                         src, dst, tx_chunks, count_left);
1218                 tmp_buf = (uint64_t *)rte_malloc(NULL, ctx->dma_buf_size,
1219                         DMA_ALIGN_BYTES);
1220                 for (i = 0; i < tx_chunks; i++) {
1221                         ret = dma_fpga_to_host(ctx, (uint64_t)tmp_buf,
1222                                 src + i * ctx->dma_buf_size,
1223                                 ctx->dma_buf_size);
1224                         if (ret)
1225                                 goto free_buf;
1226                         ret = dma_host_to_fpga(ctx,
1227                                 dst + i * ctx->dma_buf_size,
1228                                 (uint64_t)tmp_buf, ctx->dma_buf_size);
1229                         if (ret)
1230                                 goto free_buf;
1231                 }
1232
1233                 if (count_left > 0) {
1234                         ret = dma_fpga_to_host(ctx, (uint64_t)tmp_buf,
1235                                 src + offset, count_left);
1236                         if (ret)
1237                                 goto free_buf;
1238                         ret = dma_host_to_fpga(ctx, dst + offset,
1239                                 (uint64_t)tmp_buf, count_left);
1240                         if (ret)
1241                                 goto free_buf;
1242                 }
1243 free_buf:
1244                 rte_free(tmp_buf);
1245         }
1246
1247         return ret;
1248 }
1249
1250 static int dma_transfer_sync(struct dma_afu_ctx *ctx, uint64_t dst,
1251         uint64_t src, size_t count, fpga_dma_type type)
1252 {
1253         int ret = 0;
1254
1255         if (!ctx)
1256                 return -EINVAL;
1257
1258         if (type == HOST_TO_FPGA)
1259                 ret = dma_host_to_fpga(ctx, dst, src, count);
1260         else if (type == FPGA_TO_HOST)
1261                 ret = dma_fpga_to_host(ctx, dst, src, count);
1262         else if (type == FPGA_TO_FPGA)
1263                 ret = dma_fpga_to_fpga(ctx, dst, src, count);
1264         else
1265                 return -EINVAL;
1266
1267         return ret;
1268 }
1269
1270 static double get_duration(struct timespec start, struct timespec end)
1271 {
1272         uint64_t diff = 1000000000L * (end.tv_sec - start.tv_sec)
1273                 + end.tv_nsec - start.tv_nsec;
1274         return (double)diff / (double)1000000000L;
1275 }
1276
1277 #define SWEEP_ITERS 1
1278 static int sweep_test(struct dma_afu_ctx *ctx, uint32_t length,
1279         uint64_t ddr_offset, uint64_t buf_offset, uint64_t size_decrement)
1280 {
1281         struct timespec start, end;
1282         uint64_t test_size = 0;
1283         uint64_t *dma_buf_ptr = NULL;
1284         double throughput, total_time = 0.0;
1285         int i = 0;
1286         int ret = 0;
1287
1288         if (!ctx || !ctx->data_buf || !ctx->ref_buf) {
1289                 IFPGA_RAWDEV_PMD_ERR("Buffer for DMA test is not allocated");
1290                 return -EINVAL;
1291         }
1292
1293         if (length < (buf_offset + size_decrement)) {
1294                 IFPGA_RAWDEV_PMD_ERR("Test length does not match unaligned parameter");
1295                 return -EINVAL;
1296         }
1297         test_size = length - (buf_offset + size_decrement);
1298         if ((ddr_offset + test_size) > ctx->mem_size) {
1299                 IFPGA_RAWDEV_PMD_ERR("Test is out of DDR memory space");
1300                 return -EINVAL;
1301         }
1302
1303         dma_buf_ptr = (uint64_t *)((uint8_t *)ctx->data_buf + buf_offset);
1304         printf("Sweep Host %p to FPGA 0x%"PRIx64
1305                 " with 0x%"PRIx64" bytes ...\n",
1306                 (void *)dma_buf_ptr, ddr_offset, test_size);
1307
1308         for (i = 0; i < SWEEP_ITERS; i++) {
1309                 clock_gettime(CLOCK_MONOTONIC, &start);
1310                 ret = dma_transfer_sync(ctx, ddr_offset, (uint64_t)dma_buf_ptr,
1311                         test_size, HOST_TO_FPGA);
1312                 clock_gettime(CLOCK_MONOTONIC, &end);
1313                 if (ret) {
1314                         IFPGA_RAWDEV_PMD_ERR("Failed");
1315                         return ret;
1316                 }
1317                 total_time += get_duration(start, end);
1318         }
1319         throughput = (test_size * SWEEP_ITERS) / (total_time * 1000000);
1320         printf("Measured bandwidth = %lf MB/s\n", throughput);
1321
1322         printf("Sweep FPGA 0x%"PRIx64" to Host %p with 0x%"PRIx64" bytes ...\n",
1323                 ddr_offset, (void *)dma_buf_ptr, test_size);
1324
1325         total_time = 0.0;
1326         memset((char *)dma_buf_ptr, 0, test_size);
1327         for (i = 0; i < SWEEP_ITERS; i++) {
1328                 clock_gettime(CLOCK_MONOTONIC, &start);
1329                 ret = dma_transfer_sync(ctx, (uint64_t)dma_buf_ptr, ddr_offset,
1330                         test_size, FPGA_TO_HOST);
1331                 clock_gettime(CLOCK_MONOTONIC, &end);
1332                 if (ret) {
1333                         IFPGA_RAWDEV_PMD_ERR("Failed");
1334                         return ret;
1335                 }
1336                 total_time += get_duration(start, end);
1337         }
1338         throughput = (test_size * SWEEP_ITERS) / (total_time * 1000000);
1339         printf("Measured bandwidth = %lf MB/s\n", throughput);
1340
1341         printf("Verifying buffer ...\n");
1342         return dma_afu_buf_verify(ctx, test_size);
1343 }
1344
1345 static int dma_afu_test(struct afu_rawdev *dev)
1346 {
1347         struct n3000_afu_priv *priv = NULL;
1348         struct dma_afu_ctx *ctx = NULL;
1349         struct rte_pmd_afu_dma_cfg *cfg = NULL;
1350         msgdma_ctrl ctrl;
1351         uint64_t offset = 0;
1352         uint32_t i = 0;
1353         int ret = 0;
1354
1355         if (!dev)
1356                 return -EINVAL;
1357
1358         if (!dev->priv)
1359                 return -ENOENT;
1360
1361         priv = (struct n3000_afu_priv *)dev->priv;
1362         cfg = &priv->dma_cfg;
1363         if (cfg->index >= NUM_N3000_DMA)
1364                 return -EINVAL;
1365         ctx = &priv->dma_ctx[cfg->index];
1366
1367         ctx->pattern = (int)cfg->pattern;
1368         ctx->verbose = (int)cfg->verbose;
1369         ctx->dma_buf_size = cfg->size;
1370
1371         ret = dma_afu_buf_alloc(ctx, cfg);
1372         if (ret)
1373                 goto free;
1374
1375         printf("Initialize test buffer\n");
1376         dma_afu_buf_init(ctx, cfg->length);
1377
1378         /* enable interrupt */
1379         ctrl.csr = 0;
1380         ctrl.global_intr_en_mask = 1;
1381         rte_write32(ctrl.csr, CSR_CONTROL(ctx->csr_addr));
1382
1383         printf("Host %p to FPGA 0x%x with 0x%x bytes\n", ctx->data_buf,
1384                 cfg->offset, cfg->length);
1385         ret = dma_transfer_sync(ctx, cfg->offset, (uint64_t)ctx->data_buf,
1386                 cfg->length, HOST_TO_FPGA);
1387         if (ret) {
1388                 IFPGA_RAWDEV_PMD_ERR("Failed to transfer data from host to FPGA");
1389                 goto end;
1390         }
1391         memset(ctx->data_buf, 0, cfg->length);
1392
1393         printf("FPGA 0x%x to Host %p with 0x%x bytes\n", cfg->offset,
1394                 ctx->data_buf, cfg->length);
1395         ret = dma_transfer_sync(ctx, (uint64_t)ctx->data_buf, cfg->offset,
1396                 cfg->length, FPGA_TO_HOST);
1397         if (ret) {
1398                 IFPGA_RAWDEV_PMD_ERR("Failed to transfer data from FPGA to host");
1399                 goto end;
1400         }
1401         ret = dma_afu_buf_verify(ctx, cfg->length);
1402         if (ret)
1403                 goto end;
1404
1405         if ((cfg->offset + cfg->length * 2) <= ctx->mem_size)
1406                 offset = cfg->offset + cfg->length;
1407         else if (cfg->offset > cfg->length)
1408                 offset = 0;
1409         else
1410                 goto end;
1411
1412         printf("FPGA 0x%x to FPGA 0x%"PRIx64" with 0x%x bytes\n",
1413                 cfg->offset, offset, cfg->length);
1414         ret = dma_transfer_sync(ctx, offset, cfg->offset, cfg->length,
1415                 FPGA_TO_FPGA);
1416         if (ret) {
1417                 IFPGA_RAWDEV_PMD_ERR("Failed to transfer data from FPGA to FPGA");
1418                 goto end;
1419         }
1420
1421         printf("FPGA 0x%"PRIx64" to Host %p with 0x%x bytes\n", offset,
1422                 ctx->data_buf, cfg->length);
1423         ret = dma_transfer_sync(ctx, (uint64_t)ctx->data_buf, offset,
1424                 cfg->length, FPGA_TO_HOST);
1425         if (ret) {
1426                 IFPGA_RAWDEV_PMD_ERR("Failed to transfer data from FPGA to host");
1427                 goto end;
1428         }
1429         ret = dma_afu_buf_verify(ctx, cfg->length);
1430         if (ret)
1431                 goto end;
1432
1433         printf("Sweep with aligned address and size\n");
1434         ret = sweep_test(ctx, cfg->length, cfg->offset, 0, 0);
1435         if (ret)
1436                 goto end;
1437
1438         if (cfg->unaligned) {
1439                 printf("Sweep with unaligned address and size\n");
1440                 struct unaligned_set {
1441                         uint64_t addr_offset;
1442                         uint64_t size_dec;
1443                 } param[] = {{61, 5}, {3, 0}, {7, 3}, {0, 3}, {0, 61}, {0, 7}};
1444                 for (i = 0; i < ARRAY_SIZE(param); i++) {
1445                         ret = sweep_test(ctx, cfg->length, cfg->offset,
1446                                 param[i].addr_offset, param[i].size_dec);
1447                         if (ret)
1448                                 break;
1449                 }
1450         }
1451
1452 end:
1453         /* disable interrupt */
1454         ctrl.global_intr_en_mask = 0;
1455         rte_write32(ctrl.csr, CSR_CONTROL(ctx->csr_addr));
1456
1457 free:
1458         dma_afu_buf_free(ctx);
1459         return ret;
1460 }
1461
1462 static struct rte_pci_device *n3000_afu_get_pci_dev(struct afu_rawdev *dev)
1463 {
1464         struct rte_afu_device *afudev = NULL;
1465
1466         if (!dev || !dev->rawdev || !dev->rawdev->device)
1467                 return NULL;
1468
1469         afudev = RTE_DEV_TO_AFU(dev->rawdev->device);
1470         if (!afudev->rawdev || !afudev->rawdev->device)
1471                 return NULL;
1472
1473         return RTE_DEV_TO_PCI(afudev->rawdev->device);
1474 }
1475
1476 #ifdef VFIO_PRESENT
1477 static int dma_afu_set_irqs(struct afu_rawdev *dev, uint32_t vec_start,
1478         uint32_t count, int *efds)
1479 {
1480         struct rte_pci_device *pci_dev = NULL;
1481         struct vfio_irq_set *irq_set = NULL;
1482         int vfio_dev_fd = 0;
1483         size_t sz = 0;
1484         int ret = 0;
1485
1486         if (!dev || !efds || (count == 0) || (count > MAX_MSIX_VEC))
1487                 return -EINVAL;
1488
1489         pci_dev = n3000_afu_get_pci_dev(dev);
1490         if (!pci_dev)
1491                 return -ENODEV;
1492         vfio_dev_fd = rte_intr_dev_fd_get(pci_dev->intr_handle);
1493
1494         sz = sizeof(*irq_set) + sizeof(*efds) * count;
1495         irq_set = rte_zmalloc(NULL, sz, 0);
1496         if (!irq_set)
1497                 return -ENOMEM;
1498
1499         irq_set->argsz = (uint32_t)sz;
1500         irq_set->count = count;
1501         irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD |
1502                 VFIO_IRQ_SET_ACTION_TRIGGER;
1503         irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
1504         irq_set->start = vec_start;
1505
1506         rte_memcpy(&irq_set->data, efds, sizeof(*efds) * count);
1507         ret = ioctl(vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
1508         if (ret)
1509                 IFPGA_RAWDEV_PMD_ERR("Error enabling MSI-X interrupts\n");
1510
1511         rte_free(irq_set);
1512         return ret;
1513 }
1514 #endif
1515
1516 static void *n3000_afu_get_port_addr(struct afu_rawdev *dev)
1517 {
1518         struct rte_pci_device *pci_dev = NULL;
1519         uint8_t *addr = NULL;
1520         uint64_t val = 0;
1521         uint32_t bar = 0;
1522
1523         pci_dev = n3000_afu_get_pci_dev(dev);
1524         if (!pci_dev)
1525                 return NULL;
1526
1527         addr = (uint8_t *)pci_dev->mem_resource[0].addr;
1528         val = rte_read64(addr + PORT_ATTR_REG(dev->port));
1529         if (!PORT_IMPLEMENTED(val)) {
1530                 IFPGA_RAWDEV_PMD_INFO("FIU port %d is not implemented", dev->port);
1531                 return NULL;
1532         }
1533
1534         bar = PORT_BAR(val);
1535         if (bar >= PCI_MAX_RESOURCE) {
1536                 IFPGA_RAWDEV_PMD_ERR("BAR index %u is out of limit", bar);
1537                 return NULL;
1538         }
1539
1540         addr = (uint8_t *)pci_dev->mem_resource[bar].addr + PORT_OFFSET(val);
1541         return addr;
1542 }
1543
1544 static int n3000_afu_get_irq_capability(struct afu_rawdev *dev,
1545         uint32_t *vec_start, uint32_t *vec_count)
1546 {
1547         uint8_t *addr = NULL;
1548         uint64_t val = 0;
1549         uint64_t header = 0;
1550         uint64_t next_offset = 0;
1551
1552         addr = (uint8_t *)n3000_afu_get_port_addr(dev);
1553         if (!addr)
1554                 return -ENOENT;
1555
1556         do {
1557                 addr += next_offset;
1558                 header = rte_read64(addr);
1559                 if ((DFH_TYPE(header) == DFH_TYPE_PRIVATE) &&
1560                         (DFH_FEATURE_ID(header) == PORT_FEATURE_UINT_ID)) {
1561                         val = rte_read64(addr + PORT_UINT_CAP_REG);
1562                         if (vec_start)
1563                                 *vec_start = PORT_VEC_START(val);
1564                         if (vec_count)
1565                                 *vec_count = PORT_VEC_COUNT(val);
1566                         return 0;
1567                 }
1568                 next_offset = DFH_NEXT_OFFSET(header);
1569                 if (((next_offset & 0xffff) == 0xffff) || (next_offset == 0))
1570                         break;
1571         } while (!DFH_EOL(header));
1572
1573         return -ENOENT;
1574 }
1575
1576 static int nlb_afu_ctx_release(struct afu_rawdev *dev)
1577 {
1578         struct n3000_afu_priv *priv = NULL;
1579         struct nlb_afu_ctx *ctx = NULL;
1580
1581         if (!dev)
1582                 return -EINVAL;
1583
1584         priv = (struct n3000_afu_priv *)dev->priv;
1585         if (!priv)
1586                 return -ENOENT;
1587
1588         ctx = &priv->nlb_ctx;
1589
1590         rte_free(ctx->dsm_ptr);
1591         ctx->dsm_ptr = NULL;
1592         ctx->status_ptr = NULL;
1593
1594         rte_free(ctx->src_ptr);
1595         ctx->src_ptr = NULL;
1596
1597         rte_free(ctx->dest_ptr);
1598         ctx->dest_ptr = NULL;
1599
1600         return 0;
1601 }
1602
1603 static int nlb_afu_ctx_init(struct afu_rawdev *dev, uint8_t *addr)
1604 {
1605         struct n3000_afu_priv *priv = NULL;
1606         struct nlb_afu_ctx *ctx = NULL;
1607         int ret = 0;
1608
1609         if (!dev || !addr)
1610                 return -EINVAL;
1611
1612         priv = (struct n3000_afu_priv *)dev->priv;
1613         if (!priv)
1614                 return -ENOENT;
1615
1616         ctx = &priv->nlb_ctx;
1617         ctx->addr = addr;
1618
1619         ctx->dsm_ptr = (uint8_t *)rte_zmalloc(NULL, DSM_SIZE, TEST_MEM_ALIGN);
1620         if (!ctx->dsm_ptr)
1621                 return -ENOMEM;
1622
1623         ctx->dsm_iova = rte_malloc_virt2iova(ctx->dsm_ptr);
1624         if (ctx->dsm_iova == RTE_BAD_IOVA) {
1625                 ret = -ENOMEM;
1626                 goto release_dsm;
1627         }
1628
1629         ctx->src_ptr = (uint8_t *)rte_zmalloc(NULL, NLB_BUF_SIZE,
1630                 TEST_MEM_ALIGN);
1631         if (!ctx->src_ptr) {
1632                 ret = -ENOMEM;
1633                 goto release_dsm;
1634         }
1635         ctx->src_iova = rte_malloc_virt2iova(ctx->src_ptr);
1636         if (ctx->src_iova == RTE_BAD_IOVA) {
1637                 ret = -ENOMEM;
1638                 goto release_src;
1639         }
1640
1641         ctx->dest_ptr = (uint8_t *)rte_zmalloc(NULL, NLB_BUF_SIZE,
1642                 TEST_MEM_ALIGN);
1643         if (!ctx->dest_ptr) {
1644                 ret = -ENOMEM;
1645                 goto release_src;
1646         }
1647         ctx->dest_iova = rte_malloc_virt2iova(ctx->dest_ptr);
1648         if (ctx->dest_iova == RTE_BAD_IOVA) {
1649                 ret = -ENOMEM;
1650                 goto release_dest;
1651         }
1652
1653         ctx->status_ptr = (struct nlb_dsm_status *)(ctx->dsm_ptr + DSM_STATUS);
1654         return 0;
1655
1656 release_dest:
1657         rte_free(ctx->dest_ptr);
1658         ctx->dest_ptr = NULL;
1659 release_src:
1660         rte_free(ctx->src_ptr);
1661         ctx->src_ptr = NULL;
1662 release_dsm:
1663         rte_free(ctx->dsm_ptr);
1664         ctx->dsm_ptr = NULL;
1665         return ret;
1666 }
1667
1668 static int dma_afu_ctx_release(struct afu_rawdev *dev)
1669 {
1670         struct n3000_afu_priv *priv = NULL;
1671         struct dma_afu_ctx *ctx = NULL;
1672
1673         if (!dev)
1674                 return -EINVAL;
1675
1676         priv = (struct n3000_afu_priv *)dev->priv;
1677         if (!priv)
1678                 return -ENOENT;
1679
1680         ctx = &priv->dma_ctx[0];
1681
1682         rte_free(ctx->desc_buf);
1683         ctx->desc_buf = NULL;
1684
1685         rte_free(ctx->magic_buf);
1686         ctx->magic_buf = NULL;
1687
1688         close(ctx->event_fd);
1689         return 0;
1690 }
1691
1692 static int dma_afu_ctx_init(struct afu_rawdev *dev, int index, uint8_t *addr)
1693 {
1694         struct n3000_afu_priv *priv = NULL;
1695         struct dma_afu_ctx *ctx = NULL;
1696         uint64_t mem_sz[] = {0x100000000, 0x100000000, 0x40000000, 0x1000000};
1697         static int efds[1] = {0};
1698         uint32_t vec_start = 0;
1699         int ret = 0;
1700
1701         if (!dev || (index < 0) || (index >= NUM_N3000_DMA) || !addr)
1702                 return -EINVAL;
1703
1704         priv = (struct n3000_afu_priv *)dev->priv;
1705         if (!priv)
1706                 return -ENOENT;
1707
1708         ctx = &priv->dma_ctx[index];
1709         ctx->index = index;
1710         ctx->addr = addr;
1711         ctx->csr_addr = addr + DMA_CSR;
1712         ctx->desc_addr = addr + DMA_DESC;
1713         ctx->ase_ctrl_addr = addr + DMA_ASE_CTRL;
1714         ctx->ase_data_addr = addr + DMA_ASE_DATA;
1715         ctx->mem_size = mem_sz[ctx->index];
1716         ctx->cur_ase_page = INVALID_ASE_PAGE;
1717         if (ctx->index == 0) {
1718                 ret = n3000_afu_get_irq_capability(dev, &vec_start, NULL);
1719                 if (ret)
1720                         return ret;
1721
1722                 efds[0] = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
1723                 if (efds[0] < 0) {
1724                         IFPGA_RAWDEV_PMD_ERR("eventfd create failed");
1725                         return -EBADF;
1726                 }
1727 #ifdef VFIO_PRESENT
1728                 if (dma_afu_set_irqs(dev, vec_start, 1, efds))
1729                         IFPGA_RAWDEV_PMD_ERR("DMA interrupt setup failed");
1730 #endif
1731         }
1732         ctx->event_fd = efds[0];
1733
1734         ctx->desc_buf = (msgdma_ext_desc *)rte_zmalloc(NULL,
1735                 sizeof(msgdma_ext_desc), DMA_ALIGN_BYTES);
1736         if (!ctx->desc_buf) {
1737                 ret = -ENOMEM;
1738                 goto release;
1739         }
1740
1741         ctx->magic_buf = (uint64_t *)rte_zmalloc(NULL, MAGIC_BUF_SIZE,
1742                 TEST_MEM_ALIGN);
1743         if (!ctx->magic_buf) {
1744                 ret = -ENOMEM;
1745                 goto release;
1746         }
1747         ctx->magic_iova = rte_malloc_virt2iova(ctx->magic_buf);
1748         if (ctx->magic_iova == RTE_BAD_IOVA) {
1749                 ret = -ENOMEM;
1750                 goto release;
1751         }
1752
1753         return 0;
1754
1755 release:
1756         dma_afu_ctx_release(dev);
1757         return ret;
1758 }
1759
1760 static int n3000_afu_ctx_init(struct afu_rawdev *dev)
1761 {
1762         struct n3000_afu_priv *priv = NULL;
1763         uint8_t *addr = NULL;
1764         uint64_t header = 0;
1765         uint64_t uuid_hi = 0;
1766         uint64_t uuid_lo = 0;
1767         uint64_t next_offset = 0;
1768         int ret = 0;
1769
1770         if (!dev)
1771                 return -EINVAL;
1772
1773         priv = (struct n3000_afu_priv *)dev->priv;
1774         if (!priv)
1775                 return -ENOENT;
1776
1777         addr = (uint8_t *)dev->addr;
1778         do {
1779                 addr += next_offset;
1780                 header = rte_read64(addr);
1781                 uuid_lo = rte_read64(addr + DFH_UUID_L_OFFSET);
1782                 uuid_hi = rte_read64(addr + DFH_UUID_H_OFFSET);
1783
1784                 if ((DFH_TYPE(header) == DFH_TYPE_AFU) &&
1785                         (uuid_lo == N3000_NLB0_UUID_L) &&
1786                         (uuid_hi == N3000_NLB0_UUID_H)) {
1787                         IFPGA_RAWDEV_PMD_INFO("AFU NLB0 found @ %p", (void *)addr);
1788                         ret = nlb_afu_ctx_init(dev, addr);
1789                         if (ret)
1790                                 return ret;
1791                 } else if ((DFH_TYPE(header) == DFH_TYPE_BBB) &&
1792                         (uuid_lo == N3000_DMA_UUID_L) &&
1793                         (uuid_hi == N3000_DMA_UUID_H) &&
1794                         (priv->num_dma < NUM_N3000_DMA)) {
1795                         IFPGA_RAWDEV_PMD_INFO("AFU DMA%d found @ %p",
1796                                 priv->num_dma, (void *)addr);
1797                         ret = dma_afu_ctx_init(dev, priv->num_dma, addr);
1798                         if (ret)
1799                                 return ret;
1800                         priv->num_dma++;
1801                 } else {
1802                         IFPGA_RAWDEV_PMD_DEBUG("DFH: type %"PRIu64
1803                                 ", uuid %016"PRIx64"%016"PRIx64,
1804                                 DFH_TYPE(header), uuid_hi, uuid_lo);
1805                 }
1806
1807                 next_offset = DFH_NEXT_OFFSET(header);
1808                 if (((next_offset & 0xffff) == 0xffff) || (next_offset == 0))
1809                         break;
1810         } while (!DFH_EOL(header));
1811
1812         return 0;
1813 }
1814
1815 static int n3000_afu_init(struct afu_rawdev *dev)
1816 {
1817         if (!dev)
1818                 return -EINVAL;
1819
1820         if (!dev->priv) {
1821                 dev->priv = rte_zmalloc(NULL, sizeof(struct n3000_afu_priv), 0);
1822                 if (!dev->priv)
1823                         return -ENOMEM;
1824         }
1825
1826         return n3000_afu_ctx_init(dev);
1827 }
1828
1829 static int n3000_afu_config(struct afu_rawdev *dev, void *config,
1830         size_t config_size)
1831 {
1832         struct n3000_afu_priv *priv = NULL;
1833         struct rte_pmd_afu_n3000_cfg *cfg = NULL;
1834         int i = 0;
1835         uint64_t top = 0;
1836
1837         if (!dev || !config || !config_size)
1838                 return -EINVAL;
1839
1840         priv = (struct n3000_afu_priv *)dev->priv;
1841         if (!priv)
1842                 return -ENOENT;
1843
1844         if (config_size != sizeof(struct rte_pmd_afu_n3000_cfg))
1845                 return -EINVAL;
1846
1847         cfg = (struct rte_pmd_afu_n3000_cfg *)config;
1848         if (cfg->type == RTE_PMD_AFU_N3000_NLB) {
1849                 if (cfg->nlb_cfg.mode != NLB_MODE_LPBK)
1850                         return -EINVAL;
1851                 if ((cfg->nlb_cfg.read_vc > NLB_VC_RANDOM) ||
1852                         (cfg->nlb_cfg.write_vc > NLB_VC_RANDOM))
1853                         return -EINVAL;
1854                 if (cfg->nlb_cfg.wrfence_vc > NLB_VC_VH1)
1855                         return -EINVAL;
1856                 if (cfg->nlb_cfg.cache_hint > NLB_RDLINE_MIXED)
1857                         return -EINVAL;
1858                 if (cfg->nlb_cfg.cache_policy > NLB_WRPUSH_I)
1859                         return -EINVAL;
1860                 if ((cfg->nlb_cfg.multi_cl != 1) &&
1861                         (cfg->nlb_cfg.multi_cl != 2) &&
1862                         (cfg->nlb_cfg.multi_cl != 4))
1863                         return -EINVAL;
1864                 if ((cfg->nlb_cfg.begin < MIN_CACHE_LINES) ||
1865                         (cfg->nlb_cfg.begin > MAX_CACHE_LINES))
1866                         return -EINVAL;
1867                 if ((cfg->nlb_cfg.end < cfg->nlb_cfg.begin) ||
1868                         (cfg->nlb_cfg.end > MAX_CACHE_LINES))
1869                         return -EINVAL;
1870                 rte_memcpy(&priv->nlb_cfg, &cfg->nlb_cfg,
1871                         sizeof(struct rte_pmd_afu_nlb_cfg));
1872         } else if (cfg->type == RTE_PMD_AFU_N3000_DMA) {
1873                 if (cfg->dma_cfg.index >= NUM_N3000_DMA)
1874                         return -EINVAL;
1875                 i = cfg->dma_cfg.index;
1876                 if (cfg->dma_cfg.length > priv->dma_ctx[i].mem_size)
1877                         return -EINVAL;
1878                 if (cfg->dma_cfg.offset >= priv->dma_ctx[i].mem_size)
1879                         return -EINVAL;
1880                 top = cfg->dma_cfg.length + cfg->dma_cfg.offset;
1881                 if ((top == 0) || (top > priv->dma_ctx[i].mem_size))
1882                         return -EINVAL;
1883                 if (i == 3) {  /* QDR connected to DMA3 */
1884                         if (cfg->dma_cfg.length & 0x3f) {
1885                                 cfg->dma_cfg.length &= ~0x3f;
1886                                 IFPGA_RAWDEV_PMD_INFO("Round size to %x for QDR",
1887                                         cfg->dma_cfg.length);
1888                         }
1889                 }
1890                 rte_memcpy(&priv->dma_cfg, &cfg->dma_cfg,
1891                         sizeof(struct rte_pmd_afu_dma_cfg));
1892         } else {
1893                 IFPGA_RAWDEV_PMD_ERR("Invalid type of N3000 AFU");
1894                 return -EINVAL;
1895         }
1896
1897         priv->cfg_type = cfg->type;
1898         return 0;
1899 }
1900
1901 static int n3000_afu_test(struct afu_rawdev *dev)
1902 {
1903         struct n3000_afu_priv *priv = NULL;
1904         int ret = 0;
1905
1906         if (!dev)
1907                 return -EINVAL;
1908
1909         if (!dev->priv)
1910                 return -ENOENT;
1911
1912         priv = (struct n3000_afu_priv *)dev->priv;
1913
1914         if (priv->cfg_type == RTE_PMD_AFU_N3000_NLB) {
1915                 IFPGA_RAWDEV_PMD_INFO("Test NLB");
1916                 ret = nlb_afu_test(dev);
1917         } else if (priv->cfg_type == RTE_PMD_AFU_N3000_DMA) {
1918                 IFPGA_RAWDEV_PMD_INFO("Test DMA%u", priv->dma_cfg.index);
1919                 ret = dma_afu_test(dev);
1920         } else {
1921                 IFPGA_RAWDEV_PMD_ERR("Please configure AFU before test");
1922                 ret = -EINVAL;
1923         }
1924
1925         return ret;
1926 }
1927
1928 static int n3000_afu_close(struct afu_rawdev *dev)
1929 {
1930         if (!dev)
1931                 return -EINVAL;
1932
1933         nlb_afu_ctx_release(dev);
1934         dma_afu_ctx_release(dev);
1935
1936         rte_free(dev->priv);
1937         dev->priv = NULL;
1938
1939         return 0;
1940 }
1941
1942 static int n3000_afu_dump(struct afu_rawdev *dev, FILE *f)
1943 {
1944         struct n3000_afu_priv *priv = NULL;
1945
1946         if (!dev)
1947                 return -EINVAL;
1948
1949         priv = (struct n3000_afu_priv *)dev->priv;
1950         if (!priv)
1951                 return -ENOENT;
1952
1953         if (!f)
1954                 f = stdout;
1955
1956         if (priv->cfg_type == RTE_PMD_AFU_N3000_NLB) {
1957                 struct nlb_afu_ctx *ctx = &priv->nlb_ctx;
1958                 fprintf(f, "addr:\t\t%p\n", (void *)ctx->addr);
1959                 fprintf(f, "dsm_ptr:\t%p\n", (void *)ctx->dsm_ptr);
1960                 fprintf(f, "dsm_iova:\t0x%"PRIx64"\n", ctx->dsm_iova);
1961                 fprintf(f, "src_ptr:\t%p\n", (void *)ctx->src_ptr);
1962                 fprintf(f, "src_iova:\t0x%"PRIx64"\n", ctx->src_iova);
1963                 fprintf(f, "dest_ptr:\t%p\n", (void *)ctx->dest_ptr);
1964                 fprintf(f, "dest_iova:\t0x%"PRIx64"\n", ctx->dest_iova);
1965                 fprintf(f, "status_ptr:\t%p\n", (void *)ctx->status_ptr);
1966         } else if (priv->cfg_type == RTE_PMD_AFU_N3000_DMA) {
1967                 struct dma_afu_ctx *ctx = &priv->dma_ctx[priv->dma_cfg.index];
1968                 fprintf(f, "index:\t\t%d\n", ctx->index);
1969                 fprintf(f, "addr:\t\t%p\n", (void *)ctx->addr);
1970                 fprintf(f, "csr_addr:\t%p\n", (void *)ctx->csr_addr);
1971                 fprintf(f, "desc_addr:\t%p\n", (void *)ctx->desc_addr);
1972                 fprintf(f, "ase_ctrl_addr:\t%p\n", (void *)ctx->ase_ctrl_addr);
1973                 fprintf(f, "ase_data_addr:\t%p\n", (void *)ctx->ase_data_addr);
1974                 fprintf(f, "desc_buf:\t%p\n", (void *)ctx->desc_buf);
1975                 fprintf(f, "magic_buf:\t%p\n", (void *)ctx->magic_buf);
1976                 fprintf(f, "magic_iova:\t0x%"PRIx64"\n", ctx->magic_iova);
1977         } else {
1978                 return -EINVAL;
1979         }
1980
1981         return 0;
1982 }
1983
1984 static int n3000_afu_reset(struct afu_rawdev *dev)
1985 {
1986         uint8_t *addr = NULL;
1987         uint64_t val = 0;
1988
1989         addr = (uint8_t *)n3000_afu_get_port_addr(dev);
1990         if (!addr)
1991                 return -ENOENT;
1992
1993         val = rte_read64(addr + PORT_CTRL_REG);
1994         val |= PORT_SOFT_RESET;
1995         rte_write64(val, addr + PORT_CTRL_REG);
1996         rte_delay_us(100);
1997         val &= ~PORT_SOFT_RESET;
1998         rte_write64(val, addr + PORT_CTRL_REG);
1999
2000         return 0;
2001 }
2002
2003 static struct afu_ops n3000_afu_ops = {
2004         .init = n3000_afu_init,
2005         .config = n3000_afu_config,
2006         .start = NULL,
2007         .stop = NULL,
2008         .test = n3000_afu_test,
2009         .close = n3000_afu_close,
2010         .dump = n3000_afu_dump,
2011         .reset = n3000_afu_reset
2012 };
2013
2014 static struct afu_rawdev_drv n3000_afu_drv = {
2015         .uuid = { N3000_AFU_UUID_L, N3000_AFU_UUID_H },
2016         .ops = &n3000_afu_ops
2017 };
2018
2019 AFU_PMD_REGISTER(n3000_afu_drv);