1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2022 Intel Corporation
13 #include <sys/eventfd.h>
14 #include <sys/ioctl.h>
17 #include <rte_malloc.h>
18 #include <rte_memcpy.h>
21 #include <rte_bus_pci.h>
22 #include <rte_bus_ifpga.h>
23 #include <rte_rawdev.h>
25 #include "afu_pmd_core.h"
26 #include "afu_pmd_n3000.h"
28 static int nlb_afu_config(struct afu_rawdev *dev)
30 struct n3000_afu_priv *priv = NULL;
31 struct rte_pmd_afu_nlb_cfg *cfg = NULL;
40 priv = (struct n3000_afu_priv *)dev->priv;
48 if (cfg->cache_policy == NLB_WRPUSH_I)
51 v.wrthru_en = cfg->cache_policy;
53 if (cfg->cache_hint == NLB_RDLINE_MIXED)
56 v.rdsel = cfg->cache_hint;
59 v.chsel = cfg->read_vc;
60 v.wr_chsel = cfg->write_vc;
61 v.wrfence_chsel = cfg->wrfence_vc;
62 v.wrthru_en = cfg->cache_policy;
63 v.multicl_len = cfg->multi_cl - 1;
65 IFPGA_RAWDEV_PMD_DEBUG("cfg: 0x%08x", v.csr);
66 rte_write32(v.csr, priv->nlb_ctx.addr + CSR_CFG);
71 static void nlb_afu_report(struct afu_rawdev *dev, uint32_t cl)
73 struct n3000_afu_priv *priv = NULL;
74 struct rte_pmd_afu_nlb_cfg *cfg = NULL;
75 struct nlb_dsm_status *stat = NULL;
77 double num, rd_bw, wr_bw;
79 if (!dev || !dev->priv)
82 priv = (struct n3000_afu_priv *)dev->priv;
85 stat = priv->nlb_ctx.status_ptr;
88 ticks = stat->num_clocks - stat->start_overhead;
90 ticks = stat->num_clocks -
91 (stat->start_overhead + stat->end_overhead);
93 if (cfg->freq_mhz == 0)
96 num = (double)stat->num_reads;
97 rd_bw = (num * CLS_TO_SIZE(1) * MHZ(cfg->freq_mhz)) / ticks;
98 num = (double)stat->num_writes;
99 wr_bw = (num * CLS_TO_SIZE(1) * MHZ(cfg->freq_mhz)) / ticks;
101 printf("Cachelines Read_Count Write_Count Clocks@%uMHz "
102 "Rd_Bandwidth Wr_Bandwidth\n", cfg->freq_mhz);
103 printf("%10u %10u %11u %12"PRIu64" %7.3f GB/s %7.3f GB/s\n",
104 cl, stat->num_reads, stat->num_writes, ticks,
105 rd_bw / 1e9, wr_bw / 1e9);
108 static int nlb_afu_test(struct afu_rawdev *dev)
110 struct n3000_afu_priv *priv = NULL;
111 struct nlb_afu_ctx *ctx = NULL;
112 struct rte_pmd_afu_nlb_cfg *cfg = NULL;
113 struct nlb_csr_ctl ctl;
114 uint32_t *ptr = NULL;
115 uint32_t i, j, cl, val = 0;
125 priv = (struct n3000_afu_priv *)dev->priv;
126 ctx = &priv->nlb_ctx;
127 cfg = &priv->nlb_cfg;
129 /* initialize registers */
130 IFPGA_RAWDEV_PMD_DEBUG("dsm_addr: 0x%"PRIx64, ctx->dsm_iova);
131 rte_write64(ctx->dsm_iova, ctx->addr + CSR_AFU_DSM_BASEL);
134 rte_write32(ctl.csr, ctx->addr + CSR_CTL);
136 rte_write32(ctl.csr, ctx->addr + CSR_CTL);
138 IFPGA_RAWDEV_PMD_DEBUG("src_addr: 0x%"PRIx64, ctx->src_iova);
139 rte_write64(SIZE_TO_CLS(ctx->src_iova), ctx->addr + CSR_SRC_ADDR);
140 IFPGA_RAWDEV_PMD_DEBUG("dst_addr: 0x%"PRIx64, ctx->dest_iova);
141 rte_write64(SIZE_TO_CLS(ctx->dest_iova), ctx->addr + CSR_DST_ADDR);
143 ret = nlb_afu_config(dev);
147 /* initialize src data */
148 ptr = (uint32_t *)ctx->src_ptr;
149 j = CLS_TO_SIZE(cfg->end) >> 2;
150 for (i = 0; i < j; i++)
154 for (cl = cfg->begin; cl <= cfg->end; cl += cfg->multi_cl) {
155 memset(ctx->dest_ptr, 0, CLS_TO_SIZE(cl));
156 memset(ctx->dsm_ptr, 0, DSM_SIZE);
159 rte_write32(ctl.csr, ctx->addr + CSR_CTL);
161 rte_write32(ctl.csr, ctx->addr + CSR_CTL);
163 rte_write32(cl, ctx->addr + CSR_NUM_LINES);
168 rte_write32(ctl.csr, ctx->addr + CSR_CTL);
171 rte_delay_ms(cfg->timeout * 1000);
172 ctl.force_completion = 1;
173 rte_write32(ctl.csr, ctx->addr + CSR_CTL);
174 ret = dsm_poll_timeout(&ctx->status_ptr->test_complete,
175 val, (val & 0x1) == 1, DSM_POLL_INTERVAL,
178 printf("DSM poll timeout\n");
182 ret = dsm_poll_timeout(&ctx->status_ptr->test_complete,
183 val, (val & 0x1) == 1, DSM_POLL_INTERVAL,
186 printf("DSM poll timeout\n");
189 ctl.force_completion = 1;
190 rte_write32(ctl.csr, ctx->addr + CSR_CTL);
193 nlb_afu_report(dev, cl);
197 sval = rte_read64(ctx->addr + CSR_STATUS1);
203 ptr = (uint32_t *)ctx->dest_ptr;
204 j = CLS_TO_SIZE(cl) >> 2;
205 for (i = 0; i < j; i++) {
207 IFPGA_RAWDEV_PMD_ERR("Data mismatch @ %u", i);
217 static void dma_afu_buf_free(struct dma_afu_ctx *ctx)
224 for (i = 0; i < NUM_DMA_BUF; i++) {
225 rte_free(ctx->dma_buf[i]);
226 ctx->dma_buf[i] = NULL;
229 rte_free(ctx->data_buf);
230 ctx->data_buf = NULL;
232 rte_free(ctx->ref_buf);
236 static int dma_afu_buf_alloc(struct dma_afu_ctx *ctx,
237 struct rte_pmd_afu_dma_cfg *cfg)
239 size_t page_sz = sysconf(_SC_PAGE_SIZE);
245 for (i = 0; i < NUM_DMA_BUF; i++) {
246 ctx->dma_buf[i] = (uint64_t *)rte_zmalloc(NULL, cfg->size,
248 if (!ctx->dma_buf[i]) {
252 ctx->dma_iova[i] = rte_malloc_virt2iova(ctx->dma_buf[i]);
253 if (ctx->dma_iova[i] == RTE_BAD_IOVA) {
259 ctx->data_buf = rte_malloc(NULL, cfg->length, page_sz);
260 if (!ctx->data_buf) {
265 ctx->ref_buf = rte_malloc(NULL, cfg->length, page_sz);
274 rte_free(ctx->data_buf);
275 ctx->data_buf = NULL;
277 for (i = 0; i < NUM_DMA_BUF; i++) {
278 rte_free(ctx->dma_buf[i]);
279 ctx->dma_buf[i] = NULL;
284 static void dma_afu_buf_init(struct dma_afu_ctx *ctx, size_t size)
288 size_t dword_size = 0;
293 ptr = (int *)ctx->ref_buf;
296 memset(ptr, ctx->pattern, size);
299 dword_size = size >> 2;
300 for (i = 0; i < dword_size; i++)
303 rte_memcpy(ctx->data_buf, ctx->ref_buf, size);
306 static int dma_afu_buf_verify(struct dma_afu_ctx *ctx, size_t size)
316 src = (uint8_t *)ctx->ref_buf;
317 dst = (uint8_t *)ctx->data_buf;
319 if (memcmp(src, dst, size)) {
320 printf("Transfer is corrupted\n");
322 for (i = 0; i < size; i++) {
324 if (++n >= ERR_CHECK_LIMIT)
326 printf("Mismatch at 0x%zx, "
327 "Expected %02x Actual %02x\n",
333 if (n < ERR_CHECK_LIMIT) {
334 printf("Found %d error bytes\n", n);
337 printf("Found more than %d error bytes\n", n);
343 printf("Transfer is verified\n");
347 static void blk_write64(uint64_t *dev_addr, uint64_t *host_addr, uint64_t bytes)
349 uint64_t qwords = bytes / sizeof(uint64_t);
351 if (!IS_ALIGNED_QWORD((uint64_t)dev_addr) ||
352 !IS_ALIGNED_QWORD((uint64_t)bytes))
355 for (; qwords > 0; qwords--, host_addr++, dev_addr++)
356 rte_write64(*host_addr, dev_addr);
359 static void blk_read64(uint64_t *dev_addr, uint64_t *host_addr, uint64_t bytes)
361 uint64_t qwords = bytes / sizeof(uint64_t);
363 if (!IS_ALIGNED_QWORD((uint64_t)dev_addr) ||
364 !IS_ALIGNED_QWORD((uint64_t)bytes))
367 for (; qwords > 0; qwords--, host_addr++, dev_addr++)
368 *host_addr = rte_read64(dev_addr);
371 static void switch_ase_page(struct dma_afu_ctx *ctx, uint64_t addr)
373 uint64_t requested_page = addr & ~DMA_ASE_WINDOW_MASK;
378 if (requested_page != ctx->cur_ase_page) {
379 rte_write64(requested_page, ctx->ase_ctrl_addr);
380 ctx->cur_ase_page = requested_page;
384 static int ase_write_unaligned(struct dma_afu_ctx *ctx, uint64_t dev_addr,
385 uint64_t host_addr, uint32_t count)
387 uint64_t dev_aligned_addr = 0;
390 uintptr_t addr = (uintptr_t)host_addr; /* transfer to pointer size */
392 IFPGA_RAWDEV_PMD_DEBUG("0x%"PRIx64" --> 0x%"PRIx64" (0x%x)", host_addr,
395 if (!ctx || (count >= QWORD_BYTES))
401 switch_ase_page(ctx, dev_addr);
403 shift = dev_addr % QWORD_BYTES;
404 dev_aligned_addr = (dev_addr - shift) & DMA_ASE_WINDOW_MASK;
405 val = rte_read64(ctx->ase_data_addr + dev_aligned_addr);
406 rte_memcpy(((char *)(&val)) + shift, (void *)addr, count);
408 /* write back to device */
409 rte_write64(val, ctx->ase_data_addr + dev_aligned_addr);
414 static int ase_write(struct dma_afu_ctx *ctx, uint64_t *dst_ptr,
415 uint64_t *src_ptr, uint64_t *count)
417 uint64_t src = *src_ptr;
418 uint64_t dst = *dst_ptr;
419 uint64_t align_bytes = *count;
421 uint64_t left_in_page = DMA_ASE_WINDOW;
422 uint64_t size_to_copy = 0;
424 IFPGA_RAWDEV_PMD_DEBUG("0x%"PRIx64" --> 0x%"PRIx64" (0x%"PRIx64")", src, dst,
427 if (!ctx || !IS_ALIGNED_DWORD(dst))
430 if (align_bytes < DWORD_BYTES)
433 if (!IS_ALIGNED_QWORD(dst)) {
434 /* Write out a single DWORD to get QWORD aligned */
435 switch_ase_page(ctx, dst);
436 offset = dst & DMA_ASE_WINDOW_MASK;
438 rte_write32(*(uint32_t *)(uintptr_t)src,
439 ctx->ase_data_addr + offset);
442 align_bytes -= DWORD_BYTES;
448 /* Write out blocks of 64-bit values */
449 while (align_bytes >= QWORD_BYTES) {
450 left_in_page -= dst & DMA_ASE_WINDOW_MASK;
452 MIN(left_in_page, (align_bytes & ~(QWORD_BYTES - 1)));
453 if (size_to_copy < QWORD_BYTES)
455 switch_ase_page(ctx, dst);
456 offset = dst & DMA_ASE_WINDOW_MASK;
457 blk_write64((uint64_t *)(ctx->ase_data_addr + offset),
458 (uint64_t *)(uintptr_t)src, size_to_copy);
461 align_bytes -= size_to_copy;
464 if (align_bytes >= DWORD_BYTES) {
465 /* Write out remaining DWORD */
466 switch_ase_page(ctx, dst);
467 offset = dst & DMA_ASE_WINDOW_MASK;
468 rte_write32(*(uint32_t *)(uintptr_t)src,
469 ctx->ase_data_addr + offset);
472 align_bytes -= DWORD_BYTES;
477 *count = align_bytes;
482 static int ase_host_to_fpga(struct dma_afu_ctx *ctx, uint64_t *dst_ptr,
483 uint64_t *src_ptr, uint64_t count)
485 uint64_t dst = *dst_ptr;
486 uint64_t src = *src_ptr;
487 uint64_t count_left = count;
488 uint64_t unaligned_size = 0;
491 IFPGA_RAWDEV_PMD_DEBUG("0x%"PRIx64" --> 0x%"PRIx64" (0x%"PRIx64")", src, dst,
494 /* aligns address to 8 byte using dst masking method */
495 if (!IS_ALIGNED_DWORD(dst) && !IS_ALIGNED_QWORD(dst)) {
496 unaligned_size = QWORD_BYTES - (dst % QWORD_BYTES);
497 if (unaligned_size > count_left)
498 unaligned_size = count_left;
499 ret = ase_write_unaligned(ctx, dst, src, unaligned_size);
502 count_left -= unaligned_size;
503 src += unaligned_size;
504 dst += unaligned_size;
507 /* Handles 8/4 byte MMIO transfer */
508 ret = ase_write(ctx, &dst, &src, &count_left);
512 /* Left over unaligned bytes transferred using dst masking method */
513 unaligned_size = QWORD_BYTES - (dst % QWORD_BYTES);
514 if (unaligned_size > count_left)
515 unaligned_size = count_left;
517 ret = ase_write_unaligned(ctx, dst, src, unaligned_size);
521 count_left -= unaligned_size;
522 *dst_ptr = dst + unaligned_size;
523 *src_ptr = src + unaligned_size;
528 static int ase_read_unaligned(struct dma_afu_ctx *ctx, uint64_t dev_addr,
529 uint64_t host_addr, uint32_t count)
531 uint64_t dev_aligned_addr = 0;
534 uintptr_t addr = (uintptr_t)host_addr; /* transfer to pointer size */
536 IFPGA_RAWDEV_PMD_DEBUG("0x%"PRIx64" <-- 0x%"PRIx64" (0x%x)", host_addr,
539 if (!ctx || (count >= QWORD_BYTES))
545 switch_ase_page(ctx, dev_addr);
547 shift = dev_addr % QWORD_BYTES;
548 dev_aligned_addr = (dev_addr - shift) & DMA_ASE_WINDOW_MASK;
549 val = rte_read64(ctx->ase_data_addr + dev_aligned_addr);
550 rte_memcpy((void *)addr, ((char *)(&val)) + shift, count);
555 static int ase_read(struct dma_afu_ctx *ctx, uint64_t *src_ptr,
556 uint64_t *dst_ptr, uint64_t *count)
558 uint64_t src = *src_ptr;
559 uint64_t dst = *dst_ptr;
560 uint64_t align_bytes = *count;
562 uint64_t left_in_page = DMA_ASE_WINDOW;
563 uint64_t size_to_copy = 0;
565 IFPGA_RAWDEV_PMD_DEBUG("0x%"PRIx64" <-- 0x%"PRIx64" (0x%"PRIx64")", dst, src,
568 if (!ctx || !IS_ALIGNED_DWORD(src))
571 if (align_bytes < DWORD_BYTES)
574 if (!IS_ALIGNED_QWORD(src)) {
575 /* Read a single DWORD to get QWORD aligned */
576 switch_ase_page(ctx, src);
577 offset = src & DMA_ASE_WINDOW_MASK;
578 *(uint32_t *)(uintptr_t)dst =
579 rte_read32(ctx->ase_data_addr + offset);
582 align_bytes -= DWORD_BYTES;
588 /* Read blocks of 64-bit values */
589 while (align_bytes >= QWORD_BYTES) {
590 left_in_page -= src & DMA_ASE_WINDOW_MASK;
592 MIN(left_in_page, (align_bytes & ~(QWORD_BYTES - 1)));
593 if (size_to_copy < QWORD_BYTES)
595 switch_ase_page(ctx, src);
596 offset = src & DMA_ASE_WINDOW_MASK;
597 blk_read64((uint64_t *)(ctx->ase_data_addr + offset),
598 (uint64_t *)(uintptr_t)dst, size_to_copy);
601 align_bytes -= size_to_copy;
604 if (align_bytes >= DWORD_BYTES) {
605 /* Read remaining DWORD */
606 switch_ase_page(ctx, src);
607 offset = src & DMA_ASE_WINDOW_MASK;
608 *(uint32_t *)(uintptr_t)dst =
609 rte_read32(ctx->ase_data_addr + offset);
612 align_bytes -= DWORD_BYTES;
617 *count = align_bytes;
622 static int ase_fpga_to_host(struct dma_afu_ctx *ctx, uint64_t *src_ptr,
623 uint64_t *dst_ptr, uint64_t count)
625 uint64_t src = *src_ptr;
626 uint64_t dst = *dst_ptr;
627 uint64_t count_left = count;
628 uint64_t unaligned_size = 0;
631 IFPGA_RAWDEV_PMD_DEBUG("0x%"PRIx64" --> 0x%"PRIx64" (0x%"PRIx64")", src, dst,
634 /* Aligns address to 8 byte using src masking method */
635 if (!IS_ALIGNED_DWORD(src) && !IS_ALIGNED_QWORD(src)) {
636 unaligned_size = QWORD_BYTES - (src % QWORD_BYTES);
637 if (unaligned_size > count_left)
638 unaligned_size = count_left;
639 ret = ase_read_unaligned(ctx, src, dst, unaligned_size);
642 count_left -= unaligned_size;
643 dst += unaligned_size;
644 src += unaligned_size;
647 /* Handles 8/4 byte MMIO transfer */
648 ret = ase_read(ctx, &src, &dst, &count_left);
652 /* Left over unaligned bytes transferred using src masking method */
653 unaligned_size = QWORD_BYTES - (src % QWORD_BYTES);
654 if (unaligned_size > count_left)
655 unaligned_size = count_left;
657 ret = ase_read_unaligned(ctx, src, dst, unaligned_size);
661 count_left -= unaligned_size;
662 *dst_ptr = dst + unaligned_size;
663 *src_ptr = src + unaligned_size;
668 static void clear_interrupt(struct dma_afu_ctx *ctx)
670 /* clear interrupt by writing 1 to IRQ bit in status register */
671 msgdma_status status;
678 rte_write32(status.csr, CSR_STATUS(ctx->csr_addr));
681 static int poll_interrupt(struct dma_afu_ctx *ctx)
683 struct pollfd pfd = {0};
685 ssize_t bytes_read = 0;
689 if (!ctx || (ctx->event_fd < 0))
692 pfd.fd = ctx->event_fd;
694 poll_ret = poll(&pfd, 1, DMA_TIMEOUT_MSEC);
696 IFPGA_RAWDEV_PMD_ERR("Error %s", strerror(errno));
699 } else if (poll_ret == 0) {
700 IFPGA_RAWDEV_PMD_ERR("Timeout");
703 bytes_read = read(pfd.fd, &count, sizeof(count));
704 if (bytes_read > 0) {
706 IFPGA_RAWDEV_PMD_DEBUG("Successful, ret %d, cnt %"PRIu64,
710 IFPGA_RAWDEV_PMD_ERR("Failed %s", bytes_read > 0 ?
711 strerror(errno) : "zero bytes read");
716 clear_interrupt(ctx);
720 static void send_descriptor(struct dma_afu_ctx *ctx, msgdma_ext_desc *desc)
722 msgdma_status status;
723 uint64_t fpga_queue_full = 0;
729 IFPGA_RAWDEV_PMD_DEBUG("descriptor.rd_address = 0x%x%08x",
730 desc->rd_address_ext, desc->rd_address);
731 IFPGA_RAWDEV_PMD_DEBUG("descriptor.wr_address = 0x%x%08x",
732 desc->wr_address_ext, desc->wr_address);
733 IFPGA_RAWDEV_PMD_DEBUG("descriptor.len = %u", desc->len);
734 IFPGA_RAWDEV_PMD_DEBUG("descriptor.wr_burst_count = %u",
735 desc->wr_burst_count);
736 IFPGA_RAWDEV_PMD_DEBUG("descriptor.rd_burst_count = %u",
737 desc->rd_burst_count);
738 IFPGA_RAWDEV_PMD_DEBUG("descriptor.wr_stride %u", desc->wr_stride);
739 IFPGA_RAWDEV_PMD_DEBUG("descriptor.rd_stride %u", desc->rd_stride);
743 status.csr = rte_read32(CSR_STATUS(ctx->csr_addr));
744 if (fpga_queue_full++ > 100000000) {
745 IFPGA_RAWDEV_PMD_DEBUG("DMA queue full retry");
748 } while (status.desc_buf_full);
750 blk_write64((uint64_t *)ctx->desc_addr, (uint64_t *)desc,
754 static int do_dma(struct dma_afu_ctx *ctx, uint64_t dst, uint64_t src,
755 int count, int is_last_desc, fpga_dma_type type, int intr_en)
757 msgdma_ext_desc *desc = NULL;
758 int alignment_offset = 0;
759 int segment_size = 0;
764 /* src, dst and count must be 64-byte aligned */
765 if (!IS_DMA_ALIGNED(src) || !IS_DMA_ALIGNED(dst) ||
766 !IS_DMA_ALIGNED(count))
768 memset(ctx->desc_buf, 0, sizeof(msgdma_ext_desc));
770 /* these fields are fixed for all DMA transfers */
771 desc = ctx->desc_buf;
775 desc->control.go = 1;
777 desc->control.transfer_irq_en = 1;
779 desc->control.transfer_irq_en = 0;
782 desc->control.early_done_en = 1;
784 desc->control.early_done_en = 0;
786 if (type == FPGA_TO_FPGA) {
787 desc->rd_address = src & DMA_MASK_32_BIT;
788 desc->wr_address = dst & DMA_MASK_32_BIT;
790 desc->wr_burst_count = 4;
791 desc->rd_burst_count = 4;
792 desc->rd_address_ext = (src >> 32) & DMA_MASK_32_BIT;
793 desc->wr_address_ext = (dst >> 32) & DMA_MASK_32_BIT;
794 send_descriptor(ctx, desc);
796 /* check CCIP (host) address is aligned to 4CL (256B) */
797 alignment_offset = (type == HOST_TO_FPGA)
798 ? (src % CCIP_ALIGN_BYTES) : (dst % CCIP_ALIGN_BYTES);
799 /* performing a short transfer to get aligned */
800 if (alignment_offset != 0) {
801 desc->rd_address = src & DMA_MASK_32_BIT;
802 desc->wr_address = dst & DMA_MASK_32_BIT;
803 desc->wr_burst_count = 1;
804 desc->rd_burst_count = 1;
805 desc->rd_address_ext = (src >> 32) & DMA_MASK_32_BIT;
806 desc->wr_address_ext = (dst >> 32) & DMA_MASK_32_BIT;
807 /* count isn't large enough to hit next 4CL boundary */
808 if ((CCIP_ALIGN_BYTES - alignment_offset) >= count) {
809 segment_size = count;
812 segment_size = CCIP_ALIGN_BYTES
816 count -= segment_size;
817 desc->control.transfer_irq_en = 0;
819 /* post short transfer to align to a 4CL (256 byte) */
820 desc->len = segment_size;
821 send_descriptor(ctx, desc);
823 /* at this point we are 4CL (256 byte) aligned */
824 if (count >= CCIP_ALIGN_BYTES) {
825 desc->rd_address = src & DMA_MASK_32_BIT;
826 desc->wr_address = dst & DMA_MASK_32_BIT;
827 desc->wr_burst_count = 4;
828 desc->rd_burst_count = 4;
829 desc->rd_address_ext = (src >> 32) & DMA_MASK_32_BIT;
830 desc->wr_address_ext = (dst >> 32) & DMA_MASK_32_BIT;
831 /* buffer ends on 4CL boundary */
832 if ((count % CCIP_ALIGN_BYTES) == 0) {
833 segment_size = count;
837 - (count % CCIP_ALIGN_BYTES);
840 count -= segment_size;
841 desc->control.transfer_irq_en = 0;
843 desc->len = segment_size;
844 send_descriptor(ctx, desc);
846 /* post short transfer to handle the remainder */
848 desc->rd_address = src & DMA_MASK_32_BIT;
849 desc->wr_address = dst & DMA_MASK_32_BIT;
851 desc->wr_burst_count = 1;
852 desc->rd_burst_count = 1;
853 desc->rd_address_ext = (src >> 32) & DMA_MASK_32_BIT;
854 desc->wr_address_ext = (dst >> 32) & DMA_MASK_32_BIT;
856 desc->control.transfer_irq_en = 1;
857 send_descriptor(ctx, desc);
864 static int issue_magic(struct dma_afu_ctx *ctx)
866 *(ctx->magic_buf) = 0ULL;
867 return do_dma(ctx, DMA_WF_HOST_ADDR(ctx->magic_iova),
868 DMA_WF_MAGIC_ROM, 64, 1, FPGA_TO_HOST, 1);
871 static void wait_magic(struct dma_afu_ctx *ctx)
873 int magic_timeout = 0;
879 while (*(ctx->magic_buf) != DMA_WF_MAGIC) {
880 if (magic_timeout++ > 1000) {
881 IFPGA_RAWDEV_PMD_ERR("DMA magic operation timeout");
886 *(ctx->magic_buf) = 0ULL;
889 static int dma_tx_buf(struct dma_afu_ctx *ctx, uint64_t dst, uint64_t src,
890 uint64_t chunk, int is_last_chunk, int *intr_issued)
895 if (!ctx || !intr_issued)
898 src += chunk * ctx->dma_buf_size;
899 dst += chunk * ctx->dma_buf_size;
901 if (((chunk % HALF_DMA_BUF) == (HALF_DMA_BUF - 1)) || is_last_chunk) {
903 ret = poll_interrupt(ctx);
910 chunk %= NUM_DMA_BUF;
911 rte_memcpy(ctx->dma_buf[chunk], (void *)(uintptr_t)src,
913 ret = do_dma(ctx, dst, DMA_HOST_ADDR(ctx->dma_iova[chunk]),
914 ctx->dma_buf_size, 0, HOST_TO_FPGA, intr_en);
921 static int dma_host_to_fpga(struct dma_afu_ctx *ctx, uint64_t dst, uint64_t src,
925 uint64_t count_left = count;
926 uint64_t aligned_addr = 0;
927 uint64_t align_bytes = 0;
928 uint64_t dma_chunks = 0;
929 uint64_t dma_tx_bytes = 0;
934 IFPGA_RAWDEV_PMD_DEBUG("0x%"PRIx64" ---> 0x%"PRIx64" (%zu)", src, dst,
940 if (!IS_DMA_ALIGNED(dst)) {
941 if (count_left < DMA_ALIGN_BYTES)
942 return ase_host_to_fpga(ctx, &dst, &src, count_left);
944 aligned_addr = ((dst / DMA_ALIGN_BYTES) + 1)
946 align_bytes = aligned_addr - dst;
947 ret = ase_host_to_fpga(ctx, &dst, &src, align_bytes);
950 count_left = count_left - align_bytes;
954 dma_chunks = count_left / ctx->dma_buf_size;
955 offset = dma_chunks * ctx->dma_buf_size;
956 count_left -= offset;
957 IFPGA_RAWDEV_PMD_DEBUG("0x%"PRIx64" ---> 0x%"PRIx64
958 " (%"PRIu64"...0x%"PRIx64")",
959 src, dst, dma_chunks, count_left);
960 for (i = 0; i < dma_chunks; i++) {
961 ret = dma_tx_buf(ctx, dst, src, i,
962 i == (dma_chunks - 1), &issued_intr);
968 ret = poll_interrupt(ctx);
974 i = count_left / DMA_ALIGN_BYTES;
976 dma_tx_bytes = i * DMA_ALIGN_BYTES;
977 IFPGA_RAWDEV_PMD_DEBUG("left over 0x%"PRIx64" to DMA",
979 rte_memcpy(ctx->dma_buf[0],
980 (void *)(uintptr_t)(src + offset),
982 ret = do_dma(ctx, dst + offset,
983 DMA_HOST_ADDR(ctx->dma_iova[0]),
984 dma_tx_bytes, 1, HOST_TO_FPGA, 1);
987 ret = poll_interrupt(ctx);
992 count_left -= dma_tx_bytes;
994 IFPGA_RAWDEV_PMD_DEBUG("left over 0x%"PRIx64" to ASE",
996 dst += offset + dma_tx_bytes;
997 src += offset + dma_tx_bytes;
998 ret = ase_host_to_fpga(ctx, &dst, &src,
1007 static int dma_rx_buf(struct dma_afu_ctx *ctx, uint64_t dst, uint64_t src,
1008 uint64_t chunk, int is_last_chunk, uint64_t *rx_count, int *wf_issued)
1010 uint64_t i = chunk % NUM_DMA_BUF;
1011 uint64_t n = *rx_count;
1012 uint64_t num_pending = 0;
1015 if (!ctx || !wf_issued)
1018 ret = do_dma(ctx, DMA_HOST_ADDR(ctx->dma_iova[i]),
1019 src + chunk * ctx->dma_buf_size,
1020 ctx->dma_buf_size, 1, FPGA_TO_HOST, 0);
1024 num_pending = chunk - n + 1;
1025 if (num_pending == HALF_DMA_BUF) {
1026 ret = issue_magic(ctx);
1028 IFPGA_RAWDEV_PMD_DEBUG("Magic issue failed");
1034 if ((num_pending > (NUM_DMA_BUF - 1)) || is_last_chunk) {
1037 for (i = 0; i < HALF_DMA_BUF; i++) {
1038 rte_memcpy((void *)(uintptr_t)(dst +
1039 n * ctx->dma_buf_size),
1040 ctx->dma_buf[n % NUM_DMA_BUF],
1047 ret = issue_magic(ctx);
1049 IFPGA_RAWDEV_PMD_DEBUG("Magic issue failed");
1058 static int dma_fpga_to_host(struct dma_afu_ctx *ctx, uint64_t dst, uint64_t src,
1062 uint64_t count_left = count;
1063 uint64_t aligned_addr = 0;
1064 uint64_t align_bytes = 0;
1065 uint64_t dma_chunks = 0;
1066 uint64_t pending_buf = 0;
1067 uint64_t dma_rx_bytes = 0;
1068 uint64_t offset = 0;
1072 IFPGA_RAWDEV_PMD_DEBUG("0x%"PRIx64" ---> 0x%"PRIx64" (%zu)", src, dst,
1078 if (!IS_DMA_ALIGNED(src)) {
1079 if (count_left < DMA_ALIGN_BYTES)
1080 return ase_fpga_to_host(ctx, &src, &dst, count_left);
1082 aligned_addr = ((src / DMA_ALIGN_BYTES) + 1)
1084 align_bytes = aligned_addr - src;
1085 ret = ase_fpga_to_host(ctx, &src, &dst, align_bytes);
1088 count_left = count_left - align_bytes;
1092 dma_chunks = count_left / ctx->dma_buf_size;
1093 offset = dma_chunks * ctx->dma_buf_size;
1094 count_left -= offset;
1095 IFPGA_RAWDEV_PMD_DEBUG("0x%"PRIx64" ---> 0x%"PRIx64
1096 " (%"PRIu64"...0x%"PRIx64")",
1097 src, dst, dma_chunks, count_left);
1098 for (i = 0; i < dma_chunks; i++) {
1099 ret = dma_rx_buf(ctx, dst, src, i,
1100 i == (dma_chunks - 1),
1101 &pending_buf, &wf_issued);
1109 /* clear out final dma memcpy operations */
1110 while (pending_buf < dma_chunks) {
1111 /* constant size transfer; no length check required */
1112 rte_memcpy((void *)(uintptr_t)(dst +
1113 pending_buf * ctx->dma_buf_size),
1114 ctx->dma_buf[pending_buf % NUM_DMA_BUF],
1119 if (count_left > 0) {
1120 i = count_left / DMA_ALIGN_BYTES;
1122 dma_rx_bytes = i * DMA_ALIGN_BYTES;
1123 IFPGA_RAWDEV_PMD_DEBUG("left over 0x%"PRIx64" to DMA",
1126 DMA_HOST_ADDR(ctx->dma_iova[0]),
1128 dma_rx_bytes, 1, FPGA_TO_HOST, 0);
1131 ret = issue_magic(ctx);
1135 rte_memcpy((void *)(uintptr_t)(dst + offset),
1136 ctx->dma_buf[0], dma_rx_bytes);
1139 count_left -= dma_rx_bytes;
1141 IFPGA_RAWDEV_PMD_DEBUG("left over 0x%"PRIx64" to ASE",
1143 dst += offset + dma_rx_bytes;
1144 src += offset + dma_rx_bytes;
1145 ret = ase_fpga_to_host(ctx, &src, &dst,
1154 static int dma_fpga_to_fpga(struct dma_afu_ctx *ctx, uint64_t dst, uint64_t src,
1158 uint64_t count_left = count;
1159 uint64_t dma_chunks = 0;
1160 uint64_t offset = 0;
1161 uint64_t tx_chunks = 0;
1162 uint64_t *tmp_buf = NULL;
1165 IFPGA_RAWDEV_PMD_DEBUG("0x%"PRIx64" ---> 0x%"PRIx64" (%zu)", src, dst,
1171 if (IS_DMA_ALIGNED(dst) && IS_DMA_ALIGNED(src)
1172 && IS_DMA_ALIGNED(count_left)) {
1173 dma_chunks = count_left / ctx->dma_buf_size;
1174 offset = dma_chunks * ctx->dma_buf_size;
1175 count_left -= offset;
1176 IFPGA_RAWDEV_PMD_DEBUG("0x%"PRIx64" ---> 0x%"PRIx64
1177 " (%"PRIu64"...0x%"PRIx64")",
1178 src, dst, dma_chunks, count_left);
1179 for (i = 0; i < dma_chunks; i++) {
1180 ret = do_dma(ctx, dst + i * ctx->dma_buf_size,
1181 src + i * ctx->dma_buf_size,
1182 ctx->dma_buf_size, 0, FPGA_TO_FPGA, 0);
1185 if ((((i + 1) % NUM_DMA_BUF) == 0) ||
1186 (i == (dma_chunks - 1))) {
1187 ret = issue_magic(ctx);
1194 if (count_left > 0) {
1195 IFPGA_RAWDEV_PMD_DEBUG("left over 0x%"PRIx64" to DMA", count_left);
1196 ret = do_dma(ctx, dst + offset, src + offset,
1197 count_left, 1, FPGA_TO_FPGA, 0);
1200 ret = issue_magic(ctx);
1206 if ((src < dst) && (src + count_left > dst)) {
1207 IFPGA_RAWDEV_PMD_ERR("Overlapping: 0x%"PRIx64
1208 " -> 0x%"PRIx64" (0x%"PRIx64")",
1209 src, dst, count_left);
1212 tx_chunks = count_left / ctx->dma_buf_size;
1213 offset = tx_chunks * ctx->dma_buf_size;
1214 count_left -= offset;
1215 IFPGA_RAWDEV_PMD_DEBUG("0x%"PRIx64" --> 0x%"PRIx64
1216 " (%"PRIu64"...0x%"PRIx64")",
1217 src, dst, tx_chunks, count_left);
1218 tmp_buf = (uint64_t *)rte_malloc(NULL, ctx->dma_buf_size,
1220 for (i = 0; i < tx_chunks; i++) {
1221 ret = dma_fpga_to_host(ctx, (uint64_t)tmp_buf,
1222 src + i * ctx->dma_buf_size,
1226 ret = dma_host_to_fpga(ctx,
1227 dst + i * ctx->dma_buf_size,
1228 (uint64_t)tmp_buf, ctx->dma_buf_size);
1233 if (count_left > 0) {
1234 ret = dma_fpga_to_host(ctx, (uint64_t)tmp_buf,
1235 src + offset, count_left);
1238 ret = dma_host_to_fpga(ctx, dst + offset,
1239 (uint64_t)tmp_buf, count_left);
1250 static int dma_transfer_sync(struct dma_afu_ctx *ctx, uint64_t dst,
1251 uint64_t src, size_t count, fpga_dma_type type)
1258 if (type == HOST_TO_FPGA)
1259 ret = dma_host_to_fpga(ctx, dst, src, count);
1260 else if (type == FPGA_TO_HOST)
1261 ret = dma_fpga_to_host(ctx, dst, src, count);
1262 else if (type == FPGA_TO_FPGA)
1263 ret = dma_fpga_to_fpga(ctx, dst, src, count);
1270 static double get_duration(struct timespec start, struct timespec end)
1272 uint64_t diff = 1000000000L * (end.tv_sec - start.tv_sec)
1273 + end.tv_nsec - start.tv_nsec;
1274 return (double)diff / (double)1000000000L;
1277 #define SWEEP_ITERS 1
1278 static int sweep_test(struct dma_afu_ctx *ctx, uint32_t length,
1279 uint64_t ddr_offset, uint64_t buf_offset, uint64_t size_decrement)
1281 struct timespec start, end;
1282 uint64_t test_size = 0;
1283 uint64_t *dma_buf_ptr = NULL;
1284 double throughput, total_time = 0.0;
1288 if (!ctx || !ctx->data_buf || !ctx->ref_buf) {
1289 IFPGA_RAWDEV_PMD_ERR("Buffer for DMA test is not allocated");
1293 if (length < (buf_offset + size_decrement)) {
1294 IFPGA_RAWDEV_PMD_ERR("Test length does not match unaligned parameter");
1297 test_size = length - (buf_offset + size_decrement);
1298 if ((ddr_offset + test_size) > ctx->mem_size) {
1299 IFPGA_RAWDEV_PMD_ERR("Test is out of DDR memory space");
1303 dma_buf_ptr = (uint64_t *)((uint8_t *)ctx->data_buf + buf_offset);
1304 printf("Sweep Host %p to FPGA 0x%"PRIx64
1305 " with 0x%"PRIx64" bytes ...\n",
1306 (void *)dma_buf_ptr, ddr_offset, test_size);
1308 for (i = 0; i < SWEEP_ITERS; i++) {
1309 clock_gettime(CLOCK_MONOTONIC, &start);
1310 ret = dma_transfer_sync(ctx, ddr_offset, (uint64_t)dma_buf_ptr,
1311 test_size, HOST_TO_FPGA);
1312 clock_gettime(CLOCK_MONOTONIC, &end);
1314 IFPGA_RAWDEV_PMD_ERR("Failed");
1317 total_time += get_duration(start, end);
1319 throughput = (test_size * SWEEP_ITERS) / (total_time * 1000000);
1320 printf("Measured bandwidth = %lf MB/s\n", throughput);
1322 printf("Sweep FPGA 0x%"PRIx64" to Host %p with 0x%"PRIx64" bytes ...\n",
1323 ddr_offset, (void *)dma_buf_ptr, test_size);
1326 memset((char *)dma_buf_ptr, 0, test_size);
1327 for (i = 0; i < SWEEP_ITERS; i++) {
1328 clock_gettime(CLOCK_MONOTONIC, &start);
1329 ret = dma_transfer_sync(ctx, (uint64_t)dma_buf_ptr, ddr_offset,
1330 test_size, FPGA_TO_HOST);
1331 clock_gettime(CLOCK_MONOTONIC, &end);
1333 IFPGA_RAWDEV_PMD_ERR("Failed");
1336 total_time += get_duration(start, end);
1338 throughput = (test_size * SWEEP_ITERS) / (total_time * 1000000);
1339 printf("Measured bandwidth = %lf MB/s\n", throughput);
1341 printf("Verifying buffer ...\n");
1342 return dma_afu_buf_verify(ctx, test_size);
1345 static int dma_afu_test(struct afu_rawdev *dev)
1347 struct n3000_afu_priv *priv = NULL;
1348 struct dma_afu_ctx *ctx = NULL;
1349 struct rte_pmd_afu_dma_cfg *cfg = NULL;
1351 uint64_t offset = 0;
1361 priv = (struct n3000_afu_priv *)dev->priv;
1362 cfg = &priv->dma_cfg;
1363 if (cfg->index >= NUM_N3000_DMA)
1365 ctx = &priv->dma_ctx[cfg->index];
1367 ctx->pattern = (int)cfg->pattern;
1368 ctx->verbose = (int)cfg->verbose;
1369 ctx->dma_buf_size = cfg->size;
1371 ret = dma_afu_buf_alloc(ctx, cfg);
1375 printf("Initialize test buffer\n");
1376 dma_afu_buf_init(ctx, cfg->length);
1378 /* enable interrupt */
1380 ctrl.global_intr_en_mask = 1;
1381 rte_write32(ctrl.csr, CSR_CONTROL(ctx->csr_addr));
1383 printf("Host %p to FPGA 0x%x with 0x%x bytes\n", ctx->data_buf,
1384 cfg->offset, cfg->length);
1385 ret = dma_transfer_sync(ctx, cfg->offset, (uint64_t)ctx->data_buf,
1386 cfg->length, HOST_TO_FPGA);
1388 IFPGA_RAWDEV_PMD_ERR("Failed to transfer data from host to FPGA");
1391 memset(ctx->data_buf, 0, cfg->length);
1393 printf("FPGA 0x%x to Host %p with 0x%x bytes\n", cfg->offset,
1394 ctx->data_buf, cfg->length);
1395 ret = dma_transfer_sync(ctx, (uint64_t)ctx->data_buf, cfg->offset,
1396 cfg->length, FPGA_TO_HOST);
1398 IFPGA_RAWDEV_PMD_ERR("Failed to transfer data from FPGA to host");
1401 ret = dma_afu_buf_verify(ctx, cfg->length);
1405 if ((cfg->offset + cfg->length * 2) <= ctx->mem_size)
1406 offset = cfg->offset + cfg->length;
1407 else if (cfg->offset > cfg->length)
1412 printf("FPGA 0x%x to FPGA 0x%"PRIx64" with 0x%x bytes\n",
1413 cfg->offset, offset, cfg->length);
1414 ret = dma_transfer_sync(ctx, offset, cfg->offset, cfg->length,
1417 IFPGA_RAWDEV_PMD_ERR("Failed to transfer data from FPGA to FPGA");
1421 printf("FPGA 0x%"PRIx64" to Host %p with 0x%x bytes\n", offset,
1422 ctx->data_buf, cfg->length);
1423 ret = dma_transfer_sync(ctx, (uint64_t)ctx->data_buf, offset,
1424 cfg->length, FPGA_TO_HOST);
1426 IFPGA_RAWDEV_PMD_ERR("Failed to transfer data from FPGA to host");
1429 ret = dma_afu_buf_verify(ctx, cfg->length);
1433 printf("Sweep with aligned address and size\n");
1434 ret = sweep_test(ctx, cfg->length, cfg->offset, 0, 0);
1438 if (cfg->unaligned) {
1439 printf("Sweep with unaligned address and size\n");
1440 struct unaligned_set {
1441 uint64_t addr_offset;
1443 } param[] = {{61, 5}, {3, 0}, {7, 3}, {0, 3}, {0, 61}, {0, 7}};
1444 for (i = 0; i < ARRAY_SIZE(param); i++) {
1445 ret = sweep_test(ctx, cfg->length, cfg->offset,
1446 param[i].addr_offset, param[i].size_dec);
1453 /* disable interrupt */
1454 ctrl.global_intr_en_mask = 0;
1455 rte_write32(ctrl.csr, CSR_CONTROL(ctx->csr_addr));
1458 dma_afu_buf_free(ctx);
1462 static struct rte_pci_device *n3000_afu_get_pci_dev(struct afu_rawdev *dev)
1464 struct rte_afu_device *afudev = NULL;
1466 if (!dev || !dev->rawdev || !dev->rawdev->device)
1469 afudev = RTE_DEV_TO_AFU(dev->rawdev->device);
1470 if (!afudev->rawdev || !afudev->rawdev->device)
1473 return RTE_DEV_TO_PCI(afudev->rawdev->device);
1477 static int dma_afu_set_irqs(struct afu_rawdev *dev, uint32_t vec_start,
1478 uint32_t count, int *efds)
1480 struct rte_pci_device *pci_dev = NULL;
1481 struct vfio_irq_set *irq_set = NULL;
1482 int vfio_dev_fd = 0;
1486 if (!dev || !efds || (count == 0) || (count > MAX_MSIX_VEC))
1489 pci_dev = n3000_afu_get_pci_dev(dev);
1492 vfio_dev_fd = rte_intr_dev_fd_get(pci_dev->intr_handle);
1494 sz = sizeof(*irq_set) + sizeof(*efds) * count;
1495 irq_set = rte_zmalloc(NULL, sz, 0);
1499 irq_set->argsz = (uint32_t)sz;
1500 irq_set->count = count;
1501 irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD |
1502 VFIO_IRQ_SET_ACTION_TRIGGER;
1503 irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
1504 irq_set->start = vec_start;
1506 rte_memcpy(&irq_set->data, efds, sizeof(*efds) * count);
1507 ret = ioctl(vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
1509 IFPGA_RAWDEV_PMD_ERR("Error enabling MSI-X interrupts\n");
1516 static void *n3000_afu_get_port_addr(struct afu_rawdev *dev)
1518 struct rte_pci_device *pci_dev = NULL;
1519 uint8_t *addr = NULL;
1523 pci_dev = n3000_afu_get_pci_dev(dev);
1527 addr = (uint8_t *)pci_dev->mem_resource[0].addr;
1528 val = rte_read64(addr + PORT_ATTR_REG(dev->port));
1529 if (!PORT_IMPLEMENTED(val)) {
1530 IFPGA_RAWDEV_PMD_INFO("FIU port %d is not implemented", dev->port);
1534 bar = PORT_BAR(val);
1535 if (bar >= PCI_MAX_RESOURCE) {
1536 IFPGA_RAWDEV_PMD_ERR("BAR index %u is out of limit", bar);
1540 addr = (uint8_t *)pci_dev->mem_resource[bar].addr + PORT_OFFSET(val);
1544 static int n3000_afu_get_irq_capability(struct afu_rawdev *dev,
1545 uint32_t *vec_start, uint32_t *vec_count)
1547 uint8_t *addr = NULL;
1549 uint64_t header = 0;
1550 uint64_t next_offset = 0;
1552 addr = (uint8_t *)n3000_afu_get_port_addr(dev);
1557 addr += next_offset;
1558 header = rte_read64(addr);
1559 if ((DFH_TYPE(header) == DFH_TYPE_PRIVATE) &&
1560 (DFH_FEATURE_ID(header) == PORT_FEATURE_UINT_ID)) {
1561 val = rte_read64(addr + PORT_UINT_CAP_REG);
1563 *vec_start = PORT_VEC_START(val);
1565 *vec_count = PORT_VEC_COUNT(val);
1568 next_offset = DFH_NEXT_OFFSET(header);
1569 if (((next_offset & 0xffff) == 0xffff) || (next_offset == 0))
1571 } while (!DFH_EOL(header));
1576 static int nlb_afu_ctx_release(struct afu_rawdev *dev)
1578 struct n3000_afu_priv *priv = NULL;
1579 struct nlb_afu_ctx *ctx = NULL;
1584 priv = (struct n3000_afu_priv *)dev->priv;
1588 ctx = &priv->nlb_ctx;
1590 rte_free(ctx->dsm_ptr);
1591 ctx->dsm_ptr = NULL;
1592 ctx->status_ptr = NULL;
1594 rte_free(ctx->src_ptr);
1595 ctx->src_ptr = NULL;
1597 rte_free(ctx->dest_ptr);
1598 ctx->dest_ptr = NULL;
1603 static int nlb_afu_ctx_init(struct afu_rawdev *dev, uint8_t *addr)
1605 struct n3000_afu_priv *priv = NULL;
1606 struct nlb_afu_ctx *ctx = NULL;
1612 priv = (struct n3000_afu_priv *)dev->priv;
1616 ctx = &priv->nlb_ctx;
1619 ctx->dsm_ptr = (uint8_t *)rte_zmalloc(NULL, DSM_SIZE, TEST_MEM_ALIGN);
1623 ctx->dsm_iova = rte_malloc_virt2iova(ctx->dsm_ptr);
1624 if (ctx->dsm_iova == RTE_BAD_IOVA) {
1629 ctx->src_ptr = (uint8_t *)rte_zmalloc(NULL, NLB_BUF_SIZE,
1631 if (!ctx->src_ptr) {
1635 ctx->src_iova = rte_malloc_virt2iova(ctx->src_ptr);
1636 if (ctx->src_iova == RTE_BAD_IOVA) {
1641 ctx->dest_ptr = (uint8_t *)rte_zmalloc(NULL, NLB_BUF_SIZE,
1643 if (!ctx->dest_ptr) {
1647 ctx->dest_iova = rte_malloc_virt2iova(ctx->dest_ptr);
1648 if (ctx->dest_iova == RTE_BAD_IOVA) {
1653 ctx->status_ptr = (struct nlb_dsm_status *)(ctx->dsm_ptr + DSM_STATUS);
1657 rte_free(ctx->dest_ptr);
1658 ctx->dest_ptr = NULL;
1660 rte_free(ctx->src_ptr);
1661 ctx->src_ptr = NULL;
1663 rte_free(ctx->dsm_ptr);
1664 ctx->dsm_ptr = NULL;
1668 static int dma_afu_ctx_release(struct afu_rawdev *dev)
1670 struct n3000_afu_priv *priv = NULL;
1671 struct dma_afu_ctx *ctx = NULL;
1676 priv = (struct n3000_afu_priv *)dev->priv;
1680 ctx = &priv->dma_ctx[0];
1682 rte_free(ctx->desc_buf);
1683 ctx->desc_buf = NULL;
1685 rte_free(ctx->magic_buf);
1686 ctx->magic_buf = NULL;
1688 close(ctx->event_fd);
1692 static int dma_afu_ctx_init(struct afu_rawdev *dev, int index, uint8_t *addr)
1694 struct n3000_afu_priv *priv = NULL;
1695 struct dma_afu_ctx *ctx = NULL;
1696 uint64_t mem_sz[] = {0x100000000, 0x100000000, 0x40000000, 0x1000000};
1697 static int efds[1] = {0};
1698 uint32_t vec_start = 0;
1701 if (!dev || (index < 0) || (index >= NUM_N3000_DMA) || !addr)
1704 priv = (struct n3000_afu_priv *)dev->priv;
1708 ctx = &priv->dma_ctx[index];
1711 ctx->csr_addr = addr + DMA_CSR;
1712 ctx->desc_addr = addr + DMA_DESC;
1713 ctx->ase_ctrl_addr = addr + DMA_ASE_CTRL;
1714 ctx->ase_data_addr = addr + DMA_ASE_DATA;
1715 ctx->mem_size = mem_sz[ctx->index];
1716 ctx->cur_ase_page = INVALID_ASE_PAGE;
1717 if (ctx->index == 0) {
1718 ret = n3000_afu_get_irq_capability(dev, &vec_start, NULL);
1722 efds[0] = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
1724 IFPGA_RAWDEV_PMD_ERR("eventfd create failed");
1728 if (dma_afu_set_irqs(dev, vec_start, 1, efds))
1729 IFPGA_RAWDEV_PMD_ERR("DMA interrupt setup failed");
1732 ctx->event_fd = efds[0];
1734 ctx->desc_buf = (msgdma_ext_desc *)rte_zmalloc(NULL,
1735 sizeof(msgdma_ext_desc), DMA_ALIGN_BYTES);
1736 if (!ctx->desc_buf) {
1741 ctx->magic_buf = (uint64_t *)rte_zmalloc(NULL, MAGIC_BUF_SIZE,
1743 if (!ctx->magic_buf) {
1747 ctx->magic_iova = rte_malloc_virt2iova(ctx->magic_buf);
1748 if (ctx->magic_iova == RTE_BAD_IOVA) {
1756 dma_afu_ctx_release(dev);
1760 static int n3000_afu_ctx_init(struct afu_rawdev *dev)
1762 struct n3000_afu_priv *priv = NULL;
1763 uint8_t *addr = NULL;
1764 uint64_t header = 0;
1765 uint64_t uuid_hi = 0;
1766 uint64_t uuid_lo = 0;
1767 uint64_t next_offset = 0;
1773 priv = (struct n3000_afu_priv *)dev->priv;
1777 addr = (uint8_t *)dev->addr;
1779 addr += next_offset;
1780 header = rte_read64(addr);
1781 uuid_lo = rte_read64(addr + DFH_UUID_L_OFFSET);
1782 uuid_hi = rte_read64(addr + DFH_UUID_H_OFFSET);
1784 if ((DFH_TYPE(header) == DFH_TYPE_AFU) &&
1785 (uuid_lo == N3000_NLB0_UUID_L) &&
1786 (uuid_hi == N3000_NLB0_UUID_H)) {
1787 IFPGA_RAWDEV_PMD_INFO("AFU NLB0 found @ %p", (void *)addr);
1788 ret = nlb_afu_ctx_init(dev, addr);
1791 } else if ((DFH_TYPE(header) == DFH_TYPE_BBB) &&
1792 (uuid_lo == N3000_DMA_UUID_L) &&
1793 (uuid_hi == N3000_DMA_UUID_H) &&
1794 (priv->num_dma < NUM_N3000_DMA)) {
1795 IFPGA_RAWDEV_PMD_INFO("AFU DMA%d found @ %p",
1796 priv->num_dma, (void *)addr);
1797 ret = dma_afu_ctx_init(dev, priv->num_dma, addr);
1802 IFPGA_RAWDEV_PMD_DEBUG("DFH: type %"PRIu64
1803 ", uuid %016"PRIx64"%016"PRIx64,
1804 DFH_TYPE(header), uuid_hi, uuid_lo);
1807 next_offset = DFH_NEXT_OFFSET(header);
1808 if (((next_offset & 0xffff) == 0xffff) || (next_offset == 0))
1810 } while (!DFH_EOL(header));
1815 static int n3000_afu_init(struct afu_rawdev *dev)
1821 dev->priv = rte_zmalloc(NULL, sizeof(struct n3000_afu_priv), 0);
1826 return n3000_afu_ctx_init(dev);
1829 static int n3000_afu_config(struct afu_rawdev *dev, void *config,
1832 struct n3000_afu_priv *priv = NULL;
1833 struct rte_pmd_afu_n3000_cfg *cfg = NULL;
1837 if (!dev || !config || !config_size)
1840 priv = (struct n3000_afu_priv *)dev->priv;
1844 if (config_size != sizeof(struct rte_pmd_afu_n3000_cfg))
1847 cfg = (struct rte_pmd_afu_n3000_cfg *)config;
1848 if (cfg->type == RTE_PMD_AFU_N3000_NLB) {
1849 if (cfg->nlb_cfg.mode != NLB_MODE_LPBK)
1851 if ((cfg->nlb_cfg.read_vc > NLB_VC_RANDOM) ||
1852 (cfg->nlb_cfg.write_vc > NLB_VC_RANDOM))
1854 if (cfg->nlb_cfg.wrfence_vc > NLB_VC_VH1)
1856 if (cfg->nlb_cfg.cache_hint > NLB_RDLINE_MIXED)
1858 if (cfg->nlb_cfg.cache_policy > NLB_WRPUSH_I)
1860 if ((cfg->nlb_cfg.multi_cl != 1) &&
1861 (cfg->nlb_cfg.multi_cl != 2) &&
1862 (cfg->nlb_cfg.multi_cl != 4))
1864 if ((cfg->nlb_cfg.begin < MIN_CACHE_LINES) ||
1865 (cfg->nlb_cfg.begin > MAX_CACHE_LINES))
1867 if ((cfg->nlb_cfg.end < cfg->nlb_cfg.begin) ||
1868 (cfg->nlb_cfg.end > MAX_CACHE_LINES))
1870 rte_memcpy(&priv->nlb_cfg, &cfg->nlb_cfg,
1871 sizeof(struct rte_pmd_afu_nlb_cfg));
1872 } else if (cfg->type == RTE_PMD_AFU_N3000_DMA) {
1873 if (cfg->dma_cfg.index >= NUM_N3000_DMA)
1875 i = cfg->dma_cfg.index;
1876 if (cfg->dma_cfg.length > priv->dma_ctx[i].mem_size)
1878 if (cfg->dma_cfg.offset >= priv->dma_ctx[i].mem_size)
1880 top = cfg->dma_cfg.length + cfg->dma_cfg.offset;
1881 if ((top == 0) || (top > priv->dma_ctx[i].mem_size))
1883 if (i == 3) { /* QDR connected to DMA3 */
1884 if (cfg->dma_cfg.length & 0x3f) {
1885 cfg->dma_cfg.length &= ~0x3f;
1886 IFPGA_RAWDEV_PMD_INFO("Round size to %x for QDR",
1887 cfg->dma_cfg.length);
1890 rte_memcpy(&priv->dma_cfg, &cfg->dma_cfg,
1891 sizeof(struct rte_pmd_afu_dma_cfg));
1893 IFPGA_RAWDEV_PMD_ERR("Invalid type of N3000 AFU");
1897 priv->cfg_type = cfg->type;
1901 static int n3000_afu_test(struct afu_rawdev *dev)
1903 struct n3000_afu_priv *priv = NULL;
1912 priv = (struct n3000_afu_priv *)dev->priv;
1914 if (priv->cfg_type == RTE_PMD_AFU_N3000_NLB) {
1915 IFPGA_RAWDEV_PMD_INFO("Test NLB");
1916 ret = nlb_afu_test(dev);
1917 } else if (priv->cfg_type == RTE_PMD_AFU_N3000_DMA) {
1918 IFPGA_RAWDEV_PMD_INFO("Test DMA%u", priv->dma_cfg.index);
1919 ret = dma_afu_test(dev);
1921 IFPGA_RAWDEV_PMD_ERR("Please configure AFU before test");
1928 static int n3000_afu_close(struct afu_rawdev *dev)
1933 nlb_afu_ctx_release(dev);
1934 dma_afu_ctx_release(dev);
1936 rte_free(dev->priv);
1942 static int n3000_afu_dump(struct afu_rawdev *dev, FILE *f)
1944 struct n3000_afu_priv *priv = NULL;
1949 priv = (struct n3000_afu_priv *)dev->priv;
1956 if (priv->cfg_type == RTE_PMD_AFU_N3000_NLB) {
1957 struct nlb_afu_ctx *ctx = &priv->nlb_ctx;
1958 fprintf(f, "addr:\t\t%p\n", (void *)ctx->addr);
1959 fprintf(f, "dsm_ptr:\t%p\n", (void *)ctx->dsm_ptr);
1960 fprintf(f, "dsm_iova:\t0x%"PRIx64"\n", ctx->dsm_iova);
1961 fprintf(f, "src_ptr:\t%p\n", (void *)ctx->src_ptr);
1962 fprintf(f, "src_iova:\t0x%"PRIx64"\n", ctx->src_iova);
1963 fprintf(f, "dest_ptr:\t%p\n", (void *)ctx->dest_ptr);
1964 fprintf(f, "dest_iova:\t0x%"PRIx64"\n", ctx->dest_iova);
1965 fprintf(f, "status_ptr:\t%p\n", (void *)ctx->status_ptr);
1966 } else if (priv->cfg_type == RTE_PMD_AFU_N3000_DMA) {
1967 struct dma_afu_ctx *ctx = &priv->dma_ctx[priv->dma_cfg.index];
1968 fprintf(f, "index:\t\t%d\n", ctx->index);
1969 fprintf(f, "addr:\t\t%p\n", (void *)ctx->addr);
1970 fprintf(f, "csr_addr:\t%p\n", (void *)ctx->csr_addr);
1971 fprintf(f, "desc_addr:\t%p\n", (void *)ctx->desc_addr);
1972 fprintf(f, "ase_ctrl_addr:\t%p\n", (void *)ctx->ase_ctrl_addr);
1973 fprintf(f, "ase_data_addr:\t%p\n", (void *)ctx->ase_data_addr);
1974 fprintf(f, "desc_buf:\t%p\n", (void *)ctx->desc_buf);
1975 fprintf(f, "magic_buf:\t%p\n", (void *)ctx->magic_buf);
1976 fprintf(f, "magic_iova:\t0x%"PRIx64"\n", ctx->magic_iova);
1984 static int n3000_afu_reset(struct afu_rawdev *dev)
1986 uint8_t *addr = NULL;
1989 addr = (uint8_t *)n3000_afu_get_port_addr(dev);
1993 val = rte_read64(addr + PORT_CTRL_REG);
1994 val |= PORT_SOFT_RESET;
1995 rte_write64(val, addr + PORT_CTRL_REG);
1997 val &= ~PORT_SOFT_RESET;
1998 rte_write64(val, addr + PORT_CTRL_REG);
2003 static struct afu_ops n3000_afu_ops = {
2004 .init = n3000_afu_init,
2005 .config = n3000_afu_config,
2008 .test = n3000_afu_test,
2009 .close = n3000_afu_close,
2010 .dump = n3000_afu_dump,
2011 .reset = n3000_afu_reset
2014 static struct afu_rawdev_drv n3000_afu_drv = {
2015 .uuid = { N3000_AFU_UUID_L, N3000_AFU_UUID_H },
2016 .ops = &n3000_afu_ops
2019 AFU_PMD_REGISTER(n3000_afu_drv);