X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;ds=sidebyside;f=drivers%2Fbus%2Ffslmc%2Fqbman%2Fqbman_portal.c;h=0a2af7be4c2279771bad571a87bf64be080a7812;hb=7eec575a223daf436ee68a91642ffb7ed8cdbd49;hp=2d324f74abe2c05741c8a5133fc0fdfd864f9c90;hpb=496324d2a595bf87c5b89553c3d120a0f2e62dcd;p=dpdk.git diff --git a/drivers/bus/fslmc/qbman/qbman_portal.c b/drivers/bus/fslmc/qbman/qbman_portal.c index 2d324f74ab..0a2af7be4c 100644 --- a/drivers/bus/fslmc/qbman/qbman_portal.c +++ b/drivers/bus/fslmc/qbman/qbman_portal.c @@ -1,39 +1,17 @@ /* SPDX-License-Identifier: BSD-3-Clause * * Copyright (C) 2014-2016 Freescale Semiconductor, Inc. + * Copyright 2018-2020 NXP * */ +#include "qbman_sys.h" #include "qbman_portal.h" /* QBMan portal management command codes */ #define QBMAN_MC_ACQUIRE 0x30 #define QBMAN_WQCHAN_CONFIGURE 0x46 -/* CINH register offsets */ -#define QBMAN_CINH_SWP_EQCR_PI 0x800 -#define QBMAN_CINH_SWP_EQCR_CI 0x840 -#define QBMAN_CINH_SWP_EQAR 0x8c0 -#define QBMAN_CINH_SWP_DQPI 0xa00 -#define QBMAN_CINH_SWP_DCAP 0xac0 -#define QBMAN_CINH_SWP_SDQCR 0xb00 -#define QBMAN_CINH_SWP_RAR 0xcc0 -#define QBMAN_CINH_SWP_ISR 0xe00 -#define QBMAN_CINH_SWP_IER 0xe40 -#define QBMAN_CINH_SWP_ISDR 0xe80 -#define QBMAN_CINH_SWP_IIR 0xec0 -#define QBMAN_CINH_SWP_DQRR_ITR 0xa80 -#define QBMAN_CINH_SWP_ITPR 0xf40 - -/* CENA register offsets */ -#define QBMAN_CENA_SWP_EQCR(n) (0x000 + ((uint32_t)(n) << 6)) -#define QBMAN_CENA_SWP_DQRR(n) (0x200 + ((uint32_t)(n) << 6)) -#define QBMAN_CENA_SWP_RCR(n) (0x400 + ((uint32_t)(n) << 6)) -#define QBMAN_CENA_SWP_CR 0x600 -#define QBMAN_CENA_SWP_RR(vb) (0x700 + ((uint32_t)(vb) >> 1)) -#define QBMAN_CENA_SWP_VDQCR 0x780 -#define QBMAN_CENA_SWP_EQCR_CI 0x840 - /* Reverse mapping of QBMAN_CENA_SWP_DQRR() */ #define QBMAN_IDX_FROM_DQRR(p) (((unsigned long)p & 0x1ff) >> 6) @@ -83,6 +61,173 @@ enum qbman_sdqcr_fc { #define MAX_QBMAN_PORTALS 64 static struct qbman_swp *portal_idx_map[MAX_QBMAN_PORTALS]; +uint32_t qman_version; + +/* Internal Function declaration */ +static int +qbman_swp_enqueue_array_mode_direct(struct qbman_swp *s, + const struct qbman_eq_desc *d, + const struct qbman_fd *fd); +static int +qbman_swp_enqueue_array_mode_mem_back(struct qbman_swp *s, + const struct qbman_eq_desc *d, + const struct qbman_fd *fd); + +static int +qbman_swp_enqueue_ring_mode_direct(struct qbman_swp *s, + const struct qbman_eq_desc *d, + const struct qbman_fd *fd); +static int +qbman_swp_enqueue_ring_mode_cinh_read_direct(struct qbman_swp *s, + const struct qbman_eq_desc *d, + const struct qbman_fd *fd); +static int +qbman_swp_enqueue_ring_mode_cinh_direct(struct qbman_swp *s, + const struct qbman_eq_desc *d, + const struct qbman_fd *fd); +static int +qbman_swp_enqueue_ring_mode_mem_back(struct qbman_swp *s, + const struct qbman_eq_desc *d, + const struct qbman_fd *fd); + +static int +qbman_swp_enqueue_multiple_direct(struct qbman_swp *s, + const struct qbman_eq_desc *d, + const struct qbman_fd *fd, + uint32_t *flags, + int num_frames); +static int +qbman_swp_enqueue_multiple_cinh_read_direct(struct qbman_swp *s, + const struct qbman_eq_desc *d, + const struct qbman_fd *fd, + uint32_t *flags, + int num_frames); +static int +qbman_swp_enqueue_multiple_cinh_direct(struct qbman_swp *s, + const struct qbman_eq_desc *d, + const struct qbman_fd *fd, + uint32_t *flags, + int num_frames); +static int +qbman_swp_enqueue_multiple_mem_back(struct qbman_swp *s, + const struct qbman_eq_desc *d, + const struct qbman_fd *fd, + uint32_t *flags, + int num_frames); + +static int +qbman_swp_enqueue_multiple_fd_direct(struct qbman_swp *s, + const struct qbman_eq_desc *d, + struct qbman_fd **fd, + uint32_t *flags, + int num_frames); +static int +qbman_swp_enqueue_multiple_fd_cinh_read_direct(struct qbman_swp *s, + const struct qbman_eq_desc *d, + struct qbman_fd **fd, + uint32_t *flags, + int num_frames); +static int +qbman_swp_enqueue_multiple_fd_cinh_direct(struct qbman_swp *s, + const struct qbman_eq_desc *d, + struct qbman_fd **fd, + uint32_t *flags, + int num_frames); +static int +qbman_swp_enqueue_multiple_fd_mem_back(struct qbman_swp *s, + const struct qbman_eq_desc *d, + struct qbman_fd **fd, + uint32_t *flags, + int num_frames); + +static int +qbman_swp_enqueue_multiple_desc_direct(struct qbman_swp *s, + const struct qbman_eq_desc *d, + const struct qbman_fd *fd, + int num_frames); +static int +qbman_swp_enqueue_multiple_desc_cinh_read_direct(struct qbman_swp *s, + const struct qbman_eq_desc *d, + const struct qbman_fd *fd, + int num_frames); +static int +qbman_swp_enqueue_multiple_desc_cinh_direct(struct qbman_swp *s, + const struct qbman_eq_desc *d, + const struct qbman_fd *fd, + int num_frames); +static int +qbman_swp_enqueue_multiple_desc_mem_back(struct qbman_swp *s, + const struct qbman_eq_desc *d, + const struct qbman_fd *fd, + int num_frames); + +static int +qbman_swp_pull_direct(struct qbman_swp *s, struct qbman_pull_desc *d); +static int +qbman_swp_pull_cinh_direct(struct qbman_swp *s, struct qbman_pull_desc *d); +static int +qbman_swp_pull_mem_back(struct qbman_swp *s, struct qbman_pull_desc *d); + +const struct qbman_result *qbman_swp_dqrr_next_direct(struct qbman_swp *s); +const struct qbman_result *qbman_swp_dqrr_next_cinh_direct(struct qbman_swp *s); +const struct qbman_result *qbman_swp_dqrr_next_mem_back(struct qbman_swp *s); + +static int +qbman_swp_release_direct(struct qbman_swp *s, + const struct qbman_release_desc *d, + const uint64_t *buffers, unsigned int num_buffers); +static int +qbman_swp_release_cinh_direct(struct qbman_swp *s, + const struct qbman_release_desc *d, + const uint64_t *buffers, unsigned int num_buffers); +static int +qbman_swp_release_mem_back(struct qbman_swp *s, + const struct qbman_release_desc *d, + const uint64_t *buffers, unsigned int num_buffers); + +/* Function pointers */ +static int (*qbman_swp_enqueue_array_mode_ptr)(struct qbman_swp *s, + const struct qbman_eq_desc *d, + const struct qbman_fd *fd) + = qbman_swp_enqueue_array_mode_direct; + +static int (*qbman_swp_enqueue_ring_mode_ptr)(struct qbman_swp *s, + const struct qbman_eq_desc *d, + const struct qbman_fd *fd) + = qbman_swp_enqueue_ring_mode_direct; + +static int (*qbman_swp_enqueue_multiple_ptr)(struct qbman_swp *s, + const struct qbman_eq_desc *d, + const struct qbman_fd *fd, + uint32_t *flags, + int num_frames) + = qbman_swp_enqueue_multiple_direct; + +static int (*qbman_swp_enqueue_multiple_fd_ptr)(struct qbman_swp *s, + const struct qbman_eq_desc *d, + struct qbman_fd **fd, + uint32_t *flags, + int num_frames) + = qbman_swp_enqueue_multiple_fd_direct; + +static int (*qbman_swp_enqueue_multiple_desc_ptr)(struct qbman_swp *s, + const struct qbman_eq_desc *d, + const struct qbman_fd *fd, + int num_frames) + = qbman_swp_enqueue_multiple_desc_direct; + +static int (*qbman_swp_pull_ptr)(struct qbman_swp *s, + struct qbman_pull_desc *d) + = qbman_swp_pull_direct; + +const struct qbman_result *(*qbman_swp_dqrr_next_ptr)(struct qbman_swp *s) + = qbman_swp_dqrr_next_direct; + +static int (*qbman_swp_release_ptr)(struct qbman_swp *s, + const struct qbman_release_desc *d, + const uint64_t *buffers, unsigned int num_buffers) + = qbman_swp_release_direct; + /*********************************/ /* Portal constructor/destructor */ /*********************************/ @@ -104,26 +249,31 @@ struct qbman_swp *qbman_swp_init(const struct qbman_swp_desc *d) { int ret; uint32_t eqcr_pi; + uint32_t mask_size; struct qbman_swp *p = malloc(sizeof(*p)); if (!p) return NULL; + + memset(p, 0, sizeof(struct qbman_swp)); + p->desc = *d; #ifdef QBMAN_CHECKING p->mc.check = swp_mc_can_start; #endif p->mc.valid_bit = QB_VALID_BIT; - p->sdq = 0; p->sdq |= qbman_sdqcr_dct_prio_ics << QB_SDQCR_DCT_SHIFT; p->sdq |= qbman_sdqcr_fc_up_to_3 << QB_SDQCR_FC_SHIFT; p->sdq |= QMAN_SDQCR_TOKEN << QB_SDQCR_TOK_SHIFT; + if ((d->qman_version & QMAN_REV_MASK) >= QMAN_REV_5000 + && (d->cena_access_mode == qman_cena_fastest_access)) + p->mr.valid_bit = QB_VALID_BIT; atomic_set(&p->vdq.busy, 1); p->vdq.valid_bit = QB_VALID_BIT; - p->dqrr.next_idx = 0; p->dqrr.valid_bit = QB_VALID_BIT; qman_version = p->desc.qman_version; - if ((qman_version & 0xFFFF0000) < QMAN_REV_4100) { + if ((qman_version & QMAN_REV_MASK) < QMAN_REV_4100) { p->dqrr.dqrr_size = 4; p->dqrr.reset_bug = 1; } else { @@ -137,23 +287,96 @@ struct qbman_swp *qbman_swp_init(const struct qbman_swp_desc *d) pr_err("qbman_swp_sys_init() failed %d\n", ret); return NULL; } + + /* Verify that the DQRRPI is 0 - if it is not the portal isn't + * in default state which is an error + */ + if (qbman_cinh_read(&p->sys, QBMAN_CINH_SWP_DQPI) & 0xF) { + pr_err("qbman DQRR PI is not zero, portal is not clean\n"); + free(p); + return NULL; + } + /* SDQCR needs to be initialized to 0 when no channels are * being dequeued from or else the QMan HW will indicate an * error. The values that were calculated above will be * applied when dequeues from a specific channel are enabled. */ qbman_cinh_write(&p->sys, QBMAN_CINH_SWP_SDQCR, 0); + + p->eqcr.pi_ring_size = 8; + if ((qman_version & QMAN_REV_MASK) >= QMAN_REV_5000 + && (d->cena_access_mode == qman_cena_fastest_access)) { + p->eqcr.pi_ring_size = 32; + qbman_swp_enqueue_array_mode_ptr = + qbman_swp_enqueue_array_mode_mem_back; + qbman_swp_enqueue_ring_mode_ptr = + qbman_swp_enqueue_ring_mode_mem_back; + qbman_swp_enqueue_multiple_ptr = + qbman_swp_enqueue_multiple_mem_back; + qbman_swp_enqueue_multiple_fd_ptr = + qbman_swp_enqueue_multiple_fd_mem_back; + qbman_swp_enqueue_multiple_desc_ptr = + qbman_swp_enqueue_multiple_desc_mem_back; + qbman_swp_pull_ptr = qbman_swp_pull_mem_back; + qbman_swp_dqrr_next_ptr = qbman_swp_dqrr_next_mem_back; + qbman_swp_release_ptr = qbman_swp_release_mem_back; + } + + if (dpaa2_svr_family == SVR_LS1080A) { + qbman_swp_enqueue_ring_mode_ptr = + qbman_swp_enqueue_ring_mode_cinh_read_direct; + qbman_swp_enqueue_multiple_ptr = + qbman_swp_enqueue_multiple_cinh_read_direct; + qbman_swp_enqueue_multiple_fd_ptr = + qbman_swp_enqueue_multiple_fd_cinh_read_direct; + qbman_swp_enqueue_multiple_desc_ptr = + qbman_swp_enqueue_multiple_desc_cinh_read_direct; + } + + for (mask_size = p->eqcr.pi_ring_size; mask_size > 0; mask_size >>= 1) + p->eqcr.pi_ci_mask = (p->eqcr.pi_ci_mask<<1) + 1; eqcr_pi = qbman_cinh_read(&p->sys, QBMAN_CINH_SWP_EQCR_PI); - p->eqcr.pi = eqcr_pi & 0xF; + p->eqcr.pi = eqcr_pi & p->eqcr.pi_ci_mask; p->eqcr.pi_vb = eqcr_pi & QB_VALID_BIT; - p->eqcr.ci = qbman_cinh_read(&p->sys, QBMAN_CINH_SWP_EQCR_CI) & 0xF; - p->eqcr.available = QBMAN_EQCR_SIZE - qm_cyc_diff(QBMAN_EQCR_SIZE, - p->eqcr.ci, p->eqcr.pi); + if ((p->desc.qman_version & QMAN_REV_MASK) >= QMAN_REV_5000 + && (d->cena_access_mode == qman_cena_fastest_access)) + p->eqcr.ci = qbman_cinh_read(&p->sys, QBMAN_CINH_SWP_EQCR_PI) + & p->eqcr.pi_ci_mask; + else + p->eqcr.ci = qbman_cinh_read(&p->sys, QBMAN_CINH_SWP_EQCR_CI) + & p->eqcr.pi_ci_mask; + p->eqcr.available = p->eqcr.pi_ring_size - + qm_cyc_diff(p->eqcr.pi_ring_size, + p->eqcr.ci & (p->eqcr.pi_ci_mask<<1), + p->eqcr.pi & (p->eqcr.pi_ci_mask<<1)); portal_idx_map[p->desc.idx] = p; return p; } +int qbman_swp_update(struct qbman_swp *p, int stash_off) +{ + const struct qbman_swp_desc *d = &p->desc; + struct qbman_swp_sys *s = &p->sys; + int ret; + + /* Nothing needs to be done for QBMAN rev > 5000 with fast access */ + if ((qman_version & QMAN_REV_MASK) >= QMAN_REV_5000 + && (d->cena_access_mode == qman_cena_fastest_access)) + return 0; + + ret = qbman_swp_sys_update(s, d, p->dqrr.dqrr_size, stash_off); + if (ret) { + pr_err("qbman_swp_sys_init() failed %d\n", ret); + return ret; + } + + p->stash_off = stash_off; + + return 0; +} + void qbman_swp_finish(struct qbman_swp *p) { #ifdef QBMAN_CHECKING @@ -230,7 +453,8 @@ int qbman_swp_interrupt_get_inhibit(struct qbman_swp *p) void qbman_swp_interrupt_set_inhibit(struct qbman_swp *p, int inhibit) { - qbman_cinh_write(&p->sys, QBMAN_CINH_SWP_IIR, inhibit ? 0xffffffff : 0); + qbman_cinh_write(&p->sys, QBMAN_CINH_SWP_IIR, + inhibit ? 0xffffffff : 0); } /***********************/ @@ -247,7 +471,11 @@ void *qbman_swp_mc_start(struct qbman_swp *p) #ifdef QBMAN_CHECKING QBMAN_BUG_ON(p->mc.check != swp_mc_can_start); #endif - ret = qbman_cena_write_start(&p->sys, QBMAN_CENA_SWP_CR); + if ((p->desc.qman_version & QMAN_REV_MASK) >= QMAN_REV_5000 + && (p->desc.cena_access_mode == qman_cena_fastest_access)) + ret = qbman_cena_write_start(&p->sys, QBMAN_CENA_SWP_CR_MEM); + else + ret = qbman_cena_write_start(&p->sys, QBMAN_CENA_SWP_CR); #ifdef QBMAN_CHECKING if (!ret) p->mc.check = swp_mc_can_submit; @@ -267,8 +495,39 @@ void qbman_swp_mc_submit(struct qbman_swp *p, void *cmd, uint8_t cmd_verb) * caller wants to OR but has forgotten to do so. */ QBMAN_BUG_ON((*v & cmd_verb) != *v); + if ((p->desc.qman_version & QMAN_REV_MASK) >= QMAN_REV_5000 + && (p->desc.cena_access_mode == qman_cena_fastest_access)) { + *v = cmd_verb | p->mr.valid_bit; + qbman_cena_write_complete(&p->sys, QBMAN_CENA_SWP_CR_MEM, cmd); + dma_wmb(); + qbman_cinh_write(&p->sys, QBMAN_CINH_SWP_CR_RT, QMAN_RT_MODE); + } else { + dma_wmb(); + *v = cmd_verb | p->mc.valid_bit; + qbman_cena_write_complete(&p->sys, QBMAN_CENA_SWP_CR, cmd); + clean(cmd); + } +#ifdef QBMAN_CHECKING + p->mc.check = swp_mc_can_poll; +#endif +} + +void qbman_swp_mc_submit_cinh(struct qbman_swp *p, void *cmd, uint8_t cmd_verb) +{ + uint8_t *v = cmd; +#ifdef QBMAN_CHECKING + QBMAN_BUG_ON(!(p->mc.check != swp_mc_can_submit)); +#endif + /* TBD: "|=" is going to hurt performance. Need to move as many fields + * out of word zero, and for those that remain, the "OR" needs to occur + * at the caller side. This debug check helps to catch cases where the + * caller wants to OR but has forgotten to do so. + */ + QBMAN_BUG_ON((*v & cmd_verb) != *v); + dma_wmb(); *v = cmd_verb | p->mc.valid_bit; - qbman_cena_write_complete(&p->sys, QBMAN_CENA_SWP_CR, cmd); + qbman_cinh_write_complete(&p->sys, QBMAN_CENA_SWP_CR, cmd); + clean(cmd); #ifdef QBMAN_CHECKING p->mc.check = swp_mc_can_poll; #endif @@ -280,17 +539,56 @@ void *qbman_swp_mc_result(struct qbman_swp *p) #ifdef QBMAN_CHECKING QBMAN_BUG_ON(p->mc.check != swp_mc_can_poll); #endif - qbman_cena_invalidate_prefetch(&p->sys, - QBMAN_CENA_SWP_RR(p->mc.valid_bit)); - ret = qbman_cena_read(&p->sys, QBMAN_CENA_SWP_RR(p->mc.valid_bit)); - /* Remove the valid-bit - command completed if the rest is non-zero */ + if ((p->desc.qman_version & QMAN_REV_MASK) >= QMAN_REV_5000 + && (p->desc.cena_access_mode == qman_cena_fastest_access)) { + ret = qbman_cena_read(&p->sys, QBMAN_CENA_SWP_RR_MEM); + /* Command completed if the valid bit is toggled */ + if (p->mr.valid_bit != (ret[0] & QB_VALID_BIT)) + return NULL; + /* Remove the valid-bit - + * command completed iff the rest is non-zero + */ + verb = ret[0] & ~QB_VALID_BIT; + if (!verb) + return NULL; + p->mr.valid_bit ^= QB_VALID_BIT; + } else { + qbman_cena_invalidate_prefetch(&p->sys, + QBMAN_CENA_SWP_RR(p->mc.valid_bit)); + ret = qbman_cena_read(&p->sys, + QBMAN_CENA_SWP_RR(p->mc.valid_bit)); + /* Remove the valid-bit - + * command completed iff the rest is non-zero + */ + verb = ret[0] & ~QB_VALID_BIT; + if (!verb) + return NULL; + p->mc.valid_bit ^= QB_VALID_BIT; + } +#ifdef QBMAN_CHECKING + p->mc.check = swp_mc_can_start; +#endif + return ret; +} + +void *qbman_swp_mc_result_cinh(struct qbman_swp *p) +{ + uint32_t *ret, verb; +#ifdef QBMAN_CHECKING + QBMAN_BUG_ON(p->mc.check != swp_mc_can_poll); +#endif + ret = qbman_cinh_read_shadow(&p->sys, + QBMAN_CENA_SWP_RR(p->mc.valid_bit)); + /* Remove the valid-bit - + * command completed iff the rest is non-zero + */ verb = ret[0] & ~QB_VALID_BIT; if (!verb) return NULL; + p->mc.valid_bit ^= QB_VALID_BIT; #ifdef QBMAN_CHECKING p->mc.check = swp_mc_can_start; #endif - p->mc.valid_bit ^= QB_VALID_BIT; return ret; } @@ -418,13 +716,36 @@ void qbman_eq_desc_set_dca(struct qbman_eq_desc *d, int enable, } } -#define EQAR_IDX(eqar) ((eqar) & 0x7) +#define EQAR_IDX(eqar) ((eqar) & 0x1f) #define EQAR_VB(eqar) ((eqar) & 0x80) #define EQAR_SUCCESS(eqar) ((eqar) & 0x100) -static int qbman_swp_enqueue_array_mode(struct qbman_swp *s, - const struct qbman_eq_desc *d, - const struct qbman_fd *fd) +static inline void qbman_write_eqcr_am_rt_register(struct qbman_swp *p, + uint8_t idx) +{ + if (idx < 16) + qbman_cinh_write(&p->sys, QBMAN_CINH_SWP_EQCR_AM_RT + idx * 4, + QMAN_RT_MODE); + else + qbman_cinh_write(&p->sys, QBMAN_CINH_SWP_EQCR_AM_RT2 + + (idx - 16) * 4, + QMAN_RT_MODE); +} + +static void memcpy_byte_by_byte(void *to, const void *from, size_t n) +{ + const uint8_t *src = from; + volatile uint8_t *dest = to; + size_t i; + + for (i = 0; i < n; i++) + dest[i] = src[i]; +} + + +static int qbman_swp_enqueue_array_mode_direct(struct qbman_swp *s, + const struct qbman_eq_desc *d, + const struct qbman_fd *fd) { uint32_t *p; const uint32_t *cl = qb_cl(d); @@ -434,39 +755,69 @@ static int qbman_swp_enqueue_array_mode(struct qbman_swp *s, if (!EQAR_SUCCESS(eqar)) return -EBUSY; p = qbman_cena_write_start_wo_shadow(&s->sys, - QBMAN_CENA_SWP_EQCR(EQAR_IDX(eqar))); + QBMAN_CENA_SWP_EQCR(EQAR_IDX(eqar))); memcpy(&p[1], &cl[1], 28); memcpy(&p[8], fd, sizeof(*fd)); + /* Set the verb byte, have to substitute in the valid-bit */ - lwsync(); + dma_wmb(); p[0] = cl[0] | EQAR_VB(eqar); qbman_cena_write_complete_wo_shadow(&s->sys, - QBMAN_CENA_SWP_EQCR(EQAR_IDX(eqar))); + QBMAN_CENA_SWP_EQCR(EQAR_IDX(eqar))); return 0; } +static int qbman_swp_enqueue_array_mode_mem_back(struct qbman_swp *s, + const struct qbman_eq_desc *d, + const struct qbman_fd *fd) +{ + uint32_t *p; + const uint32_t *cl = qb_cl(d); + uint32_t eqar = qbman_cinh_read(&s->sys, QBMAN_CINH_SWP_EQAR); -static int qbman_swp_enqueue_ring_mode(struct qbman_swp *s, - const struct qbman_eq_desc *d, - const struct qbman_fd *fd) + pr_debug("EQAR=%08x\n", eqar); + if (!EQAR_SUCCESS(eqar)) + return -EBUSY; + p = qbman_cena_write_start_wo_shadow(&s->sys, + QBMAN_CENA_SWP_EQCR(EQAR_IDX(eqar))); + memcpy(&p[1], &cl[1], 28); + memcpy(&p[8], fd, sizeof(*fd)); + + /* Set the verb byte, have to substitute in the valid-bit */ + p[0] = cl[0] | EQAR_VB(eqar); + dma_wmb(); + qbman_write_eqcr_am_rt_register(s, EQAR_IDX(eqar)); + return 0; +} + +static inline int qbman_swp_enqueue_array_mode(struct qbman_swp *s, + const struct qbman_eq_desc *d, + const struct qbman_fd *fd) +{ + return qbman_swp_enqueue_array_mode_ptr(s, d, fd); +} + +static int qbman_swp_enqueue_ring_mode_direct(struct qbman_swp *s, + const struct qbman_eq_desc *d, + const struct qbman_fd *fd) { uint32_t *p; const uint32_t *cl = qb_cl(d); - uint32_t eqcr_ci; - uint8_t diff; + uint32_t eqcr_ci, full_mask, half_mask; + half_mask = (s->eqcr.pi_ci_mask>>1); + full_mask = s->eqcr.pi_ci_mask; if (!s->eqcr.available) { eqcr_ci = s->eqcr.ci; s->eqcr.ci = qbman_cena_read_reg(&s->sys, - QBMAN_CENA_SWP_EQCR_CI) & 0xF; - diff = qm_cyc_diff(QBMAN_EQCR_SIZE, - eqcr_ci, s->eqcr.ci); - s->eqcr.available += diff; - if (!diff) + QBMAN_CENA_SWP_EQCR_CI) & full_mask; + s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size, + eqcr_ci, s->eqcr.ci); + if (!s->eqcr.available) return -EBUSY; } p = qbman_cena_write_start_wo_shadow(&s->sys, - QBMAN_CENA_SWP_EQCR(s->eqcr.pi & 7)); + QBMAN_CENA_SWP_EQCR(s->eqcr.pi & half_mask)); memcpy(&p[1], &cl[1], 28); memcpy(&p[8], fd, sizeof(*fd)); lwsync(); @@ -474,16 +825,140 @@ static int qbman_swp_enqueue_ring_mode(struct qbman_swp *s, /* Set the verb byte, have to substitute in the valid-bit */ p[0] = cl[0] | s->eqcr.pi_vb; qbman_cena_write_complete_wo_shadow(&s->sys, - QBMAN_CENA_SWP_EQCR(s->eqcr.pi & 7)); + QBMAN_CENA_SWP_EQCR(s->eqcr.pi & half_mask)); + s->eqcr.pi++; + s->eqcr.pi &= full_mask; + s->eqcr.available--; + if (!(s->eqcr.pi & half_mask)) + s->eqcr.pi_vb ^= QB_VALID_BIT; + + return 0; +} + +static int qbman_swp_enqueue_ring_mode_cinh_read_direct( + struct qbman_swp *s, + const struct qbman_eq_desc *d, + const struct qbman_fd *fd) +{ + uint32_t *p; + const uint32_t *cl = qb_cl(d); + uint32_t eqcr_ci, full_mask, half_mask; + + half_mask = (s->eqcr.pi_ci_mask>>1); + full_mask = s->eqcr.pi_ci_mask; + if (!s->eqcr.available) { + eqcr_ci = s->eqcr.ci; + s->eqcr.ci = qbman_cinh_read(&s->sys, + QBMAN_CINH_SWP_EQCR_CI) & full_mask; + s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size, + eqcr_ci, s->eqcr.ci); + if (!s->eqcr.available) + return -EBUSY; + } + + p = qbman_cinh_write_start_wo_shadow(&s->sys, + QBMAN_CENA_SWP_EQCR(s->eqcr.pi & half_mask)); + memcpy(&p[1], &cl[1], 28); + memcpy(&p[8], fd, sizeof(*fd)); + lwsync(); + + /* Set the verb byte, have to substitute in the valid-bit */ + p[0] = cl[0] | s->eqcr.pi_vb; + s->eqcr.pi++; + s->eqcr.pi &= full_mask; + s->eqcr.available--; + if (!(s->eqcr.pi & half_mask)) + s->eqcr.pi_vb ^= QB_VALID_BIT; + + return 0; +} + +static int qbman_swp_enqueue_ring_mode_cinh_direct( + struct qbman_swp *s, + const struct qbman_eq_desc *d, + const struct qbman_fd *fd) +{ + uint32_t *p; + const uint32_t *cl = qb_cl(d); + uint32_t eqcr_ci, full_mask, half_mask; + + half_mask = (s->eqcr.pi_ci_mask>>1); + full_mask = s->eqcr.pi_ci_mask; + if (!s->eqcr.available) { + eqcr_ci = s->eqcr.ci; + s->eqcr.ci = qbman_cinh_read(&s->sys, + QBMAN_CINH_SWP_EQCR_CI) & full_mask; + s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size, + eqcr_ci, s->eqcr.ci); + if (!s->eqcr.available) + return -EBUSY; + } + + p = qbman_cinh_write_start_wo_shadow(&s->sys, + QBMAN_CENA_SWP_EQCR(s->eqcr.pi & half_mask)); + memcpy_byte_by_byte(&p[1], &cl[1], 28); + memcpy_byte_by_byte(&p[8], fd, sizeof(*fd)); + lwsync(); + + /* Set the verb byte, have to substitute in the valid-bit */ + p[0] = cl[0] | s->eqcr.pi_vb; s->eqcr.pi++; - s->eqcr.pi &= 0xF; + s->eqcr.pi &= full_mask; s->eqcr.available--; - if (!(s->eqcr.pi & 7)) + if (!(s->eqcr.pi & half_mask)) s->eqcr.pi_vb ^= QB_VALID_BIT; return 0; } +static int qbman_swp_enqueue_ring_mode_mem_back(struct qbman_swp *s, + const struct qbman_eq_desc *d, + const struct qbman_fd *fd) +{ + uint32_t *p; + const uint32_t *cl = qb_cl(d); + uint32_t eqcr_ci, full_mask, half_mask; + + half_mask = (s->eqcr.pi_ci_mask>>1); + full_mask = s->eqcr.pi_ci_mask; + if (!s->eqcr.available) { + eqcr_ci = s->eqcr.ci; + s->eqcr.ci = qbman_cena_read_reg(&s->sys, + QBMAN_CENA_SWP_EQCR_CI_MEMBACK) & full_mask; + s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size, + eqcr_ci, s->eqcr.ci); + if (!s->eqcr.available) + return -EBUSY; + } + + p = qbman_cena_write_start_wo_shadow(&s->sys, + QBMAN_CENA_SWP_EQCR(s->eqcr.pi & half_mask)); + memcpy(&p[1], &cl[1], 28); + memcpy(&p[8], fd, sizeof(*fd)); + + /* Set the verb byte, have to substitute in the valid-bit */ + p[0] = cl[0] | s->eqcr.pi_vb; + s->eqcr.pi++; + s->eqcr.pi &= full_mask; + s->eqcr.available--; + if (!(s->eqcr.pi & half_mask)) + s->eqcr.pi_vb ^= QB_VALID_BIT; + dma_wmb(); + qbman_cinh_write(&s->sys, QBMAN_CINH_SWP_EQCR_PI, + (QB_RT_BIT)|(s->eqcr.pi)|s->eqcr.pi_vb); + return 0; +} + +static int qbman_swp_enqueue_ring_mode(struct qbman_swp *s, + const struct qbman_eq_desc *d, + const struct qbman_fd *fd) +{ + if (!s->stash_off) + return qbman_swp_enqueue_ring_mode_ptr(s, d, fd); + else + return qbman_swp_enqueue_ring_mode_cinh_direct(s, d, fd); +} + int qbman_swp_enqueue(struct qbman_swp *s, const struct qbman_eq_desc *d, const struct qbman_fd *fd) { @@ -493,27 +968,27 @@ int qbman_swp_enqueue(struct qbman_swp *s, const struct qbman_eq_desc *d, return qbman_swp_enqueue_ring_mode(s, d, fd); } -int qbman_swp_enqueue_multiple(struct qbman_swp *s, - const struct qbman_eq_desc *d, - const struct qbman_fd *fd, - uint32_t *flags, - int num_frames) +static int qbman_swp_enqueue_multiple_direct(struct qbman_swp *s, + const struct qbman_eq_desc *d, + const struct qbman_fd *fd, + uint32_t *flags, + int num_frames) { - uint32_t *p; + uint32_t *p = NULL; const uint32_t *cl = qb_cl(d); - uint32_t eqcr_ci, eqcr_pi; - uint8_t diff; + uint32_t eqcr_ci, eqcr_pi, half_mask, full_mask; int i, num_enqueued = 0; uint64_t addr_cena; + half_mask = (s->eqcr.pi_ci_mask>>1); + full_mask = s->eqcr.pi_ci_mask; if (!s->eqcr.available) { eqcr_ci = s->eqcr.ci; s->eqcr.ci = qbman_cena_read_reg(&s->sys, - QBMAN_CENA_SWP_EQCR_CI) & 0xF; - diff = qm_cyc_diff(QBMAN_EQCR_SIZE, - eqcr_ci, s->eqcr.ci); - s->eqcr.available += diff; - if (!diff) + QBMAN_CENA_SWP_EQCR_CI) & full_mask; + s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size, + eqcr_ci, s->eqcr.ci); + if (!s->eqcr.available) return 0; } @@ -524,11 +999,10 @@ int qbman_swp_enqueue_multiple(struct qbman_swp *s, /* Fill in the EQCR ring */ for (i = 0; i < num_enqueued; i++) { p = qbman_cena_write_start_wo_shadow(&s->sys, - QBMAN_CENA_SWP_EQCR(eqcr_pi & 7)); + QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask)); memcpy(&p[1], &cl[1], 28); memcpy(&p[8], &fd[i], sizeof(*fd)); eqcr_pi++; - eqcr_pi &= 0xF; } lwsync(); @@ -537,7 +1011,7 @@ int qbman_swp_enqueue_multiple(struct qbman_swp *s, eqcr_pi = s->eqcr.pi; for (i = 0; i < num_enqueued; i++) { p = qbman_cena_write_start_wo_shadow(&s->sys, - QBMAN_CENA_SWP_EQCR(eqcr_pi & 7)); + QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask)); p[0] = cl[0] | s->eqcr.pi_vb; if (flags && (flags[i] & QBMAN_ENQUEUE_FLAG_DCA)) { struct qbman_eq_desc *d = (struct qbman_eq_desc *)p; @@ -546,45 +1020,45 @@ int qbman_swp_enqueue_multiple(struct qbman_swp *s, ((flags[i]) & QBMAN_EQCR_DCA_IDXMASK); } eqcr_pi++; - eqcr_pi &= 0xF; - if (!(eqcr_pi & 7)) + if (!(eqcr_pi & half_mask)) s->eqcr.pi_vb ^= QB_VALID_BIT; } /* Flush all the cacheline without load/store in between */ eqcr_pi = s->eqcr.pi; - addr_cena = (uint64_t)s->sys.addr_cena; + addr_cena = (size_t)s->sys.addr_cena; for (i = 0; i < num_enqueued; i++) { - dcbf((uint64_t *)(addr_cena + - QBMAN_CENA_SWP_EQCR(eqcr_pi & 7))); + dcbf((uintptr_t)(addr_cena + + QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask))); eqcr_pi++; - eqcr_pi &= 0xF; } - s->eqcr.pi = eqcr_pi; + s->eqcr.pi = eqcr_pi & full_mask; return num_enqueued; } -int qbman_swp_enqueue_multiple_desc(struct qbman_swp *s, - const struct qbman_eq_desc *d, - const struct qbman_fd *fd, - int num_frames) +static int qbman_swp_enqueue_multiple_cinh_read_direct( + struct qbman_swp *s, + const struct qbman_eq_desc *d, + const struct qbman_fd *fd, + uint32_t *flags, + int num_frames) { - uint32_t *p; - const uint32_t *cl; - uint32_t eqcr_ci, eqcr_pi; - uint8_t diff; + uint32_t *p = NULL; + const uint32_t *cl = qb_cl(d); + uint32_t eqcr_ci, eqcr_pi, half_mask, full_mask; int i, num_enqueued = 0; uint64_t addr_cena; + half_mask = (s->eqcr.pi_ci_mask>>1); + full_mask = s->eqcr.pi_ci_mask; if (!s->eqcr.available) { eqcr_ci = s->eqcr.ci; - s->eqcr.ci = qbman_cena_read_reg(&s->sys, - QBMAN_CENA_SWP_EQCR_CI) & 0xF; - diff = qm_cyc_diff(QBMAN_EQCR_SIZE, - eqcr_ci, s->eqcr.ci); - s->eqcr.available += diff; - if (!diff) + s->eqcr.ci = qbman_cinh_read(&s->sys, + QBMAN_CINH_SWP_EQCR_CI) & full_mask; + s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size, + eqcr_ci, s->eqcr.ci); + if (!s->eqcr.available) return 0; } @@ -595,12 +1069,10 @@ int qbman_swp_enqueue_multiple_desc(struct qbman_swp *s, /* Fill in the EQCR ring */ for (i = 0; i < num_enqueued; i++) { p = qbman_cena_write_start_wo_shadow(&s->sys, - QBMAN_CENA_SWP_EQCR(eqcr_pi & 7)); - cl = qb_cl(&d[i]); + QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask)); memcpy(&p[1], &cl[1], 28); memcpy(&p[8], &fd[i], sizeof(*fd)); eqcr_pi++; - eqcr_pi &= 0xF; } lwsync(); @@ -609,76 +1081,737 @@ int qbman_swp_enqueue_multiple_desc(struct qbman_swp *s, eqcr_pi = s->eqcr.pi; for (i = 0; i < num_enqueued; i++) { p = qbman_cena_write_start_wo_shadow(&s->sys, - QBMAN_CENA_SWP_EQCR(eqcr_pi & 7)); - cl = qb_cl(&d[i]); + QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask)); p[0] = cl[0] | s->eqcr.pi_vb; + if (flags && (flags[i] & QBMAN_ENQUEUE_FLAG_DCA)) { + struct qbman_eq_desc *d = (struct qbman_eq_desc *)p; + + d->eq.dca = (1 << QB_ENQUEUE_CMD_DCA_EN_SHIFT) | + ((flags[i]) & QBMAN_EQCR_DCA_IDXMASK); + } eqcr_pi++; - eqcr_pi &= 0xF; - if (!(eqcr_pi & 7)) + if (!(eqcr_pi & half_mask)) s->eqcr.pi_vb ^= QB_VALID_BIT; } /* Flush all the cacheline without load/store in between */ eqcr_pi = s->eqcr.pi; - addr_cena = (uint64_t)s->sys.addr_cena; + addr_cena = (size_t)s->sys.addr_cena; for (i = 0; i < num_enqueued; i++) { - dcbf((uint64_t *)(addr_cena + - QBMAN_CENA_SWP_EQCR(eqcr_pi & 7))); + dcbf(addr_cena + + QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask)); eqcr_pi++; - eqcr_pi &= 0xF; } - s->eqcr.pi = eqcr_pi; + s->eqcr.pi = eqcr_pi & full_mask; return num_enqueued; } -/*************************/ -/* Static (push) dequeue */ -/*************************/ - -void qbman_swp_push_get(struct qbman_swp *s, uint8_t channel_idx, int *enabled) +static int qbman_swp_enqueue_multiple_cinh_direct( + struct qbman_swp *s, + const struct qbman_eq_desc *d, + const struct qbman_fd *fd, + uint32_t *flags, + int num_frames) { - uint16_t src = (s->sdq >> QB_SDQCR_SRC_SHIFT) & QB_SDQCR_SRC_MASK; - - QBMAN_BUG_ON(channel_idx > 15); - *enabled = src | (1 << channel_idx); -} + uint32_t *p = NULL; + const uint32_t *cl = qb_cl(d); + uint32_t eqcr_ci, eqcr_pi, half_mask, full_mask; + int i, num_enqueued = 0; -void qbman_swp_push_set(struct qbman_swp *s, uint8_t channel_idx, int enable) -{ - uint16_t dqsrc; + half_mask = (s->eqcr.pi_ci_mask>>1); + full_mask = s->eqcr.pi_ci_mask; + if (!s->eqcr.available) { + eqcr_ci = s->eqcr.ci; + s->eqcr.ci = qbman_cinh_read(&s->sys, + QBMAN_CINH_SWP_EQCR_CI) & full_mask; + s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size, + eqcr_ci, s->eqcr.ci); + if (!s->eqcr.available) + return 0; + } - QBMAN_BUG_ON(channel_idx > 15); - if (enable) - s->sdq |= 1 << channel_idx; - else - s->sdq &= ~(1 << channel_idx); + eqcr_pi = s->eqcr.pi; + num_enqueued = (s->eqcr.available < num_frames) ? + s->eqcr.available : num_frames; + s->eqcr.available -= num_enqueued; + /* Fill in the EQCR ring */ + for (i = 0; i < num_enqueued; i++) { + p = qbman_cinh_write_start_wo_shadow(&s->sys, + QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask)); + memcpy_byte_by_byte(&p[1], &cl[1], 28); + memcpy_byte_by_byte(&p[8], &fd[i], sizeof(*fd)); + eqcr_pi++; + } - /* Read make the complete src map. If no channels are enabled - * the SDQCR must be 0 or else QMan will assert errors - */ - dqsrc = (s->sdq >> QB_SDQCR_SRC_SHIFT) & QB_SDQCR_SRC_MASK; - if (dqsrc != 0) - qbman_cinh_write(&s->sys, QBMAN_CINH_SWP_SDQCR, s->sdq); - else - qbman_cinh_write(&s->sys, QBMAN_CINH_SWP_SDQCR, 0); -} + lwsync(); -/***************************/ -/* Volatile (pull) dequeue */ -/***************************/ + /* Set the verb byte, have to substitute in the valid-bit */ + eqcr_pi = s->eqcr.pi; + for (i = 0; i < num_enqueued; i++) { + p = qbman_cinh_write_start_wo_shadow(&s->sys, + QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask)); + p[0] = cl[0] | s->eqcr.pi_vb; + if (flags && (flags[i] & QBMAN_ENQUEUE_FLAG_DCA)) { + struct qbman_eq_desc *d = (struct qbman_eq_desc *)p; -/* These should be const, eventually */ -#define QB_VDQCR_VERB_DCT_SHIFT 0 -#define QB_VDQCR_VERB_DT_SHIFT 2 -#define QB_VDQCR_VERB_RLS_SHIFT 4 -#define QB_VDQCR_VERB_WAE_SHIFT 5 + d->eq.dca = (1 << QB_ENQUEUE_CMD_DCA_EN_SHIFT) | + ((flags[i]) & QBMAN_EQCR_DCA_IDXMASK); + } + eqcr_pi++; + if (!(eqcr_pi & half_mask)) + s->eqcr.pi_vb ^= QB_VALID_BIT; + } -enum qb_pull_dt_e { - qb_pull_dt_channel, - qb_pull_dt_workqueue, - qb_pull_dt_framequeue -}; + s->eqcr.pi = eqcr_pi & full_mask; + + return num_enqueued; +} + +static int qbman_swp_enqueue_multiple_mem_back(struct qbman_swp *s, + const struct qbman_eq_desc *d, + const struct qbman_fd *fd, + uint32_t *flags, + int num_frames) +{ + uint32_t *p = NULL; + const uint32_t *cl = qb_cl(d); + uint32_t eqcr_ci, eqcr_pi, half_mask, full_mask; + int i, num_enqueued = 0; + + half_mask = (s->eqcr.pi_ci_mask>>1); + full_mask = s->eqcr.pi_ci_mask; + if (!s->eqcr.available) { + eqcr_ci = s->eqcr.ci; + s->eqcr.ci = qbman_cena_read_reg(&s->sys, + QBMAN_CENA_SWP_EQCR_CI_MEMBACK) & full_mask; + s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size, + eqcr_ci, s->eqcr.ci); + if (!s->eqcr.available) + return 0; + } + + eqcr_pi = s->eqcr.pi; + num_enqueued = (s->eqcr.available < num_frames) ? + s->eqcr.available : num_frames; + s->eqcr.available -= num_enqueued; + /* Fill in the EQCR ring */ + for (i = 0; i < num_enqueued; i++) { + p = qbman_cena_write_start_wo_shadow(&s->sys, + QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask)); + memcpy(&p[1], &cl[1], 28); + memcpy(&p[8], &fd[i], sizeof(*fd)); + if (flags && (flags[i] & QBMAN_ENQUEUE_FLAG_DCA)) { + struct qbman_eq_desc *d = (struct qbman_eq_desc *)p; + + d->eq.dca = (1 << QB_ENQUEUE_CMD_DCA_EN_SHIFT) | + ((flags[i]) & QBMAN_EQCR_DCA_IDXMASK); + } + eqcr_pi++; + p[0] = cl[0] | s->eqcr.pi_vb; + + if (!(eqcr_pi & half_mask)) + s->eqcr.pi_vb ^= QB_VALID_BIT; + } + s->eqcr.pi = eqcr_pi & full_mask; + + dma_wmb(); + qbman_cinh_write(&s->sys, QBMAN_CINH_SWP_EQCR_PI, + (QB_RT_BIT)|(s->eqcr.pi)|s->eqcr.pi_vb); + return num_enqueued; +} + +int qbman_swp_enqueue_multiple(struct qbman_swp *s, + const struct qbman_eq_desc *d, + const struct qbman_fd *fd, + uint32_t *flags, + int num_frames) +{ + if (!s->stash_off) + return qbman_swp_enqueue_multiple_ptr(s, d, fd, flags, + num_frames); + else + return qbman_swp_enqueue_multiple_cinh_direct(s, d, fd, flags, + num_frames); +} + +static int qbman_swp_enqueue_multiple_fd_direct(struct qbman_swp *s, + const struct qbman_eq_desc *d, + struct qbman_fd **fd, + uint32_t *flags, + int num_frames) +{ + uint32_t *p = NULL; + const uint32_t *cl = qb_cl(d); + uint32_t eqcr_ci, eqcr_pi, half_mask, full_mask; + int i, num_enqueued = 0; + uint64_t addr_cena; + + half_mask = (s->eqcr.pi_ci_mask>>1); + full_mask = s->eqcr.pi_ci_mask; + if (!s->eqcr.available) { + eqcr_ci = s->eqcr.ci; + s->eqcr.ci = qbman_cena_read_reg(&s->sys, + QBMAN_CENA_SWP_EQCR_CI) & full_mask; + s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size, + eqcr_ci, s->eqcr.ci); + if (!s->eqcr.available) + return 0; + } + + eqcr_pi = s->eqcr.pi; + num_enqueued = (s->eqcr.available < num_frames) ? + s->eqcr.available : num_frames; + s->eqcr.available -= num_enqueued; + /* Fill in the EQCR ring */ + for (i = 0; i < num_enqueued; i++) { + p = qbman_cena_write_start_wo_shadow(&s->sys, + QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask)); + memcpy(&p[1], &cl[1], 28); + memcpy(&p[8], fd[i], sizeof(struct qbman_fd)); + eqcr_pi++; + } + + lwsync(); + + /* Set the verb byte, have to substitute in the valid-bit */ + eqcr_pi = s->eqcr.pi; + for (i = 0; i < num_enqueued; i++) { + p = qbman_cena_write_start_wo_shadow(&s->sys, + QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask)); + p[0] = cl[0] | s->eqcr.pi_vb; + if (flags && (flags[i] & QBMAN_ENQUEUE_FLAG_DCA)) { + struct qbman_eq_desc *d = (struct qbman_eq_desc *)p; + + d->eq.dca = (1 << QB_ENQUEUE_CMD_DCA_EN_SHIFT) | + ((flags[i]) & QBMAN_EQCR_DCA_IDXMASK); + } + eqcr_pi++; + if (!(eqcr_pi & half_mask)) + s->eqcr.pi_vb ^= QB_VALID_BIT; + } + + /* Flush all the cacheline without load/store in between */ + eqcr_pi = s->eqcr.pi; + addr_cena = (size_t)s->sys.addr_cena; + for (i = 0; i < num_enqueued; i++) { + dcbf(addr_cena + + QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask)); + eqcr_pi++; + } + s->eqcr.pi = eqcr_pi & full_mask; + + return num_enqueued; +} + +static int qbman_swp_enqueue_multiple_fd_cinh_read_direct( + struct qbman_swp *s, + const struct qbman_eq_desc *d, + struct qbman_fd **fd, + uint32_t *flags, + int num_frames) +{ + uint32_t *p = NULL; + const uint32_t *cl = qb_cl(d); + uint32_t eqcr_ci, eqcr_pi, half_mask, full_mask; + int i, num_enqueued = 0; + uint64_t addr_cena; + + half_mask = (s->eqcr.pi_ci_mask>>1); + full_mask = s->eqcr.pi_ci_mask; + if (!s->eqcr.available) { + eqcr_ci = s->eqcr.ci; + s->eqcr.ci = qbman_cinh_read(&s->sys, + QBMAN_CINH_SWP_EQCR_CI) & full_mask; + s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size, + eqcr_ci, s->eqcr.ci); + if (!s->eqcr.available) + return 0; + } + + eqcr_pi = s->eqcr.pi; + num_enqueued = (s->eqcr.available < num_frames) ? + s->eqcr.available : num_frames; + s->eqcr.available -= num_enqueued; + /* Fill in the EQCR ring */ + for (i = 0; i < num_enqueued; i++) { + p = qbman_cena_write_start_wo_shadow(&s->sys, + QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask)); + memcpy(&p[1], &cl[1], 28); + memcpy(&p[8], fd[i], sizeof(struct qbman_fd)); + eqcr_pi++; + } + + lwsync(); + + /* Set the verb byte, have to substitute in the valid-bit */ + eqcr_pi = s->eqcr.pi; + for (i = 0; i < num_enqueued; i++) { + p = qbman_cena_write_start_wo_shadow(&s->sys, + QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask)); + p[0] = cl[0] | s->eqcr.pi_vb; + if (flags && (flags[i] & QBMAN_ENQUEUE_FLAG_DCA)) { + struct qbman_eq_desc *d = (struct qbman_eq_desc *)p; + + d->eq.dca = (1 << QB_ENQUEUE_CMD_DCA_EN_SHIFT) | + ((flags[i]) & QBMAN_EQCR_DCA_IDXMASK); + } + eqcr_pi++; + if (!(eqcr_pi & half_mask)) + s->eqcr.pi_vb ^= QB_VALID_BIT; + } + + /* Flush all the cacheline without load/store in between */ + eqcr_pi = s->eqcr.pi; + addr_cena = (size_t)s->sys.addr_cena; + for (i = 0; i < num_enqueued; i++) { + dcbf(addr_cena + + QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask)); + eqcr_pi++; + } + s->eqcr.pi = eqcr_pi & full_mask; + + return num_enqueued; +} + +static int qbman_swp_enqueue_multiple_fd_cinh_direct( + struct qbman_swp *s, + const struct qbman_eq_desc *d, + struct qbman_fd **fd, + uint32_t *flags, + int num_frames) +{ + uint32_t *p = NULL; + const uint32_t *cl = qb_cl(d); + uint32_t eqcr_ci, eqcr_pi, half_mask, full_mask; + int i, num_enqueued = 0; + + half_mask = (s->eqcr.pi_ci_mask>>1); + full_mask = s->eqcr.pi_ci_mask; + if (!s->eqcr.available) { + eqcr_ci = s->eqcr.ci; + s->eqcr.ci = qbman_cinh_read(&s->sys, + QBMAN_CINH_SWP_EQCR_CI) & full_mask; + s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size, + eqcr_ci, s->eqcr.ci); + if (!s->eqcr.available) + return 0; + } + + eqcr_pi = s->eqcr.pi; + num_enqueued = (s->eqcr.available < num_frames) ? + s->eqcr.available : num_frames; + s->eqcr.available -= num_enqueued; + /* Fill in the EQCR ring */ + for (i = 0; i < num_enqueued; i++) { + p = qbman_cinh_write_start_wo_shadow(&s->sys, + QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask)); + memcpy_byte_by_byte(&p[1], &cl[1], 28); + memcpy_byte_by_byte(&p[8], fd[i], sizeof(struct qbman_fd)); + eqcr_pi++; + } + + lwsync(); + + /* Set the verb byte, have to substitute in the valid-bit */ + eqcr_pi = s->eqcr.pi; + for (i = 0; i < num_enqueued; i++) { + p = qbman_cinh_write_start_wo_shadow(&s->sys, + QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask)); + p[0] = cl[0] | s->eqcr.pi_vb; + if (flags && (flags[i] & QBMAN_ENQUEUE_FLAG_DCA)) { + struct qbman_eq_desc *d = (struct qbman_eq_desc *)p; + + d->eq.dca = (1 << QB_ENQUEUE_CMD_DCA_EN_SHIFT) | + ((flags[i]) & QBMAN_EQCR_DCA_IDXMASK); + } + eqcr_pi++; + if (!(eqcr_pi & half_mask)) + s->eqcr.pi_vb ^= QB_VALID_BIT; + } + + s->eqcr.pi = eqcr_pi & full_mask; + + return num_enqueued; +} + +static int qbman_swp_enqueue_multiple_fd_mem_back(struct qbman_swp *s, + const struct qbman_eq_desc *d, + struct qbman_fd **fd, + uint32_t *flags, + int num_frames) +{ + uint32_t *p = NULL; + const uint32_t *cl = qb_cl(d); + uint32_t eqcr_ci, eqcr_pi, half_mask, full_mask; + int i, num_enqueued = 0; + + half_mask = (s->eqcr.pi_ci_mask>>1); + full_mask = s->eqcr.pi_ci_mask; + if (!s->eqcr.available) { + eqcr_ci = s->eqcr.ci; + s->eqcr.ci = qbman_cena_read_reg(&s->sys, + QBMAN_CENA_SWP_EQCR_CI_MEMBACK) & full_mask; + s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size, + eqcr_ci, s->eqcr.ci); + if (!s->eqcr.available) + return 0; + } + + eqcr_pi = s->eqcr.pi; + num_enqueued = (s->eqcr.available < num_frames) ? + s->eqcr.available : num_frames; + s->eqcr.available -= num_enqueued; + /* Fill in the EQCR ring */ + for (i = 0; i < num_enqueued; i++) { + p = qbman_cena_write_start_wo_shadow(&s->sys, + QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask)); + memcpy(&p[1], &cl[1], 28); + memcpy(&p[8], fd[i], sizeof(struct qbman_fd)); + eqcr_pi++; + } + + /* Set the verb byte, have to substitute in the valid-bit */ + eqcr_pi = s->eqcr.pi; + for (i = 0; i < num_enqueued; i++) { + p = qbman_cena_write_start_wo_shadow(&s->sys, + QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask)); + p[0] = cl[0] | s->eqcr.pi_vb; + if (flags && (flags[i] & QBMAN_ENQUEUE_FLAG_DCA)) { + struct qbman_eq_desc *d = (struct qbman_eq_desc *)p; + + d->eq.dca = (1 << QB_ENQUEUE_CMD_DCA_EN_SHIFT) | + ((flags[i]) & QBMAN_EQCR_DCA_IDXMASK); + } + eqcr_pi++; + if (!(eqcr_pi & half_mask)) + s->eqcr.pi_vb ^= QB_VALID_BIT; + } + s->eqcr.pi = eqcr_pi & full_mask; + + dma_wmb(); + qbman_cinh_write(&s->sys, QBMAN_CINH_SWP_EQCR_PI, + (QB_RT_BIT)|(s->eqcr.pi)|s->eqcr.pi_vb); + return num_enqueued; +} + +int qbman_swp_enqueue_multiple_fd(struct qbman_swp *s, + const struct qbman_eq_desc *d, + struct qbman_fd **fd, + uint32_t *flags, + int num_frames) +{ + if (!s->stash_off) + return qbman_swp_enqueue_multiple_fd_ptr(s, d, fd, flags, + num_frames); + else + return qbman_swp_enqueue_multiple_fd_cinh_direct(s, d, fd, + flags, num_frames); +} + +static int qbman_swp_enqueue_multiple_desc_direct(struct qbman_swp *s, + const struct qbman_eq_desc *d, + const struct qbman_fd *fd, + int num_frames) +{ + uint32_t *p; + const uint32_t *cl; + uint32_t eqcr_ci, eqcr_pi, half_mask, full_mask; + int i, num_enqueued = 0; + uint64_t addr_cena; + + half_mask = (s->eqcr.pi_ci_mask>>1); + full_mask = s->eqcr.pi_ci_mask; + if (!s->eqcr.available) { + eqcr_ci = s->eqcr.ci; + s->eqcr.ci = qbman_cena_read_reg(&s->sys, + QBMAN_CENA_SWP_EQCR_CI) & full_mask; + s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size, + eqcr_ci, s->eqcr.ci); + if (!s->eqcr.available) + return 0; + } + + eqcr_pi = s->eqcr.pi; + num_enqueued = (s->eqcr.available < num_frames) ? + s->eqcr.available : num_frames; + s->eqcr.available -= num_enqueued; + /* Fill in the EQCR ring */ + for (i = 0; i < num_enqueued; i++) { + p = qbman_cena_write_start_wo_shadow(&s->sys, + QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask)); + cl = qb_cl(&d[i]); + memcpy(&p[1], &cl[1], 28); + memcpy(&p[8], &fd[i], sizeof(*fd)); + eqcr_pi++; + } + + lwsync(); + + /* Set the verb byte, have to substitute in the valid-bit */ + eqcr_pi = s->eqcr.pi; + for (i = 0; i < num_enqueued; i++) { + p = qbman_cena_write_start_wo_shadow(&s->sys, + QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask)); + cl = qb_cl(&d[i]); + p[0] = cl[0] | s->eqcr.pi_vb; + eqcr_pi++; + if (!(eqcr_pi & half_mask)) + s->eqcr.pi_vb ^= QB_VALID_BIT; + } + + /* Flush all the cacheline without load/store in between */ + eqcr_pi = s->eqcr.pi; + addr_cena = (size_t)s->sys.addr_cena; + for (i = 0; i < num_enqueued; i++) { + dcbf((uintptr_t)(addr_cena + + QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask))); + eqcr_pi++; + } + s->eqcr.pi = eqcr_pi & full_mask; + + return num_enqueued; +} + +static int qbman_swp_enqueue_multiple_desc_cinh_read_direct( + struct qbman_swp *s, + const struct qbman_eq_desc *d, + const struct qbman_fd *fd, + int num_frames) +{ + uint32_t *p; + const uint32_t *cl; + uint32_t eqcr_ci, eqcr_pi, half_mask, full_mask; + int i, num_enqueued = 0; + uint64_t addr_cena; + + half_mask = (s->eqcr.pi_ci_mask>>1); + full_mask = s->eqcr.pi_ci_mask; + if (!s->eqcr.available) { + eqcr_ci = s->eqcr.ci; + s->eqcr.ci = qbman_cinh_read(&s->sys, + QBMAN_CINH_SWP_EQCR_CI) & full_mask; + s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size, + eqcr_ci, s->eqcr.ci); + if (!s->eqcr.available) + return 0; + } + + eqcr_pi = s->eqcr.pi; + num_enqueued = (s->eqcr.available < num_frames) ? + s->eqcr.available : num_frames; + s->eqcr.available -= num_enqueued; + /* Fill in the EQCR ring */ + for (i = 0; i < num_enqueued; i++) { + p = qbman_cena_write_start_wo_shadow(&s->sys, + QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask)); + cl = qb_cl(&d[i]); + memcpy(&p[1], &cl[1], 28); + memcpy(&p[8], &fd[i], sizeof(*fd)); + eqcr_pi++; + } + + lwsync(); + + /* Set the verb byte, have to substitute in the valid-bit */ + eqcr_pi = s->eqcr.pi; + for (i = 0; i < num_enqueued; i++) { + p = qbman_cena_write_start_wo_shadow(&s->sys, + QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask)); + cl = qb_cl(&d[i]); + p[0] = cl[0] | s->eqcr.pi_vb; + eqcr_pi++; + if (!(eqcr_pi & half_mask)) + s->eqcr.pi_vb ^= QB_VALID_BIT; + } + + /* Flush all the cacheline without load/store in between */ + eqcr_pi = s->eqcr.pi; + addr_cena = (size_t)s->sys.addr_cena; + for (i = 0; i < num_enqueued; i++) { + dcbf(addr_cena + + QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask)); + eqcr_pi++; + } + s->eqcr.pi = eqcr_pi & full_mask; + + return num_enqueued; +} + +static int qbman_swp_enqueue_multiple_desc_cinh_direct( + struct qbman_swp *s, + const struct qbman_eq_desc *d, + const struct qbman_fd *fd, + int num_frames) +{ + uint32_t *p; + const uint32_t *cl; + uint32_t eqcr_ci, eqcr_pi, half_mask, full_mask; + int i, num_enqueued = 0; + + half_mask = (s->eqcr.pi_ci_mask>>1); + full_mask = s->eqcr.pi_ci_mask; + if (!s->eqcr.available) { + eqcr_ci = s->eqcr.ci; + s->eqcr.ci = qbman_cinh_read(&s->sys, + QBMAN_CINH_SWP_EQCR_CI) & full_mask; + s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size, + eqcr_ci, s->eqcr.ci); + if (!s->eqcr.available) + return 0; + } + + eqcr_pi = s->eqcr.pi; + num_enqueued = (s->eqcr.available < num_frames) ? + s->eqcr.available : num_frames; + s->eqcr.available -= num_enqueued; + /* Fill in the EQCR ring */ + for (i = 0; i < num_enqueued; i++) { + p = qbman_cinh_write_start_wo_shadow(&s->sys, + QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask)); + cl = qb_cl(&d[i]); + memcpy_byte_by_byte(&p[1], &cl[1], 28); + memcpy_byte_by_byte(&p[8], &fd[i], sizeof(*fd)); + eqcr_pi++; + } + + lwsync(); + + /* Set the verb byte, have to substitute in the valid-bit */ + eqcr_pi = s->eqcr.pi; + for (i = 0; i < num_enqueued; i++) { + p = qbman_cinh_write_start_wo_shadow(&s->sys, + QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask)); + cl = qb_cl(&d[i]); + p[0] = cl[0] | s->eqcr.pi_vb; + eqcr_pi++; + if (!(eqcr_pi & half_mask)) + s->eqcr.pi_vb ^= QB_VALID_BIT; + } + + s->eqcr.pi = eqcr_pi & full_mask; + + return num_enqueued; +} + +static int qbman_swp_enqueue_multiple_desc_mem_back(struct qbman_swp *s, + const struct qbman_eq_desc *d, + const struct qbman_fd *fd, + int num_frames) +{ + uint32_t *p; + const uint32_t *cl; + uint32_t eqcr_ci, eqcr_pi, half_mask, full_mask; + int i, num_enqueued = 0; + + half_mask = (s->eqcr.pi_ci_mask>>1); + full_mask = s->eqcr.pi_ci_mask; + if (!s->eqcr.available) { + eqcr_ci = s->eqcr.ci; + s->eqcr.ci = qbman_cena_read_reg(&s->sys, + QBMAN_CENA_SWP_EQCR_CI_MEMBACK) & full_mask; + s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size, + eqcr_ci, s->eqcr.ci); + if (!s->eqcr.available) + return 0; + } + + eqcr_pi = s->eqcr.pi; + num_enqueued = (s->eqcr.available < num_frames) ? + s->eqcr.available : num_frames; + s->eqcr.available -= num_enqueued; + /* Fill in the EQCR ring */ + for (i = 0; i < num_enqueued; i++) { + p = qbman_cena_write_start_wo_shadow(&s->sys, + QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask)); + cl = qb_cl(&d[i]); + memcpy(&p[1], &cl[1], 28); + memcpy(&p[8], &fd[i], sizeof(*fd)); + eqcr_pi++; + } + + /* Set the verb byte, have to substitute in the valid-bit */ + eqcr_pi = s->eqcr.pi; + for (i = 0; i < num_enqueued; i++) { + p = qbman_cena_write_start_wo_shadow(&s->sys, + QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask)); + cl = qb_cl(&d[i]); + p[0] = cl[0] | s->eqcr.pi_vb; + eqcr_pi++; + if (!(eqcr_pi & half_mask)) + s->eqcr.pi_vb ^= QB_VALID_BIT; + } + + s->eqcr.pi = eqcr_pi & full_mask; + + dma_wmb(); + qbman_cinh_write(&s->sys, QBMAN_CINH_SWP_EQCR_PI, + (QB_RT_BIT)|(s->eqcr.pi)|s->eqcr.pi_vb); + + return num_enqueued; +} +int qbman_swp_enqueue_multiple_desc(struct qbman_swp *s, + const struct qbman_eq_desc *d, + const struct qbman_fd *fd, + int num_frames) +{ + if (!s->stash_off) + return qbman_swp_enqueue_multiple_desc_ptr(s, d, fd, + num_frames); + else + return qbman_swp_enqueue_multiple_desc_cinh_direct(s, d, fd, + num_frames); + +} + +/*************************/ +/* Static (push) dequeue */ +/*************************/ + +void qbman_swp_push_get(struct qbman_swp *s, uint8_t channel_idx, int *enabled) +{ + uint16_t src = (s->sdq >> QB_SDQCR_SRC_SHIFT) & QB_SDQCR_SRC_MASK; + + QBMAN_BUG_ON(channel_idx > 15); + *enabled = src | (1 << channel_idx); +} + +void qbman_swp_push_set(struct qbman_swp *s, uint8_t channel_idx, int enable) +{ + uint16_t dqsrc; + + QBMAN_BUG_ON(channel_idx > 15); + if (enable) + s->sdq |= 1 << channel_idx; + else + s->sdq &= ~(1 << channel_idx); + + /* Read make the complete src map. If no channels are enabled + * the SDQCR must be 0 or else QMan will assert errors + */ + dqsrc = (s->sdq >> QB_SDQCR_SRC_SHIFT) & QB_SDQCR_SRC_MASK; + if (dqsrc != 0) + qbman_cinh_write(&s->sys, QBMAN_CINH_SWP_SDQCR, s->sdq); + else + qbman_cinh_write(&s->sys, QBMAN_CINH_SWP_SDQCR, 0); +} + +/***************************/ +/* Volatile (pull) dequeue */ +/***************************/ + +/* These should be const, eventually */ +#define QB_VDQCR_VERB_DCT_SHIFT 0 +#define QB_VDQCR_VERB_DT_SHIFT 2 +#define QB_VDQCR_VERB_RLS_SHIFT 4 +#define QB_VDQCR_VERB_WAE_SHIFT 5 +#define QB_VDQCR_VERB_RAD_SHIFT 6 + +enum qb_pull_dt_e { + qb_pull_dt_channel, + qb_pull_dt_workqueue, + qb_pull_dt_framequeue +}; void qbman_pull_desc_clear(struct qbman_pull_desc *d) { @@ -690,7 +1823,7 @@ void qbman_pull_desc_set_storage(struct qbman_pull_desc *d, dma_addr_t storage_phys, int stash) { - d->pull.rsp_addr_virt = (uint64_t)storage; + d->pull.rsp_addr_virt = (size_t)storage; if (!storage) { d->pull.verb &= ~(1 << QB_VDQCR_VERB_RLS_SHIFT); @@ -705,7 +1838,8 @@ void qbman_pull_desc_set_storage(struct qbman_pull_desc *d, d->pull.rsp_addr = storage_phys; } -void qbman_pull_desc_set_numframes(struct qbman_pull_desc *d, uint8_t numframes) +void qbman_pull_desc_set_numframes(struct qbman_pull_desc *d, + uint8_t numframes) { d->pull.numf = numframes - 1; } @@ -730,15 +1864,77 @@ void qbman_pull_desc_set_wq(struct qbman_pull_desc *d, uint32_t wqid, d->pull.dq_src = wqid; } -void qbman_pull_desc_set_channel(struct qbman_pull_desc *d, uint32_t chid, - enum qbman_pull_type_e dct) -{ - d->pull.verb |= dct << QB_VDQCR_VERB_DCT_SHIFT; - d->pull.verb |= qb_pull_dt_channel << QB_VDQCR_VERB_DT_SHIFT; - d->pull.dq_src = chid; +void qbman_pull_desc_set_channel(struct qbman_pull_desc *d, uint32_t chid, + enum qbman_pull_type_e dct) +{ + d->pull.verb |= dct << QB_VDQCR_VERB_DCT_SHIFT; + d->pull.verb |= qb_pull_dt_channel << QB_VDQCR_VERB_DT_SHIFT; + d->pull.dq_src = chid; +} + +void qbman_pull_desc_set_rad(struct qbman_pull_desc *d, int rad) +{ + if (d->pull.verb & (1 << QB_VDQCR_VERB_RLS_SHIFT)) { + if (rad) + d->pull.verb |= 1 << QB_VDQCR_VERB_RAD_SHIFT; + else + d->pull.verb &= ~(1 << QB_VDQCR_VERB_RAD_SHIFT); + } else { + printf("The RAD feature is not valid when RLS = 0\n"); + } +} + +static int qbman_swp_pull_direct(struct qbman_swp *s, + struct qbman_pull_desc *d) +{ + uint32_t *p; + uint32_t *cl = qb_cl(d); + + if (!atomic_dec_and_test(&s->vdq.busy)) { + atomic_inc(&s->vdq.busy); + return -EBUSY; + } + + d->pull.tok = s->sys.idx + 1; + s->vdq.storage = (void *)(size_t)d->pull.rsp_addr_virt; + p = qbman_cena_write_start_wo_shadow(&s->sys, QBMAN_CENA_SWP_VDQCR); + memcpy(&p[1], &cl[1], 12); + + /* Set the verb byte, have to substitute in the valid-bit */ + lwsync(); + p[0] = cl[0] | s->vdq.valid_bit; + s->vdq.valid_bit ^= QB_VALID_BIT; + qbman_cena_write_complete_wo_shadow(&s->sys, QBMAN_CENA_SWP_VDQCR); + + return 0; +} + +static int qbman_swp_pull_cinh_direct(struct qbman_swp *s, + struct qbman_pull_desc *d) +{ + uint32_t *p; + uint32_t *cl = qb_cl(d); + + if (!atomic_dec_and_test(&s->vdq.busy)) { + atomic_inc(&s->vdq.busy); + return -EBUSY; + } + + d->pull.tok = s->sys.idx + 1; + s->vdq.storage = (void *)(size_t)d->pull.rsp_addr_virt; + p = qbman_cinh_write_start_wo_shadow(&s->sys, QBMAN_CENA_SWP_VDQCR); + memcpy_byte_by_byte(&p[1], &cl[1], 12); + + /* Set the verb byte, have to substitute in the valid-bit */ + lwsync(); + p[0] = cl[0] | s->vdq.valid_bit; + s->vdq.valid_bit ^= QB_VALID_BIT; + + return 0; } -int qbman_swp_pull(struct qbman_swp *s, struct qbman_pull_desc *d) +static int qbman_swp_pull_mem_back(struct qbman_swp *s, + struct qbman_pull_desc *d) { uint32_t *p; uint32_t *cl = qb_cl(d); @@ -749,19 +1945,27 @@ int qbman_swp_pull(struct qbman_swp *s, struct qbman_pull_desc *d) } d->pull.tok = s->sys.idx + 1; - s->vdq.storage = (void *)d->pull.rsp_addr_virt; - p = qbman_cena_write_start_wo_shadow(&s->sys, QBMAN_CENA_SWP_VDQCR); + s->vdq.storage = (void *)(size_t)d->pull.rsp_addr_virt; + p = qbman_cena_write_start_wo_shadow(&s->sys, QBMAN_CENA_SWP_VDQCR_MEM); memcpy(&p[1], &cl[1], 12); /* Set the verb byte, have to substitute in the valid-bit */ - lwsync(); p[0] = cl[0] | s->vdq.valid_bit; s->vdq.valid_bit ^= QB_VALID_BIT; - qbman_cena_write_complete_wo_shadow(&s->sys, QBMAN_CENA_SWP_VDQCR); + dma_wmb(); + qbman_cinh_write(&s->sys, QBMAN_CINH_SWP_VDQCR_RT, QMAN_RT_MODE); return 0; } +int qbman_swp_pull(struct qbman_swp *s, struct qbman_pull_desc *d) +{ + if (!s->stash_off) + return qbman_swp_pull_ptr(s, d); + else + return qbman_swp_pull_cinh_direct(s, d); +} + /****************/ /* Polling DQRR */ /****************/ @@ -779,11 +1983,30 @@ int qbman_swp_pull(struct qbman_swp *s, struct qbman_pull_desc *d) #define QBMAN_RESULT_BPSCN 0x29 #define QBMAN_RESULT_CSCN_WQ 0x2a +#include + +void qbman_swp_prefetch_dqrr_next(struct qbman_swp *s) +{ + const struct qbman_result *p; + + p = qbman_cena_read_wo_shadow(&s->sys, + QBMAN_CENA_SWP_DQRR(s->dqrr.next_idx)); + rte_prefetch0(p); +} + /* NULL return if there are no unconsumed DQRR entries. Returns a DQRR entry * only once, so repeated calls can return a sequence of DQRR entries, without * requiring they be consumed immediately or in any particular order. */ const struct qbman_result *qbman_swp_dqrr_next(struct qbman_swp *s) +{ + if (!s->stash_off) + return qbman_swp_dqrr_next_ptr(s); + else + return qbman_swp_dqrr_next_cinh_direct(s); +} + +const struct qbman_result *qbman_swp_dqrr_next_direct(struct qbman_swp *s) { uint32_t verb; uint32_t response_verb; @@ -793,7 +2016,7 @@ const struct qbman_result *qbman_swp_dqrr_next(struct qbman_swp *s) /* Before using valid-bit to detect if something is there, we have to * handle the case of the DQRR reset bug... */ - if (unlikely(s->dqrr.reset_bug)) { + if (s->dqrr.reset_bug) { /* We pick up new entries by cache-inhibited producer index, * which means that a non-coherent mapping would require us to * invalidate and read *only* once that PI has indicated that @@ -825,7 +2048,83 @@ const struct qbman_result *qbman_swp_dqrr_next(struct qbman_swp *s) QBMAN_CENA_SWP_DQRR(s->dqrr.next_idx)); } p = qbman_cena_read_wo_shadow(&s->sys, - QBMAN_CENA_SWP_DQRR(s->dqrr.next_idx)); + QBMAN_CENA_SWP_DQRR(s->dqrr.next_idx)); + + verb = p->dq.verb; + + /* If the valid-bit isn't of the expected polarity, nothing there. Note, + * in the DQRR reset bug workaround, we shouldn't need to skip these + * check, because we've already determined that a new entry is available + * and we've invalidated the cacheline before reading it, so the + * valid-bit behaviour is repaired and should tell us what we already + * knew from reading PI. + */ + if ((verb & QB_VALID_BIT) != s->dqrr.valid_bit) + return NULL; + + /* There's something there. Move "next_idx" attention to the next ring + * entry (and prefetch it) before returning what we found. + */ + s->dqrr.next_idx++; + if (s->dqrr.next_idx == s->dqrr.dqrr_size) { + s->dqrr.next_idx = 0; + s->dqrr.valid_bit ^= QB_VALID_BIT; + } + /* If this is the final response to a volatile dequeue command + * indicate that the vdq is no longer busy + */ + flags = p->dq.stat; + response_verb = verb & QBMAN_RESPONSE_VERB_MASK; + if ((response_verb == QBMAN_RESULT_DQ) && + (flags & QBMAN_DQ_STAT_VOLATILE) && + (flags & QBMAN_DQ_STAT_EXPIRED)) + atomic_inc(&s->vdq.busy); + + return p; +} + +const struct qbman_result *qbman_swp_dqrr_next_cinh_direct(struct qbman_swp *s) +{ + uint32_t verb; + uint32_t response_verb; + uint32_t flags; + const struct qbman_result *p; + + /* Before using valid-bit to detect if something is there, we have to + * handle the case of the DQRR reset bug... + */ + if (s->dqrr.reset_bug) { + /* We pick up new entries by cache-inhibited producer index, + * which means that a non-coherent mapping would require us to + * invalidate and read *only* once that PI has indicated that + * there's an entry here. The first trip around the DQRR ring + * will be much less efficient than all subsequent trips around + * it... + */ + uint8_t pi = qbman_cinh_read(&s->sys, QBMAN_CINH_SWP_DQPI) & + QMAN_DQRR_PI_MASK; + + /* there are new entries if pi != next_idx */ + if (pi == s->dqrr.next_idx) + return NULL; + + /* if next_idx is/was the last ring index, and 'pi' is + * different, we can disable the workaround as all the ring + * entries have now been DMA'd to so valid-bit checking is + * repaired. Note: this logic needs to be based on next_idx + * (which increments one at a time), rather than on pi (which + * can burst and wrap-around between our snapshots of it). + */ + QBMAN_BUG_ON((s->dqrr.dqrr_size - 1) < 0); + if (s->dqrr.next_idx == (s->dqrr.dqrr_size - 1u)) { + pr_debug("DEBUG: next_idx=%d, pi=%d, clear reset bug\n", + s->dqrr.next_idx, pi); + s->dqrr.reset_bug = 0; + } + } + p = qbman_cinh_read_wo_shadow(&s->sys, + QBMAN_CENA_SWP_DQRR(s->dqrr.next_idx)); + verb = p->dq.verb; /* If the valid-bit isn't of the expected polarity, nothing there. Note, @@ -859,11 +2158,54 @@ const struct qbman_result *qbman_swp_dqrr_next(struct qbman_swp *s) return p; } +const struct qbman_result *qbman_swp_dqrr_next_mem_back(struct qbman_swp *s) +{ + uint32_t verb; + uint32_t response_verb; + uint32_t flags; + const struct qbman_result *p; + + p = qbman_cena_read_wo_shadow(&s->sys, + QBMAN_CENA_SWP_DQRR_MEM(s->dqrr.next_idx)); + + verb = p->dq.verb; + + /* If the valid-bit isn't of the expected polarity, nothing there. Note, + * in the DQRR reset bug workaround, we shouldn't need to skip these + * check, because we've already determined that a new entry is available + * and we've invalidated the cacheline before reading it, so the + * valid-bit behaviour is repaired and should tell us what we already + * knew from reading PI. + */ + if ((verb & QB_VALID_BIT) != s->dqrr.valid_bit) + return NULL; + + /* There's something there. Move "next_idx" attention to the next ring + * entry (and prefetch it) before returning what we found. + */ + s->dqrr.next_idx++; + if (s->dqrr.next_idx == s->dqrr.dqrr_size) { + s->dqrr.next_idx = 0; + s->dqrr.valid_bit ^= QB_VALID_BIT; + } + /* If this is the final response to a volatile dequeue command + * indicate that the vdq is no longer busy + */ + flags = p->dq.stat; + response_verb = verb & QBMAN_RESPONSE_VERB_MASK; + if ((response_verb == QBMAN_RESULT_DQ) + && (flags & QBMAN_DQ_STAT_VOLATILE) + && (flags & QBMAN_DQ_STAT_EXPIRED)) + atomic_inc(&s->vdq.busy); + return p; +} + /* Consume DQRR entries previously returned from qbman_swp_dqrr_next(). */ void qbman_swp_dqrr_consume(struct qbman_swp *s, const struct qbman_result *dq) { - qbman_cinh_write(&s->sys, QBMAN_CINH_SWP_DCAP, QBMAN_IDX_FROM_DQRR(dq)); + qbman_cinh_write(&s->sys, + QBMAN_CINH_SWP_DCAP, QBMAN_IDX_FROM_DQRR(dq)); } /* Consume DQRR entries previously returned from qbman_swp_dqrr_next(). */ @@ -876,6 +2218,7 @@ void qbman_swp_dqrr_idx_consume(struct qbman_swp *s, /*********************************/ /* Polling user-provided storage */ /*********************************/ + int qbman_result_has_new_result(struct qbman_swp *s, struct qbman_result *dq) { @@ -890,11 +2233,11 @@ int qbman_result_has_new_result(struct qbman_swp *s, ((struct qbman_result *)dq)->dq.tok = 0; /* - * VDQCR "no longer busy" hook - not quite the same as DQRR, because the - * fact "VDQCR" shows busy doesn't mean that we hold the result that - * makes it available. Eg. we may be looking at our 10th dequeue result, - * having released VDQCR after the 1st result and it is now busy due to - * some other command! + * VDQCR "no longer busy" hook - not quite the same as DQRR, because + * the fact "VDQCR" shows busy doesn't mean that we hold the result + * that makes it available. Eg. we may be looking at our 10th dequeue + * result, having released VDQCR after the 1st result and it is now + * busy due to some other command! */ if (s->vdq.storage == dq) { s->vdq.storage = NULL; @@ -928,11 +2271,11 @@ int qbman_check_command_complete(struct qbman_result *dq) s = portal_idx_map[dq->dq.tok - 1]; /* - * VDQCR "no longer busy" hook - not quite the same as DQRR, because the - * fact "VDQCR" shows busy doesn't mean that we hold the result that - * makes it available. Eg. we may be looking at our 10th dequeue result, - * having released VDQCR after the 1st result and it is now busy due to - * some other command! + * VDQCR "no longer busy" hook - not quite the same as DQRR, because + * the fact "VDQCR" shows busy doesn't mean that we hold the result + * that makes it available. Eg. we may be looking at our 10th dequeue + * result, having released VDQCR after the 1st result and it is now + * busy due to some other command! */ if (s->vdq.storage == dq) { s->vdq.storage = NULL; @@ -1105,6 +2448,32 @@ uint64_t qbman_result_cgcu_icnt(const struct qbman_result *scn) return qbman_result_SCN_ctx(scn); } +/********************/ +/* Parsing EQ RESP */ +/********************/ +struct qbman_fd *qbman_result_eqresp_fd(struct qbman_result *eqresp) +{ + return (struct qbman_fd *)&eqresp->eq_resp.fd[0]; +} + +void qbman_result_eqresp_set_rspid(struct qbman_result *eqresp, uint8_t val) +{ + eqresp->eq_resp.rspid = val; +} + +uint8_t qbman_result_eqresp_rspid(struct qbman_result *eqresp) +{ + return eqresp->eq_resp.rspid; +} + +uint8_t qbman_result_eqresp_rc(struct qbman_result *eqresp) +{ + if (eqresp->eq_resp.rc == 0xE) + return 0; + else + return -1; +} + /******************/ /* Buffer release */ /******************/ @@ -1134,8 +2503,10 @@ void qbman_release_desc_set_rcdi(struct qbman_release_desc *d, int enable) #define RAR_VB(rar) ((rar) & 0x80) #define RAR_SUCCESS(rar) ((rar) & 0x100) -int qbman_swp_release(struct qbman_swp *s, const struct qbman_release_desc *d, - const uint64_t *buffers, unsigned int num_buffers) +static int qbman_swp_release_direct(struct qbman_swp *s, + const struct qbman_release_desc *d, + const uint64_t *buffers, + unsigned int num_buffers) { uint32_t *p; const uint32_t *cl = qb_cl(d); @@ -1149,22 +2520,98 @@ int qbman_swp_release(struct qbman_swp *s, const struct qbman_release_desc *d, /* Start the release command */ p = qbman_cena_write_start_wo_shadow(&s->sys, - QBMAN_CENA_SWP_RCR(RAR_IDX(rar))); + QBMAN_CENA_SWP_RCR(RAR_IDX(rar))); /* Copy the caller's buffer pointers to the command */ u64_to_le32_copy(&p[2], buffers, num_buffers); - /* Set the verb byte, have to substitute in the valid-bit and the number - * of buffers. + /* Set the verb byte, have to substitute in the valid-bit and the + * number of buffers. */ lwsync(); p[0] = cl[0] | RAR_VB(rar) | num_buffers; qbman_cena_write_complete_wo_shadow(&s->sys, - QBMAN_CENA_SWP_RCR(RAR_IDX(rar))); + QBMAN_CENA_SWP_RCR(RAR_IDX(rar))); + + return 0; +} + +static int qbman_swp_release_cinh_direct(struct qbman_swp *s, + const struct qbman_release_desc *d, + const uint64_t *buffers, + unsigned int num_buffers) +{ + uint32_t *p; + const uint32_t *cl = qb_cl(d); + uint32_t rar = qbman_cinh_read(&s->sys, QBMAN_CINH_SWP_RAR); + + pr_debug("RAR=%08x\n", rar); + if (!RAR_SUCCESS(rar)) + return -EBUSY; + + QBMAN_BUG_ON(!num_buffers || (num_buffers > 7)); + + /* Start the release command */ + p = qbman_cinh_write_start_wo_shadow(&s->sys, + QBMAN_CENA_SWP_RCR(RAR_IDX(rar))); + + /* Copy the caller's buffer pointers to the command */ + memcpy_byte_by_byte(&p[2], buffers, num_buffers * sizeof(uint64_t)); + + /* Set the verb byte, have to substitute in the valid-bit and the + * number of buffers. + */ + lwsync(); + p[0] = cl[0] | RAR_VB(rar) | num_buffers; + + return 0; +} + +static int qbman_swp_release_mem_back(struct qbman_swp *s, + const struct qbman_release_desc *d, + const uint64_t *buffers, + unsigned int num_buffers) +{ + uint32_t *p; + const uint32_t *cl = qb_cl(d); + uint32_t rar = qbman_cinh_read(&s->sys, QBMAN_CINH_SWP_RAR); + + pr_debug("RAR=%08x\n", rar); + if (!RAR_SUCCESS(rar)) + return -EBUSY; + + QBMAN_BUG_ON(!num_buffers || (num_buffers > 7)); + + /* Start the release command */ + p = qbman_cena_write_start_wo_shadow(&s->sys, + QBMAN_CENA_SWP_RCR_MEM(RAR_IDX(rar))); + + /* Copy the caller's buffer pointers to the command */ + u64_to_le32_copy(&p[2], buffers, num_buffers); + + /* Set the verb byte, have to substitute in the valid-bit and the + * number of buffers. + */ + p[0] = cl[0] | RAR_VB(rar) | num_buffers; + lwsync(); + qbman_cinh_write(&s->sys, QBMAN_CINH_SWP_RCR_AM_RT + + RAR_IDX(rar) * 4, QMAN_RT_MODE); return 0; } +int qbman_swp_release(struct qbman_swp *s, + const struct qbman_release_desc *d, + const uint64_t *buffers, + unsigned int num_buffers) +{ + if (!s->stash_off) + return qbman_swp_release_ptr(s, d, buffers, num_buffers); + else + return qbman_swp_release_cinh_direct(s, d, buffers, + num_buffers); +} + /*******************/ /* Buffer acquires */ /*******************/ @@ -1185,8 +2632,8 @@ struct qbman_acquire_rslt { uint64_t buf[7]; }; -int qbman_swp_acquire(struct qbman_swp *s, uint16_t bpid, uint64_t *buffers, - unsigned int num_buffers) +static int qbman_swp_acquire_direct(struct qbman_swp *s, uint16_t bpid, + uint64_t *buffers, unsigned int num_buffers) { struct qbman_acquire_desc *p; struct qbman_acquire_rslt *r; @@ -1206,7 +2653,52 @@ int qbman_swp_acquire(struct qbman_swp *s, uint16_t bpid, uint64_t *buffers, /* Complete the management command */ r = qbman_swp_mc_complete(s, p, QBMAN_MC_ACQUIRE); - if (unlikely(!r)) { + if (!r) { + pr_err("qbman: acquire from BPID %d failed, no response\n", + bpid); + return -EIO; + } + + /* Decode the outcome */ + QBMAN_BUG_ON((r->verb & QBMAN_RESPONSE_VERB_MASK) != QBMAN_MC_ACQUIRE); + + /* Determine success or failure */ + if (r->rslt != QBMAN_MC_RSLT_OK) { + pr_err("Acquire buffers from BPID 0x%x failed, code=0x%02x\n", + bpid, r->rslt); + return -EIO; + } + + QBMAN_BUG_ON(r->num > num_buffers); + + /* Copy the acquired buffers to the caller's array */ + u64_from_le32_copy(buffers, &r->buf[0], r->num); + + return (int)r->num; +} + +static int qbman_swp_acquire_cinh_direct(struct qbman_swp *s, uint16_t bpid, + uint64_t *buffers, unsigned int num_buffers) +{ + struct qbman_acquire_desc *p; + struct qbman_acquire_rslt *r; + + if (!num_buffers || (num_buffers > 7)) + return -EINVAL; + + /* Start the management command */ + p = qbman_swp_mc_start(s); + + if (!p) + return -EBUSY; + + /* Encode the caller-provided attributes */ + p->bpid = bpid; + p->num = num_buffers; + + /* Complete the management command */ + r = qbman_swp_mc_complete_cinh(s, p, QBMAN_MC_ACQUIRE); + if (!r) { pr_err("qbman: acquire from BPID %d failed, no response\n", bpid); return -EIO; @@ -1216,7 +2708,7 @@ int qbman_swp_acquire(struct qbman_swp *s, uint16_t bpid, uint64_t *buffers, QBMAN_BUG_ON((r->verb & QBMAN_RESPONSE_VERB_MASK) != QBMAN_MC_ACQUIRE); /* Determine success or failure */ - if (unlikely(r->rslt != QBMAN_MC_RSLT_OK)) { + if (r->rslt != QBMAN_MC_RSLT_OK) { pr_err("Acquire buffers from BPID 0x%x failed, code=0x%02x\n", bpid, r->rslt); return -EIO; @@ -1230,6 +2722,16 @@ int qbman_swp_acquire(struct qbman_swp *s, uint16_t bpid, uint64_t *buffers, return (int)r->num; } +int qbman_swp_acquire(struct qbman_swp *s, uint16_t bpid, uint64_t *buffers, + unsigned int num_buffers) +{ + if (!s->stash_off) + return qbman_swp_acquire_direct(s, bpid, buffers, num_buffers); + else + return qbman_swp_acquire_cinh_direct(s, bpid, buffers, + num_buffers); +} + /*****************/ /* FQ management */ /*****************/ @@ -1263,7 +2765,7 @@ static int qbman_swp_alt_fq_state(struct qbman_swp *s, uint32_t fqid, /* Complete the management command */ r = qbman_swp_mc_complete(s, p, alt_fq_verb); - if (unlikely(!r)) { + if (!r) { pr_err("qbman: mgmt cmd failed, no response (verb=0x%x)\n", alt_fq_verb); return -EIO; @@ -1273,7 +2775,7 @@ static int qbman_swp_alt_fq_state(struct qbman_swp *s, uint32_t fqid, QBMAN_BUG_ON((r->verb & QBMAN_RESPONSE_VERB_MASK) != alt_fq_verb); /* Determine success or failure */ - if (unlikely(r->rslt != QBMAN_MC_RSLT_OK)) { + if (r->rslt != QBMAN_MC_RSLT_OK) { pr_err("ALT FQID %d failed: verb = 0x%08x, code = 0x%02x\n", fqid, alt_fq_verb, r->rslt); return -EIO; @@ -1354,7 +2856,7 @@ static int qbman_swp_CDAN_set(struct qbman_swp *s, uint16_t channelid, /* Complete the management command */ r = qbman_swp_mc_complete(s, p, QBMAN_WQCHAN_CONFIGURE); - if (unlikely(!r)) { + if (!r) { pr_err("qbman: wqchan config failed, no response\n"); return -EIO; } @@ -1364,7 +2866,7 @@ static int qbman_swp_CDAN_set(struct qbman_swp *s, uint16_t channelid, != QBMAN_WQCHAN_CONFIGURE); /* Determine success or failure */ - if (unlikely(r->rslt != QBMAN_MC_RSLT_OK)) { + if (r->rslt != QBMAN_MC_RSLT_OK) { pr_err("CDAN cQID %d failed: code = 0x%02x\n", channelid, r->rslt); return -EIO;