net/bnxt: remove unused macro
[dpdk.git] / drivers / bus / fslmc / qbman / qbman_portal.c
index 2f572a0..0a2af7b 100644 (file)
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (C) 2014-2016 Freescale Semiconductor, Inc.
- * Copyright 2018 NXP
+ * Copyright 2018-2020 NXP
  *
  */
 
@@ -61,6 +61,8 @@ enum qbman_sdqcr_fc {
 #define MAX_QBMAN_PORTALS  64
 static struct qbman_swp *portal_idx_map[MAX_QBMAN_PORTALS];
 
+uint32_t qman_version;
+
 /* Internal Function declaration */
 static int
 qbman_swp_enqueue_array_mode_direct(struct qbman_swp *s,
@@ -76,6 +78,14 @@ qbman_swp_enqueue_ring_mode_direct(struct qbman_swp *s,
                const struct qbman_eq_desc *d,
                const struct qbman_fd *fd);
 static int
+qbman_swp_enqueue_ring_mode_cinh_read_direct(struct qbman_swp *s,
+               const struct qbman_eq_desc *d,
+               const struct qbman_fd *fd);
+static int
+qbman_swp_enqueue_ring_mode_cinh_direct(struct qbman_swp *s,
+               const struct qbman_eq_desc *d,
+               const struct qbman_fd *fd);
+static int
 qbman_swp_enqueue_ring_mode_mem_back(struct qbman_swp *s,
                const struct qbman_eq_desc *d,
                const struct qbman_fd *fd);
@@ -87,18 +97,65 @@ qbman_swp_enqueue_multiple_direct(struct qbman_swp *s,
                uint32_t *flags,
                int num_frames);
 static int
+qbman_swp_enqueue_multiple_cinh_read_direct(struct qbman_swp *s,
+               const struct qbman_eq_desc *d,
+               const struct qbman_fd *fd,
+               uint32_t *flags,
+               int num_frames);
+static int
+qbman_swp_enqueue_multiple_cinh_direct(struct qbman_swp *s,
+               const struct qbman_eq_desc *d,
+               const struct qbman_fd *fd,
+               uint32_t *flags,
+               int num_frames);
+static int
 qbman_swp_enqueue_multiple_mem_back(struct qbman_swp *s,
                const struct qbman_eq_desc *d,
                const struct qbman_fd *fd,
                uint32_t *flags,
                int num_frames);
 
+static int
+qbman_swp_enqueue_multiple_fd_direct(struct qbman_swp *s,
+               const struct qbman_eq_desc *d,
+               struct qbman_fd **fd,
+               uint32_t *flags,
+               int num_frames);
+static int
+qbman_swp_enqueue_multiple_fd_cinh_read_direct(struct qbman_swp *s,
+               const struct qbman_eq_desc *d,
+               struct qbman_fd **fd,
+               uint32_t *flags,
+               int num_frames);
+static int
+qbman_swp_enqueue_multiple_fd_cinh_direct(struct qbman_swp *s,
+               const struct qbman_eq_desc *d,
+               struct qbman_fd **fd,
+               uint32_t *flags,
+               int num_frames);
+static int
+qbman_swp_enqueue_multiple_fd_mem_back(struct qbman_swp *s,
+               const struct qbman_eq_desc *d,
+               struct qbman_fd **fd,
+               uint32_t *flags,
+               int num_frames);
+
 static int
 qbman_swp_enqueue_multiple_desc_direct(struct qbman_swp *s,
                const struct qbman_eq_desc *d,
                const struct qbman_fd *fd,
                int num_frames);
 static int
+qbman_swp_enqueue_multiple_desc_cinh_read_direct(struct qbman_swp *s,
+               const struct qbman_eq_desc *d,
+               const struct qbman_fd *fd,
+               int num_frames);
+static int
+qbman_swp_enqueue_multiple_desc_cinh_direct(struct qbman_swp *s,
+               const struct qbman_eq_desc *d,
+               const struct qbman_fd *fd,
+               int num_frames);
+static int
 qbman_swp_enqueue_multiple_desc_mem_back(struct qbman_swp *s,
                const struct qbman_eq_desc *d,
                const struct qbman_fd *fd,
@@ -107,9 +164,12 @@ qbman_swp_enqueue_multiple_desc_mem_back(struct qbman_swp *s,
 static int
 qbman_swp_pull_direct(struct qbman_swp *s, struct qbman_pull_desc *d);
 static int
+qbman_swp_pull_cinh_direct(struct qbman_swp *s, struct qbman_pull_desc *d);
+static int
 qbman_swp_pull_mem_back(struct qbman_swp *s, struct qbman_pull_desc *d);
 
 const struct qbman_result *qbman_swp_dqrr_next_direct(struct qbman_swp *s);
+const struct qbman_result *qbman_swp_dqrr_next_cinh_direct(struct qbman_swp *s);
 const struct qbman_result *qbman_swp_dqrr_next_mem_back(struct qbman_swp *s);
 
 static int
@@ -117,6 +177,10 @@ qbman_swp_release_direct(struct qbman_swp *s,
                const struct qbman_release_desc *d,
                const uint64_t *buffers, unsigned int num_buffers);
 static int
+qbman_swp_release_cinh_direct(struct qbman_swp *s,
+               const struct qbman_release_desc *d,
+               const uint64_t *buffers, unsigned int num_buffers);
+static int
 qbman_swp_release_mem_back(struct qbman_swp *s,
                const struct qbman_release_desc *d,
                const uint64_t *buffers, unsigned int num_buffers);
@@ -139,6 +203,13 @@ static int (*qbman_swp_enqueue_multiple_ptr)(struct qbman_swp *s,
                int num_frames)
        = qbman_swp_enqueue_multiple_direct;
 
+static int (*qbman_swp_enqueue_multiple_fd_ptr)(struct qbman_swp *s,
+               const struct qbman_eq_desc *d,
+               struct qbman_fd **fd,
+               uint32_t *flags,
+               int num_frames)
+       = qbman_swp_enqueue_multiple_fd_direct;
+
 static int (*qbman_swp_enqueue_multiple_desc_ptr)(struct qbman_swp *s,
                const struct qbman_eq_desc *d,
                const struct qbman_fd *fd,
@@ -194,7 +265,8 @@ struct qbman_swp *qbman_swp_init(const struct qbman_swp_desc *d)
        p->sdq |= qbman_sdqcr_dct_prio_ics << QB_SDQCR_DCT_SHIFT;
        p->sdq |= qbman_sdqcr_fc_up_to_3 << QB_SDQCR_FC_SHIFT;
        p->sdq |= QMAN_SDQCR_TOKEN << QB_SDQCR_TOK_SHIFT;
-       if ((d->qman_version & QMAN_REV_MASK) >= QMAN_REV_5000)
+       if ((d->qman_version & QMAN_REV_MASK) >= QMAN_REV_5000
+                       && (d->cena_access_mode == qman_cena_fastest_access))
                p->mr.valid_bit = QB_VALID_BIT;
 
        atomic_set(&p->vdq.busy, 1);
@@ -233,41 +305,78 @@ struct qbman_swp *qbman_swp_init(const struct qbman_swp_desc *d)
        qbman_cinh_write(&p->sys, QBMAN_CINH_SWP_SDQCR, 0);
 
        p->eqcr.pi_ring_size = 8;
-       if ((qman_version & 0xFFFF0000) >= QMAN_REV_5000) {
+       if ((qman_version & QMAN_REV_MASK) >= QMAN_REV_5000
+                       && (d->cena_access_mode == qman_cena_fastest_access)) {
                p->eqcr.pi_ring_size = 32;
                qbman_swp_enqueue_array_mode_ptr =
-                               qbman_swp_enqueue_array_mode_mem_back;
+                       qbman_swp_enqueue_array_mode_mem_back;
                qbman_swp_enqueue_ring_mode_ptr =
-                               qbman_swp_enqueue_ring_mode_mem_back;
+                       qbman_swp_enqueue_ring_mode_mem_back;
                qbman_swp_enqueue_multiple_ptr =
-                               qbman_swp_enqueue_multiple_mem_back;
+                       qbman_swp_enqueue_multiple_mem_back;
+               qbman_swp_enqueue_multiple_fd_ptr =
+                       qbman_swp_enqueue_multiple_fd_mem_back;
                qbman_swp_enqueue_multiple_desc_ptr =
-                               qbman_swp_enqueue_multiple_desc_mem_back;
+                       qbman_swp_enqueue_multiple_desc_mem_back;
                qbman_swp_pull_ptr = qbman_swp_pull_mem_back;
                qbman_swp_dqrr_next_ptr = qbman_swp_dqrr_next_mem_back;
                qbman_swp_release_ptr = qbman_swp_release_mem_back;
        }
 
+       if (dpaa2_svr_family == SVR_LS1080A) {
+               qbman_swp_enqueue_ring_mode_ptr =
+                       qbman_swp_enqueue_ring_mode_cinh_read_direct;
+               qbman_swp_enqueue_multiple_ptr =
+                       qbman_swp_enqueue_multiple_cinh_read_direct;
+               qbman_swp_enqueue_multiple_fd_ptr =
+                       qbman_swp_enqueue_multiple_fd_cinh_read_direct;
+               qbman_swp_enqueue_multiple_desc_ptr =
+                       qbman_swp_enqueue_multiple_desc_cinh_read_direct;
+       }
+
        for (mask_size = p->eqcr.pi_ring_size; mask_size > 0; mask_size >>= 1)
-               p->eqcr.pi_mask = (p->eqcr.pi_mask<<1) + 1;
+               p->eqcr.pi_ci_mask = (p->eqcr.pi_ci_mask<<1) + 1;
        eqcr_pi = qbman_cinh_read(&p->sys, QBMAN_CINH_SWP_EQCR_PI);
-       p->eqcr.pi = eqcr_pi & p->eqcr.pi_mask;
+       p->eqcr.pi = eqcr_pi & p->eqcr.pi_ci_mask;
        p->eqcr.pi_vb = eqcr_pi & QB_VALID_BIT;
-       if ((p->desc.qman_version & QMAN_REV_MASK) < QMAN_REV_5000)
-               p->eqcr.ci = qbman_cinh_read(&p->sys,
-                               QBMAN_CINH_SWP_EQCR_CI) & p->eqcr.pi_mask;
+       if ((p->desc.qman_version & QMAN_REV_MASK) >= QMAN_REV_5000
+                       && (d->cena_access_mode == qman_cena_fastest_access))
+               p->eqcr.ci = qbman_cinh_read(&p->sys, QBMAN_CINH_SWP_EQCR_PI)
+                                            & p->eqcr.pi_ci_mask;
        else
-               p->eqcr.ci = qbman_cinh_read(&p->sys,
-                               QBMAN_CINH_SWP_EQCR_PI) & p->eqcr.pi_mask;
+               p->eqcr.ci = qbman_cinh_read(&p->sys, QBMAN_CINH_SWP_EQCR_CI)
+                                            & p->eqcr.pi_ci_mask;
        p->eqcr.available = p->eqcr.pi_ring_size -
                                qm_cyc_diff(p->eqcr.pi_ring_size,
-                               p->eqcr.ci & (p->eqcr.pi_mask<<1),
-                               p->eqcr.pi & (p->eqcr.pi_mask<<1));
+                               p->eqcr.ci & (p->eqcr.pi_ci_mask<<1),
+                               p->eqcr.pi & (p->eqcr.pi_ci_mask<<1));
 
        portal_idx_map[p->desc.idx] = p;
        return p;
 }
 
+int qbman_swp_update(struct qbman_swp *p, int stash_off)
+{
+       const struct qbman_swp_desc *d = &p->desc;
+       struct qbman_swp_sys *s = &p->sys;
+       int ret;
+
+       /* Nothing needs to be done for QBMAN rev > 5000 with fast access */
+       if ((qman_version & QMAN_REV_MASK) >= QMAN_REV_5000
+                       && (d->cena_access_mode == qman_cena_fastest_access))
+               return 0;
+
+       ret = qbman_swp_sys_update(s, d, p->dqrr.dqrr_size, stash_off);
+       if (ret) {
+               pr_err("qbman_swp_sys_init() failed %d\n", ret);
+               return ret;
+       }
+
+       p->stash_off = stash_off;
+
+       return 0;
+}
+
 void qbman_swp_finish(struct qbman_swp *p)
 {
 #ifdef QBMAN_CHECKING
@@ -362,10 +471,11 @@ void *qbman_swp_mc_start(struct qbman_swp *p)
 #ifdef QBMAN_CHECKING
        QBMAN_BUG_ON(p->mc.check != swp_mc_can_start);
 #endif
-       if ((p->desc.qman_version & QMAN_REV_MASK) < QMAN_REV_5000)
-               ret = qbman_cena_write_start(&p->sys, QBMAN_CENA_SWP_CR);
-       else
+       if ((p->desc.qman_version & QMAN_REV_MASK) >= QMAN_REV_5000
+                   && (p->desc.cena_access_mode == qman_cena_fastest_access))
                ret = qbman_cena_write_start(&p->sys, QBMAN_CENA_SWP_CR_MEM);
+       else
+               ret = qbman_cena_write_start(&p->sys, QBMAN_CENA_SWP_CR);
 #ifdef QBMAN_CHECKING
        if (!ret)
                p->mc.check = swp_mc_can_submit;
@@ -385,52 +495,75 @@ void qbman_swp_mc_submit(struct qbman_swp *p, void *cmd, uint8_t cmd_verb)
         * caller wants to OR but has forgotten to do so.
         */
        QBMAN_BUG_ON((*v & cmd_verb) != *v);
-       if ((p->desc.qman_version & QMAN_REV_MASK) < QMAN_REV_5000) {
-               dma_wmb();
-               *v = cmd_verb | p->mc.valid_bit;
-               qbman_cena_write_complete(&p->sys, QBMAN_CENA_SWP_CR, cmd);
-               clean(cmd);
-       } else {
+       if ((p->desc.qman_version & QMAN_REV_MASK) >= QMAN_REV_5000
+                   && (p->desc.cena_access_mode == qman_cena_fastest_access)) {
                *v = cmd_verb | p->mr.valid_bit;
                qbman_cena_write_complete(&p->sys, QBMAN_CENA_SWP_CR_MEM, cmd);
                dma_wmb();
                qbman_cinh_write(&p->sys, QBMAN_CINH_SWP_CR_RT, QMAN_RT_MODE);
+       } else {
+               dma_wmb();
+               *v = cmd_verb | p->mc.valid_bit;
+               qbman_cena_write_complete(&p->sys, QBMAN_CENA_SWP_CR, cmd);
+               clean(cmd);
        }
 #ifdef QBMAN_CHECKING
        p->mc.check = swp_mc_can_poll;
 #endif
 }
 
+void qbman_swp_mc_submit_cinh(struct qbman_swp *p, void *cmd, uint8_t cmd_verb)
+{
+       uint8_t *v = cmd;
+#ifdef QBMAN_CHECKING
+       QBMAN_BUG_ON(!(p->mc.check != swp_mc_can_submit));
+#endif
+       /* TBD: "|=" is going to hurt performance. Need to move as many fields
+        * out of word zero, and for those that remain, the "OR" needs to occur
+        * at the caller side. This debug check helps to catch cases where the
+        * caller wants to OR but has forgotten to do so.
+        */
+       QBMAN_BUG_ON((*v & cmd_verb) != *v);
+       dma_wmb();
+       *v = cmd_verb | p->mc.valid_bit;
+       qbman_cinh_write_complete(&p->sys, QBMAN_CENA_SWP_CR, cmd);
+       clean(cmd);
+#ifdef QBMAN_CHECKING
+       p->mc.check = swp_mc_can_poll;
+#endif
+}
+
 void *qbman_swp_mc_result(struct qbman_swp *p)
 {
        uint32_t *ret, verb;
 #ifdef QBMAN_CHECKING
        QBMAN_BUG_ON(p->mc.check != swp_mc_can_poll);
 #endif
-       if ((p->desc.qman_version & QMAN_REV_MASK) < QMAN_REV_5000) {
-               qbman_cena_invalidate_prefetch(&p->sys,
-                               QBMAN_CENA_SWP_RR(p->mc.valid_bit));
-               ret = qbman_cena_read(&p->sys,
-                               QBMAN_CENA_SWP_RR(p->mc.valid_bit));
+       if ((p->desc.qman_version & QMAN_REV_MASK) >= QMAN_REV_5000
+               && (p->desc.cena_access_mode == qman_cena_fastest_access)) {
+               ret = qbman_cena_read(&p->sys, QBMAN_CENA_SWP_RR_MEM);
+               /* Command completed if the valid bit is toggled */
+               if (p->mr.valid_bit != (ret[0] & QB_VALID_BIT))
+                       return NULL;
                /* Remove the valid-bit -
                 * command completed iff the rest is non-zero
                 */
                verb = ret[0] & ~QB_VALID_BIT;
                if (!verb)
                        return NULL;
-               p->mc.valid_bit ^= QB_VALID_BIT;
+               p->mr.valid_bit ^= QB_VALID_BIT;
        } else {
-               ret = qbman_cena_read(&p->sys, QBMAN_CENA_SWP_RR_MEM);
-               /* Command completed if the valid bit is toggled */
-               if (p->mr.valid_bit != (ret[0] & QB_VALID_BIT))
-                       return NULL;
+               qbman_cena_invalidate_prefetch(&p->sys,
+                       QBMAN_CENA_SWP_RR(p->mc.valid_bit));
+               ret = qbman_cena_read(&p->sys,
+                                     QBMAN_CENA_SWP_RR(p->mc.valid_bit));
                /* Remove the valid-bit -
                 * command completed iff the rest is non-zero
                 */
                verb = ret[0] & ~QB_VALID_BIT;
                if (!verb)
                        return NULL;
-               p->mr.valid_bit ^= QB_VALID_BIT;
+               p->mc.valid_bit ^= QB_VALID_BIT;
        }
 #ifdef QBMAN_CHECKING
        p->mc.check = swp_mc_can_start;
@@ -438,6 +571,27 @@ void *qbman_swp_mc_result(struct qbman_swp *p)
        return ret;
 }
 
+void *qbman_swp_mc_result_cinh(struct qbman_swp *p)
+{
+       uint32_t *ret, verb;
+#ifdef QBMAN_CHECKING
+       QBMAN_BUG_ON(p->mc.check != swp_mc_can_poll);
+#endif
+       ret = qbman_cinh_read_shadow(&p->sys,
+                             QBMAN_CENA_SWP_RR(p->mc.valid_bit));
+       /* Remove the valid-bit -
+        * command completed iff the rest is non-zero
+        */
+       verb = ret[0] & ~QB_VALID_BIT;
+       if (!verb)
+               return NULL;
+       p->mc.valid_bit ^= QB_VALID_BIT;
+#ifdef QBMAN_CHECKING
+       p->mc.check = swp_mc_can_start;
+#endif
+       return ret;
+}
+
 /***********/
 /* Enqueue */
 /***********/
@@ -578,6 +732,16 @@ static inline void qbman_write_eqcr_am_rt_register(struct qbman_swp *p,
                                     QMAN_RT_MODE);
 }
 
+static void memcpy_byte_by_byte(void *to, const void *from, size_t n)
+{
+       const uint8_t *src = from;
+       volatile uint8_t *dest = to;
+       size_t i;
+
+       for (i = 0; i < n; i++)
+               dest[i] = src[i];
+}
+
 
 static int qbman_swp_enqueue_array_mode_direct(struct qbman_swp *s,
                                               const struct qbman_eq_desc *d,
@@ -640,8 +804,8 @@ static int qbman_swp_enqueue_ring_mode_direct(struct qbman_swp *s,
        const uint32_t *cl = qb_cl(d);
        uint32_t eqcr_ci, full_mask, half_mask;
 
-       half_mask = (s->eqcr.pi_mask>>1);
-       full_mask = s->eqcr.pi_mask;
+       half_mask = (s->eqcr.pi_ci_mask>>1);
+       full_mask = s->eqcr.pi_ci_mask;
        if (!s->eqcr.available) {
                eqcr_ci = s->eqcr.ci;
                s->eqcr.ci = qbman_cena_read_reg(&s->sys,
@@ -671,6 +835,82 @@ static int qbman_swp_enqueue_ring_mode_direct(struct qbman_swp *s,
        return 0;
 }
 
+static int qbman_swp_enqueue_ring_mode_cinh_read_direct(
+               struct qbman_swp *s,
+               const struct qbman_eq_desc *d,
+               const struct qbman_fd *fd)
+{
+       uint32_t *p;
+       const uint32_t *cl = qb_cl(d);
+       uint32_t eqcr_ci, full_mask, half_mask;
+
+       half_mask = (s->eqcr.pi_ci_mask>>1);
+       full_mask = s->eqcr.pi_ci_mask;
+       if (!s->eqcr.available) {
+               eqcr_ci = s->eqcr.ci;
+               s->eqcr.ci = qbman_cinh_read(&s->sys,
+                               QBMAN_CINH_SWP_EQCR_CI) & full_mask;
+               s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size,
+                               eqcr_ci, s->eqcr.ci);
+               if (!s->eqcr.available)
+                       return -EBUSY;
+       }
+
+       p = qbman_cinh_write_start_wo_shadow(&s->sys,
+                       QBMAN_CENA_SWP_EQCR(s->eqcr.pi & half_mask));
+       memcpy(&p[1], &cl[1], 28);
+       memcpy(&p[8], fd, sizeof(*fd));
+       lwsync();
+
+       /* Set the verb byte, have to substitute in the valid-bit */
+       p[0] = cl[0] | s->eqcr.pi_vb;
+       s->eqcr.pi++;
+       s->eqcr.pi &= full_mask;
+       s->eqcr.available--;
+       if (!(s->eqcr.pi & half_mask))
+               s->eqcr.pi_vb ^= QB_VALID_BIT;
+
+       return 0;
+}
+
+static int qbman_swp_enqueue_ring_mode_cinh_direct(
+               struct qbman_swp *s,
+               const struct qbman_eq_desc *d,
+               const struct qbman_fd *fd)
+{
+       uint32_t *p;
+       const uint32_t *cl = qb_cl(d);
+       uint32_t eqcr_ci, full_mask, half_mask;
+
+       half_mask = (s->eqcr.pi_ci_mask>>1);
+       full_mask = s->eqcr.pi_ci_mask;
+       if (!s->eqcr.available) {
+               eqcr_ci = s->eqcr.ci;
+               s->eqcr.ci = qbman_cinh_read(&s->sys,
+                               QBMAN_CINH_SWP_EQCR_CI) & full_mask;
+               s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size,
+                               eqcr_ci, s->eqcr.ci);
+               if (!s->eqcr.available)
+                       return -EBUSY;
+       }
+
+       p = qbman_cinh_write_start_wo_shadow(&s->sys,
+                       QBMAN_CENA_SWP_EQCR(s->eqcr.pi & half_mask));
+       memcpy_byte_by_byte(&p[1], &cl[1], 28);
+       memcpy_byte_by_byte(&p[8], fd, sizeof(*fd));
+       lwsync();
+
+       /* Set the verb byte, have to substitute in the valid-bit */
+       p[0] = cl[0] | s->eqcr.pi_vb;
+       s->eqcr.pi++;
+       s->eqcr.pi &= full_mask;
+       s->eqcr.available--;
+       if (!(s->eqcr.pi & half_mask))
+               s->eqcr.pi_vb ^= QB_VALID_BIT;
+
+       return 0;
+}
+
 static int qbman_swp_enqueue_ring_mode_mem_back(struct qbman_swp *s,
                                                const struct qbman_eq_desc *d,
                                                const struct qbman_fd *fd)
@@ -679,8 +919,8 @@ static int qbman_swp_enqueue_ring_mode_mem_back(struct qbman_swp *s,
        const uint32_t *cl = qb_cl(d);
        uint32_t eqcr_ci, full_mask, half_mask;
 
-       half_mask = (s->eqcr.pi_mask>>1);
-       full_mask = s->eqcr.pi_mask;
+       half_mask = (s->eqcr.pi_ci_mask>>1);
+       full_mask = s->eqcr.pi_ci_mask;
        if (!s->eqcr.available) {
                eqcr_ci = s->eqcr.ci;
                s->eqcr.ci = qbman_cena_read_reg(&s->sys,
@@ -713,7 +953,10 @@ static int qbman_swp_enqueue_ring_mode(struct qbman_swp *s,
                                       const struct qbman_eq_desc *d,
                                       const struct qbman_fd *fd)
 {
-       return qbman_swp_enqueue_ring_mode_ptr(s, d, fd);
+       if (!s->stash_off)
+               return qbman_swp_enqueue_ring_mode_ptr(s, d, fd);
+       else
+               return qbman_swp_enqueue_ring_mode_cinh_direct(s, d, fd);
 }
 
 int qbman_swp_enqueue(struct qbman_swp *s, const struct qbman_eq_desc *d,
@@ -737,8 +980,8 @@ static int qbman_swp_enqueue_multiple_direct(struct qbman_swp *s,
        int i, num_enqueued = 0;
        uint64_t addr_cena;
 
-       half_mask = (s->eqcr.pi_mask>>1);
-       full_mask = s->eqcr.pi_mask;
+       half_mask = (s->eqcr.pi_ci_mask>>1);
+       full_mask = s->eqcr.pi_ci_mask;
        if (!s->eqcr.available) {
                eqcr_ci = s->eqcr.ci;
                s->eqcr.ci = qbman_cena_read_reg(&s->sys,
@@ -794,23 +1037,560 @@ static int qbman_swp_enqueue_multiple_direct(struct qbman_swp *s,
        return num_enqueued;
 }
 
-static int qbman_swp_enqueue_multiple_mem_back(struct qbman_swp *s,
-                                              const struct qbman_eq_desc *d,
-                                              const struct qbman_fd *fd,
-                                              uint32_t *flags,
-                                              int num_frames)
+static int qbman_swp_enqueue_multiple_cinh_read_direct(
+               struct qbman_swp *s,
+               const struct qbman_eq_desc *d,
+               const struct qbman_fd *fd,
+               uint32_t *flags,
+               int num_frames)
+{
+       uint32_t *p = NULL;
+       const uint32_t *cl = qb_cl(d);
+       uint32_t eqcr_ci, eqcr_pi, half_mask, full_mask;
+       int i, num_enqueued = 0;
+       uint64_t addr_cena;
+
+       half_mask = (s->eqcr.pi_ci_mask>>1);
+       full_mask = s->eqcr.pi_ci_mask;
+       if (!s->eqcr.available) {
+               eqcr_ci = s->eqcr.ci;
+               s->eqcr.ci = qbman_cinh_read(&s->sys,
+                               QBMAN_CINH_SWP_EQCR_CI) & full_mask;
+               s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size,
+                               eqcr_ci, s->eqcr.ci);
+               if (!s->eqcr.available)
+                       return 0;
+       }
+
+       eqcr_pi = s->eqcr.pi;
+       num_enqueued = (s->eqcr.available < num_frames) ?
+                       s->eqcr.available : num_frames;
+       s->eqcr.available -= num_enqueued;
+       /* Fill in the EQCR ring */
+       for (i = 0; i < num_enqueued; i++) {
+               p = qbman_cena_write_start_wo_shadow(&s->sys,
+                               QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask));
+               memcpy(&p[1], &cl[1], 28);
+               memcpy(&p[8], &fd[i], sizeof(*fd));
+               eqcr_pi++;
+       }
+
+       lwsync();
+
+       /* Set the verb byte, have to substitute in the valid-bit */
+       eqcr_pi = s->eqcr.pi;
+       for (i = 0; i < num_enqueued; i++) {
+               p = qbman_cena_write_start_wo_shadow(&s->sys,
+                               QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask));
+               p[0] = cl[0] | s->eqcr.pi_vb;
+               if (flags && (flags[i] & QBMAN_ENQUEUE_FLAG_DCA)) {
+                       struct qbman_eq_desc *d = (struct qbman_eq_desc *)p;
+
+                       d->eq.dca = (1 << QB_ENQUEUE_CMD_DCA_EN_SHIFT) |
+                               ((flags[i]) & QBMAN_EQCR_DCA_IDXMASK);
+               }
+               eqcr_pi++;
+               if (!(eqcr_pi & half_mask))
+                       s->eqcr.pi_vb ^= QB_VALID_BIT;
+       }
+
+       /* Flush all the cacheline without load/store in between */
+       eqcr_pi = s->eqcr.pi;
+       addr_cena = (size_t)s->sys.addr_cena;
+       for (i = 0; i < num_enqueued; i++) {
+               dcbf(addr_cena +
+                       QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask));
+               eqcr_pi++;
+       }
+       s->eqcr.pi = eqcr_pi & full_mask;
+
+       return num_enqueued;
+}
+
+static int qbman_swp_enqueue_multiple_cinh_direct(
+               struct qbman_swp *s,
+               const struct qbman_eq_desc *d,
+               const struct qbman_fd *fd,
+               uint32_t *flags,
+               int num_frames)
+{
+       uint32_t *p = NULL;
+       const uint32_t *cl = qb_cl(d);
+       uint32_t eqcr_ci, eqcr_pi, half_mask, full_mask;
+       int i, num_enqueued = 0;
+
+       half_mask = (s->eqcr.pi_ci_mask>>1);
+       full_mask = s->eqcr.pi_ci_mask;
+       if (!s->eqcr.available) {
+               eqcr_ci = s->eqcr.ci;
+               s->eqcr.ci = qbman_cinh_read(&s->sys,
+                               QBMAN_CINH_SWP_EQCR_CI) & full_mask;
+               s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size,
+                               eqcr_ci, s->eqcr.ci);
+               if (!s->eqcr.available)
+                       return 0;
+       }
+
+       eqcr_pi = s->eqcr.pi;
+       num_enqueued = (s->eqcr.available < num_frames) ?
+                       s->eqcr.available : num_frames;
+       s->eqcr.available -= num_enqueued;
+       /* Fill in the EQCR ring */
+       for (i = 0; i < num_enqueued; i++) {
+               p = qbman_cinh_write_start_wo_shadow(&s->sys,
+                               QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask));
+               memcpy_byte_by_byte(&p[1], &cl[1], 28);
+               memcpy_byte_by_byte(&p[8], &fd[i], sizeof(*fd));
+               eqcr_pi++;
+       }
+
+       lwsync();
+
+       /* Set the verb byte, have to substitute in the valid-bit */
+       eqcr_pi = s->eqcr.pi;
+       for (i = 0; i < num_enqueued; i++) {
+               p = qbman_cinh_write_start_wo_shadow(&s->sys,
+                               QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask));
+               p[0] = cl[0] | s->eqcr.pi_vb;
+               if (flags && (flags[i] & QBMAN_ENQUEUE_FLAG_DCA)) {
+                       struct qbman_eq_desc *d = (struct qbman_eq_desc *)p;
+
+                       d->eq.dca = (1 << QB_ENQUEUE_CMD_DCA_EN_SHIFT) |
+                               ((flags[i]) & QBMAN_EQCR_DCA_IDXMASK);
+               }
+               eqcr_pi++;
+               if (!(eqcr_pi & half_mask))
+                       s->eqcr.pi_vb ^= QB_VALID_BIT;
+       }
+
+       s->eqcr.pi = eqcr_pi & full_mask;
+
+       return num_enqueued;
+}
+
+static int qbman_swp_enqueue_multiple_mem_back(struct qbman_swp *s,
+                                              const struct qbman_eq_desc *d,
+                                              const struct qbman_fd *fd,
+                                              uint32_t *flags,
+                                              int num_frames)
+{
+       uint32_t *p = NULL;
+       const uint32_t *cl = qb_cl(d);
+       uint32_t eqcr_ci, eqcr_pi, half_mask, full_mask;
+       int i, num_enqueued = 0;
+
+       half_mask = (s->eqcr.pi_ci_mask>>1);
+       full_mask = s->eqcr.pi_ci_mask;
+       if (!s->eqcr.available) {
+               eqcr_ci = s->eqcr.ci;
+               s->eqcr.ci = qbman_cena_read_reg(&s->sys,
+                               QBMAN_CENA_SWP_EQCR_CI_MEMBACK) & full_mask;
+               s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size,
+                                       eqcr_ci, s->eqcr.ci);
+               if (!s->eqcr.available)
+                       return 0;
+       }
+
+       eqcr_pi = s->eqcr.pi;
+       num_enqueued = (s->eqcr.available < num_frames) ?
+                       s->eqcr.available : num_frames;
+       s->eqcr.available -= num_enqueued;
+       /* Fill in the EQCR ring */
+       for (i = 0; i < num_enqueued; i++) {
+               p = qbman_cena_write_start_wo_shadow(&s->sys,
+                               QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask));
+               memcpy(&p[1], &cl[1], 28);
+               memcpy(&p[8], &fd[i], sizeof(*fd));
+               if (flags && (flags[i] & QBMAN_ENQUEUE_FLAG_DCA)) {
+                       struct qbman_eq_desc *d = (struct qbman_eq_desc *)p;
+
+                       d->eq.dca = (1 << QB_ENQUEUE_CMD_DCA_EN_SHIFT) |
+                               ((flags[i]) & QBMAN_EQCR_DCA_IDXMASK);
+               }
+               eqcr_pi++;
+               p[0] = cl[0] | s->eqcr.pi_vb;
+
+               if (!(eqcr_pi & half_mask))
+                       s->eqcr.pi_vb ^= QB_VALID_BIT;
+       }
+       s->eqcr.pi = eqcr_pi & full_mask;
+
+       dma_wmb();
+       qbman_cinh_write(&s->sys, QBMAN_CINH_SWP_EQCR_PI,
+                               (QB_RT_BIT)|(s->eqcr.pi)|s->eqcr.pi_vb);
+       return num_enqueued;
+}
+
+int qbman_swp_enqueue_multiple(struct qbman_swp *s,
+                                     const struct qbman_eq_desc *d,
+                                     const struct qbman_fd *fd,
+                                     uint32_t *flags,
+                                     int num_frames)
+{
+       if (!s->stash_off)
+               return qbman_swp_enqueue_multiple_ptr(s, d, fd, flags,
+                                               num_frames);
+       else
+               return qbman_swp_enqueue_multiple_cinh_direct(s, d, fd, flags,
+                                               num_frames);
+}
+
+static int qbman_swp_enqueue_multiple_fd_direct(struct qbman_swp *s,
+                                               const struct qbman_eq_desc *d,
+                                               struct qbman_fd **fd,
+                                               uint32_t *flags,
+                                               int num_frames)
+{
+       uint32_t *p = NULL;
+       const uint32_t *cl = qb_cl(d);
+       uint32_t eqcr_ci, eqcr_pi, half_mask, full_mask;
+       int i, num_enqueued = 0;
+       uint64_t addr_cena;
+
+       half_mask = (s->eqcr.pi_ci_mask>>1);
+       full_mask = s->eqcr.pi_ci_mask;
+       if (!s->eqcr.available) {
+               eqcr_ci = s->eqcr.ci;
+               s->eqcr.ci = qbman_cena_read_reg(&s->sys,
+                               QBMAN_CENA_SWP_EQCR_CI) & full_mask;
+               s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size,
+                               eqcr_ci, s->eqcr.ci);
+               if (!s->eqcr.available)
+                       return 0;
+       }
+
+       eqcr_pi = s->eqcr.pi;
+       num_enqueued = (s->eqcr.available < num_frames) ?
+                       s->eqcr.available : num_frames;
+       s->eqcr.available -= num_enqueued;
+       /* Fill in the EQCR ring */
+       for (i = 0; i < num_enqueued; i++) {
+               p = qbman_cena_write_start_wo_shadow(&s->sys,
+                               QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask));
+               memcpy(&p[1], &cl[1], 28);
+               memcpy(&p[8], fd[i], sizeof(struct qbman_fd));
+               eqcr_pi++;
+       }
+
+       lwsync();
+
+       /* Set the verb byte, have to substitute in the valid-bit */
+       eqcr_pi = s->eqcr.pi;
+       for (i = 0; i < num_enqueued; i++) {
+               p = qbman_cena_write_start_wo_shadow(&s->sys,
+                               QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask));
+               p[0] = cl[0] | s->eqcr.pi_vb;
+               if (flags && (flags[i] & QBMAN_ENQUEUE_FLAG_DCA)) {
+                       struct qbman_eq_desc *d = (struct qbman_eq_desc *)p;
+
+                       d->eq.dca = (1 << QB_ENQUEUE_CMD_DCA_EN_SHIFT) |
+                               ((flags[i]) & QBMAN_EQCR_DCA_IDXMASK);
+               }
+               eqcr_pi++;
+               if (!(eqcr_pi & half_mask))
+                       s->eqcr.pi_vb ^= QB_VALID_BIT;
+       }
+
+       /* Flush all the cacheline without load/store in between */
+       eqcr_pi = s->eqcr.pi;
+       addr_cena = (size_t)s->sys.addr_cena;
+       for (i = 0; i < num_enqueued; i++) {
+               dcbf(addr_cena +
+                       QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask));
+               eqcr_pi++;
+       }
+       s->eqcr.pi = eqcr_pi & full_mask;
+
+       return num_enqueued;
+}
+
+static int qbman_swp_enqueue_multiple_fd_cinh_read_direct(
+               struct qbman_swp *s,
+               const struct qbman_eq_desc *d,
+               struct qbman_fd **fd,
+               uint32_t *flags,
+               int num_frames)
+{
+       uint32_t *p = NULL;
+       const uint32_t *cl = qb_cl(d);
+       uint32_t eqcr_ci, eqcr_pi, half_mask, full_mask;
+       int i, num_enqueued = 0;
+       uint64_t addr_cena;
+
+       half_mask = (s->eqcr.pi_ci_mask>>1);
+       full_mask = s->eqcr.pi_ci_mask;
+       if (!s->eqcr.available) {
+               eqcr_ci = s->eqcr.ci;
+               s->eqcr.ci = qbman_cinh_read(&s->sys,
+                               QBMAN_CINH_SWP_EQCR_CI) & full_mask;
+               s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size,
+                               eqcr_ci, s->eqcr.ci);
+               if (!s->eqcr.available)
+                       return 0;
+       }
+
+       eqcr_pi = s->eqcr.pi;
+       num_enqueued = (s->eqcr.available < num_frames) ?
+                       s->eqcr.available : num_frames;
+       s->eqcr.available -= num_enqueued;
+       /* Fill in the EQCR ring */
+       for (i = 0; i < num_enqueued; i++) {
+               p = qbman_cena_write_start_wo_shadow(&s->sys,
+                               QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask));
+               memcpy(&p[1], &cl[1], 28);
+               memcpy(&p[8], fd[i], sizeof(struct qbman_fd));
+               eqcr_pi++;
+       }
+
+       lwsync();
+
+       /* Set the verb byte, have to substitute in the valid-bit */
+       eqcr_pi = s->eqcr.pi;
+       for (i = 0; i < num_enqueued; i++) {
+               p = qbman_cena_write_start_wo_shadow(&s->sys,
+                               QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask));
+               p[0] = cl[0] | s->eqcr.pi_vb;
+               if (flags && (flags[i] & QBMAN_ENQUEUE_FLAG_DCA)) {
+                       struct qbman_eq_desc *d = (struct qbman_eq_desc *)p;
+
+                       d->eq.dca = (1 << QB_ENQUEUE_CMD_DCA_EN_SHIFT) |
+                               ((flags[i]) & QBMAN_EQCR_DCA_IDXMASK);
+               }
+               eqcr_pi++;
+               if (!(eqcr_pi & half_mask))
+                       s->eqcr.pi_vb ^= QB_VALID_BIT;
+       }
+
+       /* Flush all the cacheline without load/store in between */
+       eqcr_pi = s->eqcr.pi;
+       addr_cena = (size_t)s->sys.addr_cena;
+       for (i = 0; i < num_enqueued; i++) {
+               dcbf(addr_cena +
+                       QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask));
+               eqcr_pi++;
+       }
+       s->eqcr.pi = eqcr_pi & full_mask;
+
+       return num_enqueued;
+}
+
+static int qbman_swp_enqueue_multiple_fd_cinh_direct(
+               struct qbman_swp *s,
+               const struct qbman_eq_desc *d,
+               struct qbman_fd **fd,
+               uint32_t *flags,
+               int num_frames)
+{
+       uint32_t *p = NULL;
+       const uint32_t *cl = qb_cl(d);
+       uint32_t eqcr_ci, eqcr_pi, half_mask, full_mask;
+       int i, num_enqueued = 0;
+
+       half_mask = (s->eqcr.pi_ci_mask>>1);
+       full_mask = s->eqcr.pi_ci_mask;
+       if (!s->eqcr.available) {
+               eqcr_ci = s->eqcr.ci;
+               s->eqcr.ci = qbman_cinh_read(&s->sys,
+                               QBMAN_CINH_SWP_EQCR_CI) & full_mask;
+               s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size,
+                               eqcr_ci, s->eqcr.ci);
+               if (!s->eqcr.available)
+                       return 0;
+       }
+
+       eqcr_pi = s->eqcr.pi;
+       num_enqueued = (s->eqcr.available < num_frames) ?
+                       s->eqcr.available : num_frames;
+       s->eqcr.available -= num_enqueued;
+       /* Fill in the EQCR ring */
+       for (i = 0; i < num_enqueued; i++) {
+               p = qbman_cinh_write_start_wo_shadow(&s->sys,
+                               QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask));
+               memcpy_byte_by_byte(&p[1], &cl[1], 28);
+               memcpy_byte_by_byte(&p[8], fd[i], sizeof(struct qbman_fd));
+               eqcr_pi++;
+       }
+
+       lwsync();
+
+       /* Set the verb byte, have to substitute in the valid-bit */
+       eqcr_pi = s->eqcr.pi;
+       for (i = 0; i < num_enqueued; i++) {
+               p = qbman_cinh_write_start_wo_shadow(&s->sys,
+                               QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask));
+               p[0] = cl[0] | s->eqcr.pi_vb;
+               if (flags && (flags[i] & QBMAN_ENQUEUE_FLAG_DCA)) {
+                       struct qbman_eq_desc *d = (struct qbman_eq_desc *)p;
+
+                       d->eq.dca = (1 << QB_ENQUEUE_CMD_DCA_EN_SHIFT) |
+                               ((flags[i]) & QBMAN_EQCR_DCA_IDXMASK);
+               }
+               eqcr_pi++;
+               if (!(eqcr_pi & half_mask))
+                       s->eqcr.pi_vb ^= QB_VALID_BIT;
+       }
+
+       s->eqcr.pi = eqcr_pi & full_mask;
+
+       return num_enqueued;
+}
+
+static int qbman_swp_enqueue_multiple_fd_mem_back(struct qbman_swp *s,
+                                                 const struct qbman_eq_desc *d,
+                                                 struct qbman_fd **fd,
+                                                 uint32_t *flags,
+                                                 int num_frames)
+{
+       uint32_t *p = NULL;
+       const uint32_t *cl = qb_cl(d);
+       uint32_t eqcr_ci, eqcr_pi, half_mask, full_mask;
+       int i, num_enqueued = 0;
+
+       half_mask = (s->eqcr.pi_ci_mask>>1);
+       full_mask = s->eqcr.pi_ci_mask;
+       if (!s->eqcr.available) {
+               eqcr_ci = s->eqcr.ci;
+               s->eqcr.ci = qbman_cena_read_reg(&s->sys,
+                               QBMAN_CENA_SWP_EQCR_CI_MEMBACK) & full_mask;
+               s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size,
+                                       eqcr_ci, s->eqcr.ci);
+               if (!s->eqcr.available)
+                       return 0;
+       }
+
+       eqcr_pi = s->eqcr.pi;
+       num_enqueued = (s->eqcr.available < num_frames) ?
+                       s->eqcr.available : num_frames;
+       s->eqcr.available -= num_enqueued;
+       /* Fill in the EQCR ring */
+       for (i = 0; i < num_enqueued; i++) {
+               p = qbman_cena_write_start_wo_shadow(&s->sys,
+                               QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask));
+               memcpy(&p[1], &cl[1], 28);
+               memcpy(&p[8], fd[i], sizeof(struct qbman_fd));
+               eqcr_pi++;
+       }
+
+       /* Set the verb byte, have to substitute in the valid-bit */
+       eqcr_pi = s->eqcr.pi;
+       for (i = 0; i < num_enqueued; i++) {
+               p = qbman_cena_write_start_wo_shadow(&s->sys,
+                               QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask));
+               p[0] = cl[0] | s->eqcr.pi_vb;
+               if (flags && (flags[i] & QBMAN_ENQUEUE_FLAG_DCA)) {
+                       struct qbman_eq_desc *d = (struct qbman_eq_desc *)p;
+
+                       d->eq.dca = (1 << QB_ENQUEUE_CMD_DCA_EN_SHIFT) |
+                               ((flags[i]) & QBMAN_EQCR_DCA_IDXMASK);
+               }
+               eqcr_pi++;
+               if (!(eqcr_pi & half_mask))
+                       s->eqcr.pi_vb ^= QB_VALID_BIT;
+       }
+       s->eqcr.pi = eqcr_pi & full_mask;
+
+       dma_wmb();
+       qbman_cinh_write(&s->sys, QBMAN_CINH_SWP_EQCR_PI,
+                               (QB_RT_BIT)|(s->eqcr.pi)|s->eqcr.pi_vb);
+       return num_enqueued;
+}
+
+int qbman_swp_enqueue_multiple_fd(struct qbman_swp *s,
+                                        const struct qbman_eq_desc *d,
+                                        struct qbman_fd **fd,
+                                        uint32_t *flags,
+                                        int num_frames)
+{
+       if (!s->stash_off)
+               return qbman_swp_enqueue_multiple_fd_ptr(s, d, fd, flags,
+                                       num_frames);
+       else
+               return qbman_swp_enqueue_multiple_fd_cinh_direct(s, d, fd,
+                                       flags, num_frames);
+}
+
+static int qbman_swp_enqueue_multiple_desc_direct(struct qbman_swp *s,
+                                       const struct qbman_eq_desc *d,
+                                       const struct qbman_fd *fd,
+                                       int num_frames)
+{
+       uint32_t *p;
+       const uint32_t *cl;
+       uint32_t eqcr_ci, eqcr_pi, half_mask, full_mask;
+       int i, num_enqueued = 0;
+       uint64_t addr_cena;
+
+       half_mask = (s->eqcr.pi_ci_mask>>1);
+       full_mask = s->eqcr.pi_ci_mask;
+       if (!s->eqcr.available) {
+               eqcr_ci = s->eqcr.ci;
+               s->eqcr.ci = qbman_cena_read_reg(&s->sys,
+                               QBMAN_CENA_SWP_EQCR_CI) & full_mask;
+               s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size,
+                                       eqcr_ci, s->eqcr.ci);
+               if (!s->eqcr.available)
+                       return 0;
+       }
+
+       eqcr_pi = s->eqcr.pi;
+       num_enqueued = (s->eqcr.available < num_frames) ?
+                       s->eqcr.available : num_frames;
+       s->eqcr.available -= num_enqueued;
+       /* Fill in the EQCR ring */
+       for (i = 0; i < num_enqueued; i++) {
+               p = qbman_cena_write_start_wo_shadow(&s->sys,
+                               QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask));
+               cl = qb_cl(&d[i]);
+               memcpy(&p[1], &cl[1], 28);
+               memcpy(&p[8], &fd[i], sizeof(*fd));
+               eqcr_pi++;
+       }
+
+       lwsync();
+
+       /* Set the verb byte, have to substitute in the valid-bit */
+       eqcr_pi = s->eqcr.pi;
+       for (i = 0; i < num_enqueued; i++) {
+               p = qbman_cena_write_start_wo_shadow(&s->sys,
+                               QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask));
+               cl = qb_cl(&d[i]);
+               p[0] = cl[0] | s->eqcr.pi_vb;
+               eqcr_pi++;
+               if (!(eqcr_pi & half_mask))
+                       s->eqcr.pi_vb ^= QB_VALID_BIT;
+       }
+
+       /* Flush all the cacheline without load/store in between */
+       eqcr_pi = s->eqcr.pi;
+       addr_cena = (size_t)s->sys.addr_cena;
+       for (i = 0; i < num_enqueued; i++) {
+               dcbf((uintptr_t)(addr_cena +
+                       QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask)));
+               eqcr_pi++;
+       }
+       s->eqcr.pi = eqcr_pi & full_mask;
+
+       return num_enqueued;
+}
+
+static int qbman_swp_enqueue_multiple_desc_cinh_read_direct(
+               struct qbman_swp *s,
+               const struct qbman_eq_desc *d,
+               const struct qbman_fd *fd,
+               int num_frames)
 {
-       uint32_t *p = NULL;
-       const uint32_t *cl = qb_cl(d);
+       uint32_t *p;
+       const uint32_t *cl;
        uint32_t eqcr_ci, eqcr_pi, half_mask, full_mask;
        int i, num_enqueued = 0;
+       uint64_t addr_cena;
 
-       half_mask = (s->eqcr.pi_mask>>1);
-       full_mask = s->eqcr.pi_mask;
+       half_mask = (s->eqcr.pi_ci_mask>>1);
+       full_mask = s->eqcr.pi_ci_mask;
        if (!s->eqcr.available) {
                eqcr_ci = s->eqcr.ci;
-               s->eqcr.ci = qbman_cena_read_reg(&s->sys,
-                               QBMAN_CENA_SWP_EQCR_CI_MEMBACK) & full_mask;
+               s->eqcr.ci = qbman_cinh_read(&s->sys,
+                               QBMAN_CINH_SWP_EQCR_CI) & full_mask;
                s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size,
                                        eqcr_ci, s->eqcr.ci);
                if (!s->eqcr.available)
@@ -825,61 +1605,56 @@ static int qbman_swp_enqueue_multiple_mem_back(struct qbman_swp *s,
        for (i = 0; i < num_enqueued; i++) {
                p = qbman_cena_write_start_wo_shadow(&s->sys,
                                QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask));
+               cl = qb_cl(&d[i]);
                memcpy(&p[1], &cl[1], 28);
                memcpy(&p[8], &fd[i], sizeof(*fd));
                eqcr_pi++;
        }
 
+       lwsync();
+
        /* Set the verb byte, have to substitute in the valid-bit */
        eqcr_pi = s->eqcr.pi;
        for (i = 0; i < num_enqueued; i++) {
                p = qbman_cena_write_start_wo_shadow(&s->sys,
                                QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask));
+               cl = qb_cl(&d[i]);
                p[0] = cl[0] | s->eqcr.pi_vb;
-               if (flags && (flags[i] & QBMAN_ENQUEUE_FLAG_DCA)) {
-                       struct qbman_eq_desc *d = (struct qbman_eq_desc *)p;
-
-                       d->eq.dca = (1 << QB_ENQUEUE_CMD_DCA_EN_SHIFT) |
-                               ((flags[i]) & QBMAN_EQCR_DCA_IDXMASK);
-               }
                eqcr_pi++;
                if (!(eqcr_pi & half_mask))
                        s->eqcr.pi_vb ^= QB_VALID_BIT;
        }
+
+       /* Flush all the cacheline without load/store in between */
+       eqcr_pi = s->eqcr.pi;
+       addr_cena = (size_t)s->sys.addr_cena;
+       for (i = 0; i < num_enqueued; i++) {
+               dcbf(addr_cena +
+                       QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask));
+               eqcr_pi++;
+       }
        s->eqcr.pi = eqcr_pi & full_mask;
 
-       dma_wmb();
-       qbman_cinh_write(&s->sys, QBMAN_CINH_SWP_EQCR_PI,
-                               (QB_RT_BIT)|(s->eqcr.pi)|s->eqcr.pi_vb);
        return num_enqueued;
 }
 
-inline int qbman_swp_enqueue_multiple(struct qbman_swp *s,
-                                     const struct qbman_eq_desc *d,
-                                     const struct qbman_fd *fd,
-                                     uint32_t *flags,
-                                     int num_frames)
-{
-       return qbman_swp_enqueue_multiple_ptr(s, d, fd, flags, num_frames);
-}
-
-static int qbman_swp_enqueue_multiple_desc_direct(struct qbman_swp *s,
-                                       const struct qbman_eq_desc *d,
-                                       const struct qbman_fd *fd,
-                                       int num_frames)
+static int qbman_swp_enqueue_multiple_desc_cinh_direct(
+               struct qbman_swp *s,
+               const struct qbman_eq_desc *d,
+               const struct qbman_fd *fd,
+               int num_frames)
 {
        uint32_t *p;
        const uint32_t *cl;
        uint32_t eqcr_ci, eqcr_pi, half_mask, full_mask;
        int i, num_enqueued = 0;
-       uint64_t addr_cena;
 
-       half_mask = (s->eqcr.pi_mask>>1);
-       full_mask = s->eqcr.pi_mask;
+       half_mask = (s->eqcr.pi_ci_mask>>1);
+       full_mask = s->eqcr.pi_ci_mask;
        if (!s->eqcr.available) {
                eqcr_ci = s->eqcr.ci;
-               s->eqcr.ci = qbman_cena_read_reg(&s->sys,
-                               QBMAN_CENA_SWP_EQCR_CI) & full_mask;
+               s->eqcr.ci = qbman_cinh_read(&s->sys,
+                               QBMAN_CINH_SWP_EQCR_CI) & full_mask;
                s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size,
                                        eqcr_ci, s->eqcr.ci);
                if (!s->eqcr.available)
@@ -892,11 +1667,11 @@ static int qbman_swp_enqueue_multiple_desc_direct(struct qbman_swp *s,
        s->eqcr.available -= num_enqueued;
        /* Fill in the EQCR ring */
        for (i = 0; i < num_enqueued; i++) {
-               p = qbman_cena_write_start_wo_shadow(&s->sys,
+               p = qbman_cinh_write_start_wo_shadow(&s->sys,
                                QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask));
                cl = qb_cl(&d[i]);
-               memcpy(&p[1], &cl[1], 28);
-               memcpy(&p[8], &fd[i], sizeof(*fd));
+               memcpy_byte_by_byte(&p[1], &cl[1], 28);
+               memcpy_byte_by_byte(&p[8], &fd[i], sizeof(*fd));
                eqcr_pi++;
        }
 
@@ -905,7 +1680,7 @@ static int qbman_swp_enqueue_multiple_desc_direct(struct qbman_swp *s,
        /* Set the verb byte, have to substitute in the valid-bit */
        eqcr_pi = s->eqcr.pi;
        for (i = 0; i < num_enqueued; i++) {
-               p = qbman_cena_write_start_wo_shadow(&s->sys,
+               p = qbman_cinh_write_start_wo_shadow(&s->sys,
                                QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask));
                cl = qb_cl(&d[i]);
                p[0] = cl[0] | s->eqcr.pi_vb;
@@ -914,14 +1689,6 @@ static int qbman_swp_enqueue_multiple_desc_direct(struct qbman_swp *s,
                        s->eqcr.pi_vb ^= QB_VALID_BIT;
        }
 
-       /* Flush all the cacheline without load/store in between */
-       eqcr_pi = s->eqcr.pi;
-       addr_cena = (size_t)s->sys.addr_cena;
-       for (i = 0; i < num_enqueued; i++) {
-               dcbf((uintptr_t)(addr_cena +
-                       QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask)));
-               eqcr_pi++;
-       }
        s->eqcr.pi = eqcr_pi & full_mask;
 
        return num_enqueued;
@@ -937,8 +1704,8 @@ static int qbman_swp_enqueue_multiple_desc_mem_back(struct qbman_swp *s,
        uint32_t eqcr_ci, eqcr_pi, half_mask, full_mask;
        int i, num_enqueued = 0;
 
-       half_mask = (s->eqcr.pi_mask>>1);
-       full_mask = s->eqcr.pi_mask;
+       half_mask = (s->eqcr.pi_ci_mask>>1);
+       full_mask = s->eqcr.pi_ci_mask;
        if (!s->eqcr.available) {
                eqcr_ci = s->eqcr.ci;
                s->eqcr.ci = qbman_cena_read_reg(&s->sys,
@@ -983,12 +1750,18 @@ static int qbman_swp_enqueue_multiple_desc_mem_back(struct qbman_swp *s,
 
        return num_enqueued;
 }
-inline int qbman_swp_enqueue_multiple_desc(struct qbman_swp *s,
+int qbman_swp_enqueue_multiple_desc(struct qbman_swp *s,
                                           const struct qbman_eq_desc *d,
                                           const struct qbman_fd *fd,
                                           int num_frames)
 {
-       return qbman_swp_enqueue_multiple_desc_ptr(s, d, fd, num_frames);
+       if (!s->stash_off)
+               return qbman_swp_enqueue_multiple_desc_ptr(s, d, fd,
+                                       num_frames);
+       else
+               return qbman_swp_enqueue_multiple_desc_cinh_direct(s, d, fd,
+                                       num_frames);
+
 }
 
 /*************************/
@@ -1136,6 +1909,30 @@ static int qbman_swp_pull_direct(struct qbman_swp *s,
        return 0;
 }
 
+static int qbman_swp_pull_cinh_direct(struct qbman_swp *s,
+                                struct qbman_pull_desc *d)
+{
+       uint32_t *p;
+       uint32_t *cl = qb_cl(d);
+
+       if (!atomic_dec_and_test(&s->vdq.busy)) {
+               atomic_inc(&s->vdq.busy);
+               return -EBUSY;
+       }
+
+       d->pull.tok = s->sys.idx + 1;
+       s->vdq.storage = (void *)(size_t)d->pull.rsp_addr_virt;
+       p = qbman_cinh_write_start_wo_shadow(&s->sys, QBMAN_CENA_SWP_VDQCR);
+       memcpy_byte_by_byte(&p[1], &cl[1], 12);
+
+       /* Set the verb byte, have to substitute in the valid-bit */
+       lwsync();
+       p[0] = cl[0] | s->vdq.valid_bit;
+       s->vdq.valid_bit ^= QB_VALID_BIT;
+
+       return 0;
+}
+
 static int qbman_swp_pull_mem_back(struct qbman_swp *s,
                                   struct qbman_pull_desc *d)
 {
@@ -1161,9 +1958,12 @@ static int qbman_swp_pull_mem_back(struct qbman_swp *s,
        return 0;
 }
 
-inline int qbman_swp_pull(struct qbman_swp *s, struct qbman_pull_desc *d)
+int qbman_swp_pull(struct qbman_swp *s, struct qbman_pull_desc *d)
 {
-       return qbman_swp_pull_ptr(s, d);
+       if (!s->stash_off)
+               return qbman_swp_pull_ptr(s, d);
+       else
+               return qbman_swp_pull_cinh_direct(s, d);
 }
 
 /****************/
@@ -1198,9 +1998,12 @@ void qbman_swp_prefetch_dqrr_next(struct qbman_swp *s)
  * only once, so repeated calls can return a sequence of DQRR entries, without
  * requiring they be consumed immediately or in any particular order.
  */
-inline const struct qbman_result *qbman_swp_dqrr_next(struct qbman_swp *s)
+const struct qbman_result *qbman_swp_dqrr_next(struct qbman_swp *s)
 {
-       return qbman_swp_dqrr_next_ptr(s);
+       if (!s->stash_off)
+               return qbman_swp_dqrr_next_ptr(s);
+       else
+               return qbman_swp_dqrr_next_cinh_direct(s);
 }
 
 const struct qbman_result *qbman_swp_dqrr_next_direct(struct qbman_swp *s)
@@ -1280,6 +2083,81 @@ const struct qbman_result *qbman_swp_dqrr_next_direct(struct qbman_swp *s)
        return p;
 }
 
+const struct qbman_result *qbman_swp_dqrr_next_cinh_direct(struct qbman_swp *s)
+{
+       uint32_t verb;
+       uint32_t response_verb;
+       uint32_t flags;
+       const struct qbman_result *p;
+
+       /* Before using valid-bit to detect if something is there, we have to
+        * handle the case of the DQRR reset bug...
+        */
+       if (s->dqrr.reset_bug) {
+               /* We pick up new entries by cache-inhibited producer index,
+                * which means that a non-coherent mapping would require us to
+                * invalidate and read *only* once that PI has indicated that
+                * there's an entry here. The first trip around the DQRR ring
+                * will be much less efficient than all subsequent trips around
+                * it...
+                */
+               uint8_t pi = qbman_cinh_read(&s->sys, QBMAN_CINH_SWP_DQPI) &
+                            QMAN_DQRR_PI_MASK;
+
+               /* there are new entries if pi != next_idx */
+               if (pi == s->dqrr.next_idx)
+                       return NULL;
+
+               /* if next_idx is/was the last ring index, and 'pi' is
+                * different, we can disable the workaround as all the ring
+                * entries have now been DMA'd to so valid-bit checking is
+                * repaired. Note: this logic needs to be based on next_idx
+                * (which increments one at a time), rather than on pi (which
+                * can burst and wrap-around between our snapshots of it).
+                */
+               QBMAN_BUG_ON((s->dqrr.dqrr_size - 1) < 0);
+               if (s->dqrr.next_idx == (s->dqrr.dqrr_size - 1u)) {
+                       pr_debug("DEBUG: next_idx=%d, pi=%d, clear reset bug\n",
+                                s->dqrr.next_idx, pi);
+                       s->dqrr.reset_bug = 0;
+               }
+       }
+       p = qbman_cinh_read_wo_shadow(&s->sys,
+                       QBMAN_CENA_SWP_DQRR(s->dqrr.next_idx));
+
+       verb = p->dq.verb;
+
+       /* If the valid-bit isn't of the expected polarity, nothing there. Note,
+        * in the DQRR reset bug workaround, we shouldn't need to skip these
+        * check, because we've already determined that a new entry is available
+        * and we've invalidated the cacheline before reading it, so the
+        * valid-bit behaviour is repaired and should tell us what we already
+        * knew from reading PI.
+        */
+       if ((verb & QB_VALID_BIT) != s->dqrr.valid_bit)
+               return NULL;
+
+       /* There's something there. Move "next_idx" attention to the next ring
+        * entry (and prefetch it) before returning what we found.
+        */
+       s->dqrr.next_idx++;
+       if (s->dqrr.next_idx == s->dqrr.dqrr_size) {
+               s->dqrr.next_idx = 0;
+               s->dqrr.valid_bit ^= QB_VALID_BIT;
+       }
+       /* If this is the final response to a volatile dequeue command
+        * indicate that the vdq is no longer busy
+        */
+       flags = p->dq.stat;
+       response_verb = verb & QBMAN_RESPONSE_VERB_MASK;
+       if ((response_verb == QBMAN_RESULT_DQ) &&
+           (flags & QBMAN_DQ_STAT_VOLATILE) &&
+           (flags & QBMAN_DQ_STAT_EXPIRED))
+               atomic_inc(&s->vdq.busy);
+
+       return p;
+}
+
 const struct qbman_result *qbman_swp_dqrr_next_mem_back(struct qbman_swp *s)
 {
        uint32_t verb;
@@ -1570,6 +2448,32 @@ uint64_t qbman_result_cgcu_icnt(const struct qbman_result *scn)
        return qbman_result_SCN_ctx(scn);
 }
 
+/********************/
+/* Parsing EQ RESP  */
+/********************/
+struct qbman_fd *qbman_result_eqresp_fd(struct qbman_result *eqresp)
+{
+       return (struct qbman_fd *)&eqresp->eq_resp.fd[0];
+}
+
+void qbman_result_eqresp_set_rspid(struct qbman_result *eqresp, uint8_t val)
+{
+       eqresp->eq_resp.rspid = val;
+}
+
+uint8_t qbman_result_eqresp_rspid(struct qbman_result *eqresp)
+{
+       return eqresp->eq_resp.rspid;
+}
+
+uint8_t qbman_result_eqresp_rc(struct qbman_result *eqresp)
+{
+       if (eqresp->eq_resp.rc == 0xE)
+               return 0;
+       else
+               return -1;
+}
+
 /******************/
 /* Buffer release */
 /******************/
@@ -1632,6 +2536,37 @@ static int qbman_swp_release_direct(struct qbman_swp *s,
        return 0;
 }
 
+static int qbman_swp_release_cinh_direct(struct qbman_swp *s,
+                                   const struct qbman_release_desc *d,
+                                   const uint64_t *buffers,
+                                   unsigned int num_buffers)
+{
+       uint32_t *p;
+       const uint32_t *cl = qb_cl(d);
+       uint32_t rar = qbman_cinh_read(&s->sys, QBMAN_CINH_SWP_RAR);
+
+       pr_debug("RAR=%08x\n", rar);
+       if (!RAR_SUCCESS(rar))
+               return -EBUSY;
+
+       QBMAN_BUG_ON(!num_buffers || (num_buffers > 7));
+
+       /* Start the release command */
+       p = qbman_cinh_write_start_wo_shadow(&s->sys,
+                                    QBMAN_CENA_SWP_RCR(RAR_IDX(rar)));
+
+       /* Copy the caller's buffer pointers to the command */
+       memcpy_byte_by_byte(&p[2], buffers, num_buffers * sizeof(uint64_t));
+
+       /* Set the verb byte, have to substitute in the valid-bit and the
+        * number of buffers.
+        */
+       lwsync();
+       p[0] = cl[0] | RAR_VB(rar) | num_buffers;
+
+       return 0;
+}
+
 static int qbman_swp_release_mem_back(struct qbman_swp *s,
                                      const struct qbman_release_desc *d,
                                      const uint64_t *buffers,
@@ -1665,12 +2600,16 @@ static int qbman_swp_release_mem_back(struct qbman_swp *s,
        return 0;
 }
 
-inline int qbman_swp_release(struct qbman_swp *s,
+int qbman_swp_release(struct qbman_swp *s,
                             const struct qbman_release_desc *d,
                             const uint64_t *buffers,
                             unsigned int num_buffers)
 {
-       return qbman_swp_release_ptr(s, d, buffers, num_buffers);
+       if (!s->stash_off)
+               return qbman_swp_release_ptr(s, d, buffers, num_buffers);
+       else
+               return qbman_swp_release_cinh_direct(s, d, buffers,
+                                               num_buffers);
 }
 
 /*******************/
@@ -1693,8 +2632,8 @@ struct qbman_acquire_rslt {
        uint64_t buf[7];
 };
 
-int qbman_swp_acquire(struct qbman_swp *s, uint16_t bpid, uint64_t *buffers,
-                     unsigned int num_buffers)
+static int qbman_swp_acquire_direct(struct qbman_swp *s, uint16_t bpid,
+                               uint64_t *buffers, unsigned int num_buffers)
 {
        struct qbman_acquire_desc *p;
        struct qbman_acquire_rslt *r;
@@ -1738,6 +2677,61 @@ int qbman_swp_acquire(struct qbman_swp *s, uint16_t bpid, uint64_t *buffers,
        return (int)r->num;
 }
 
+static int qbman_swp_acquire_cinh_direct(struct qbman_swp *s, uint16_t bpid,
+                       uint64_t *buffers, unsigned int num_buffers)
+{
+       struct qbman_acquire_desc *p;
+       struct qbman_acquire_rslt *r;
+
+       if (!num_buffers || (num_buffers > 7))
+               return -EINVAL;
+
+       /* Start the management command */
+       p = qbman_swp_mc_start(s);
+
+       if (!p)
+               return -EBUSY;
+
+       /* Encode the caller-provided attributes */
+       p->bpid = bpid;
+       p->num = num_buffers;
+
+       /* Complete the management command */
+       r = qbman_swp_mc_complete_cinh(s, p, QBMAN_MC_ACQUIRE);
+       if (!r) {
+               pr_err("qbman: acquire from BPID %d failed, no response\n",
+                      bpid);
+               return -EIO;
+       }
+
+       /* Decode the outcome */
+       QBMAN_BUG_ON((r->verb & QBMAN_RESPONSE_VERB_MASK) != QBMAN_MC_ACQUIRE);
+
+       /* Determine success or failure */
+       if (r->rslt != QBMAN_MC_RSLT_OK) {
+               pr_err("Acquire buffers from BPID 0x%x failed, code=0x%02x\n",
+                      bpid, r->rslt);
+               return -EIO;
+       }
+
+       QBMAN_BUG_ON(r->num > num_buffers);
+
+       /* Copy the acquired buffers to the caller's array */
+       u64_from_le32_copy(buffers, &r->buf[0], r->num);
+
+       return (int)r->num;
+}
+
+int qbman_swp_acquire(struct qbman_swp *s, uint16_t bpid, uint64_t *buffers,
+                     unsigned int num_buffers)
+{
+       if (!s->stash_off)
+               return qbman_swp_acquire_direct(s, bpid, buffers, num_buffers);
+       else
+               return qbman_swp_acquire_cinh_direct(s, bpid, buffers,
+                                       num_buffers);
+}
+
 /*****************/
 /* FQ management */
 /*****************/