From: Nipun Gupta Date: Mon, 15 Jul 2019 08:44:41 +0000 (+0530) Subject: bus/fslmc: use CINH read on LS1088 platform X-Git-Url: http://git.droids-corp.org/?a=commitdiff_plain;h=63d5d0af4f0c70dcc29ca80db85727be23aef3ff;p=dpdk.git bus/fslmc: use CINH read on LS1088 platform LS1088 platform CENA operation are causing issues at high load. CINH (cache inhibited) mode is working fine with minor performance impact. This patch enables CINH mode selectively on LS1088 platform Signed-off-by: Nipun Gupta --- diff --git a/drivers/bus/fslmc/portal/dpaa2_hw_dpio.h b/drivers/bus/fslmc/portal/dpaa2_hw_dpio.h index 17e7e4fad7..c68495eafd 100644 --- a/drivers/bus/fslmc/portal/dpaa2_hw_dpio.h +++ b/drivers/bus/fslmc/portal/dpaa2_hw_dpio.h @@ -28,8 +28,6 @@ RTE_DECLARE_PER_LCORE(struct dpaa2_io_portal_t, _dpaa2_io); #define DPAA2_PER_LCORE_ETHRX_DPIO RTE_PER_LCORE(_dpaa2_io).ethrx_dpio_dev #define DPAA2_PER_LCORE_ETHRX_PORTAL DPAA2_PER_LCORE_ETHRX_DPIO->sw_portal -/* Variable to store DPAA2 platform type */ -extern uint32_t dpaa2_svr_family; /* Variable to store DPAA2 DQRR size */ extern uint8_t dpaa2_dqrr_size; /* Variable to store DPAA2 EQCR size */ diff --git a/drivers/bus/fslmc/portal/dpaa2_hw_pvt.h b/drivers/bus/fslmc/portal/dpaa2_hw_pvt.h index 0cbde8a9be..92fc762119 100644 --- a/drivers/bus/fslmc/portal/dpaa2_hw_pvt.h +++ b/drivers/bus/fslmc/portal/dpaa2_hw_pvt.h @@ -23,11 +23,6 @@ #define lower_32_bits(x) ((uint32_t)(x)) #define upper_32_bits(x) ((uint32_t)(((x) >> 16) >> 16)) -#define SVR_LS1080A 0x87030000 -#define SVR_LS2080A 0x87010000 -#define SVR_LS2088A 0x87090000 -#define SVR_LX2160A 0x87360000 - #ifndef VLAN_TAG_SIZE #define VLAN_TAG_SIZE 4 /** < Vlan Header Length */ #endif diff --git a/drivers/bus/fslmc/qbman/include/fsl_qbman_portal.h b/drivers/bus/fslmc/qbman/include/fsl_qbman_portal.h index c35dafedbc..88f0a99686 100644 --- a/drivers/bus/fslmc/qbman/include/fsl_qbman_portal.h +++ b/drivers/bus/fslmc/qbman/include/fsl_qbman_portal.h @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: BSD-3-Clause * * Copyright (C) 2014 Freescale Semiconductor, Inc. + * Copyright 2015-2019 NXP * */ #ifndef _FSL_QBMAN_PORTAL_H @@ -8,6 +9,14 @@ #include +#define SVR_LS1080A 0x87030000 +#define SVR_LS2080A 0x87010000 +#define SVR_LS2088A 0x87090000 +#define SVR_LX2160A 0x87360000 + +/* Variable to store DPAA2 platform type */ +extern uint32_t dpaa2_svr_family; + /** * DOC - QBMan portal APIs to implement the following functions: * - Initialize and destroy Software portal object. diff --git a/drivers/bus/fslmc/qbman/qbman_portal.c b/drivers/bus/fslmc/qbman/qbman_portal.c index 20da8b9219..e6066ce357 100644 --- a/drivers/bus/fslmc/qbman/qbman_portal.c +++ b/drivers/bus/fslmc/qbman/qbman_portal.c @@ -76,6 +76,10 @@ qbman_swp_enqueue_ring_mode_direct(struct qbman_swp *s, const struct qbman_eq_desc *d, const struct qbman_fd *fd); static int +qbman_swp_enqueue_ring_mode_cinh_direct(struct qbman_swp *s, + const struct qbman_eq_desc *d, + const struct qbman_fd *fd); +static int qbman_swp_enqueue_ring_mode_mem_back(struct qbman_swp *s, const struct qbman_eq_desc *d, const struct qbman_fd *fd); @@ -87,6 +91,12 @@ qbman_swp_enqueue_multiple_direct(struct qbman_swp *s, uint32_t *flags, int num_frames); static int +qbman_swp_enqueue_multiple_cinh_direct(struct qbman_swp *s, + const struct qbman_eq_desc *d, + const struct qbman_fd *fd, + uint32_t *flags, + int num_frames); +static int qbman_swp_enqueue_multiple_mem_back(struct qbman_swp *s, const struct qbman_eq_desc *d, const struct qbman_fd *fd, @@ -99,7 +109,12 @@ qbman_swp_enqueue_multiple_fd_direct(struct qbman_swp *s, struct qbman_fd **fd, uint32_t *flags, int num_frames); - +static int +qbman_swp_enqueue_multiple_fd_cinh_direct(struct qbman_swp *s, + const struct qbman_eq_desc *d, + struct qbman_fd **fd, + uint32_t *flags, + int num_frames); static int qbman_swp_enqueue_multiple_fd_mem_back(struct qbman_swp *s, const struct qbman_eq_desc *d, @@ -113,6 +128,11 @@ qbman_swp_enqueue_multiple_desc_direct(struct qbman_swp *s, const struct qbman_fd *fd, int num_frames); static int +qbman_swp_enqueue_multiple_desc_cinh_direct(struct qbman_swp *s, + const struct qbman_eq_desc *d, + const struct qbman_fd *fd, + int num_frames); +static int qbman_swp_enqueue_multiple_desc_mem_back(struct qbman_swp *s, const struct qbman_eq_desc *d, const struct qbman_fd *fd, @@ -273,6 +293,17 @@ struct qbman_swp *qbman_swp_init(const struct qbman_swp_desc *d) qbman_swp_release_ptr = qbman_swp_release_mem_back; } + if (dpaa2_svr_family == SVR_LS1080A) { + qbman_swp_enqueue_ring_mode_ptr = + qbman_swp_enqueue_ring_mode_cinh_direct; + qbman_swp_enqueue_multiple_ptr = + qbman_swp_enqueue_multiple_cinh_direct; + qbman_swp_enqueue_multiple_fd_ptr = + qbman_swp_enqueue_multiple_fd_cinh_direct; + qbman_swp_enqueue_multiple_desc_ptr = + qbman_swp_enqueue_multiple_desc_cinh_direct; + } + for (mask_size = p->eqcr.pi_ring_size; mask_size > 0; mask_size >>= 1) p->eqcr.pi_ci_mask = (p->eqcr.pi_ci_mask<<1) + 1; eqcr_pi = qbman_cinh_read(&p->sys, QBMAN_CINH_SWP_EQCR_PI); @@ -700,6 +731,46 @@ static int qbman_swp_enqueue_ring_mode_direct(struct qbman_swp *s, return 0; } +static int qbman_swp_enqueue_ring_mode_cinh_direct( + struct qbman_swp *s, + const struct qbman_eq_desc *d, + const struct qbman_fd *fd) +{ + uint32_t *p; + const uint32_t *cl = qb_cl(d); + uint32_t eqcr_ci, full_mask, half_mask; + + half_mask = (s->eqcr.pi_ci_mask>>1); + full_mask = s->eqcr.pi_ci_mask; + if (!s->eqcr.available) { + eqcr_ci = s->eqcr.ci; + s->eqcr.ci = qbman_cinh_read(&s->sys, + QBMAN_CINH_SWP_EQCR_CI) & full_mask; + s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size, + eqcr_ci, s->eqcr.ci); + if (!s->eqcr.available) + return -EBUSY; + } + + p = qbman_cena_write_start_wo_shadow(&s->sys, + QBMAN_CENA_SWP_EQCR(s->eqcr.pi & half_mask)); + memcpy(&p[1], &cl[1], 28); + memcpy(&p[8], fd, sizeof(*fd)); + lwsync(); + + /* Set the verb byte, have to substitute in the valid-bit */ + p[0] = cl[0] | s->eqcr.pi_vb; + qbman_cena_write_complete_wo_shadow(&s->sys, + QBMAN_CENA_SWP_EQCR(s->eqcr.pi & half_mask)); + s->eqcr.pi++; + s->eqcr.pi &= full_mask; + s->eqcr.available--; + if (!(s->eqcr.pi & half_mask)) + s->eqcr.pi_vb ^= QB_VALID_BIT; + + return 0; +} + static int qbman_swp_enqueue_ring_mode_mem_back(struct qbman_swp *s, const struct qbman_eq_desc *d, const struct qbman_fd *fd) @@ -823,6 +894,76 @@ static int qbman_swp_enqueue_multiple_direct(struct qbman_swp *s, return num_enqueued; } +static int qbman_swp_enqueue_multiple_cinh_direct( + struct qbman_swp *s, + const struct qbman_eq_desc *d, + const struct qbman_fd *fd, + uint32_t *flags, + int num_frames) +{ + uint32_t *p = NULL; + const uint32_t *cl = qb_cl(d); + uint32_t eqcr_ci, eqcr_pi, half_mask, full_mask; + int i, num_enqueued = 0; + uint64_t addr_cena; + + half_mask = (s->eqcr.pi_ci_mask>>1); + full_mask = s->eqcr.pi_ci_mask; + if (!s->eqcr.available) { + eqcr_ci = s->eqcr.ci; + s->eqcr.ci = qbman_cinh_read(&s->sys, + QBMAN_CINH_SWP_EQCR_CI) & full_mask; + s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size, + eqcr_ci, s->eqcr.ci); + if (!s->eqcr.available) + return 0; + } + + eqcr_pi = s->eqcr.pi; + num_enqueued = (s->eqcr.available < num_frames) ? + s->eqcr.available : num_frames; + s->eqcr.available -= num_enqueued; + /* Fill in the EQCR ring */ + for (i = 0; i < num_enqueued; i++) { + p = qbman_cena_write_start_wo_shadow(&s->sys, + QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask)); + memcpy(&p[1], &cl[1], 28); + memcpy(&p[8], &fd[i], sizeof(*fd)); + eqcr_pi++; + } + + lwsync(); + + /* Set the verb byte, have to substitute in the valid-bit */ + eqcr_pi = s->eqcr.pi; + for (i = 0; i < num_enqueued; i++) { + p = qbman_cena_write_start_wo_shadow(&s->sys, + QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask)); + p[0] = cl[0] | s->eqcr.pi_vb; + if (flags && (flags[i] & QBMAN_ENQUEUE_FLAG_DCA)) { + struct qbman_eq_desc *d = (struct qbman_eq_desc *)p; + + d->eq.dca = (1 << QB_ENQUEUE_CMD_DCA_EN_SHIFT) | + ((flags[i]) & QBMAN_EQCR_DCA_IDXMASK); + } + eqcr_pi++; + if (!(eqcr_pi & half_mask)) + s->eqcr.pi_vb ^= QB_VALID_BIT; + } + + /* Flush all the cacheline without load/store in between */ + eqcr_pi = s->eqcr.pi; + addr_cena = (size_t)s->sys.addr_cena; + for (i = 0; i < num_enqueued; i++) { + dcbf(addr_cena + + QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask)); + eqcr_pi++; + } + s->eqcr.pi = eqcr_pi & full_mask; + + return num_enqueued; +} + static int qbman_swp_enqueue_multiple_mem_back(struct qbman_swp *s, const struct qbman_eq_desc *d, const struct qbman_fd *fd, @@ -954,6 +1095,76 @@ static int qbman_swp_enqueue_multiple_fd_direct(struct qbman_swp *s, return num_enqueued; } +static int qbman_swp_enqueue_multiple_fd_cinh_direct( + struct qbman_swp *s, + const struct qbman_eq_desc *d, + struct qbman_fd **fd, + uint32_t *flags, + int num_frames) +{ + uint32_t *p = NULL; + const uint32_t *cl = qb_cl(d); + uint32_t eqcr_ci, eqcr_pi, half_mask, full_mask; + int i, num_enqueued = 0; + uint64_t addr_cena; + + half_mask = (s->eqcr.pi_ci_mask>>1); + full_mask = s->eqcr.pi_ci_mask; + if (!s->eqcr.available) { + eqcr_ci = s->eqcr.ci; + s->eqcr.ci = qbman_cinh_read(&s->sys, + QBMAN_CINH_SWP_EQCR_CI) & full_mask; + s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size, + eqcr_ci, s->eqcr.ci); + if (!s->eqcr.available) + return 0; + } + + eqcr_pi = s->eqcr.pi; + num_enqueued = (s->eqcr.available < num_frames) ? + s->eqcr.available : num_frames; + s->eqcr.available -= num_enqueued; + /* Fill in the EQCR ring */ + for (i = 0; i < num_enqueued; i++) { + p = qbman_cena_write_start_wo_shadow(&s->sys, + QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask)); + memcpy(&p[1], &cl[1], 28); + memcpy(&p[8], fd[i], sizeof(struct qbman_fd)); + eqcr_pi++; + } + + lwsync(); + + /* Set the verb byte, have to substitute in the valid-bit */ + eqcr_pi = s->eqcr.pi; + for (i = 0; i < num_enqueued; i++) { + p = qbman_cena_write_start_wo_shadow(&s->sys, + QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask)); + p[0] = cl[0] | s->eqcr.pi_vb; + if (flags && (flags[i] & QBMAN_ENQUEUE_FLAG_DCA)) { + struct qbman_eq_desc *d = (struct qbman_eq_desc *)p; + + d->eq.dca = (1 << QB_ENQUEUE_CMD_DCA_EN_SHIFT) | + ((flags[i]) & QBMAN_EQCR_DCA_IDXMASK); + } + eqcr_pi++; + if (!(eqcr_pi & half_mask)) + s->eqcr.pi_vb ^= QB_VALID_BIT; + } + + /* Flush all the cacheline without load/store in between */ + eqcr_pi = s->eqcr.pi; + addr_cena = (size_t)s->sys.addr_cena; + for (i = 0; i < num_enqueued; i++) { + dcbf(addr_cena + + QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask)); + eqcr_pi++; + } + s->eqcr.pi = eqcr_pi & full_mask; + + return num_enqueued; +} + static int qbman_swp_enqueue_multiple_fd_mem_back(struct qbman_swp *s, const struct qbman_eq_desc *d, struct qbman_fd **fd, @@ -1087,6 +1298,71 @@ static int qbman_swp_enqueue_multiple_desc_direct(struct qbman_swp *s, return num_enqueued; } +static int qbman_swp_enqueue_multiple_desc_cinh_direct( + struct qbman_swp *s, + const struct qbman_eq_desc *d, + const struct qbman_fd *fd, + int num_frames) +{ + uint32_t *p; + const uint32_t *cl; + uint32_t eqcr_ci, eqcr_pi, half_mask, full_mask; + int i, num_enqueued = 0; + uint64_t addr_cena; + + half_mask = (s->eqcr.pi_ci_mask>>1); + full_mask = s->eqcr.pi_ci_mask; + if (!s->eqcr.available) { + eqcr_ci = s->eqcr.ci; + s->eqcr.ci = qbman_cinh_read(&s->sys, + QBMAN_CINH_SWP_EQCR_CI) & full_mask; + s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size, + eqcr_ci, s->eqcr.ci); + if (!s->eqcr.available) + return 0; + } + + eqcr_pi = s->eqcr.pi; + num_enqueued = (s->eqcr.available < num_frames) ? + s->eqcr.available : num_frames; + s->eqcr.available -= num_enqueued; + /* Fill in the EQCR ring */ + for (i = 0; i < num_enqueued; i++) { + p = qbman_cena_write_start_wo_shadow(&s->sys, + QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask)); + cl = qb_cl(&d[i]); + memcpy(&p[1], &cl[1], 28); + memcpy(&p[8], &fd[i], sizeof(*fd)); + eqcr_pi++; + } + + lwsync(); + + /* Set the verb byte, have to substitute in the valid-bit */ + eqcr_pi = s->eqcr.pi; + for (i = 0; i < num_enqueued; i++) { + p = qbman_cena_write_start_wo_shadow(&s->sys, + QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask)); + cl = qb_cl(&d[i]); + p[0] = cl[0] | s->eqcr.pi_vb; + eqcr_pi++; + if (!(eqcr_pi & half_mask)) + s->eqcr.pi_vb ^= QB_VALID_BIT; + } + + /* Flush all the cacheline without load/store in between */ + eqcr_pi = s->eqcr.pi; + addr_cena = (size_t)s->sys.addr_cena; + for (i = 0; i < num_enqueued; i++) { + dcbf(addr_cena + + QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask)); + eqcr_pi++; + } + s->eqcr.pi = eqcr_pi & full_mask; + + return num_enqueued; +} + static int qbman_swp_enqueue_multiple_desc_mem_back(struct qbman_swp *s, const struct qbman_eq_desc *d, const struct qbman_fd *fd, diff --git a/drivers/bus/fslmc/qbman/qbman_sys.h b/drivers/bus/fslmc/qbman/qbman_sys.h index 71f7a67822..e59fcfd542 100644 --- a/drivers/bus/fslmc/qbman/qbman_sys.h +++ b/drivers/bus/fslmc/qbman/qbman_sys.h @@ -381,6 +381,14 @@ static inline uint32_t qbman_set_swp_cfg(uint8_t max_fill, uint8_t wn, #define QMAN_REV_5000 0x05000000 #define QMAN_REV_MASK 0xffff0000 +#define SVR_LS1080A 0x87030000 +#define SVR_LS2080A 0x87010000 +#define SVR_LS2088A 0x87090000 +#define SVR_LX2160A 0x87360000 + +/* Variable to store DPAA2 platform type */ +extern uint32_t dpaa2_svr_family; + static inline int qbman_swp_sys_init(struct qbman_swp_sys *s, const struct qbman_swp_desc *d, uint8_t dqrr_size) @@ -388,16 +396,17 @@ static inline int qbman_swp_sys_init(struct qbman_swp_sys *s, uint32_t reg; int i; int cena_region_size = 4*1024; - - if ((d->qman_version & QMAN_REV_MASK) >= QMAN_REV_5000 - && (d->cena_access_mode == qman_cena_fastest_access)) - cena_region_size = 64*1024; + uint8_t est = 1; #ifdef RTE_ARCH_64 uint8_t wn = CENA_WRITE_ENABLE; #else uint8_t wn = CINH_WRITE_ENABLE; #endif + + if ((d->qman_version & QMAN_REV_MASK) >= QMAN_REV_5000 + && (d->cena_access_mode == qman_cena_fastest_access)) + cena_region_size = 64*1024; s->addr_cena = d->cena_bar; s->addr_cinh = d->cinh_bar; s->idx = (uint32_t)d->idx; @@ -428,6 +437,9 @@ static inline int qbman_swp_sys_init(struct qbman_swp_sys *s, dccivac(s->addr_cena + i); } + if (dpaa2_svr_family == SVR_LS1080A) + est = 0; + if (s->eqcr_mode == qman_eqcr_vb_array) { reg = qbman_set_swp_cfg(dqrr_size, wn, 0, 3, 2, 3, 1, 1, 1, 1, 1, 1); @@ -438,7 +450,7 @@ static inline int qbman_swp_sys_init(struct qbman_swp_sys *s, 1, 3, 2, 0, 1, 1, 1, 1, 1, 1); else reg = qbman_set_swp_cfg(dqrr_size, wn, - 1, 3, 2, 2, 1, 1, 1, 1, 1, 1); + est, 3, 2, 2, 1, 1, 1, 1, 1, 1); } if ((d->qman_version & QMAN_REV_MASK) >= QMAN_REV_5000