Add hardware supported flow control checks before enqueueing to CPT.
Since both poll mode and event mode can be used at the same time, add
hardware flow control checks to make sure s/w doesn't over submit to
hardware queues. For cn9k, queue depth usage is not high and so FC check
is omitted for poll mode.
To allow for more accurate updates, flow control hardware setting is
updated to give an update per 32 packets. In case of crypto adapter,
multiple cores can enqueue to the same CPT LF at the same time. To
allow such a case, flow control threshold is updated when the adapter
is configured.
Signed-off-by: Anoob Joseph <anoobj@marvell.com>
Acked-by: Akhil Goyal <gakhil@marvell.com>
} w1;
};
+union cpt_fc_write_s {
+ struct {
+ uint32_t qsize;
+ uint32_t reserved_32_63;
+ uint64_t reserved_64_127;
+ } s;
+ uint64_t u64[2];
+};
+
#endif /* __CPT_HW_H__ */
#define CPT_IQ_GRP_SIZE(nb_desc) \
(CPT_IQ_NB_DESC_SIZE_DIV40(nb_desc) * CPT_IQ_GRP_LEN)
-#define CPT_LF_MAX_NB_DESC 128000
-#define CPT_LF_DEFAULT_NB_DESC 1024
+#define CPT_LF_MAX_NB_DESC 128000
+#define CPT_LF_DEFAULT_NB_DESC 1024
+#define CPT_LF_FC_MIN_THRESHOLD 32
static void
cpt_lf_misc_intr_enb_dis(struct roc_cpt_lf *lf, bool enb)
plt_write64(lf_q_size.u, lf->rbase + CPT_LF_Q_SIZE);
lf->fc_addr = (uint64_t *)addr;
- lf->fc_hyst_bits = plt_log2_u32(lf->nb_desc) / 2;
- lf->fc_thresh = lf->nb_desc - (lf->nb_desc % (1 << lf->fc_hyst_bits));
}
int
lf_ctl.s.ena = 1;
lf_ctl.s.fc_ena = 1;
lf_ctl.s.fc_up_crossing = 0;
- lf_ctl.s.fc_hyst_bits = lf->fc_hyst_bits;
+ lf_ctl.s.fc_hyst_bits = plt_log2_u32(CPT_LF_FC_MIN_THRESHOLD);
plt_write64(lf_ctl.u, lf->rbase + CPT_LF_CTL);
/* Enable command queue execution */
lmtline->fc_addr = lf->fc_addr;
lmtline->lmt_base = lf->lmt_base;
+ lmtline->fc_thresh = lf->nb_desc - CPT_LF_FC_MIN_THRESHOLD;
return 0;
}
uint64_t io_addr;
uint64_t *fc_addr;
uintptr_t lmt_base;
+ uint32_t fc_thresh;
};
struct roc_cpt_lf {
uint16_t msixoff;
uint16_t pf_func;
uint64_t *fc_addr;
- uint32_t fc_hyst_bits;
- uint64_t fc_thresh;
uint64_t io_addr;
uint8_t *iq_vaddr;
struct roc_nix *inl_outb_nix;
uint16_t zombie_thres;
};
-static inline int
-roc_cpt_is_iq_full(struct roc_cpt_lf *lf)
-{
- if (*lf->fc_addr < lf->fc_thresh)
- return 0;
-
- return 1;
-}
-
int __roc_api roc_cpt_rxc_time_cfg(struct roc_cpt *roc_cpt,
struct roc_cpt_rxc_time_cfg *cfg);
int __roc_api roc_cpt_dev_init(struct roc_cpt *roc_cpt);
struct cnxk_cpt_qp *qp = qptr;
struct pending_queue *pend_q;
struct cpt_inst_s *inst;
+ union cpt_fc_write_s fc;
+ uint64_t *fc_addr;
uint16_t lmt_id;
uint64_t head;
int ret, i;
lmt_base = qp->lmtline.lmt_base;
io_addr = qp->lmtline.io_addr;
+ fc_addr = qp->lmtline.fc_addr;
+
+ const uint32_t fc_thresh = qp->lmtline.fc_thresh;
ROC_LMT_BASE_ID_GET(lmt_base, lmt_id);
inst = (struct cpt_inst_s *)lmt_base;
again:
+ fc.u64[0] = __atomic_load_n(fc_addr, __ATOMIC_RELAXED);
+ if (unlikely(fc.s.qsize > fc_thresh)) {
+ i = 0;
+ goto pend_q_commit;
+ }
+
for (i = 0; i < RTE_MIN(PKTS_PER_LOOP, nb_ops); i++) {
infl_req = &pend_q->req_queue[head];
infl_req->op_flags = 0;
struct cpt_inflight_req *infl_req;
uint64_t lmt_base, lmt_arg, w2;
struct cpt_inst_s *inst;
+ union cpt_fc_write_s fc;
struct cnxk_cpt_qp *qp;
+ uint64_t *fc_addr;
uint16_t lmt_id;
int ret;
infl_req->op_flags = 0;
lmt_base = qp->lmtline.lmt_base;
+ fc_addr = qp->lmtline.fc_addr;
+
+ const uint32_t fc_thresh = qp->lmtline.fc_thresh;
+
ROC_LMT_BASE_ID_GET(lmt_base, lmt_id);
inst = (struct cpt_inst_s *)lmt_base;
inst->w2.u64 = w2;
inst->w3.u64 = CNXK_CPT_INST_W3(1, infl_req);
- if (roc_cpt_is_iq_full(&qp->lf)) {
+ fc.u64[0] = __atomic_load_n(fc_addr, __ATOMIC_RELAXED);
+ if (unlikely(fc.s.qsize > fc_thresh)) {
rte_mempool_put(qp->ca.req_mp, infl_req);
rte_errno = EAGAIN;
return 0;
cn9k_cpt_crypto_adapter_enqueue(uintptr_t base, struct rte_crypto_op *op)
{
struct cpt_inflight_req *infl_req;
+ union cpt_fc_write_s fc;
struct cnxk_cpt_qp *qp;
struct cpt_inst_s inst;
+ uint64_t *fc_addr;
int ret;
ret = cn9k_ca_meta_info_extract(op, &qp, &inst);
inst.res_addr = (uint64_t)&infl_req->res;
inst.w3.u64 = CNXK_CPT_INST_W3(1, infl_req);
- if (roc_cpt_is_iq_full(&qp->lf)) {
+ fc_addr = qp->lmtline.fc_addr;
+
+ const uint32_t fc_thresh = qp->lmtline.fc_thresh;
+
+ fc.u64[0] = __atomic_load_n(fc_addr, __ATOMIC_RELAXED);
+ if (unlikely(fc.s.qsize > fc_thresh)) {
rte_mempool_put(qp->ca.req_mp, infl_req);
rte_errno = EAGAIN;
return 0;
char name[RTE_MEMPOOL_NAMESIZE];
uint32_t cache_size, nb_req;
unsigned int req_size;
+ uint32_t nb_desc_min;
+
+ /*
+ * Update CPT FC threshold. Decrement by hardware burst size to allow
+ * simultaneous enqueue from all available cores.
+ */
+ if (roc_model_is_cn10k())
+ nb_desc_min = rte_lcore_count() * 32;
+ else
+ nb_desc_min = rte_lcore_count() * 2;
+
+ if (qp->lmtline.fc_thresh < nb_desc_min) {
+ plt_err("CPT queue depth not sufficient to allow enqueueing from %d cores",
+ rte_lcore_count());
+ return -ENOSPC;
+ }
+
+ qp->lmtline.fc_thresh -= nb_desc_min;
snprintf(name, RTE_MEMPOOL_NAMESIZE, "cnxk_ca_req_%u:%u",
cdev->data->dev_id, qp->lf.lf_id);
static int
crypto_adapter_qp_free(struct cnxk_cpt_qp *qp)
{
+ int ret;
+
rte_mempool_free(qp->ca.req_mp);
qp->ca.enabled = false;
+ ret = roc_cpt_lmtline_init(qp->lf.roc_cpt, &qp->lmtline, qp->lf.lf_id);
+ if (ret < 0) {
+ plt_err("Could not reset lmtline for queue pair %d",
+ qp->lf.lf_id);
+ return ret;
+ }
+
return 0;
}