From 727758576714ea4de4df01bca53978f321ae323a Mon Sep 17 00:00:00 2001 From: Amaranath Somalapuram Date: Fri, 22 Nov 2019 12:17:17 +0530 Subject: [PATCH] crypto/ccp: fix scheduling of burst CCP driver was scheduling only one CCP in a single burst(enqueue). Effective throughput was limited to 1 CCP performance. Scheduling multiple ccp within one burst will increase the ccp performance. this changes will divide the enqueue packets equally among the multiple CCP Fixes: e0d88a394e ("crypto/ccp: support run-time CPU based auth") Cc: stable@dpdk.org Signed-off-by: Amaranath Somalapuram --- drivers/crypto/ccp/ccp_crypto.c | 22 +++++++---- drivers/crypto/ccp/ccp_crypto.h | 7 +++- drivers/crypto/ccp/ccp_dev.h | 2 +- drivers/crypto/ccp/ccp_pmd_private.h | 2 + drivers/crypto/ccp/rte_ccp_pmd.c | 57 +++++++++++++++++++--------- 5 files changed, 63 insertions(+), 27 deletions(-) diff --git a/drivers/crypto/ccp/ccp_crypto.c b/drivers/crypto/ccp/ccp_crypto.c index 1837c85432..4256734d16 100644 --- a/drivers/crypto/ccp/ccp_crypto.c +++ b/drivers/crypto/ccp/ccp_crypto.c @@ -2680,7 +2680,9 @@ process_ops_to_enqueue(struct ccp_qp *qp, struct rte_crypto_op **op, struct ccp_queue *cmd_q, uint16_t nb_ops, - int slots_req) + uint16_t total_nb_ops, + int slots_req, + uint16_t b_idx) { int i, result = 0; struct ccp_batch_info *b_info; @@ -2701,6 +2703,7 @@ process_ops_to_enqueue(struct ccp_qp *qp, /* populate batch info necessary for dequeue */ b_info->op_idx = 0; + b_info->b_idx = 0; b_info->lsb_buf_idx = 0; b_info->desccnt = 0; b_info->cmd_q = cmd_q; @@ -2710,7 +2713,7 @@ process_ops_to_enqueue(struct ccp_qp *qp, b_info->head_offset = (uint32_t)(cmd_q->qbase_phys_addr + cmd_q->qidx * Q_DESC_SIZE); - for (i = 0; i < nb_ops; i++) { + for (i = b_idx; i < (nb_ops+b_idx); i++) { session = (struct ccp_session *)get_sym_session_private_data( op[i]->sym->session, ccp_cryptodev_driver_id); @@ -2762,6 +2765,8 @@ process_ops_to_enqueue(struct ccp_qp *qp, } b_info->opcnt = i; + b_info->b_idx = b_idx; + b_info->total_nb_ops = total_nb_ops; b_info->tail_offset = (uint32_t)(cmd_q->qbase_phys_addr + cmd_q->qidx * Q_DESC_SIZE); @@ -2776,7 +2781,7 @@ process_ops_to_enqueue(struct ccp_qp *qp, rte_ring_enqueue(qp->processed_pkts, (void *)b_info); EVP_MD_CTX_destroy(auth_ctx); - return i; + return i-b_idx; } static inline void ccp_auth_dq_prepare(struct rte_crypto_op *op) @@ -2861,8 +2866,8 @@ ccp_prepare_ops(struct ccp_qp *qp, } min_ops = RTE_MIN(nb_ops, b_info->opcnt); - for (i = 0; i < min_ops; i++) { - op_d[i] = b_info->op[b_info->op_idx++]; + for (i = b_info->b_idx; i < min_ops; i++) { + op_d[i] = b_info->op[b_info->b_idx + b_info->op_idx++]; session = (struct ccp_session *)get_sym_session_private_data( op_d[i]->sym->session, ccp_cryptodev_driver_id); @@ -2903,7 +2908,8 @@ ccp_prepare_ops(struct ccp_qp *qp, int process_ops_to_dequeue(struct ccp_qp *qp, struct rte_crypto_op **op, - uint16_t nb_ops) + uint16_t nb_ops, + uint16_t *total_nb_ops) { struct ccp_batch_info *b_info; uint32_t cur_head_offset; @@ -2918,6 +2924,7 @@ process_ops_to_dequeue(struct ccp_qp *qp, if (b_info->auth_ctr == b_info->opcnt) goto success; + *total_nb_ops = b_info->total_nb_ops; cur_head_offset = CCP_READ_REG(b_info->cmd_q->reg_base, CMD_Q_HEAD_LO_BASE); @@ -2927,7 +2934,7 @@ process_ops_to_dequeue(struct ccp_qp *qp, qp->b_info = b_info; return 0; } - } else { + } else if (b_info->tail_offset != b_info->head_offset) { if ((cur_head_offset >= b_info->head_offset) || (cur_head_offset < b_info->tail_offset)) { qp->b_info = b_info; @@ -2937,6 +2944,7 @@ process_ops_to_dequeue(struct ccp_qp *qp, success: + *total_nb_ops = b_info->total_nb_ops; nb_ops = ccp_prepare_ops(qp, op, b_info, nb_ops); rte_atomic64_add(&b_info->cmd_q->free_slots, b_info->desccnt); b_info->desccnt = 0; diff --git a/drivers/crypto/ccp/ccp_crypto.h b/drivers/crypto/ccp/ccp_crypto.h index 882b398aca..8e6d03efc8 100644 --- a/drivers/crypto/ccp/ccp_crypto.h +++ b/drivers/crypto/ccp/ccp_crypto.h @@ -353,7 +353,9 @@ int process_ops_to_enqueue(struct ccp_qp *qp, struct rte_crypto_op **op, struct ccp_queue *cmd_q, uint16_t nb_ops, - int slots_req); + uint16_t total_nb_ops, + int slots_req, + uint16_t b_idx); /** * process crypto ops to be dequeued @@ -365,7 +367,8 @@ int process_ops_to_enqueue(struct ccp_qp *qp, */ int process_ops_to_dequeue(struct ccp_qp *qp, struct rte_crypto_op **op, - uint16_t nb_ops); + uint16_t nb_ops, + uint16_t *total_nb_ops); /** diff --git a/drivers/crypto/ccp/ccp_dev.h b/drivers/crypto/ccp/ccp_dev.h index de3e4bcc63..f4ad9eafd5 100644 --- a/drivers/crypto/ccp/ccp_dev.h +++ b/drivers/crypto/ccp/ccp_dev.h @@ -59,7 +59,7 @@ #define CMD_Q_RUN 0x1 #define CMD_Q_SIZE 0x1F #define CMD_Q_SHIFT 3 -#define COMMANDS_PER_QUEUE 2048 +#define COMMANDS_PER_QUEUE 8192 #define QUEUE_SIZE_VAL ((ffs(COMMANDS_PER_QUEUE) - 2) & \ CMD_Q_SIZE) diff --git a/drivers/crypto/ccp/ccp_pmd_private.h b/drivers/crypto/ccp/ccp_pmd_private.h index 781050c31b..1c4118ee3c 100644 --- a/drivers/crypto/ccp/ccp_pmd_private.h +++ b/drivers/crypto/ccp/ccp_pmd_private.h @@ -50,8 +50,10 @@ struct ccp_batch_info { struct rte_crypto_op *op[CCP_MAX_BURST]; /**< optable populated at enque time from app*/ int op_idx; + uint16_t b_idx; struct ccp_queue *cmd_q; uint16_t opcnt; + uint16_t total_nb_ops; /**< no. of crypto ops in batch*/ int desccnt; /**< no. of ccp queue descriptors*/ diff --git a/drivers/crypto/ccp/rte_ccp_pmd.c b/drivers/crypto/ccp/rte_ccp_pmd.c index 4810d799cc..38cb1fe3da 100644 --- a/drivers/crypto/ccp/rte_ccp_pmd.c +++ b/drivers/crypto/ccp/rte_ccp_pmd.c @@ -22,6 +22,7 @@ */ static unsigned int ccp_pmd_init_done; uint8_t ccp_cryptodev_driver_id; +uint8_t cryptodev_cnt; struct ccp_pmd_init_params { struct rte_cryptodev_pmd_init_params def_p; @@ -201,30 +202,46 @@ ccp_pmd_enqueue_burst(void *queue_pair, struct rte_crypto_op **ops, struct ccp_queue *cmd_q; struct rte_cryptodev *dev = qp->dev; uint16_t i, enq_cnt = 0, slots_req = 0; + uint16_t tmp_ops = nb_ops, b_idx, cur_ops = 0; if (nb_ops == 0) return 0; if (unlikely(rte_ring_full(qp->processed_pkts) != 0)) return 0; + if (tmp_ops >= cryptodev_cnt) + cur_ops = nb_ops / cryptodev_cnt + (nb_ops)%cryptodev_cnt; + else + cur_ops = tmp_ops; + while (tmp_ops) { + b_idx = nb_ops - tmp_ops; + slots_req = 0; + if (cur_ops <= tmp_ops) { + tmp_ops -= cur_ops; + } else { + cur_ops = tmp_ops; + tmp_ops = 0; + } + for (i = 0; i < cur_ops; i++) { + sess = get_ccp_session(qp, ops[i + b_idx]); + if (unlikely(sess == NULL) && (i == 0)) { + qp->qp_stats.enqueue_err_count++; + return 0; + } else if (sess == NULL) { + cur_ops = i; + break; + } + slots_req += ccp_compute_slot_count(sess); + } - for (i = 0; i < nb_ops; i++) { - sess = get_ccp_session(qp, ops[i]); - if (unlikely(sess == NULL) && (i == 0)) { - qp->qp_stats.enqueue_err_count++; + cmd_q = ccp_allot_queue(dev, slots_req); + if (unlikely(cmd_q == NULL)) return 0; - } else if (sess == NULL) { - nb_ops = i; - break; - } - slots_req += ccp_compute_slot_count(sess); + enq_cnt += process_ops_to_enqueue(qp, ops, cmd_q, cur_ops, + nb_ops, slots_req, b_idx); + i++; } - cmd_q = ccp_allot_queue(dev, slots_req); - if (unlikely(cmd_q == NULL)) - return 0; - - enq_cnt = process_ops_to_enqueue(qp, ops, cmd_q, nb_ops, slots_req); qp->qp_stats.enqueued_count += enq_cnt; return enq_cnt; } @@ -234,9 +251,16 @@ ccp_pmd_dequeue_burst(void *queue_pair, struct rte_crypto_op **ops, uint16_t nb_ops) { struct ccp_qp *qp = queue_pair; - uint16_t nb_dequeued = 0, i; + uint16_t nb_dequeued = 0, i, total_nb_ops; + + nb_dequeued = process_ops_to_dequeue(qp, ops, nb_ops, &total_nb_ops); - nb_dequeued = process_ops_to_dequeue(qp, ops, nb_ops); + if (total_nb_ops) { + while (nb_dequeued != total_nb_ops) { + nb_dequeued = process_ops_to_dequeue(qp, + ops, nb_ops, &total_nb_ops); + } + } /* Free session if a session-less crypto op */ for (i = 0; i < nb_dequeued; i++) @@ -296,7 +320,6 @@ cryptodev_ccp_create(const char *name, { struct rte_cryptodev *dev; struct ccp_private *internals; - uint8_t cryptodev_cnt = 0; if (init_params->def_p.name[0] == '\0') strlcpy(init_params->def_p.name, name, -- 2.20.1