From: David Coyle Date: Tue, 21 Jul 2020 14:47:18 +0000 (+0100) Subject: crypto/qat: fix DOCSIS performance X-Git-Url: http://git.droids-corp.org/?a=commitdiff_plain;h=012affe1383850e1ebf29fb8287cb2ff06a7f007;p=dpdk.git crypto/qat: fix DOCSIS performance DOCSIS protocol performance in the downlink direction can be improved significantly in the QAT SYM PMD, especially for larger packets, by pre-processing all CRC generations in a batch before building and enqueuing any requests to the HW. This patch adds this optimization. Fixes: 6f0ef237404b ("crypto/qat: support DOCSIS protocol") Signed-off-by: David Coyle Acked-by: Fiona Trahe --- diff --git a/drivers/common/qat/qat_qp.c b/drivers/common/qat/qat_qp.c index aacd4ab212..9538739339 100644 --- a/drivers/common/qat/qat_qp.c +++ b/drivers/common/qat/qat_qp.c @@ -627,6 +627,10 @@ qat_enqueue_op_burst(void *qp, void **ops, uint16_t nb_ops) } } +#ifdef BUILD_QAT_SYM + if (tmp_qp->service_type == QAT_SERVICE_SYMMETRIC) + qat_sym_preprocess_requests(ops, nb_ops_possible); +#endif while (nb_ops_sent != nb_ops_possible) { if (tmp_qp->service_type == QAT_SERVICE_SYMMETRIC) { diff --git a/drivers/crypto/qat/qat_sym.c b/drivers/crypto/qat/qat_sym.c index e6bf115232..e3f98a76ba 100644 --- a/drivers/crypto/qat/qat_sym.c +++ b/drivers/crypto/qat/qat_sym.c @@ -9,9 +9,6 @@ #include #include #include -#ifdef RTE_LIBRTE_SECURITY -#include -#endif #include "qat_sym.h" @@ -102,29 +99,6 @@ qat_bpicipher_preprocess(struct qat_sym_session *ctx, return sym_op->cipher.data.length - last_block_len; } -#ifdef RTE_LIBRTE_SECURITY -static inline void -qat_crc_generate(struct qat_sym_session *ctx, - struct rte_crypto_op *op) -{ - struct rte_crypto_sym_op *sym_op = op->sym; - uint32_t *crc, crc_data_len; - uint8_t *crc_data; - - if (ctx->qat_dir == ICP_QAT_HW_CIPHER_ENCRYPT && - sym_op->auth.data.length != 0) { - - crc_data_len = sym_op->auth.data.length; - crc_data = rte_pktmbuf_mtod_offset(sym_op->m_src, uint8_t *, - sym_op->auth.data.offset); - crc = (uint32_t *)(crc_data + crc_data_len); - - *crc = rte_net_crc_calc(crc_data, crc_data_len, - RTE_NET_CRC32_ETH); - } -} -#endif - static inline void set_cipher_iv(uint16_t iv_length, uint16_t iv_offset, struct icp_qat_fw_la_cipher_req_params *cipher_param, @@ -187,7 +161,6 @@ qat_sym_build_request(void *in_op, uint8_t *out_msg, uint64_t auth_data_end = 0; uint8_t do_sgl = 0; uint8_t in_place = 1; - uint8_t is_docsis_sec = 0; int alignment_adjustment = 0; struct rte_crypto_op *op = (struct rte_crypto_op *)in_op; struct qat_sym_op_cookie *cookie = @@ -211,13 +184,23 @@ qat_sym_build_request(void *in_op, uint8_t *out_msg, } else { ctx = (struct qat_sym_session *)get_sec_session_private_data( op->sym->sec_session); - if (ctx && ctx->bpi_ctx == NULL) { - QAT_DP_LOG(ERR, "QAT PMD only supports security" - " operation requests for DOCSIS, op" - " (%p) is not for DOCSIS.", op); - return -EINVAL; + if (likely(ctx)) { + if (unlikely(ctx->bpi_ctx == NULL)) { + QAT_DP_LOG(ERR, "QAT PMD only supports security" + " operation requests for" + " DOCSIS, op (%p) is not for" + " DOCSIS.", op); + return -EINVAL; + } else if (unlikely(((op->sym->m_dst != NULL) && + (op->sym->m_dst != op->sym->m_src)) || + op->sym->m_src->nb_segs > 1)) { + QAT_DP_LOG(ERR, "OOP and/or multi-segment" + " buffers not supported for" + " DOCSIS security."); + op->status = RTE_CRYPTO_OP_STATUS_INVALID_ARGS; + return -EINVAL; + } } - is_docsis_sec = 1; #endif } @@ -281,31 +264,7 @@ qat_sym_build_request(void *in_op, uint8_t *out_msg, cipher_ofs = op->sym->cipher.data.offset >> 3; } else if (ctx->bpi_ctx) { - /* DOCSIS processing */ -#ifdef RTE_LIBRTE_SECURITY - if (is_docsis_sec) { - /* Check for OOP or multi-segment buffers */ - if (unlikely(((op->sym->m_dst != NULL) && - (op->sym->m_dst != - op->sym->m_src)) || - op->sym->m_src->nb_segs > 1)) { - QAT_DP_LOG(ERR, - "OOP and/or multi-segment " - "buffers are not supported for " - "DOCSIS security"); - op->status = - RTE_CRYPTO_OP_STATUS_INVALID_ARGS; - return -EINVAL; - } - - /* Calculate CRC */ - qat_crc_generate(ctx, op); - } -#else - RTE_SET_USED(is_docsis_sec); -#endif - - /* Only send complete blocks to device. + /* DOCSIS - only send complete blocks to device. * Process any partial block using CFB mode. * Even if 0 complete blocks, still send this to device * to get into rx queue for post-process and dequeuing diff --git a/drivers/crypto/qat/qat_sym.h b/drivers/crypto/qat/qat_sym.h index 7934dd478d..1a9748849e 100644 --- a/drivers/crypto/qat/qat_sym.h +++ b/drivers/crypto/qat/qat_sym.h @@ -158,6 +158,57 @@ qat_crc_verify(struct qat_sym_session *ctx, struct rte_crypto_op *op) op->status = RTE_CRYPTO_OP_STATUS_AUTH_FAILED; } } + +static inline void +qat_crc_generate(struct qat_sym_session *ctx, + struct rte_crypto_op *op) +{ + struct rte_crypto_sym_op *sym_op = op->sym; + uint32_t *crc, crc_data_len; + uint8_t *crc_data; + + if (ctx->qat_dir == ICP_QAT_HW_CIPHER_ENCRYPT && + sym_op->auth.data.length != 0 && + sym_op->m_src->nb_segs == 1) { + + crc_data_len = sym_op->auth.data.length; + crc_data = rte_pktmbuf_mtod_offset(sym_op->m_src, uint8_t *, + sym_op->auth.data.offset); + crc = (uint32_t *)(crc_data + crc_data_len); + *crc = rte_net_crc_calc(crc_data, crc_data_len, + RTE_NET_CRC32_ETH); + } +} + +static inline void +qat_sym_preprocess_requests(void **ops, uint16_t nb_ops) +{ + struct rte_crypto_op *op; + struct qat_sym_session *ctx; + uint16_t i; + + for (i = 0; i < nb_ops; i++) { + op = (struct rte_crypto_op *)ops[i]; + + if (op->sess_type == RTE_CRYPTO_OP_SECURITY_SESSION) { + ctx = (struct qat_sym_session *) + get_sec_session_private_data( + op->sym->sec_session); + + if (ctx == NULL || ctx->bpi_ctx == NULL) + continue; + + qat_crc_generate(ctx, op); + } + } +} +#else + +static inline void +qat_sym_preprocess_requests(void **ops __rte_unused, + uint16_t nb_ops __rte_unused) +{ +} #endif static inline void @@ -215,6 +266,12 @@ qat_sym_process_response(void **op, uint8_t *resp) } #else +static inline void +qat_sym_preprocess_requests(void **ops __rte_unused, + uint16_t nb_ops __rte_unused) +{ +} + static inline void qat_sym_process_response(void **op __rte_unused, uint8_t *resp __rte_unused) {