From 4c30c94a00ab3f2aae917cf14751d8057c1f99d9 Mon Sep 17 00:00:00 2001 From: Anoob Joseph Date: Mon, 20 Jun 2022 12:48:06 +0530 Subject: [PATCH] crypto/cnxk: improve fast path Remove SA & packet accesses in dequeue path by adjusting the headers in the enqueue path for outbound packets. For inbound packets, add extra esn_en flag in the SA to minimize cache line accesses in the datapath. Also, use seq_lo for IPID. IPID just need to be unique. Instead of incrementing per packet, use ESN low bits. Signed-off-by: Anoob Joseph Acked-by: Akhil Goyal --- drivers/crypto/cnxk/cn9k_cryptodev_ops.c | 69 ++++++++++++++---------- drivers/crypto/cnxk/cn9k_ipsec.c | 11 ++-- drivers/crypto/cnxk/cn9k_ipsec.h | 7 ++- drivers/crypto/cnxk/cn9k_ipsec_la_ops.h | 55 +++++++++++-------- drivers/crypto/cnxk/cnxk_cryptodev_ops.h | 12 ++--- 5 files changed, 87 insertions(+), 67 deletions(-) diff --git a/drivers/crypto/cnxk/cn9k_cryptodev_ops.c b/drivers/crypto/cnxk/cn9k_cryptodev_ops.c index 7720730120..8aab9c9f60 100644 --- a/drivers/crypto/cnxk/cn9k_cryptodev_ops.c +++ b/drivers/crypto/cnxk/cn9k_cryptodev_ops.c @@ -43,10 +43,12 @@ cn9k_cpt_sec_inst_fill(struct rte_crypto_op *op, struct cpt_inst_s *inst) { struct rte_crypto_sym_op *sym_op = op->sym; - struct roc_ie_on_common_sa *common_sa; struct cn9k_sec_session *priv; - struct roc_ie_on_sa_ctl *ctl; struct cn9k_ipsec_sa *sa; + int ret; + + priv = get_sec_session_private_data(op->sym->sec_session); + sa = &priv->sa; if (unlikely(sym_op->m_dst && sym_op->m_dst != sym_op->m_src)) { plt_dp_err("Out of place is not supported"); @@ -58,21 +60,17 @@ cn9k_cpt_sec_inst_fill(struct rte_crypto_op *op, return -ENOTSUP; } - priv = get_sec_session_private_data(op->sym->sec_session); - sa = &priv->sa; - if (sa->dir == RTE_SECURITY_IPSEC_SA_DIR_EGRESS) - return process_outb_sa(op, sa, inst); - - infl_req->op_flags |= CPT_OP_FLAGS_IPSEC_DIR_INBOUND; - - common_sa = &sa->in_sa.common_sa; - ctl = &common_sa->ctl; - - if (ctl->esn_en) - infl_req->op_flags |= CPT_OP_FLAGS_IPSEC_INB_ESN; + ret = process_outb_sa(op, sa, inst); + else { + infl_req->op_flags |= CPT_OP_FLAGS_IPSEC_DIR_INBOUND; + process_inb_sa(op, sa, inst); + if (unlikely(sa->esn_en)) + infl_req->op_flags |= CPT_OP_FLAGS_IPSEC_INB_ESN; + ret = 0; + } - return process_inb_sa(op, sa, inst); + return ret; } static inline struct cnxk_se_sess * @@ -234,19 +232,29 @@ cn9k_cpt_enqueue_burst(void *qptr, struct rte_crypto_op **ops, uint16_t nb_ops) }; pend_q = &qp->pend_q; - - const uint64_t lmt_base = qp->lf.lmt_base; - const uint64_t io_addr = qp->lf.io_addr; - const uint64_t pq_mask = pend_q->pq_mask; + rte_prefetch2(pend_q); /* Clear w0, w2, w3 of both inst */ +#if defined(RTE_ARCH_ARM64) + uint64x2_t zero = vdupq_n_u64(0); + + vst1q_u64(&inst[0].w0.u64, zero); + vst1q_u64(&inst[1].w0.u64, zero); + vst1q_u64(&inst[0].w2.u64, zero); + vst1q_u64(&inst[1].w2.u64, zero); +#else inst[0].w0.u64 = 0; inst[0].w2.u64 = 0; inst[0].w3.u64 = 0; inst[1].w0.u64 = 0; inst[1].w2.u64 = 0; inst[1].w3.u64 = 0; +#endif + + const uint64_t lmt_base = qp->lf.lmt_base; + const uint64_t io_addr = qp->lf.io_addr; + const uint64_t pq_mask = pend_q->pq_mask; head = pend_q->head; nb_allowed = pending_queue_free_cnt(head, pend_q->tail, pq_mask); @@ -506,21 +514,26 @@ cn9k_cpt_sec_post_process(struct rte_crypto_op *cop, uint16_t m_len = 0; char *data; - priv = get_sec_session_private_data(cop->sym->sec_session); - sa = &priv->sa; - if (infl_req->op_flags & CPT_OP_FLAGS_IPSEC_DIR_INBOUND) { - struct roc_ie_on_common_sa *common_sa = &sa->in_sa.common_sa; + struct roc_ie_on_common_sa *common_sa; data = rte_pktmbuf_mtod(m, char *); - if (infl_req->op_flags == CPT_OP_FLAGS_IPSEC_INB_ESN) { - struct roc_ie_on_inb_hdr *inb_hdr = - (struct roc_ie_on_inb_hdr *)data; - uint64_t seq = rte_be_to_cpu_64(inb_hdr->seq); + if (unlikely(infl_req->op_flags & CPT_OP_FLAGS_IPSEC_INB_ESN)) { + struct roc_ie_on_inb_hdr *inb_hdr; + uint64_t seq; + + priv = get_sec_session_private_data( + sym_op->sec_session); + sa = &priv->sa; + common_sa = &sa->in_sa.common_sa; + + inb_hdr = (struct roc_ie_on_inb_hdr *)data; + seq = rte_be_to_cpu_64(inb_hdr->seq); if (seq > common_sa->seq_t.u64) common_sa->seq_t.u64 = seq; } + ip = (struct rte_ipv4_hdr *)(data + ROC_IE_ON_INB_RPTR_HDR); if (((ip->version_ihl & 0xf0) >> RTE_IPV4_IHL_MULTIPLIER) == @@ -537,8 +550,6 @@ cn9k_cpt_sec_post_process(struct rte_crypto_op *cop, m->data_len = m_len; m->pkt_len = m_len; m->data_off += ROC_IE_ON_INB_RPTR_HDR; - } else { - rte_pktmbuf_adj(m, sa->custom_hdr_len); } } diff --git a/drivers/crypto/cnxk/cn9k_ipsec.c b/drivers/crypto/cnxk/cn9k_ipsec.c index 85f3f26c32..49a775eb7f 100644 --- a/drivers/crypto/cnxk/cn9k_ipsec.c +++ b/drivers/crypto/cnxk/cn9k_ipsec.c @@ -40,13 +40,8 @@ cn9k_ipsec_outb_sa_create(struct cnxk_cpt_qp *qp, /* Initialize lookaside IPsec private data */ sa->dir = RTE_SECURITY_IPSEC_SA_DIR_EGRESS; - /* Start ip id from 1 */ - sa->ip_id = 1; - sa->seq_lo = 1; - sa->seq_hi = 0; - if (ipsec->esn.value) - sa->esn = ipsec->esn.value; + sa->esn = ipsec->esn.value; ret = cnxk_ipsec_outb_rlens_get(&sa->rlens, ipsec, crypto_xform); if (ret) @@ -166,10 +161,12 @@ cn9k_ipsec_inb_sa_create(struct cnxk_cpt_qp *qp, } ret = cnxk_on_ipsec_inb_sa_create(ipsec, crypto_xform, &sa->in_sa); - if (ret < 0) return ret; + if (sa->in_sa.common_sa.ctl.esn_en) + sa->esn_en = 1; + ctx_len = ret; opcode = ROC_IE_ON_MAJOR_OP_WRITE_IPSEC_INBOUND; egrp = roc_cpt->eng_grp[CPT_ENG_TYPE_IE]; diff --git a/drivers/crypto/cnxk/cn9k_ipsec.h b/drivers/crypto/cnxk/cn9k_ipsec.h index 499dbc2782..bed5976096 100644 --- a/drivers/crypto/cnxk/cn9k_ipsec.h +++ b/drivers/crypto/cnxk/cn9k_ipsec.h @@ -28,8 +28,6 @@ struct cn9k_ipsec_sa { uint8_t custom_hdr_len; /** Response length calculation data */ struct cnxk_ipsec_outb_rlens rlens; - /** Outbound IP-ID */ - uint16_t ip_id; /** ESN */ union { uint64_t esn; @@ -42,6 +40,11 @@ struct cn9k_ipsec_sa { struct cnxk_on_ipsec_ar ar; /** Anti replay window size */ uint32_t replay_win_sz; + /* + * ESN enable flag. Copy of in_sa ctl.esn_en to have single cache line + * access in the non-esn fastpath. + */ + uint8_t esn_en; /** Queue pair */ struct cnxk_cpt_qp *qp; }; diff --git a/drivers/crypto/cnxk/cn9k_ipsec_la_ops.h b/drivers/crypto/cnxk/cn9k_ipsec_la_ops.h index bbb4404a89..65dbb629b1 100644 --- a/drivers/crypto/cnxk/cn9k_ipsec_la_ops.h +++ b/drivers/crypto/cnxk/cn9k_ipsec_la_ops.h @@ -77,29 +77,36 @@ process_outb_sa(struct rte_crypto_op *cop, struct cn9k_ipsec_sa *sa, const unsigned int hdr_len = sa->custom_hdr_len; struct rte_crypto_sym_op *sym_op = cop->sym; struct rte_mbuf *m_src = sym_op->m_src; + uint32_t dlen, rlen, pkt_len, seq_lo; + uint16_t data_off = m_src->data_off; struct roc_ie_on_outb_hdr *hdr; - uint32_t dlen, rlen; int32_t extend_tail; + uint64_t esn; - dlen = rte_pktmbuf_pkt_len(m_src) + hdr_len; - rlen = ipsec_po_out_rlen_get(sa, dlen - hdr_len); + pkt_len = rte_pktmbuf_pkt_len(m_src); + dlen = pkt_len + hdr_len; + rlen = ipsec_po_out_rlen_get(sa, pkt_len); extend_tail = rlen - dlen; if (unlikely(extend_tail > rte_pktmbuf_tailroom(m_src))) { - plt_dp_err("Not enough tail room (required: %d, available: %d", + plt_dp_err("Not enough tail room (required: %d, available: %d)", extend_tail, rte_pktmbuf_tailroom(m_src)); return -ENOMEM; } - m_src->data_len += extend_tail; - m_src->pkt_len += extend_tail; - - hdr = (struct roc_ie_on_outb_hdr *)rte_pktmbuf_prepend(m_src, hdr_len); - if (unlikely(hdr == NULL)) { - plt_dp_err("Not enough head room"); + if (unlikely(hdr_len > data_off)) { + plt_dp_err("Not enough head room (required: %d, available: %d)", + hdr_len, rte_pktmbuf_headroom(m_src)); return -ENOMEM; } + pkt_len += extend_tail; + + m_src->data_len = pkt_len; + m_src->pkt_len = pkt_len; + + hdr = PLT_PTR_ADD(m_src->buf_addr, data_off - hdr_len); + #ifdef LA_IPSEC_DEBUG if (sa->inst.w4 & ROC_IE_ON_PER_PKT_IV) { memcpy(&hdr->iv[0], @@ -109,23 +116,28 @@ process_outb_sa(struct rte_crypto_op *cop, struct cn9k_ipsec_sa *sa, } #endif - hdr->seq = rte_cpu_to_be_32(sa->seq_lo); - hdr->ip_id = rte_cpu_to_be_32(sa->ip_id); - hdr->esn = rte_cpu_to_be_32(sa->seq_hi); + esn = ++sa->esn; + + /* Set ESN seq hi */ + hdr->esn = rte_cpu_to_be_32(esn >> 32); - sa->ip_id++; - sa->esn++; + /* Set ESN seq lo */ + seq_lo = rte_cpu_to_be_32(esn & (BIT_ULL(32) - 1)); + hdr->seq = seq_lo; + + /* Set IPID same as seq_lo */ + hdr->ip_id = seq_lo; /* Prepare CPT instruction */ inst->w4.u64 = sa->inst.w4 | dlen; - inst->dptr = rte_pktmbuf_iova(m_src); - inst->rptr = inst->dptr; + inst->dptr = PLT_U64_CAST(hdr); + inst->rptr = PLT_U64_CAST(hdr); inst->w7.u64 = sa->inst.w7; return 0; } -static __rte_always_inline int +static __rte_always_inline void process_inb_sa(struct rte_crypto_op *cop, struct cn9k_ipsec_sa *sa, struct cpt_inst_s *inst) { @@ -149,16 +161,13 @@ process_inb_sa(struct rte_crypto_op *cop, struct cn9k_ipsec_sa *sa, inst->dptr = rte_pktmbuf_iova(m_src); inst->rptr = inst->dptr; inst->w7.u64 = sa->inst.w7; - return 0; + return; } } /* Prepare CPT instruction */ inst->w4.u64 = sa->inst.w4 | rte_pktmbuf_pkt_len(m_src); - inst->dptr = rte_pktmbuf_iova(m_src); - inst->rptr = inst->dptr; + inst->dptr = inst->rptr = rte_pktmbuf_iova(m_src); inst->w7.u64 = sa->inst.w7; - - return 0; } #endif /* __CN9K_IPSEC_LA_OPS_H__ */ diff --git a/drivers/crypto/cnxk/cnxk_cryptodev_ops.h b/drivers/crypto/cnxk/cnxk_cryptodev_ops.h index ec99e6d660..0b41d47de9 100644 --- a/drivers/crypto/cnxk/cnxk_cryptodev_ops.h +++ b/drivers/crypto/cnxk/cnxk_cryptodev_ops.h @@ -70,16 +70,16 @@ struct cnxk_cpt_qp { /**< Crypto LF */ struct pending_queue pend_q; /**< Pending queue */ - struct rte_mempool *sess_mp; - /**< Session mempool */ - struct rte_mempool *sess_mp_priv; - /**< Session private data mempool */ - struct cpt_qp_meta_info meta_info; - /**< Metabuf info required to support operations on the queue pair */ struct roc_cpt_lmtline lmtline; /**< Lmtline information */ + struct cpt_qp_meta_info meta_info; + /**< Metabuf info required to support operations on the queue pair */ struct crypto_adpter_info ca; /**< Crypto adapter related info */ + struct rte_mempool *sess_mp; + /**< Session mempool */ + struct rte_mempool *sess_mp_priv; + /**< Session private data mempool */ }; int cnxk_cpt_dev_config(struct rte_cryptodev *dev, -- 2.39.5