crypto/cnxk: enable allocated queues only
[dpdk.git] / drivers / crypto / cnxk / cn9k_cryptodev_ops.c
index 08f08c8..449208d 100644 (file)
@@ -3,11 +3,15 @@
  */
 
 #include <rte_cryptodev.h>
-#include <rte_cryptodev_pmd.h>
+#include <cryptodev_pmd.h>
 #include <rte_event_crypto_adapter.h>
+#include <rte_ip.h>
+#include <rte_vect.h>
 
 #include "cn9k_cryptodev.h"
 #include "cn9k_cryptodev_ops.h"
+#include "cn9k_ipsec.h"
+#include "cn9k_ipsec_la_ops.h"
 #include "cnxk_ae.h"
 #include "cnxk_cryptodev.h"
 #include "cnxk_cryptodev_ops.h"
@@ -20,7 +24,7 @@ cn9k_cpt_sym_inst_fill(struct cnxk_cpt_qp *qp, struct rte_crypto_op *op,
                       struct cpt_inst_s *inst)
 {
        uint64_t cpt_op;
-       int ret = -1;
+       int ret;
 
        cpt_op = sess->cpt_op;
 
@@ -33,6 +37,36 @@ cn9k_cpt_sym_inst_fill(struct cnxk_cpt_qp *qp, struct rte_crypto_op *op,
        return ret;
 }
 
+static __rte_always_inline int __rte_hot
+cn9k_cpt_sec_inst_fill(struct rte_crypto_op *op,
+                      struct cpt_inflight_req *infl_req,
+                      struct cpt_inst_s *inst)
+{
+       struct rte_crypto_sym_op *sym_op = op->sym;
+       struct cn9k_sec_session *priv;
+       struct cn9k_ipsec_sa *sa;
+
+       if (unlikely(sym_op->m_dst && sym_op->m_dst != sym_op->m_src)) {
+               plt_dp_err("Out of place is not supported");
+               return -ENOTSUP;
+       }
+
+       if (unlikely(!rte_pktmbuf_is_contiguous(sym_op->m_src))) {
+               plt_dp_err("Scatter Gather mode is not supported");
+               return -ENOTSUP;
+       }
+
+       priv = get_sec_session_private_data(op->sym->sec_session);
+       sa = &priv->sa;
+
+       if (sa->dir == RTE_SECURITY_IPSEC_SA_DIR_EGRESS)
+               return process_outb_sa(op, sa, inst);
+
+       infl_req->op_flags |= CPT_OP_FLAGS_IPSEC_DIR_INBOUND;
+
+       return process_inb_sa(op, sa, inst);
+}
+
 static inline struct cnxk_se_sess *
 cn9k_cpt_sym_temp_sess_create(struct cnxk_cpt_qp *qp, struct rte_crypto_op *op)
 {
@@ -64,9 +98,8 @@ sess_put:
 }
 
 static inline int
-cn9k_cpt_prepare_instruction(struct cnxk_cpt_qp *qp, struct rte_crypto_op *op,
-                            struct cpt_inflight_req *infl_req,
-                            struct cpt_inst_s *inst)
+cn9k_cpt_inst_prep(struct cnxk_cpt_qp *qp, struct rte_crypto_op *op,
+                  struct cpt_inflight_req *infl_req, struct cpt_inst_s *inst)
 {
        int ret;
 
@@ -80,7 +113,10 @@ cn9k_cpt_prepare_instruction(struct cnxk_cpt_qp *qp, struct rte_crypto_op *op,
                                sym_op->session, cn9k_cryptodev_driver_id);
                        ret = cn9k_cpt_sym_inst_fill(qp, op, sess, infl_req,
                                                     inst);
-               } else {
+                       inst->w7.u64 = sess->cpt_inst_w7;
+               } else if (op->sess_type == RTE_CRYPTO_OP_SECURITY_SESSION)
+                       ret = cn9k_cpt_sec_inst_fill(op, infl_req, inst);
+               else {
                        sess = cn9k_cpt_sym_temp_sess_create(qp, op);
                        if (unlikely(sess == NULL)) {
                                plt_dp_err("Could not create temp session");
@@ -94,8 +130,8 @@ cn9k_cpt_prepare_instruction(struct cnxk_cpt_qp *qp, struct rte_crypto_op *op,
                                                  op->sym->session);
                                rte_mempool_put(qp->sess_mp, op->sym->session);
                        }
+                       inst->w7.u64 = sess->cpt_inst_w7;
                }
-               inst->w7.u64 = sess->cpt_inst_w7;
        } else if (op->type == RTE_CRYPTO_OP_TYPE_ASYMMETRIC) {
                struct rte_crypto_asym_op *asym_op;
                struct cnxk_ae_sess *sess;
@@ -118,8 +154,8 @@ cn9k_cpt_prepare_instruction(struct cnxk_cpt_qp *qp, struct rte_crypto_op *op,
 }
 
 static inline void
-cn9k_cpt_submit_instruction(struct cpt_inst_s *inst, uint64_t lmtline,
-                           uint64_t io_addr)
+cn9k_cpt_inst_submit(struct cpt_inst_s *inst, uint64_t lmtline,
+                    uint64_t io_addr)
 {
        uint64_t lmt_status;
 
@@ -138,47 +174,136 @@ cn9k_cpt_submit_instruction(struct cpt_inst_s *inst, uint64_t lmtline,
        } while (lmt_status == 0);
 }
 
+static __plt_always_inline void
+cn9k_cpt_inst_submit_dual(struct cpt_inst_s *inst, uint64_t lmtline,
+                         uint64_t io_addr)
+{
+       uint64_t lmt_status;
+
+       do {
+               /* Copy 2 CPT inst_s to LMTLINE */
+#if defined(RTE_ARCH_ARM64)
+               uint64_t *s = (uint64_t *)inst;
+               uint64_t *d = (uint64_t *)lmtline;
+
+               vst1q_u64(&d[0], vld1q_u64(&s[0]));
+               vst1q_u64(&d[2], vld1q_u64(&s[2]));
+               vst1q_u64(&d[4], vld1q_u64(&s[4]));
+               vst1q_u64(&d[6], vld1q_u64(&s[6]));
+               vst1q_u64(&d[8], vld1q_u64(&s[8]));
+               vst1q_u64(&d[10], vld1q_u64(&s[10]));
+               vst1q_u64(&d[12], vld1q_u64(&s[12]));
+               vst1q_u64(&d[14], vld1q_u64(&s[14]));
+#else
+               roc_lmt_mov_seg((void *)lmtline, inst, 8);
+#endif
+
+               /*
+                * Make sure compiler does not reorder memcpy and ldeor.
+                * LMTST transactions are always flushed from the write
+                * buffer immediately, a DMB is not required to push out
+                * LMTSTs.
+                */
+               rte_io_wmb();
+               lmt_status = roc_lmt_submit_ldeor(io_addr);
+       } while (lmt_status == 0);
+}
+
 static uint16_t
 cn9k_cpt_enqueue_burst(void *qptr, struct rte_crypto_op **ops, uint16_t nb_ops)
 {
-       struct cpt_inflight_req *infl_req;
+       struct cpt_inflight_req *infl_req_1, *infl_req_2;
+       struct cpt_inst_s inst[2] __rte_cache_aligned;
+       struct rte_crypto_op *op_1, *op_2;
        uint16_t nb_allowed, count = 0;
        struct cnxk_cpt_qp *qp = qptr;
        struct pending_queue *pend_q;
-       struct rte_crypto_op *op;
-       struct cpt_inst_s inst;
+       uint64_t head;
        int ret;
 
        pend_q = &qp->pend_q;
 
-       inst.w0.u64 = 0;
-       inst.w2.u64 = 0;
-       inst.w3.u64 = 0;
+       const uint64_t lmt_base = qp->lf.lmt_base;
+       const uint64_t io_addr = qp->lf.io_addr;
+       const uint64_t pq_mask = pend_q->pq_mask;
+
+       /* Clear w0, w2, w3 of both inst */
 
-       nb_allowed = qp->lf.nb_desc - pend_q->pending_count;
+       inst[0].w0.u64 = 0;
+       inst[0].w2.u64 = 0;
+       inst[0].w3.u64 = 0;
+       inst[1].w0.u64 = 0;
+       inst[1].w2.u64 = 0;
+       inst[1].w3.u64 = 0;
+
+       head = pend_q->head;
+       nb_allowed = pending_queue_free_cnt(head, pend_q->tail, pq_mask);
        nb_ops = RTE_MIN(nb_ops, nb_allowed);
 
-       for (count = 0; count < nb_ops; count++) {
-               op = ops[count];
-               infl_req = &pend_q->req_queue[pend_q->enq_tail];
-               infl_req->op_flags = 0;
+       if (unlikely(nb_ops & 1)) {
+               op_1 = ops[0];
+               infl_req_1 = &pend_q->req_queue[head];
+               infl_req_1->op_flags = 0;
+
+               ret = cn9k_cpt_inst_prep(qp, op_1, infl_req_1, &inst[0]);
+               if (unlikely(ret)) {
+                       plt_dp_err("Could not process op: %p", op_1);
+                       return 0;
+               }
+
+               infl_req_1->cop = op_1;
+               infl_req_1->res.cn9k.compcode = CPT_COMP_NOT_DONE;
+               inst[0].res_addr = (uint64_t)&infl_req_1->res;
+
+               cn9k_cpt_inst_submit(&inst[0], lmt_base, io_addr);
+               pending_queue_advance(&head, pq_mask);
+               count++;
+       }
+
+       while (count < nb_ops) {
+               op_1 = ops[count];
+               op_2 = ops[count + 1];
+
+               infl_req_1 = &pend_q->req_queue[head];
+               pending_queue_advance(&head, pq_mask);
+               infl_req_2 = &pend_q->req_queue[head];
+               pending_queue_advance(&head, pq_mask);
+
+               infl_req_1->cop = op_1;
+               infl_req_2->cop = op_2;
+               infl_req_1->op_flags = 0;
+               infl_req_2->op_flags = 0;
+
+               infl_req_1->res.cn9k.compcode = CPT_COMP_NOT_DONE;
+               inst[0].res_addr = (uint64_t)&infl_req_1->res;
+
+               infl_req_2->res.cn9k.compcode = CPT_COMP_NOT_DONE;
+               inst[1].res_addr = (uint64_t)&infl_req_2->res;
+
+               ret = cn9k_cpt_inst_prep(qp, op_1, infl_req_1, &inst[0]);
+               if (unlikely(ret)) {
+                       plt_dp_err("Could not process op: %p", op_1);
+                       pending_queue_retreat(&head, pq_mask, 2);
+                       break;
+               }
 
-               ret = cn9k_cpt_prepare_instruction(qp, op, infl_req, &inst);
+               ret = cn9k_cpt_inst_prep(qp, op_2, infl_req_2, &inst[1]);
                if (unlikely(ret)) {
-                       plt_dp_err("Could not process op: %p", op);
+                       plt_dp_err("Could not process op: %p", op_2);
+                       pending_queue_retreat(&head, pq_mask, 1);
+                       cn9k_cpt_inst_submit(&inst[0], lmt_base, io_addr);
+                       count++;
                        break;
                }
 
-               infl_req->cop = op;
-               infl_req->res.cn9k.compcode = CPT_COMP_NOT_DONE;
-               inst.res_addr = (uint64_t)&infl_req->res;
+               cn9k_cpt_inst_submit_dual(&inst[0], lmt_base, io_addr);
 
-               cn9k_cpt_submit_instruction(&inst, qp->lmtline.lmt_base,
-                                           qp->lmtline.io_addr);
-               MOD_INC(pend_q->enq_tail, qp->lf.nb_desc);
+               count += 2;
        }
 
-       pend_q->pending_count += count;
+       rte_atomic_thread_fence(__ATOMIC_RELEASE);
+
+       pend_q->head = head;
        pend_q->time_out = rte_get_timer_cycles() +
                           DEFAULT_COMMAND_TIMEOUT * rte_get_timer_hz();
 
@@ -219,7 +344,7 @@ cn9k_cpt_crypto_adapter_enqueue(uintptr_t tag_op, struct rte_crypto_op *op)
        }
        infl_req->op_flags = 0;
 
-       ret = cn9k_cpt_prepare_instruction(qp, op, infl_req, &inst);
+       ret = cn9k_cpt_inst_prep(qp, op, infl_req, &inst);
        if (unlikely(ret)) {
                plt_dp_err("Could not process op: %p", op);
                rte_mempool_put(qp->ca.req_mp, infl_req);
@@ -245,12 +370,44 @@ cn9k_cpt_crypto_adapter_enqueue(uintptr_t tag_op, struct rte_crypto_op *op)
        if (!rsp_info->sched_type)
                roc_sso_hws_head_wait(tag_op);
 
-       cn9k_cpt_submit_instruction(&inst, qp->lmtline.lmt_base,
-                                   qp->lmtline.io_addr);
+       cn9k_cpt_inst_submit(&inst, qp->lmtline.lmt_base, qp->lmtline.io_addr);
 
        return 1;
 }
 
+static inline void
+cn9k_cpt_sec_post_process(struct rte_crypto_op *cop,
+                         struct cpt_inflight_req *infl_req)
+{
+       struct rte_crypto_sym_op *sym_op = cop->sym;
+       struct rte_mbuf *m = sym_op->m_src;
+       struct rte_ipv6_hdr *ip6;
+       struct rte_ipv4_hdr *ip;
+       uint16_t m_len = 0;
+       char *data;
+
+       if (infl_req->op_flags & CPT_OP_FLAGS_IPSEC_DIR_INBOUND) {
+               data = rte_pktmbuf_mtod(m, char *);
+
+               ip = (struct rte_ipv4_hdr *)(data + ROC_IE_ON_INB_RPTR_HDR);
+
+               if (((ip->version_ihl & 0xf0) >> RTE_IPV4_IHL_MULTIPLIER) ==
+                   IPVERSION) {
+                       m_len = rte_be_to_cpu_16(ip->total_length);
+               } else {
+                       PLT_ASSERT(((ip->version_ihl & 0xf0) >>
+                                   RTE_IPV4_IHL_MULTIPLIER) == 6);
+                       ip6 = (struct rte_ipv6_hdr *)ip;
+                       m_len = rte_be_to_cpu_16(ip6->payload_len) +
+                               sizeof(struct rte_ipv6_hdr);
+               }
+
+               m->data_len = m_len;
+               m->pkt_len = m_len;
+               m->data_off += ROC_IE_ON_INB_RPTR_HDR;
+       }
+}
+
 static inline void
 cn9k_cpt_dequeue_post_process(struct cnxk_cpt_qp *qp, struct rte_crypto_op *cop,
                              struct cpt_inflight_req *infl_req)
@@ -273,6 +430,11 @@ cn9k_cpt_dequeue_post_process(struct cnxk_cpt_qp *qp, struct rte_crypto_op *cop,
 
                cop->status = RTE_CRYPTO_OP_STATUS_SUCCESS;
                if (cop->type == RTE_CRYPTO_OP_TYPE_SYMMETRIC) {
+                       if (cop->sess_type == RTE_CRYPTO_OP_SECURITY_SESSION) {
+                               cn9k_cpt_sec_post_process(cop, infl_req);
+                               return;
+                       }
+
                        /* Verify authentication data if required */
                        if (unlikely(infl_req->op_flags &
                                     CPT_OP_FLAGS_AUTH_VERIFY)) {
@@ -347,22 +509,27 @@ cn9k_cpt_crypto_adapter_dequeue(uintptr_t get_work1)
 static uint16_t
 cn9k_cpt_dequeue_burst(void *qptr, struct rte_crypto_op **ops, uint16_t nb_ops)
 {
+       struct cpt_inflight_req *infl_req;
        struct cnxk_cpt_qp *qp = qptr;
        struct pending_queue *pend_q;
-       struct cpt_inflight_req *infl_req;
        struct cpt_cn9k_res_s *res;
+       uint64_t infl_cnt, pq_tail;
        struct rte_crypto_op *cop;
-       uint32_t pq_deq_head;
        int i;
 
        pend_q = &qp->pend_q;
 
-       nb_ops = RTE_MIN(nb_ops, pend_q->pending_count);
+       const uint64_t pq_mask = pend_q->pq_mask;
+
+       pq_tail = pend_q->tail;
+       infl_cnt = pending_queue_infl_cnt(pend_q->head, pq_tail, pq_mask);
+       nb_ops = RTE_MIN(nb_ops, infl_cnt);
 
-       pq_deq_head = pend_q->deq_head;
+       /* Ensure infl_cnt isn't read before data lands */
+       rte_atomic_thread_fence(__ATOMIC_ACQUIRE);
 
        for (i = 0; i < nb_ops; i++) {
-               infl_req = &pend_q->req_queue[pq_deq_head];
+               infl_req = &pend_q->req_queue[pq_tail];
 
                res = (struct cpt_cn9k_res_s *)&infl_req->res;
 
@@ -377,7 +544,7 @@ cn9k_cpt_dequeue_burst(void *qptr, struct rte_crypto_op **ops, uint16_t nb_ops)
                        break;
                }
 
-               MOD_INC(pq_deq_head, qp->lf.nb_desc);
+               pending_queue_advance(&pq_tail, pq_mask);
 
                cop = infl_req->cop;
 
@@ -389,8 +556,7 @@ cn9k_cpt_dequeue_burst(void *qptr, struct rte_crypto_op **ops, uint16_t nb_ops)
                        rte_mempool_put(qp->meta_info.pool, infl_req->mdata);
        }
 
-       pend_q->pending_count -= i;
-       pend_q->deq_head = pq_deq_head;
+       pend_q->tail = pq_tail;
 
        return i;
 }