crypto/qat: enable Rx head writes coalescing
authorAnatoly Burakov <anatoly.burakov@intel.com>
Tue, 12 Sep 2017 09:31:17 +0000 (10:31 +0100)
committerPablo de Lara <pablo.de.lara.guarch@intel.com>
Thu, 12 Oct 2017 14:12:20 +0000 (15:12 +0100)
Don't write CSR head until we processed enough RX descriptors.
Also delay marking them as free until we are writing CSR head.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
Acked-by: Fiona Trahe <fiona.trahe@intel.com>
doc/guides/rel_notes/release_17_11.rst
drivers/crypto/qat/qat_crypto.c
drivers/crypto/qat/qat_crypto.h

index 5bbc3f2..ab224a9 100644 (file)
@@ -86,6 +86,7 @@ New Features
   Performance enhancements:
 
   * Removed atomics from the internal queue pair structure.
+  * Coalesce writes to HEAD CSR on response processing.
 
 * **Added IOMMU support to libvhost-user**
 
index bb199ae..1656e0f 100644 (file)
@@ -980,6 +980,33 @@ kick_tail:
        return nb_ops_sent;
 }
 
+static inline
+void rxq_free_desc(struct qat_qp *qp, struct qat_queue *q)
+{
+       uint32_t old_head, new_head;
+       uint32_t max_head;
+
+       old_head = q->csr_head;
+       new_head = q->head;
+       max_head = qp->nb_descriptors * q->msg_size;
+
+       /* write out free descriptors */
+       void *cur_desc = (uint8_t *)q->base_addr + old_head;
+
+       if (new_head < old_head) {
+               memset(cur_desc, ADF_RING_EMPTY_SIG, max_head - old_head);
+               memset(q->base_addr, ADF_RING_EMPTY_SIG, new_head);
+       } else {
+               memset(cur_desc, ADF_RING_EMPTY_SIG, new_head - old_head);
+       }
+       q->nb_processed_responses = 0;
+       q->csr_head = new_head;
+
+       /* write current head to CSR */
+       WRITE_CSR_RING_HEAD(qp->mmap_bar_addr, q->hw_bundle_number,
+                           q->hw_queue_number, new_head);
+}
+
 uint16_t
 qat_pmd_dequeue_op_burst(void *qp, struct rte_crypto_op **ops,
                uint16_t nb_ops)
@@ -989,10 +1016,12 @@ qat_pmd_dequeue_op_burst(void *qp, struct rte_crypto_op **ops,
        uint32_t msg_counter = 0;
        struct rte_crypto_op *rx_op;
        struct icp_qat_fw_comn_resp *resp_msg;
+       uint32_t head;
 
        queue = &(tmp_qp->rx_q);
+       head = queue->head;
        resp_msg = (struct icp_qat_fw_comn_resp *)
-                       ((uint8_t *)queue->base_addr + queue->head);
+                       ((uint8_t *)queue->base_addr + head);
 
        while (*(uint32_t *)resp_msg != ADF_RING_EMPTY_SIG &&
                        msg_counter != nb_ops) {
@@ -1019,23 +1048,21 @@ qat_pmd_dequeue_op_burst(void *qp, struct rte_crypto_op **ops,
                        rx_op->status = RTE_CRYPTO_OP_STATUS_SUCCESS;
                }
 
-               *(uint32_t *)resp_msg = ADF_RING_EMPTY_SIG;
-               queue->head = adf_modulo(queue->head +
-                               queue->msg_size,
-                               ADF_RING_SIZE_MODULO(queue->queue_size));
+               head = adf_modulo(head + queue->msg_size, queue->modulo);
                resp_msg = (struct icp_qat_fw_comn_resp *)
-                                       ((uint8_t *)queue->base_addr +
-                                                       queue->head);
+                               ((uint8_t *)queue->base_addr + head);
                *ops = rx_op;
                ops++;
                msg_counter++;
        }
        if (msg_counter > 0) {
-               WRITE_CSR_RING_HEAD(tmp_qp->mmap_bar_addr,
-                                       queue->hw_bundle_number,
-                                       queue->hw_queue_number, queue->head);
-               tmp_qp->inflights16 -= msg_counter;
+               queue->head = head;
                tmp_qp->stats.dequeued_count += msg_counter;
+               queue->nb_processed_responses += msg_counter;
+               tmp_qp->inflights16 -= msg_counter;
+
+               if (queue->nb_processed_responses > QAT_CSR_HEAD_WRITE_THRESH)
+                       rxq_free_desc(tmp_qp, queue);
        }
        return msg_counter;
 }
index 7773b57..d78957c 100644 (file)
@@ -50,6 +50,9 @@
        (((num) + (align) - 1) & ~((align) - 1))
 #define QAT_64_BTYE_ALIGN_MASK (~0x3f)
 
+#define QAT_CSR_HEAD_WRITE_THRESH 32U
+/* number of requests to accumulate before writing head CSR */
+
 struct qat_session;
 
 enum qat_device_gen {
@@ -73,6 +76,9 @@ struct qat_queue {
        uint8_t         hw_bundle_number;
        uint8_t         hw_queue_number;
        /* HW queue aka ring offset on bundle */
+       uint32_t        csr_head;               /* last written head value */
+       uint16_t        nb_processed_responses;
+       /* number of responses processed since last CSR head write */
 };
 
 struct qat_qp {