crypto/qat: enable Tx tail writes coalescing
authorAnatoly Burakov <anatoly.burakov@intel.com>
Tue, 12 Sep 2017 09:31:18 +0000 (10:31 +0100)
committerPablo de Lara <pablo.de.lara.guarch@intel.com>
Thu, 12 Oct 2017 14:12:46 +0000 (15:12 +0100)
Don't write CSR tail until we processed enough TX descriptors.

To avoid crypto operations sitting in the TX ring indefinitely,
the "force write" threshold is used:
 - on TX, no tail write coalescing will occur if number of inflights
   is below force write threshold
 - on RX, check if we have a number of crypto ops enqueued that is
   below force write threshold that are not yet submitted to
   processing.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
Acked-by: Fiona Trahe <fiona.trahe@intel.com>
doc/guides/rel_notes/release_17_11.rst
drivers/crypto/qat/qat_crypto.c
drivers/crypto/qat/qat_crypto.h

index ab224a9..24f6e71 100644 (file)
@@ -87,6 +87,7 @@ New Features
 
   * Removed atomics from the internal queue pair structure.
   * Coalesce writes to HEAD CSR on response processing.
+  * Coalesce writes to TAIL CSR on request processing.
 
 * **Added IOMMU support to libvhost-user**
 
index 1656e0f..a2b202f 100644 (file)
@@ -921,6 +921,14 @@ qat_bpicipher_postprocess(struct qat_session *ctx,
        return sym_op->cipher.data.length - last_block_len;
 }
 
+static inline void
+txq_write_tail(struct qat_qp *qp, struct qat_queue *q) {
+       WRITE_CSR_RING_TAIL(qp->mmap_bar_addr, q->hw_bundle_number,
+                       q->hw_queue_number, q->tail);
+       q->nb_pending_requests = 0;
+       q->csr_tail = q->tail;
+}
+
 uint16_t
 qat_pmd_enqueue_op_burst(void *qp, struct rte_crypto_op **ops,
                uint16_t nb_ops)
@@ -973,10 +981,13 @@ qat_pmd_enqueue_op_burst(void *qp, struct rte_crypto_op **ops,
                cur_op++;
        }
 kick_tail:
-       WRITE_CSR_RING_TAIL(tmp_qp->mmap_bar_addr, queue->hw_bundle_number,
-                       queue->hw_queue_number, tail);
        queue->tail = tail;
        tmp_qp->stats.enqueued_count += nb_ops_sent;
+       queue->nb_pending_requests += nb_ops_sent;
+       if (tmp_qp->inflights16 < QAT_CSR_TAIL_FORCE_WRITE_THRESH ||
+                       queue->nb_pending_requests > QAT_CSR_TAIL_WRITE_THRESH) {
+               txq_write_tail(tmp_qp, queue);
+       }
        return nb_ops_sent;
 }
 
@@ -1011,17 +1022,18 @@ uint16_t
 qat_pmd_dequeue_op_burst(void *qp, struct rte_crypto_op **ops,
                uint16_t nb_ops)
 {
-       struct qat_queue *queue;
+       struct qat_queue *rx_queue, *tx_queue;
        struct qat_qp *tmp_qp = (struct qat_qp *)qp;
        uint32_t msg_counter = 0;
        struct rte_crypto_op *rx_op;
        struct icp_qat_fw_comn_resp *resp_msg;
        uint32_t head;
 
-       queue = &(tmp_qp->rx_q);
-       head = queue->head;
+       rx_queue = &(tmp_qp->rx_q);
+       tx_queue = &(tmp_qp->tx_q);
+       head = rx_queue->head;
        resp_msg = (struct icp_qat_fw_comn_resp *)
-                       ((uint8_t *)queue->base_addr + head);
+                       ((uint8_t *)rx_queue->base_addr + head);
 
        while (*(uint32_t *)resp_msg != ADF_RING_EMPTY_SIG &&
                        msg_counter != nb_ops) {
@@ -1048,21 +1060,26 @@ qat_pmd_dequeue_op_burst(void *qp, struct rte_crypto_op **ops,
                        rx_op->status = RTE_CRYPTO_OP_STATUS_SUCCESS;
                }
 
-               head = adf_modulo(head + queue->msg_size, queue->modulo);
+               head = adf_modulo(head + rx_queue->msg_size, rx_queue->modulo);
                resp_msg = (struct icp_qat_fw_comn_resp *)
-                               ((uint8_t *)queue->base_addr + head);
+                               ((uint8_t *)rx_queue->base_addr + head);
                *ops = rx_op;
                ops++;
                msg_counter++;
        }
        if (msg_counter > 0) {
-               queue->head = head;
+               rx_queue->head = head;
                tmp_qp->stats.dequeued_count += msg_counter;
-               queue->nb_processed_responses += msg_counter;
+               rx_queue->nb_processed_responses += msg_counter;
                tmp_qp->inflights16 -= msg_counter;
 
-               if (queue->nb_processed_responses > QAT_CSR_HEAD_WRITE_THRESH)
-                       rxq_free_desc(tmp_qp, queue);
+               if (rx_queue->nb_processed_responses > QAT_CSR_HEAD_WRITE_THRESH)
+                       rxq_free_desc(tmp_qp, rx_queue);
+       }
+       /* also check if tail needs to be advanced */
+       if (tmp_qp->inflights16 <= QAT_CSR_TAIL_FORCE_WRITE_THRESH &&
+                       tx_queue->tail != tx_queue->csr_tail) {
+               txq_write_tail(tmp_qp, tx_queue);
        }
        return msg_counter;
 }
index d78957c..0ebb083 100644 (file)
 
 #define QAT_CSR_HEAD_WRITE_THRESH 32U
 /* number of requests to accumulate before writing head CSR */
+#define QAT_CSR_TAIL_WRITE_THRESH 32U
+/* number of requests to accumulate before writing tail CSR */
+#define QAT_CSR_TAIL_FORCE_WRITE_THRESH 256U
+/* number of inflights below which no tail write coalescing should occur */
 
 struct qat_session;
 
@@ -77,8 +81,11 @@ struct qat_queue {
        uint8_t         hw_queue_number;
        /* HW queue aka ring offset on bundle */
        uint32_t        csr_head;               /* last written head value */
+       uint32_t        csr_tail;               /* last written tail value */
        uint16_t        nb_processed_responses;
        /* number of responses processed since last CSR head write */
+       uint16_t        nb_pending_requests;
+       /* number of requests pending since last CSR tail write */
 };
 
 struct qat_qp {