common/qat: support dual threads for enqueue/dequeue

author Fiona Trahe <fiona.trahe@intel.com>

Wed, 15 Jan 2020 14:32:10 +0000 (15:32 +0100)

committer Akhil Goyal <akhil.goyal@nxp.com>

Wed, 15 Jan 2020 14:20:58 +0000 (15:20 +0100)
author Fiona Trahe <fiona.trahe@intel.com>
Wed, 15 Jan 2020 14:32:10 +0000 (15:32 +0100)
committer Akhil Goyal <akhil.goyal@nxp.com>
Wed, 15 Jan 2020 14:20:58 +0000 (15:20 +0100)
diff --git a/doc/guides/compressdevs/qat_comp.rst b/doc/guides/compressdevs/qat_comp.rst

index 6421f767c4bc0672afa28b5592d1c200e34f6615..757611a30c5c324e1fb830f9362dd36eb0d2cf38 100644 (file)
--- a/doc/guides/compressdevs/qat_comp.rst
+++ b/doc/guides/compressdevs/qat_comp.rst
@@ -37,7 +37,10 @@ Limitations
  -----------
  
  * Compressdev level 0, no compression, is not supported.
-* Queue pairs are not thread-safe (that is, within a single queue pair, RX and TX from different lcores is not supported).
+* Queue-pairs are thread-safe on Intel CPUs but Queues are not (that is, within a single
+  queue-pair all enqueues to the TX queue must be done from one thread and all dequeues
+  from the RX queue must be done from one thread, but enqueues and dequeues may be done
+  in different threads.)
  * No BSD support as BSD QAT kernel driver not available.
  * When using Deflate dynamic huffman encoding for compression, the input size (op.src.length)
    must be < CONFIG_RTE_PMD_QAT_COMP_IM_BUFFER_SIZE from the config file,
diff --git a/doc/guides/cryptodevs/qat.rst b/doc/guides/cryptodevs/qat.rst

index 6197875fe324650251c3f7a77cd1812a095b0dc1..3a4a1892eace4429a0f534956ab7c390de20bdda 100644 (file)
--- a/doc/guides/cryptodevs/qat.rst
+++ b/doc/guides/cryptodevs/qat.rst
@@ -81,7 +81,10 @@ Limitations
  * No BSD support as BSD QAT kernel driver not available.
  * ZUC EEA3/EIA3 is not supported by dh895xcc devices
  * Maximum additional authenticated data (AAD) for GCM is 240 bytes long and must be passed to the device in a buffer rounded up to the nearest block-size multiple (x16) and padded with zeros.
-* Queue pairs are not thread-safe (that is, within a single queue pair, RX and TX from different lcores is not supported).
+* Queue-pairs are thread-safe on Intel CPUs but Queues are not (that is, within a single
+  queue-pair all enqueues to the TX queue must be done from one thread and all dequeues
+  from the RX queue must be done from one thread, but enqueues and dequeues may be done
+  in different threads.)
  * A GCM limitation exists, but only in the case where there are multiple
    generations of QAT devices on a single platform.
    To optimise performance, the GCM crypto session should be initialised for the
@@ -133,7 +136,10 @@ Limitations
  ~~~~~~~~~~~
  
  * Big integers longer than 4096 bits are not supported.
-* Queue pairs are not thread-safe (that is, within a single queue pair, RX and TX from different lcores is not supported).
+* Queue-pairs are thread-safe on Intel CPUs but Queues are not (that is, within a single
+  queue-pair all enqueues to the TX queue must be done from one thread and all dequeues
+  from the RX queue must be done from one thread, but enqueues and dequeues may be done
+  in different threads.)
  * RSA-2560, RSA-3584 are not supported
  
  .. _building_qat:
diff --git a/drivers/common/qat/qat_qp.c b/drivers/common/qat/qat_qp.c

index 8e4c74a0226b26a0c10a9b560f74c7398d24a1e3..30cdc618d9cbbd9115902d0b511d2cfcd35872af 100644 (file)
--- a/drivers/common/qat/qat_qp.c
+++ b/drivers/common/qat/qat_qp.c
@@ -230,7 +230,7 @@ int qat_qp_setup(struct qat_pci_device *qat_dev,
         }
  
         qp->mmap_bar_addr = pci_dev->mem_resource[0].addr;
-       qp->inflights16 = 0;
+       qp->enqueued = qp->dequeued = 0;
  
         if (qat_queue_create(qat_dev, &(qp->tx_q), qat_qp_conf,
                                         ADF_RING_DIR_TX) != 0) {
@@ -321,7 +321,7 @@ int qat_qp_release(struct qat_qp **qp_addr)
                                 qp->qat_dev->qat_dev_id);
  
         /* Don't free memory if there are still responses to be processed */
-       if (qp->inflights16 == 0) {
+       if ((qp->enqueued - qp->dequeued) == 0) {
                 qat_queue_delete(&(qp->tx_q));
                 qat_queue_delete(&(qp->rx_q));
         } else {
@@ -579,7 +579,6 @@ qat_enqueue_op_burst(void *qp, void **ops, uint16_t nb_ops)
         uint16_t nb_ops_possible = nb_ops;
         register uint8_t *base_addr;
         register uint32_t tail;
-       int overflow;
  
         if (unlikely(nb_ops == 0))
                 return 0;
@@ -590,13 +589,25 @@ qat_enqueue_op_burst(void *qp, void **ops, uint16_t nb_ops)
         tail = queue->tail;
  
         /* Find how many can actually fit on the ring */
-       tmp_qp->inflights16 += nb_ops;
-       overflow = tmp_qp->inflights16 - tmp_qp->max_inflights;
-       if (overflow > 0) {
-               tmp_qp->inflights16 -= overflow;
-               nb_ops_possible = nb_ops - overflow;
-               if (nb_ops_possible == 0)
-                       return 0;
+       {
+               /* dequeued can only be written by one thread, but it may not
+                * be this thread. As it's 4-byte aligned it will be read
+                * atomically here by any Intel CPU.
+                * enqueued can wrap before dequeued, but cannot
+                * lap it as var size of enq/deq (uint32_t) > var size of
+                * max_inflights (uint16_t). In reality inflights is never
+                * even as big as max uint16_t, as it's <= ADF_MAX_DESC.
+                * On wrapping, the calculation still returns the correct
+                * positive value as all three vars are unsigned.
+                */
+               uint32_t inflights =
+                       tmp_qp->enqueued - tmp_qp->dequeued;
+
+               if ((inflights + nb_ops) > tmp_qp->max_inflights) {
+                       nb_ops_possible = tmp_qp->max_inflights - inflights;
+                       if (nb_ops_possible == 0)
+                               return 0;
+               }
         }
  
         while (nb_ops_sent != nb_ops_possible) {
@@ -605,11 +616,7 @@ qat_enqueue_op_burst(void *qp, void **ops, uint16_t nb_ops)
                                 tmp_qp->qat_dev_gen);
                 if (ret != 0) {
                         tmp_qp->stats.enqueue_err_count++;
-                       /*
-                        * This message cannot be enqueued,
-                        * decrease number of ops that wasn't sent
-                        */
-                       tmp_qp->inflights16 -= nb_ops_possible - nb_ops_sent;
+                       /* This message cannot be enqueued */
                         if (nb_ops_sent == 0)
                                 return 0;
                         goto kick_tail;
@@ -621,6 +628,7 @@ qat_enqueue_op_burst(void *qp, void **ops, uint16_t nb_ops)
         }
  kick_tail:
         queue->tail = tail;
+       tmp_qp->enqueued += nb_ops_sent;
         tmp_qp->stats.enqueued_count += nb_ops_sent;
         txq_write_tail(tmp_qp, queue);
         return nb_ops_sent;
@@ -664,9 +672,9 @@ qat_dequeue_op_burst(void *qp, void **ops, uint16_t nb_ops)
         }
         if (resp_counter > 0) {
                 rx_queue->head = head;
+               tmp_qp->dequeued += resp_counter;
                 tmp_qp->stats.dequeued_count += resp_counter;
                 rx_queue->nb_processed_responses += resp_counter;
-               tmp_qp->inflights16 -= resp_counter;
  
                 if (rx_queue->nb_processed_responses >
                                                 QAT_CSR_HEAD_WRITE_THRESH)
diff --git a/drivers/common/qat/qat_qp.h b/drivers/common/qat/qat_qp.h

index 5066f06f07d74c956499abca5aefe69f292f4e4d..8b9ab79ff870341c1dca69299b0fa968416a853a 100644 (file)
--- a/drivers/common/qat/qat_qp.h
+++ b/drivers/common/qat/qat_qp.h
@@ -63,7 +63,6 @@ struct qat_queue {
  
  struct qat_qp {
         void                    *mmap_bar_addr;
-       uint16_t                inflights16;
         struct qat_queue        tx_q;
         struct qat_queue        rx_q;
         struct qat_common_stats stats;
@@ -75,6 +74,8 @@ struct qat_qp {
         enum qat_service_type service_type;
         struct qat_pci_device *qat_dev;
         /**< qat device this qp is on */
+       uint32_t enqueued;
+       uint32_t dequeued __rte_aligned(4);
         uint16_t max_inflights;
  } __rte_cache_aligned;
author	Fiona Trahe <fiona.trahe@intel.com>
	Wed, 15 Jan 2020 14:32:10 +0000 (15:32 +0100)
committer	Akhil Goyal <akhil.goyal@nxp.com>
	Wed, 15 Jan 2020 14:20:58 +0000 (15:20 +0100)
doc/guides/compressdevs/qat_comp.rst		patch \| blob \| history
doc/guides/cryptodevs/qat.rst		patch \| blob \| history
drivers/common/qat/qat_qp.c		patch \| blob \| history
drivers/common/qat/qat_qp.h		patch \| blob \| history