-----------
* Compressdev level 0, no compression, is not supported.
-* Queue pairs are not thread-safe (that is, within a single queue pair, RX and TX from different lcores is not supported).
+* Queue-pairs are thread-safe on Intel CPUs but Queues are not (that is, within a single
+ queue-pair all enqueues to the TX queue must be done from one thread and all dequeues
+ from the RX queue must be done from one thread, but enqueues and dequeues may be done
+ in different threads.)
* No BSD support as BSD QAT kernel driver not available.
* When using Deflate dynamic huffman encoding for compression, the input size (op.src.length)
must be < CONFIG_RTE_PMD_QAT_COMP_IM_BUFFER_SIZE from the config file,
* No BSD support as BSD QAT kernel driver not available.
* ZUC EEA3/EIA3 is not supported by dh895xcc devices
* Maximum additional authenticated data (AAD) for GCM is 240 bytes long and must be passed to the device in a buffer rounded up to the nearest block-size multiple (x16) and padded with zeros.
-* Queue pairs are not thread-safe (that is, within a single queue pair, RX and TX from different lcores is not supported).
+* Queue-pairs are thread-safe on Intel CPUs but Queues are not (that is, within a single
+ queue-pair all enqueues to the TX queue must be done from one thread and all dequeues
+ from the RX queue must be done from one thread, but enqueues and dequeues may be done
+ in different threads.)
* A GCM limitation exists, but only in the case where there are multiple
generations of QAT devices on a single platform.
To optimise performance, the GCM crypto session should be initialised for the
~~~~~~~~~~~
* Big integers longer than 4096 bits are not supported.
-* Queue pairs are not thread-safe (that is, within a single queue pair, RX and TX from different lcores is not supported).
+* Queue-pairs are thread-safe on Intel CPUs but Queues are not (that is, within a single
+ queue-pair all enqueues to the TX queue must be done from one thread and all dequeues
+ from the RX queue must be done from one thread, but enqueues and dequeues may be done
+ in different threads.)
* RSA-2560, RSA-3584 are not supported
.. _building_qat:
}
qp->mmap_bar_addr = pci_dev->mem_resource[0].addr;
- qp->inflights16 = 0;
+ qp->enqueued = qp->dequeued = 0;
if (qat_queue_create(qat_dev, &(qp->tx_q), qat_qp_conf,
ADF_RING_DIR_TX) != 0) {
qp->qat_dev->qat_dev_id);
/* Don't free memory if there are still responses to be processed */
- if (qp->inflights16 == 0) {
+ if ((qp->enqueued - qp->dequeued) == 0) {
qat_queue_delete(&(qp->tx_q));
qat_queue_delete(&(qp->rx_q));
} else {
uint16_t nb_ops_possible = nb_ops;
register uint8_t *base_addr;
register uint32_t tail;
- int overflow;
if (unlikely(nb_ops == 0))
return 0;
tail = queue->tail;
/* Find how many can actually fit on the ring */
- tmp_qp->inflights16 += nb_ops;
- overflow = tmp_qp->inflights16 - tmp_qp->max_inflights;
- if (overflow > 0) {
- tmp_qp->inflights16 -= overflow;
- nb_ops_possible = nb_ops - overflow;
- if (nb_ops_possible == 0)
- return 0;
+ {
+ /* dequeued can only be written by one thread, but it may not
+ * be this thread. As it's 4-byte aligned it will be read
+ * atomically here by any Intel CPU.
+ * enqueued can wrap before dequeued, but cannot
+ * lap it as var size of enq/deq (uint32_t) > var size of
+ * max_inflights (uint16_t). In reality inflights is never
+ * even as big as max uint16_t, as it's <= ADF_MAX_DESC.
+ * On wrapping, the calculation still returns the correct
+ * positive value as all three vars are unsigned.
+ */
+ uint32_t inflights =
+ tmp_qp->enqueued - tmp_qp->dequeued;
+
+ if ((inflights + nb_ops) > tmp_qp->max_inflights) {
+ nb_ops_possible = tmp_qp->max_inflights - inflights;
+ if (nb_ops_possible == 0)
+ return 0;
+ }
}
while (nb_ops_sent != nb_ops_possible) {
tmp_qp->qat_dev_gen);
if (ret != 0) {
tmp_qp->stats.enqueue_err_count++;
- /*
- * This message cannot be enqueued,
- * decrease number of ops that wasn't sent
- */
- tmp_qp->inflights16 -= nb_ops_possible - nb_ops_sent;
+ /* This message cannot be enqueued */
if (nb_ops_sent == 0)
return 0;
goto kick_tail;
}
kick_tail:
queue->tail = tail;
+ tmp_qp->enqueued += nb_ops_sent;
tmp_qp->stats.enqueued_count += nb_ops_sent;
txq_write_tail(tmp_qp, queue);
return nb_ops_sent;
}
if (resp_counter > 0) {
rx_queue->head = head;
+ tmp_qp->dequeued += resp_counter;
tmp_qp->stats.dequeued_count += resp_counter;
rx_queue->nb_processed_responses += resp_counter;
- tmp_qp->inflights16 -= resp_counter;
if (rx_queue->nb_processed_responses >
QAT_CSR_HEAD_WRITE_THRESH)