X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=drivers%2Fcommon%2Fqat%2Fqat_qp.c;h=026ea5ee01c4066efd650a09aebdc3925d614b00;hb=90a2ec4ae81f2ef52f7c14bfc9307e75a4127fa4;hp=8e4c74a0226b26a0c10a9b560f74c7398d24a1e3;hpb=8f185e7c3e651eb845bd82f7ab0bbb862cc0a2e2;p=dpdk.git

diff --git a/drivers/common/qat/qat_qp.c b/drivers/common/qat/qat_qp.c
index 8e4c74a022..026ea5ee01 100644
--- a/drivers/common/qat/qat_qp.c
+++ b/drivers/common/qat/qat_qp.c
@@ -3,6 +3,7 @@
  */
 
 #include <rte_common.h>
+#include <rte_cycles.h>
 #include <rte_dev.h>
 #include <rte_malloc.h>
 #include <rte_memzone.h>
@@ -18,7 +19,9 @@
 #include "qat_asym.h"
 #include "qat_comp.h"
 #include "adf_transport_access_macros.h"
+#include "adf_transport_access_macros_gen4vf.h"
 
+#define QAT_CQ_MAX_DEQ_RETRIES 10
 
 #define ADF_MAX_DESC				4096
 #define ADF_MIN_DESC				128
@@ -136,21 +139,34 @@ static int qat_queue_create(struct qat_pci_device *qat_dev,
 	struct qat_queue *queue, struct qat_qp_config *, uint8_t dir);
 static int adf_verify_queue_size(uint32_t msg_size, uint32_t msg_num,
 	uint32_t *queue_size_for_csr);
-static void adf_configure_queues(struct qat_qp *queue);
-static void adf_queue_arb_enable(struct qat_queue *txq, void *base_addr,
-	rte_spinlock_t *lock);
-static void adf_queue_arb_disable(struct qat_queue *txq, void *base_addr,
-	rte_spinlock_t *lock);
-
-
-int qat_qps_per_service(const struct qat_qp_hw_data *qp_hw_data,
+static void adf_configure_queues(struct qat_qp *queue,
+	enum qat_device_gen qat_dev_gen);
+static void adf_queue_arb_enable(enum qat_device_gen qat_dev_gen,
+	struct qat_queue *txq, void *base_addr, rte_spinlock_t *lock);
+static void adf_queue_arb_disable(enum qat_device_gen qat_dev_gen,
+	struct qat_queue *txq, void *base_addr, rte_spinlock_t *lock);
+
+int qat_qps_per_service(struct qat_pci_device *qat_dev,
 		enum qat_service_type service)
 {
-	int i, count;
+	int i = 0, count = 0, max_ops_per_srv = 0;
+
+	if (qat_dev->qat_dev_gen == QAT_GEN4) {
+		max_ops_per_srv = QAT_GEN4_BUNDLE_NUM;
+		for (i = 0, count = 0; i < max_ops_per_srv; i++)
+			if (qat_dev->qp_gen4_data[i][0].service_type == service)
+				count++;
+	} else {
+		const struct qat_qp_hw_data *sym_hw_qps =
+				qat_gen_config[qat_dev->qat_dev_gen]
+				.qp_hw_data[service];
+
+		max_ops_per_srv = ADF_MAX_QPS_ON_ANY_SERVICE;
+		for (i = 0, count = 0; i < max_ops_per_srv; i++)
+			if (sym_hw_qps[i].service_type == service)
+				count++;
+	}
 
-	for (i = 0, count = 0; i < ADF_MAX_QPS_ON_ANY_SERVICE; i++)
-		if (qp_hw_data[i].service_type == service)
-			count++;
 	return count;
 }
 
@@ -188,11 +204,12 @@ int qat_qp_setup(struct qat_pci_device *qat_dev,
 		struct qat_qp **qp_addr,
 		uint16_t queue_pair_id,
 		struct qat_qp_config *qat_qp_conf)
-
 {
 	struct qat_qp *qp;
-	struct rte_pci_device *pci_dev = qat_dev->pci_dev;
+	struct rte_pci_device *pci_dev =
+			qat_pci_devs[qat_dev->qat_dev_id].pci_dev;
 	char op_cookie_pool_name[RTE_RING_NAMESIZE];
+	enum qat_device_gen qat_dev_gen = qat_dev->qat_dev_gen;
 	uint32_t i;
 
 	QAT_LOG(DEBUG, "Setup qp %u on qat pci device %d gen %d",
@@ -230,7 +247,7 @@ int qat_qp_setup(struct qat_pci_device *qat_dev,
 	}
 
 	qp->mmap_bar_addr = pci_dev->mem_resource[0].addr;
-	qp->inflights16 = 0;
+	qp->enqueued = qp->dequeued = 0;
 
 	if (qat_queue_create(qat_dev, &(qp->tx_q), qat_qp_conf,
 					ADF_RING_DIR_TX) != 0) {
@@ -256,8 +273,8 @@ int qat_qp_setup(struct qat_pci_device *qat_dev,
 		goto create_err;
 	}
 
-	adf_configure_queues(qp);
-	adf_queue_arb_enable(&qp->tx_q, qp->mmap_bar_addr,
+	adf_configure_queues(qp, qat_dev_gen);
+	adf_queue_arb_enable(qat_dev_gen, &qp->tx_q, qp->mmap_bar_addr,
 					&qat_dev->arb_csr_lock);
 
 	snprintf(op_cookie_pool_name, RTE_RING_NAMESIZE,
@@ -272,7 +289,7 @@ int qat_qp_setup(struct qat_pci_device *qat_dev,
 				qp->nb_descriptors,
 				qat_qp_conf->cookie_size, 64, 0,
 				NULL, NULL, NULL, NULL,
-				qat_dev->pci_dev->device.numa_node,
+				pci_dev->device.numa_node,
 				0);
 	if (!qp->op_cookie_pool) {
 		QAT_LOG(ERR, "QAT PMD Cannot create"
@@ -289,7 +306,6 @@ int qat_qp_setup(struct qat_pci_device *qat_dev,
 	}
 
 	qp->qat_dev_gen = qat_dev->qat_dev_gen;
-	qp->build_request = qat_qp_conf->build_request;
 	qp->service_type = qat_qp_conf->hw->service_type;
 	qp->qat_dev = qat_dev;
 
@@ -307,7 +323,8 @@ create_err:
 	return -EFAULT;
 }
 
-int qat_qp_release(struct qat_qp **qp_addr)
+
+int qat_qp_release(enum qat_device_gen qat_dev_gen, struct qat_qp **qp_addr)
 {
 	struct qat_qp *qp = *qp_addr;
 	uint32_t i;
@@ -321,15 +338,15 @@ int qat_qp_release(struct qat_qp **qp_addr)
 				qp->qat_dev->qat_dev_id);
 
 	/* Don't free memory if there are still responses to be processed */
-	if (qp->inflights16 == 0) {
+	if ((qp->enqueued - qp->dequeued) == 0) {
 		qat_queue_delete(&(qp->tx_q));
 		qat_queue_delete(&(qp->rx_q));
 	} else {
 		return -EAGAIN;
 	}
 
-	adf_queue_arb_disable(&(qp->tx_q), qp->mmap_bar_addr,
-					&qp->qat_dev->arb_csr_lock);
+	adf_queue_arb_disable(qat_dev_gen, &(qp->tx_q), qp->mmap_bar_addr,
+				&qp->qat_dev->arb_csr_lock);
 
 	for (i = 0; i < qp->nb_descriptors; i++)
 		rte_mempool_put(qp->op_cookie_pool, qp->op_cookies[i]);
@@ -377,7 +394,9 @@ qat_queue_create(struct qat_pci_device *qat_dev, struct qat_queue *queue,
 	uint64_t queue_base;
 	void *io_addr;
 	const struct rte_memzone *qp_mz;
-	struct rte_pci_device *pci_dev = qat_dev->pci_dev;
+	struct rte_pci_device *pci_dev =
+			qat_pci_devs[qat_dev->qat_dev_id].pci_dev;
+	enum qat_device_gen qat_dev_gen = qat_dev->qat_dev_gen;
 	int ret = 0;
 	uint16_t desc_size = (dir == ADF_RING_DIR_TX ?
 			qp_conf->hw->tx_msg_size : qp_conf->hw->rx_msg_size);
@@ -401,7 +420,7 @@ qat_queue_create(struct qat_pci_device *qat_dev, struct qat_queue *queue,
 		qp_conf->service_str, "qp_mem",
 		queue->hw_bundle_number, queue->hw_queue_number);
 	qp_mz = queue_dma_zone_reserve(queue->memz_name, queue_size_bytes,
-			qat_dev->pci_dev->device.numa_node);
+			pci_dev->device.numa_node);
 	if (qp_mz == NULL) {
 		QAT_LOG(ERR, "Failed to allocate ring memzone");
 		return -ENOMEM;
@@ -430,18 +449,26 @@ qat_queue_create(struct qat_pci_device *qat_dev, struct qat_queue *queue,
 	queue->tail = 0;
 	queue->msg_size = desc_size;
 
+	/* For fast calculation of cookie index, relies on msg_size being 2^n */
+	queue->trailz = __builtin_ctz(desc_size);
+
 	/*
 	 * Write an unused pattern to the queue memory.
 	 */
 	memset(queue->base_addr, 0x7F, queue_size_bytes);
-
-	queue_base = BUILD_RING_BASE_ADDR(queue->base_phys_addr,
-					queue->queue_size);
-
 	io_addr = pci_dev->mem_resource[0].addr;
 
-	WRITE_CSR_RING_BASE(io_addr, queue->hw_bundle_number,
+	if (qat_dev_gen == QAT_GEN4) {
+		queue_base = BUILD_RING_BASE_ADDR_GEN4(queue->base_phys_addr,
+					queue->queue_size);
+		WRITE_CSR_RING_BASE_GEN4VF(io_addr, queue->hw_bundle_number,
+			queue->hw_queue_number, queue_base);
+	} else {
+		queue_base = BUILD_RING_BASE_ADDR(queue->base_phys_addr,
+				queue->queue_size);
+		WRITE_CSR_RING_BASE(io_addr, queue->hw_bundle_number,
 			queue->hw_queue_number, queue_base);
+	}
 
 	QAT_LOG(DEBUG, "RING: Name:%s, size in CSR: %u, in bytes %u,"
 		" nb msgs %u, msg_size %u, modulo mask %u",
@@ -457,6 +484,84 @@ queue_create_err:
 	return ret;
 }
 
+int
+qat_select_valid_queue(struct qat_pci_device *qat_dev, int qp_id,
+			enum qat_service_type service_type)
+{
+	if (qat_dev->qat_dev_gen == QAT_GEN4) {
+		int i = 0, valid_qps = 0;
+
+		for (; i < QAT_GEN4_BUNDLE_NUM; i++) {
+			if (qat_dev->qp_gen4_data[i][0].service_type ==
+				service_type) {
+				if (valid_qps == qp_id)
+					return i;
+				++valid_qps;
+			}
+		}
+	}
+	return -1;
+}
+
+int
+qat_read_qp_config(struct qat_pci_device *qat_dev)
+{
+	int i = 0;
+	enum qat_device_gen qat_dev_gen = qat_dev->qat_dev_gen;
+
+	if (qat_dev_gen == QAT_GEN4) {
+		uint16_t svc = 0;
+
+		if (qat_query_svc(qat_dev, (uint8_t *)&svc))
+			return -(EFAULT);
+		for (; i < QAT_GEN4_BUNDLE_NUM; i++) {
+			struct qat_qp_hw_data *hw_data =
+				&qat_dev->qp_gen4_data[i][0];
+			uint8_t svc1 = (svc >> (3 * i)) & 0x7;
+			enum qat_service_type service_type = QAT_SERVICE_INVALID;
+
+			if (svc1 == QAT_SVC_SYM) {
+				service_type = QAT_SERVICE_SYMMETRIC;
+				QAT_LOG(DEBUG,
+					"Discovered SYMMETRIC service on bundle %d",
+					i);
+			} else if (svc1 == QAT_SVC_COMPRESSION) {
+				service_type = QAT_SERVICE_COMPRESSION;
+				QAT_LOG(DEBUG,
+					"Discovered COPRESSION service on bundle %d",
+					i);
+			} else if (svc1 == QAT_SVC_ASYM) {
+				service_type = QAT_SERVICE_ASYMMETRIC;
+				QAT_LOG(DEBUG,
+					"Discovered ASYMMETRIC service on bundle %d",
+					i);
+			} else {
+				QAT_LOG(ERR,
+					"Unrecognized service on bundle %d",
+					i);
+				return -(EFAULT);
+			}
+
+			memset(hw_data, 0, sizeof(*hw_data));
+			hw_data->service_type = service_type;
+			if (service_type == QAT_SERVICE_ASYMMETRIC) {
+				hw_data->tx_msg_size = 64;
+				hw_data->rx_msg_size = 32;
+			} else if (service_type == QAT_SERVICE_SYMMETRIC ||
+					service_type ==
+						QAT_SERVICE_COMPRESSION) {
+				hw_data->tx_msg_size = 128;
+				hw_data->rx_msg_size = 32;
+			}
+			hw_data->tx_ring_num = 0;
+			hw_data->rx_ring_num = 1;
+			hw_data->hw_bundle_num = i;
+		}
+		return 0;
+	}
+	return -(EINVAL);
+}
+
 static int qat_qp_check_queue_alignment(uint64_t phys_addr,
 					uint32_t queue_size_bytes)
 {
@@ -480,54 +585,81 @@ static int adf_verify_queue_size(uint32_t msg_size, uint32_t msg_num,
 	return -EINVAL;
 }
 
-static void adf_queue_arb_enable(struct qat_queue *txq, void *base_addr,
-					rte_spinlock_t *lock)
+static void
+adf_queue_arb_enable(enum qat_device_gen qat_dev_gen, struct qat_queue *txq,
+			void *base_addr, rte_spinlock_t *lock)
 {
-	uint32_t arb_csr_offset =  ADF_ARB_RINGSRVARBEN_OFFSET +
-					(ADF_ARB_REG_SLOT *
-							txq->hw_bundle_number);
-	uint32_t value;
+	uint32_t arb_csr_offset = 0, value;
 
 	rte_spinlock_lock(lock);
-	value = ADF_CSR_RD(base_addr, arb_csr_offset);
+	if (qat_dev_gen == QAT_GEN4) {
+		arb_csr_offset = ADF_ARB_RINGSRVARBEN_OFFSET +
+				(ADF_RING_BUNDLE_SIZE_GEN4 *
+				txq->hw_bundle_number);
+		value = ADF_CSR_RD(base_addr + ADF_RING_CSR_ADDR_OFFSET_GEN4VF,
+				arb_csr_offset);
+	} else {
+		arb_csr_offset = ADF_ARB_RINGSRVARBEN_OFFSET +
+				(ADF_ARB_REG_SLOT *
+				txq->hw_bundle_number);
+		value = ADF_CSR_RD(base_addr,
+				arb_csr_offset);
+	}
 	value |= (0x01 << txq->hw_queue_number);
 	ADF_CSR_WR(base_addr, arb_csr_offset, value);
 	rte_spinlock_unlock(lock);
 }
 
-static void adf_queue_arb_disable(struct qat_queue *txq, void *base_addr,
-					rte_spinlock_t *lock)
+static void adf_queue_arb_disable(enum qat_device_gen qat_dev_gen,
+		struct qat_queue *txq, void *base_addr, rte_spinlock_t *lock)
 {
-	uint32_t arb_csr_offset =  ADF_ARB_RINGSRVARBEN_OFFSET +
-					(ADF_ARB_REG_SLOT *
-							txq->hw_bundle_number);
-	uint32_t value;
+	uint32_t arb_csr_offset = 0, value;
 
 	rte_spinlock_lock(lock);
-	value = ADF_CSR_RD(base_addr, arb_csr_offset);
+	if (qat_dev_gen == QAT_GEN4) {
+		arb_csr_offset = ADF_ARB_RINGSRVARBEN_OFFSET +
+				(ADF_RING_BUNDLE_SIZE_GEN4 *
+				txq->hw_bundle_number);
+		value = ADF_CSR_RD(base_addr + ADF_RING_CSR_ADDR_OFFSET_GEN4VF,
+				arb_csr_offset);
+	} else {
+		arb_csr_offset = ADF_ARB_RINGSRVARBEN_OFFSET +
+				(ADF_ARB_REG_SLOT *
+				txq->hw_bundle_number);
+		value = ADF_CSR_RD(base_addr,
+				arb_csr_offset);
+	}
 	value &= ~(0x01 << txq->hw_queue_number);
 	ADF_CSR_WR(base_addr, arb_csr_offset, value);
 	rte_spinlock_unlock(lock);
 }
 
-static void adf_configure_queues(struct qat_qp *qp)
+static void adf_configure_queues(struct qat_qp *qp,
+		enum qat_device_gen qat_dev_gen)
 {
-	uint32_t queue_config;
-	struct qat_queue *queue = &qp->tx_q;
-
-	queue_config = BUILD_RING_CONFIG(queue->queue_size);
-
-	WRITE_CSR_RING_CONFIG(qp->mmap_bar_addr, queue->hw_bundle_number,
-			queue->hw_queue_number, queue_config);
-
-	queue = &qp->rx_q;
-	queue_config =
-			BUILD_RESP_RING_CONFIG(queue->queue_size,
-					ADF_RING_NEAR_WATERMARK_512,
-					ADF_RING_NEAR_WATERMARK_0);
-
-	WRITE_CSR_RING_CONFIG(qp->mmap_bar_addr, queue->hw_bundle_number,
-			queue->hw_queue_number, queue_config);
+	uint32_t q_tx_config, q_resp_config;
+	struct qat_queue *q_tx = &qp->tx_q, *q_rx = &qp->rx_q;
+
+	q_tx_config = BUILD_RING_CONFIG(q_tx->queue_size);
+	q_resp_config = BUILD_RESP_RING_CONFIG(q_rx->queue_size,
+			ADF_RING_NEAR_WATERMARK_512,
+			ADF_RING_NEAR_WATERMARK_0);
+
+	if (qat_dev_gen == QAT_GEN4) {
+		WRITE_CSR_RING_CONFIG_GEN4VF(qp->mmap_bar_addr,
+			q_tx->hw_bundle_number,	q_tx->hw_queue_number,
+			q_tx_config);
+		WRITE_CSR_RING_CONFIG_GEN4VF(qp->mmap_bar_addr,
+			q_rx->hw_bundle_number,	q_rx->hw_queue_number,
+			q_resp_config);
+	} else {
+		WRITE_CSR_RING_CONFIG(qp->mmap_bar_addr,
+			q_tx->hw_bundle_number,	q_tx->hw_queue_number,
+			q_tx_config);
+		WRITE_CSR_RING_CONFIG(qp->mmap_bar_addr,
+			q_rx->hw_bundle_number,	q_rx->hw_queue_number,
+			q_resp_config);
+	}
 }
 
 static inline uint32_t adf_modulo(uint32_t data, uint32_t modulo_mask)
@@ -536,14 +668,21 @@ static inline uint32_t adf_modulo(uint32_t data, uint32_t modulo_mask)
 }
 
 static inline void
-txq_write_tail(struct qat_qp *qp, struct qat_queue *q) {
-	WRITE_CSR_RING_TAIL(qp->mmap_bar_addr, q->hw_bundle_number,
+txq_write_tail(enum qat_device_gen qat_dev_gen,
+		struct qat_qp *qp, struct qat_queue *q) {
+
+	if (qat_dev_gen == QAT_GEN4) {
+		WRITE_CSR_RING_TAIL_GEN4VF(qp->mmap_bar_addr,
+			q->hw_bundle_number, q->hw_queue_number, q->tail);
+	} else {
+		WRITE_CSR_RING_TAIL(qp->mmap_bar_addr, q->hw_bundle_number,
 			q->hw_queue_number, q->tail);
-	q->csr_tail = q->tail;
+	}
 }
 
 static inline
-void rxq_free_desc(struct qat_qp *qp, struct qat_queue *q)
+void rxq_free_desc(enum qat_device_gen qat_dev_gen, struct qat_qp *qp,
+				struct qat_queue *q)
 {
 	uint32_t old_head, new_head;
 	uint32_t max_head;
@@ -565,8 +704,14 @@ void rxq_free_desc(struct qat_qp *qp, struct qat_queue *q)
 	q->csr_head = new_head;
 
 	/* write current head to CSR */
-	WRITE_CSR_RING_HEAD(qp->mmap_bar_addr, q->hw_bundle_number,
-			    q->hw_queue_number, new_head);
+	if (qat_dev_gen == QAT_GEN4) {
+		WRITE_CSR_RING_HEAD_GEN4VF(qp->mmap_bar_addr,
+			q->hw_bundle_number, q->hw_queue_number, new_head);
+	} else {
+		WRITE_CSR_RING_HEAD(qp->mmap_bar_addr, q->hw_bundle_number,
+				q->hw_queue_number, new_head);
+	}
+
 }
 
 uint16_t
@@ -575,11 +720,10 @@ qat_enqueue_op_burst(void *qp, void **ops, uint16_t nb_ops)
 	register struct qat_queue *queue;
 	struct qat_qp *tmp_qp = (struct qat_qp *)qp;
 	register uint32_t nb_ops_sent = 0;
-	register int ret;
+	register int ret = -1;
 	uint16_t nb_ops_possible = nb_ops;
 	register uint8_t *base_addr;
 	register uint32_t tail;
-	int overflow;
 
 	if (unlikely(nb_ops == 0))
 		return 0;
@@ -590,26 +734,63 @@ qat_enqueue_op_burst(void *qp, void **ops, uint16_t nb_ops)
 	tail = queue->tail;
 
 	/* Find how many can actually fit on the ring */
-	tmp_qp->inflights16 += nb_ops;
-	overflow = tmp_qp->inflights16 - tmp_qp->max_inflights;
-	if (overflow > 0) {
-		tmp_qp->inflights16 -= overflow;
-		nb_ops_possible = nb_ops - overflow;
-		if (nb_ops_possible == 0)
+	{
+		/* dequeued can only be written by one thread, but it may not
+		 * be this thread. As it's 4-byte aligned it will be read
+		 * atomically here by any Intel CPU.
+		 * enqueued can wrap before dequeued, but cannot
+		 * lap it as var size of enq/deq (uint32_t) > var size of
+		 * max_inflights (uint16_t). In reality inflights is never
+		 * even as big as max uint16_t, as it's <= ADF_MAX_DESC.
+		 * On wrapping, the calculation still returns the correct
+		 * positive value as all three vars are unsigned.
+		 */
+		uint32_t inflights =
+			tmp_qp->enqueued - tmp_qp->dequeued;
+
+		if ((inflights + nb_ops) > tmp_qp->max_inflights) {
+			nb_ops_possible = tmp_qp->max_inflights - inflights;
+			if (nb_ops_possible == 0)
+				return 0;
+		}
+		/* QAT has plenty of work queued already, so don't waste cycles
+		 * enqueueing, wait til the application has gathered a bigger
+		 * burst or some completed ops have been dequeued
+		 */
+		if (tmp_qp->min_enq_burst_threshold && inflights >
+				QAT_QP_MIN_INFL_THRESHOLD && nb_ops_possible <
+				tmp_qp->min_enq_burst_threshold) {
+			tmp_qp->stats.threshold_hit_count++;
 			return 0;
+		}
 	}
 
+#ifdef BUILD_QAT_SYM
+	if (tmp_qp->service_type == QAT_SERVICE_SYMMETRIC)
+		qat_sym_preprocess_requests(ops, nb_ops_possible);
+#endif
+
 	while (nb_ops_sent != nb_ops_possible) {
-		ret = tmp_qp->build_request(*ops, base_addr + tail,
-				tmp_qp->op_cookies[tail / queue->msg_size],
+		if (tmp_qp->service_type == QAT_SERVICE_SYMMETRIC) {
+#ifdef BUILD_QAT_SYM
+			ret = qat_sym_build_request(*ops, base_addr + tail,
+				tmp_qp->op_cookies[tail >> queue->trailz],
+				tmp_qp->qat_dev_gen);
+#endif
+		} else if (tmp_qp->service_type == QAT_SERVICE_COMPRESSION) {
+			ret = qat_comp_build_request(*ops, base_addr + tail,
+				tmp_qp->op_cookies[tail >> queue->trailz],
 				tmp_qp->qat_dev_gen);
+		} else if (tmp_qp->service_type == QAT_SERVICE_ASYMMETRIC) {
+#ifdef BUILD_QAT_ASYM
+			ret = qat_asym_build_request(*ops, base_addr + tail,
+				tmp_qp->op_cookies[tail >> queue->trailz],
+				tmp_qp->qat_dev_gen);
+#endif
+		}
 		if (ret != 0) {
 			tmp_qp->stats.enqueue_err_count++;
-			/*
-			 * This message cannot be enqueued,
-			 * decrease number of ops that wasn't sent
-			 */
-			tmp_qp->inflights16 -= nb_ops_possible - nb_ops_sent;
+			/* This message cannot be enqueued */
 			if (nb_ops_sent == 0)
 				return 0;
 			goto kick_tail;
@@ -621,8 +802,182 @@ qat_enqueue_op_burst(void *qp, void **ops, uint16_t nb_ops)
 	}
 kick_tail:
 	queue->tail = tail;
+	tmp_qp->enqueued += nb_ops_sent;
+	tmp_qp->stats.enqueued_count += nb_ops_sent;
+	txq_write_tail(tmp_qp->qat_dev_gen, tmp_qp, queue);
+	return nb_ops_sent;
+}
+
+/* Use this for compression only - but keep consistent with above common
+ * function as much as possible.
+ */
+uint16_t
+qat_enqueue_comp_op_burst(void *qp, void **ops, uint16_t nb_ops)
+{
+	register struct qat_queue *queue;
+	struct qat_qp *tmp_qp = (struct qat_qp *)qp;
+	register uint32_t nb_ops_sent = 0;
+	register int nb_desc_to_build;
+	uint16_t nb_ops_possible = nb_ops;
+	register uint8_t *base_addr;
+	register uint32_t tail;
+
+	int descriptors_built, total_descriptors_built = 0;
+	int nb_remaining_descriptors;
+	int overflow = 0;
+
+	if (unlikely(nb_ops == 0))
+		return 0;
+
+	/* read params used a lot in main loop into registers */
+	queue = &(tmp_qp->tx_q);
+	base_addr = (uint8_t *)queue->base_addr;
+	tail = queue->tail;
+
+	/* Find how many can actually fit on the ring */
+	{
+		/* dequeued can only be written by one thread, but it may not
+		 * be this thread. As it's 4-byte aligned it will be read
+		 * atomically here by any Intel CPU.
+		 * enqueued can wrap before dequeued, but cannot
+		 * lap it as var size of enq/deq (uint32_t) > var size of
+		 * max_inflights (uint16_t). In reality inflights is never
+		 * even as big as max uint16_t, as it's <= ADF_MAX_DESC.
+		 * On wrapping, the calculation still returns the correct
+		 * positive value as all three vars are unsigned.
+		 */
+		uint32_t inflights =
+			tmp_qp->enqueued - tmp_qp->dequeued;
+
+		/* Find how many can actually fit on the ring */
+		overflow = (inflights + nb_ops) - tmp_qp->max_inflights;
+		if (overflow > 0) {
+			nb_ops_possible = nb_ops - overflow;
+			if (nb_ops_possible == 0)
+				return 0;
+		}
+
+		/* QAT has plenty of work queued already, so don't waste cycles
+		 * enqueueing, wait til the application has gathered a bigger
+		 * burst or some completed ops have been dequeued
+		 */
+		if (tmp_qp->min_enq_burst_threshold && inflights >
+				QAT_QP_MIN_INFL_THRESHOLD && nb_ops_possible <
+				tmp_qp->min_enq_burst_threshold) {
+			tmp_qp->stats.threshold_hit_count++;
+			return 0;
+		}
+	}
+
+	/* At this point nb_ops_possible is assuming a 1:1 mapping
+	 * between ops and descriptors.
+	 * Fewer may be sent if some ops have to be split.
+	 * nb_ops_possible is <= burst size.
+	 * Find out how many spaces are actually available on the qp in case
+	 * more are needed.
+	 */
+	nb_remaining_descriptors = nb_ops_possible
+			 + ((overflow >= 0) ? 0 : overflow * (-1));
+	QAT_DP_LOG(DEBUG, "Nb ops requested %d, nb descriptors remaining %d",
+			nb_ops, nb_remaining_descriptors);
+
+	while (nb_ops_sent != nb_ops_possible &&
+				nb_remaining_descriptors > 0) {
+		struct qat_comp_op_cookie *cookie =
+				tmp_qp->op_cookies[tail >> queue->trailz];
+
+		descriptors_built = 0;
+
+		QAT_DP_LOG(DEBUG, "--- data length: %u",
+			   ((struct rte_comp_op *)*ops)->src.length);
+
+		nb_desc_to_build = qat_comp_build_request(*ops,
+				base_addr + tail, cookie, tmp_qp->qat_dev_gen);
+		QAT_DP_LOG(DEBUG, "%d descriptors built, %d remaining, "
+			"%d ops sent, %d descriptors needed",
+			total_descriptors_built, nb_remaining_descriptors,
+			nb_ops_sent, nb_desc_to_build);
+
+		if (unlikely(nb_desc_to_build < 0)) {
+			/* this message cannot be enqueued */
+			tmp_qp->stats.enqueue_err_count++;
+			if (nb_ops_sent == 0)
+				return 0;
+			goto kick_tail;
+		} else if (unlikely(nb_desc_to_build > 1)) {
+			/* this op is too big and must be split - get more
+			 * descriptors and retry
+			 */
+
+			QAT_DP_LOG(DEBUG, "Build %d descriptors for this op",
+					nb_desc_to_build);
+
+			nb_remaining_descriptors -= nb_desc_to_build;
+			if (nb_remaining_descriptors >= 0) {
+				/* There are enough remaining descriptors
+				 * so retry
+				 */
+				int ret2 = qat_comp_build_multiple_requests(
+						*ops, tmp_qp, tail,
+						nb_desc_to_build);
+
+				if (unlikely(ret2 < 1)) {
+					QAT_DP_LOG(DEBUG,
+							"Failed to build (%d) descriptors, status %d",
+							nb_desc_to_build, ret2);
+
+					qat_comp_free_split_op_memzones(cookie,
+							nb_desc_to_build - 1);
+
+					tmp_qp->stats.enqueue_err_count++;
+
+					/* This message cannot be enqueued */
+					if (nb_ops_sent == 0)
+						return 0;
+					goto kick_tail;
+				} else {
+					descriptors_built = ret2;
+					total_descriptors_built +=
+							descriptors_built;
+					nb_remaining_descriptors -=
+							descriptors_built;
+					QAT_DP_LOG(DEBUG,
+							"Multiple descriptors (%d) built ok",
+							descriptors_built);
+				}
+			} else {
+				QAT_DP_LOG(ERR, "For the current op, number of requested descriptors (%d) "
+						"exceeds number of available descriptors (%d)",
+						nb_desc_to_build,
+						nb_remaining_descriptors +
+							nb_desc_to_build);
+
+				qat_comp_free_split_op_memzones(cookie,
+						nb_desc_to_build - 1);
+
+				/* Not enough extra descriptors */
+				if (nb_ops_sent == 0)
+					return 0;
+				goto kick_tail;
+			}
+		} else {
+			descriptors_built = 1;
+			total_descriptors_built++;
+			nb_remaining_descriptors--;
+			QAT_DP_LOG(DEBUG, "Single descriptor built ok");
+		}
+
+		tail = adf_modulo(tail + (queue->msg_size * descriptors_built),
+				  queue->modulo_mask);
+		ops++;
+		nb_ops_sent++;
+	}
+
+kick_tail:
+	queue->tail = tail;
+	tmp_qp->enqueued += total_descriptors_built;
 	tmp_qp->stats.enqueued_count += nb_ops_sent;
-	txq_write_tail(tmp_qp, queue);
+	txq_write_tail(tmp_qp->qat_dev_gen, tmp_qp, queue);
 	return nb_ops_sent;
 }
 
@@ -632,48 +987,159 @@ qat_dequeue_op_burst(void *qp, void **ops, uint16_t nb_ops)
 	struct qat_queue *rx_queue;
 	struct qat_qp *tmp_qp = (struct qat_qp *)qp;
 	uint32_t head;
-	uint32_t resp_counter = 0;
+	uint32_t op_resp_counter = 0, fw_resp_counter = 0;
 	uint8_t *resp_msg;
+	int nb_fw_responses;
 
 	rx_queue = &(tmp_qp->rx_q);
 	head = rx_queue->head;
 	resp_msg = (uint8_t *)rx_queue->base_addr + rx_queue->head;
 
 	while (*(uint32_t *)resp_msg != ADF_RING_EMPTY_SIG &&
-			resp_counter != nb_ops) {
+			op_resp_counter != nb_ops) {
+
+		nb_fw_responses = 1;
 
 		if (tmp_qp->service_type == QAT_SERVICE_SYMMETRIC)
-			qat_sym_process_response(ops, resp_msg);
+			qat_sym_process_response(ops, resp_msg,
+				tmp_qp->op_cookies[head >> rx_queue->trailz]);
 		else if (tmp_qp->service_type == QAT_SERVICE_COMPRESSION)
-			qat_comp_process_response(ops, resp_msg,
-				tmp_qp->op_cookies[head / rx_queue->msg_size],
+			nb_fw_responses = qat_comp_process_response(
+				ops, resp_msg,
+				tmp_qp->op_cookies[head >> rx_queue->trailz],
 				&tmp_qp->stats.dequeue_err_count);
-		else if (tmp_qp->service_type == QAT_SERVICE_ASYMMETRIC) {
 #ifdef BUILD_QAT_ASYM
+		else if (tmp_qp->service_type == QAT_SERVICE_ASYMMETRIC)
 			qat_asym_process_response(ops, resp_msg,
-				tmp_qp->op_cookies[head / rx_queue->msg_size]);
+				tmp_qp->op_cookies[head >> rx_queue->trailz]);
 #endif
-		}
 
 		head = adf_modulo(head + rx_queue->msg_size,
 				  rx_queue->modulo_mask);
 
 		resp_msg = (uint8_t *)rx_queue->base_addr + head;
-		ops++;
-		resp_counter++;
+
+		if (nb_fw_responses) {
+			/* only move on to next op if one was ready to return
+			 * to API
+			 */
+			ops++;
+			op_resp_counter++;
+		}
+
+		 /* A compression op may be broken up into multiple fw requests.
+		  * Only count fw responses as complete once ALL the responses
+		  * associated with an op have been processed, as the cookie
+		  * data from the first response must be available until
+		  * finished with all firmware responses.
+		  */
+		fw_resp_counter += nb_fw_responses;
+
+		rx_queue->nb_processed_responses++;
 	}
-	if (resp_counter > 0) {
-		rx_queue->head = head;
-		tmp_qp->stats.dequeued_count += resp_counter;
-		rx_queue->nb_processed_responses += resp_counter;
-		tmp_qp->inflights16 -= resp_counter;
 
-		if (rx_queue->nb_processed_responses >
-						QAT_CSR_HEAD_WRITE_THRESH)
-			rxq_free_desc(tmp_qp, rx_queue);
+	tmp_qp->dequeued += fw_resp_counter;
+	tmp_qp->stats.dequeued_count += op_resp_counter;
+
+	rx_queue->head = head;
+	if (rx_queue->nb_processed_responses > QAT_CSR_HEAD_WRITE_THRESH)
+		rxq_free_desc(tmp_qp->qat_dev_gen, tmp_qp, rx_queue);
+
+	QAT_DP_LOG(DEBUG, "Dequeue burst return: %u, QAT responses: %u",
+			op_resp_counter, fw_resp_counter);
+
+	return op_resp_counter;
+}
+
+/* This is almost same as dequeue_op_burst, without the atomic, without stats
+ * and without the op. Dequeues one response.
+ */
+static uint8_t
+qat_cq_dequeue_response(struct qat_qp *qp, void *out_data)
+{
+	uint8_t result = 0;
+	uint8_t retries = 0;
+	struct qat_queue *queue = &(qp->rx_q);
+	struct icp_qat_fw_comn_resp *resp_msg = (struct icp_qat_fw_comn_resp *)
+			((uint8_t *)queue->base_addr + queue->head);
+
+	while (retries++ < QAT_CQ_MAX_DEQ_RETRIES &&
+			*(uint32_t *)resp_msg == ADF_RING_EMPTY_SIG) {
+		/* loop waiting for response until we reach the timeout */
+		rte_delay_ms(20);
 	}
 
-	return resp_counter;
+	if (*(uint32_t *)resp_msg != ADF_RING_EMPTY_SIG) {
+		/* response received */
+		result = 1;
+
+		/* check status flag */
+		if (ICP_QAT_FW_COMN_RESP_CRYPTO_STAT_GET(
+				resp_msg->comn_hdr.comn_status) ==
+				ICP_QAT_FW_COMN_STATUS_FLAG_OK) {
+			/* success */
+			memcpy(out_data, resp_msg, queue->msg_size);
+		} else {
+			memset(out_data, 0, queue->msg_size);
+		}
+
+		queue->head = adf_modulo(queue->head + queue->msg_size,
+				queue->modulo_mask);
+		rxq_free_desc(qp->qat_dev_gen, qp, queue);
+	}
+
+	return result;
+}
+
+/* Sends a NULL message and extracts QAT fw version from the response.
+ * Used to determine detailed capabilities based on the fw version number.
+ * This assumes that there are no inflight messages, i.e. assumes there's space
+ * on the qp, one message is sent and only one response collected.
+ * Returns fw version number or 0 for unknown version or a negative error code.
+ */
+int
+qat_cq_get_fw_version(struct qat_qp *qp)
+{
+	struct qat_queue *queue = &(qp->tx_q);
+	uint8_t *base_addr = (uint8_t *)queue->base_addr;
+	struct icp_qat_fw_comn_req null_msg;
+	struct icp_qat_fw_comn_resp response;
+
+	/* prepare the NULL request */
+	memset(&null_msg, 0, sizeof(null_msg));
+	null_msg.comn_hdr.hdr_flags =
+		ICP_QAT_FW_COMN_HDR_FLAGS_BUILD(ICP_QAT_FW_COMN_REQ_FLAG_SET);
+	null_msg.comn_hdr.service_type = ICP_QAT_FW_COMN_REQ_NULL;
+	null_msg.comn_hdr.service_cmd_id = ICP_QAT_FW_NULL_REQ_SERV_ID;
+
+#if RTE_LOG_DP_LEVEL >= RTE_LOG_DEBUG
+	QAT_DP_HEXDUMP_LOG(DEBUG, "NULL request", &null_msg, sizeof(null_msg));
+#endif
+
+	/* send the NULL request */
+	memcpy(base_addr + queue->tail, &null_msg, sizeof(null_msg));
+	queue->tail = adf_modulo(queue->tail + queue->msg_size,
+			queue->modulo_mask);
+	txq_write_tail(qp->qat_dev_gen, qp, queue);
+
+	/* receive a response */
+	if (qat_cq_dequeue_response(qp, &response)) {
+
+#if RTE_LOG_DP_LEVEL >= RTE_LOG_DEBUG
+		QAT_DP_HEXDUMP_LOG(DEBUG, "NULL response:", &response,
+				sizeof(response));
+#endif
+		/* if LW0 bit 24 is set - then the fw version was returned */
+		if (QAT_FIELD_GET(response.comn_hdr.hdr_flags,
+				ICP_QAT_FW_COMN_NULL_VERSION_FLAG_BITPOS,
+				ICP_QAT_FW_COMN_NULL_VERSION_FLAG_MASK))
+			return response.resrvd[0]; /* return LW4 */
+		else
+			return 0; /* not set - we don't know fw version */
+	}
+
+	QAT_LOG(ERR, "No response received");
+	return -EINVAL;
 }
 
 __rte_weak int