*/
#include <rte_common.h>
+#include <rte_cycles.h>
#include <rte_dev.h>
#include <rte_malloc.h>
#include <rte_memzone.h>
#include "qat_comp.h"
#include "adf_transport_access_macros.h"
+#define QAT_CQ_MAX_DEQ_RETRIES 10
#define ADF_MAX_DESC 4096
#define ADF_MIN_DESC 128
{
struct qat_qp *qp;
- struct rte_pci_device *pci_dev = qat_dev->pci_dev;
+ struct rte_pci_device *pci_dev =
+ qat_pci_devs[qat_dev->qat_dev_id].pci_dev;
char op_cookie_pool_name[RTE_RING_NAMESIZE];
uint32_t i;
qp->nb_descriptors,
qat_qp_conf->cookie_size, 64, 0,
NULL, NULL, NULL, NULL,
- qat_dev->pci_dev->device.numa_node,
+ pci_dev->device.numa_node,
0);
if (!qp->op_cookie_pool) {
QAT_LOG(ERR, "QAT PMD Cannot create"
}
qp->qat_dev_gen = qat_dev->qat_dev_gen;
- qp->build_request = qat_qp_conf->build_request;
qp->service_type = qat_qp_conf->hw->service_type;
qp->qat_dev = qat_dev;
uint64_t queue_base;
void *io_addr;
const struct rte_memzone *qp_mz;
- struct rte_pci_device *pci_dev = qat_dev->pci_dev;
+ struct rte_pci_device *pci_dev =
+ qat_pci_devs[qat_dev->qat_dev_id].pci_dev;
int ret = 0;
uint16_t desc_size = (dir == ADF_RING_DIR_TX ?
qp_conf->hw->tx_msg_size : qp_conf->hw->rx_msg_size);
qp_conf->service_str, "qp_mem",
queue->hw_bundle_number, queue->hw_queue_number);
qp_mz = queue_dma_zone_reserve(queue->memz_name, queue_size_bytes,
- qat_dev->pci_dev->device.numa_node);
+ pci_dev->device.numa_node);
if (qp_mz == NULL) {
QAT_LOG(ERR, "Failed to allocate ring memzone");
return -ENOMEM;
queue->tail = 0;
queue->msg_size = desc_size;
+ /* For fast calculation of cookie index, relies on msg_size being 2^n */
+ queue->trailz = __builtin_ctz(desc_size);
+
/*
* Write an unused pattern to the queue memory.
*/
register struct qat_queue *queue;
struct qat_qp *tmp_qp = (struct qat_qp *)qp;
register uint32_t nb_ops_sent = 0;
- register int ret;
+ register int ret = -1;
uint16_t nb_ops_possible = nb_ops;
register uint8_t *base_addr;
register uint32_t tail;
}
}
+#ifdef BUILD_QAT_SYM
+ if (tmp_qp->service_type == QAT_SERVICE_SYMMETRIC)
+ qat_sym_preprocess_requests(ops, nb_ops_possible);
+#endif
while (nb_ops_sent != nb_ops_possible) {
- ret = tmp_qp->build_request(*ops, base_addr + tail,
- tmp_qp->op_cookies[tail / queue->msg_size],
+ if (tmp_qp->service_type == QAT_SERVICE_SYMMETRIC) {
+#ifdef BUILD_QAT_SYM
+ ret = qat_sym_build_request(*ops, base_addr + tail,
+ tmp_qp->op_cookies[tail >> queue->trailz],
+ tmp_qp->qat_dev_gen);
+#endif
+ } else if (tmp_qp->service_type == QAT_SERVICE_COMPRESSION) {
+ ret = qat_comp_build_request(*ops, base_addr + tail,
+ tmp_qp->op_cookies[tail >> queue->trailz],
tmp_qp->qat_dev_gen);
+ } else if (tmp_qp->service_type == QAT_SERVICE_ASYMMETRIC) {
+#ifdef BUILD_QAT_ASYM
+ ret = qat_asym_build_request(*ops, base_addr + tail,
+ tmp_qp->op_cookies[tail >> queue->trailz],
+ tmp_qp->qat_dev_gen);
+#endif
+ }
if (ret != 0) {
tmp_qp->stats.enqueue_err_count++;
/* This message cannot be enqueued */
return nb_ops_sent;
}
+/* Use this for compression only - but keep consistent with above common
+ * function as much as possible.
+ */
+uint16_t
+qat_enqueue_comp_op_burst(void *qp, void **ops, uint16_t nb_ops)
+{
+ register struct qat_queue *queue;
+ struct qat_qp *tmp_qp = (struct qat_qp *)qp;
+ register uint32_t nb_ops_sent = 0;
+ register int nb_desc_to_build;
+ uint16_t nb_ops_possible = nb_ops;
+ register uint8_t *base_addr;
+ register uint32_t tail;
+
+ int descriptors_built, total_descriptors_built = 0;
+ int nb_remaining_descriptors;
+ int overflow = 0;
+
+ if (unlikely(nb_ops == 0))
+ return 0;
+
+ /* read params used a lot in main loop into registers */
+ queue = &(tmp_qp->tx_q);
+ base_addr = (uint8_t *)queue->base_addr;
+ tail = queue->tail;
+
+ /* Find how many can actually fit on the ring */
+ {
+ /* dequeued can only be written by one thread, but it may not
+ * be this thread. As it's 4-byte aligned it will be read
+ * atomically here by any Intel CPU.
+ * enqueued can wrap before dequeued, but cannot
+ * lap it as var size of enq/deq (uint32_t) > var size of
+ * max_inflights (uint16_t). In reality inflights is never
+ * even as big as max uint16_t, as it's <= ADF_MAX_DESC.
+ * On wrapping, the calculation still returns the correct
+ * positive value as all three vars are unsigned.
+ */
+ uint32_t inflights =
+ tmp_qp->enqueued - tmp_qp->dequeued;
+
+ /* Find how many can actually fit on the ring */
+ overflow = (inflights + nb_ops) - tmp_qp->max_inflights;
+ if (overflow > 0) {
+ nb_ops_possible = nb_ops - overflow;
+ if (nb_ops_possible == 0)
+ return 0;
+ }
+
+ /* QAT has plenty of work queued already, so don't waste cycles
+ * enqueueing, wait til the application has gathered a bigger
+ * burst or some completed ops have been dequeued
+ */
+ if (tmp_qp->min_enq_burst_threshold && inflights >
+ QAT_QP_MIN_INFL_THRESHOLD && nb_ops_possible <
+ tmp_qp->min_enq_burst_threshold) {
+ tmp_qp->stats.threshold_hit_count++;
+ return 0;
+ }
+ }
+
+ /* At this point nb_ops_possible is assuming a 1:1 mapping
+ * between ops and descriptors.
+ * Fewer may be sent if some ops have to be split.
+ * nb_ops_possible is <= burst size.
+ * Find out how many spaces are actually available on the qp in case
+ * more are needed.
+ */
+ nb_remaining_descriptors = nb_ops_possible
+ + ((overflow >= 0) ? 0 : overflow * (-1));
+ QAT_DP_LOG(DEBUG, "Nb ops requested %d, nb descriptors remaining %d",
+ nb_ops, nb_remaining_descriptors);
+
+ while (nb_ops_sent != nb_ops_possible &&
+ nb_remaining_descriptors > 0) {
+ struct qat_comp_op_cookie *cookie =
+ tmp_qp->op_cookies[tail >> queue->trailz];
+
+ descriptors_built = 0;
+
+ QAT_DP_LOG(DEBUG, "--- data length: %u",
+ ((struct rte_comp_op *)*ops)->src.length);
+
+ nb_desc_to_build = qat_comp_build_request(*ops,
+ base_addr + tail, cookie, tmp_qp->qat_dev_gen);
+ QAT_DP_LOG(DEBUG, "%d descriptors built, %d remaining, "
+ "%d ops sent, %d descriptors needed",
+ total_descriptors_built, nb_remaining_descriptors,
+ nb_ops_sent, nb_desc_to_build);
+
+ if (unlikely(nb_desc_to_build < 0)) {
+ /* this message cannot be enqueued */
+ tmp_qp->stats.enqueue_err_count++;
+ if (nb_ops_sent == 0)
+ return 0;
+ goto kick_tail;
+ } else if (unlikely(nb_desc_to_build > 1)) {
+ /* this op is too big and must be split - get more
+ * descriptors and retry
+ */
+
+ QAT_DP_LOG(DEBUG, "Build %d descriptors for this op",
+ nb_desc_to_build);
+
+ nb_remaining_descriptors -= nb_desc_to_build;
+ if (nb_remaining_descriptors >= 0) {
+ /* There are enough remaining descriptors
+ * so retry
+ */
+ int ret2 = qat_comp_build_multiple_requests(
+ *ops, tmp_qp, tail,
+ nb_desc_to_build);
+
+ if (unlikely(ret2 < 1)) {
+ QAT_DP_LOG(DEBUG,
+ "Failed to build (%d) descriptors, status %d",
+ nb_desc_to_build, ret2);
+
+ qat_comp_free_split_op_memzones(cookie,
+ nb_desc_to_build - 1);
+
+ tmp_qp->stats.enqueue_err_count++;
+
+ /* This message cannot be enqueued */
+ if (nb_ops_sent == 0)
+ return 0;
+ goto kick_tail;
+ } else {
+ descriptors_built = ret2;
+ total_descriptors_built +=
+ descriptors_built;
+ nb_remaining_descriptors -=
+ descriptors_built;
+ QAT_DP_LOG(DEBUG,
+ "Multiple descriptors (%d) built ok",
+ descriptors_built);
+ }
+ } else {
+ QAT_DP_LOG(ERR, "For the current op, number of requested descriptors (%d) "
+ "exceeds number of available descriptors (%d)",
+ nb_desc_to_build,
+ nb_remaining_descriptors +
+ nb_desc_to_build);
+
+ qat_comp_free_split_op_memzones(cookie,
+ nb_desc_to_build - 1);
+
+ /* Not enough extra descriptors */
+ if (nb_ops_sent == 0)
+ return 0;
+ goto kick_tail;
+ }
+ } else {
+ descriptors_built = 1;
+ total_descriptors_built++;
+ nb_remaining_descriptors--;
+ QAT_DP_LOG(DEBUG, "Single descriptor built ok");
+ }
+
+ tail = adf_modulo(tail + (queue->msg_size * descriptors_built),
+ queue->modulo_mask);
+ ops++;
+ nb_ops_sent++;
+ }
+
+kick_tail:
+ queue->tail = tail;
+ tmp_qp->enqueued += total_descriptors_built;
+ tmp_qp->stats.enqueued_count += nb_ops_sent;
+ txq_write_tail(tmp_qp, queue);
+ return nb_ops_sent;
+}
+
uint16_t
qat_dequeue_op_burst(void *qp, void **ops, uint16_t nb_ops)
{
struct qat_queue *rx_queue;
struct qat_qp *tmp_qp = (struct qat_qp *)qp;
uint32_t head;
- uint32_t resp_counter = 0;
+ uint32_t op_resp_counter = 0, fw_resp_counter = 0;
uint8_t *resp_msg;
+ int nb_fw_responses;
rx_queue = &(tmp_qp->rx_q);
head = rx_queue->head;
resp_msg = (uint8_t *)rx_queue->base_addr + rx_queue->head;
while (*(uint32_t *)resp_msg != ADF_RING_EMPTY_SIG &&
- resp_counter != nb_ops) {
+ op_resp_counter != nb_ops) {
+
+ nb_fw_responses = 1;
if (tmp_qp->service_type == QAT_SERVICE_SYMMETRIC)
- qat_sym_process_response(ops, resp_msg);
+ qat_sym_process_response(ops, resp_msg,
+ tmp_qp->op_cookies[head >> rx_queue->trailz]);
else if (tmp_qp->service_type == QAT_SERVICE_COMPRESSION)
- qat_comp_process_response(ops, resp_msg,
- tmp_qp->op_cookies[head / rx_queue->msg_size],
+ nb_fw_responses = qat_comp_process_response(
+ ops, resp_msg,
+ tmp_qp->op_cookies[head >> rx_queue->trailz],
&tmp_qp->stats.dequeue_err_count);
- else if (tmp_qp->service_type == QAT_SERVICE_ASYMMETRIC) {
#ifdef BUILD_QAT_ASYM
+ else if (tmp_qp->service_type == QAT_SERVICE_ASYMMETRIC)
qat_asym_process_response(ops, resp_msg,
- tmp_qp->op_cookies[head / rx_queue->msg_size]);
+ tmp_qp->op_cookies[head >> rx_queue->trailz]);
#endif
- }
head = adf_modulo(head + rx_queue->msg_size,
rx_queue->modulo_mask);
resp_msg = (uint8_t *)rx_queue->base_addr + head;
- ops++;
- resp_counter++;
+
+ if (nb_fw_responses) {
+ /* only move on to next op if one was ready to return
+ * to API
+ */
+ ops++;
+ op_resp_counter++;
+ }
+
+ /* A compression op may be broken up into multiple fw requests.
+ * Only count fw responses as complete once ALL the responses
+ * associated with an op have been processed, as the cookie
+ * data from the first response must be available until
+ * finished with all firmware responses.
+ */
+ fw_resp_counter += nb_fw_responses;
+
+ rx_queue->nb_processed_responses++;
+ }
+
+ tmp_qp->dequeued += fw_resp_counter;
+ tmp_qp->stats.dequeued_count += op_resp_counter;
+
+ rx_queue->head = head;
+ if (rx_queue->nb_processed_responses > QAT_CSR_HEAD_WRITE_THRESH)
+ rxq_free_desc(tmp_qp, rx_queue);
+
+ QAT_DP_LOG(DEBUG, "Dequeue burst return: %u, QAT responses: %u",
+ op_resp_counter, fw_resp_counter);
+
+ return op_resp_counter;
+}
+
+/* This is almost same as dequeue_op_burst, without the atomic, without stats
+ * and without the op. Dequeues one response.
+ */
+static uint8_t
+qat_cq_dequeue_response(struct qat_qp *qp, void *out_data)
+{
+ uint8_t result = 0;
+ uint8_t retries = 0;
+ struct qat_queue *queue = &(qp->rx_q);
+ struct icp_qat_fw_comn_resp *resp_msg = (struct icp_qat_fw_comn_resp *)
+ ((uint8_t *)queue->base_addr + queue->head);
+
+ while (retries++ < QAT_CQ_MAX_DEQ_RETRIES &&
+ *(uint32_t *)resp_msg == ADF_RING_EMPTY_SIG) {
+ /* loop waiting for response until we reach the timeout */
+ rte_delay_ms(20);
}
- if (resp_counter > 0) {
- rx_queue->head = head;
- tmp_qp->dequeued += resp_counter;
- tmp_qp->stats.dequeued_count += resp_counter;
- rx_queue->nb_processed_responses += resp_counter;
-
- if (rx_queue->nb_processed_responses >
- QAT_CSR_HEAD_WRITE_THRESH)
- rxq_free_desc(tmp_qp, rx_queue);
+
+ if (*(uint32_t *)resp_msg != ADF_RING_EMPTY_SIG) {
+ /* response received */
+ result = 1;
+
+ /* check status flag */
+ if (ICP_QAT_FW_COMN_RESP_CRYPTO_STAT_GET(
+ resp_msg->comn_hdr.comn_status) ==
+ ICP_QAT_FW_COMN_STATUS_FLAG_OK) {
+ /* success */
+ memcpy(out_data, resp_msg, queue->msg_size);
+ } else {
+ memset(out_data, 0, queue->msg_size);
+ }
+
+ queue->head = adf_modulo(queue->head + queue->msg_size,
+ queue->modulo_mask);
+ rxq_free_desc(qp, queue);
}
- return resp_counter;
+ return result;
+}
+
+/* Sends a NULL message and extracts QAT fw version from the response.
+ * Used to determine detailed capabilities based on the fw version number.
+ * This assumes that there are no inflight messages, i.e. assumes there's space
+ * on the qp, one message is sent and only one response collected.
+ * Returns fw version number or 0 for unknown version or a negative error code.
+ */
+int
+qat_cq_get_fw_version(struct qat_qp *qp)
+{
+ struct qat_queue *queue = &(qp->tx_q);
+ uint8_t *base_addr = (uint8_t *)queue->base_addr;
+ struct icp_qat_fw_comn_req null_msg;
+ struct icp_qat_fw_comn_resp response;
+
+ /* prepare the NULL request */
+ memset(&null_msg, 0, sizeof(null_msg));
+ null_msg.comn_hdr.hdr_flags =
+ ICP_QAT_FW_COMN_HDR_FLAGS_BUILD(ICP_QAT_FW_COMN_REQ_FLAG_SET);
+ null_msg.comn_hdr.service_type = ICP_QAT_FW_COMN_REQ_NULL;
+ null_msg.comn_hdr.service_cmd_id = ICP_QAT_FW_NULL_REQ_SERV_ID;
+
+#if RTE_LOG_DP_LEVEL >= RTE_LOG_DEBUG
+ QAT_DP_HEXDUMP_LOG(DEBUG, "NULL request", &null_msg, sizeof(null_msg));
+#endif
+
+ /* send the NULL request */
+ memcpy(base_addr + queue->tail, &null_msg, sizeof(null_msg));
+ queue->tail = adf_modulo(queue->tail + queue->msg_size,
+ queue->modulo_mask);
+ txq_write_tail(qp, queue);
+
+ /* receive a response */
+ if (qat_cq_dequeue_response(qp, &response)) {
+
+#if RTE_LOG_DP_LEVEL >= RTE_LOG_DEBUG
+ QAT_DP_HEXDUMP_LOG(DEBUG, "NULL response:", &response,
+ sizeof(response));
+#endif
+ /* if LW0 bit 24 is set - then the fw version was returned */
+ if (QAT_FIELD_GET(response.comn_hdr.hdr_flags,
+ ICP_QAT_FW_COMN_NULL_VERSION_FLAG_BITPOS,
+ ICP_QAT_FW_COMN_NULL_VERSION_FLAG_MASK))
+ return response.resrvd[0]; /* return LW4 */
+ else
+ return 0; /* not set - we don't know fw version */
+ }
+
+ QAT_LOG(ERR, "No response received");
+ return -EINVAL;
}
__rte_weak int