X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=drivers%2Fcommon%2Fqat%2Fqat_qp.c;h=026ea5ee01c4066efd650a09aebdc3925d614b00;hb=90a2ec4ae81f2ef52f7c14bfc9307e75a4127fa4;hp=8e4c74a0226b26a0c10a9b560f74c7398d24a1e3;hpb=8f185e7c3e651eb845bd82f7ab0bbb862cc0a2e2;p=dpdk.git diff --git a/drivers/common/qat/qat_qp.c b/drivers/common/qat/qat_qp.c index 8e4c74a022..026ea5ee01 100644 --- a/drivers/common/qat/qat_qp.c +++ b/drivers/common/qat/qat_qp.c @@ -3,6 +3,7 @@ */ #include +#include #include #include #include @@ -18,7 +19,9 @@ #include "qat_asym.h" #include "qat_comp.h" #include "adf_transport_access_macros.h" +#include "adf_transport_access_macros_gen4vf.h" +#define QAT_CQ_MAX_DEQ_RETRIES 10 #define ADF_MAX_DESC 4096 #define ADF_MIN_DESC 128 @@ -136,21 +139,34 @@ static int qat_queue_create(struct qat_pci_device *qat_dev, struct qat_queue *queue, struct qat_qp_config *, uint8_t dir); static int adf_verify_queue_size(uint32_t msg_size, uint32_t msg_num, uint32_t *queue_size_for_csr); -static void adf_configure_queues(struct qat_qp *queue); -static void adf_queue_arb_enable(struct qat_queue *txq, void *base_addr, - rte_spinlock_t *lock); -static void adf_queue_arb_disable(struct qat_queue *txq, void *base_addr, - rte_spinlock_t *lock); - - -int qat_qps_per_service(const struct qat_qp_hw_data *qp_hw_data, +static void adf_configure_queues(struct qat_qp *queue, + enum qat_device_gen qat_dev_gen); +static void adf_queue_arb_enable(enum qat_device_gen qat_dev_gen, + struct qat_queue *txq, void *base_addr, rte_spinlock_t *lock); +static void adf_queue_arb_disable(enum qat_device_gen qat_dev_gen, + struct qat_queue *txq, void *base_addr, rte_spinlock_t *lock); + +int qat_qps_per_service(struct qat_pci_device *qat_dev, enum qat_service_type service) { - int i, count; + int i = 0, count = 0, max_ops_per_srv = 0; + + if (qat_dev->qat_dev_gen == QAT_GEN4) { + max_ops_per_srv = QAT_GEN4_BUNDLE_NUM; + for (i = 0, count = 0; i < max_ops_per_srv; i++) + if (qat_dev->qp_gen4_data[i][0].service_type == service) + count++; + } else { + const struct qat_qp_hw_data *sym_hw_qps = + qat_gen_config[qat_dev->qat_dev_gen] + .qp_hw_data[service]; + + max_ops_per_srv = ADF_MAX_QPS_ON_ANY_SERVICE; + for (i = 0, count = 0; i < max_ops_per_srv; i++) + if (sym_hw_qps[i].service_type == service) + count++; + } - for (i = 0, count = 0; i < ADF_MAX_QPS_ON_ANY_SERVICE; i++) - if (qp_hw_data[i].service_type == service) - count++; return count; } @@ -188,11 +204,12 @@ int qat_qp_setup(struct qat_pci_device *qat_dev, struct qat_qp **qp_addr, uint16_t queue_pair_id, struct qat_qp_config *qat_qp_conf) - { struct qat_qp *qp; - struct rte_pci_device *pci_dev = qat_dev->pci_dev; + struct rte_pci_device *pci_dev = + qat_pci_devs[qat_dev->qat_dev_id].pci_dev; char op_cookie_pool_name[RTE_RING_NAMESIZE]; + enum qat_device_gen qat_dev_gen = qat_dev->qat_dev_gen; uint32_t i; QAT_LOG(DEBUG, "Setup qp %u on qat pci device %d gen %d", @@ -230,7 +247,7 @@ int qat_qp_setup(struct qat_pci_device *qat_dev, } qp->mmap_bar_addr = pci_dev->mem_resource[0].addr; - qp->inflights16 = 0; + qp->enqueued = qp->dequeued = 0; if (qat_queue_create(qat_dev, &(qp->tx_q), qat_qp_conf, ADF_RING_DIR_TX) != 0) { @@ -256,8 +273,8 @@ int qat_qp_setup(struct qat_pci_device *qat_dev, goto create_err; } - adf_configure_queues(qp); - adf_queue_arb_enable(&qp->tx_q, qp->mmap_bar_addr, + adf_configure_queues(qp, qat_dev_gen); + adf_queue_arb_enable(qat_dev_gen, &qp->tx_q, qp->mmap_bar_addr, &qat_dev->arb_csr_lock); snprintf(op_cookie_pool_name, RTE_RING_NAMESIZE, @@ -272,7 +289,7 @@ int qat_qp_setup(struct qat_pci_device *qat_dev, qp->nb_descriptors, qat_qp_conf->cookie_size, 64, 0, NULL, NULL, NULL, NULL, - qat_dev->pci_dev->device.numa_node, + pci_dev->device.numa_node, 0); if (!qp->op_cookie_pool) { QAT_LOG(ERR, "QAT PMD Cannot create" @@ -289,7 +306,6 @@ int qat_qp_setup(struct qat_pci_device *qat_dev, } qp->qat_dev_gen = qat_dev->qat_dev_gen; - qp->build_request = qat_qp_conf->build_request; qp->service_type = qat_qp_conf->hw->service_type; qp->qat_dev = qat_dev; @@ -307,7 +323,8 @@ create_err: return -EFAULT; } -int qat_qp_release(struct qat_qp **qp_addr) + +int qat_qp_release(enum qat_device_gen qat_dev_gen, struct qat_qp **qp_addr) { struct qat_qp *qp = *qp_addr; uint32_t i; @@ -321,15 +338,15 @@ int qat_qp_release(struct qat_qp **qp_addr) qp->qat_dev->qat_dev_id); /* Don't free memory if there are still responses to be processed */ - if (qp->inflights16 == 0) { + if ((qp->enqueued - qp->dequeued) == 0) { qat_queue_delete(&(qp->tx_q)); qat_queue_delete(&(qp->rx_q)); } else { return -EAGAIN; } - adf_queue_arb_disable(&(qp->tx_q), qp->mmap_bar_addr, - &qp->qat_dev->arb_csr_lock); + adf_queue_arb_disable(qat_dev_gen, &(qp->tx_q), qp->mmap_bar_addr, + &qp->qat_dev->arb_csr_lock); for (i = 0; i < qp->nb_descriptors; i++) rte_mempool_put(qp->op_cookie_pool, qp->op_cookies[i]); @@ -377,7 +394,9 @@ qat_queue_create(struct qat_pci_device *qat_dev, struct qat_queue *queue, uint64_t queue_base; void *io_addr; const struct rte_memzone *qp_mz; - struct rte_pci_device *pci_dev = qat_dev->pci_dev; + struct rte_pci_device *pci_dev = + qat_pci_devs[qat_dev->qat_dev_id].pci_dev; + enum qat_device_gen qat_dev_gen = qat_dev->qat_dev_gen; int ret = 0; uint16_t desc_size = (dir == ADF_RING_DIR_TX ? qp_conf->hw->tx_msg_size : qp_conf->hw->rx_msg_size); @@ -401,7 +420,7 @@ qat_queue_create(struct qat_pci_device *qat_dev, struct qat_queue *queue, qp_conf->service_str, "qp_mem", queue->hw_bundle_number, queue->hw_queue_number); qp_mz = queue_dma_zone_reserve(queue->memz_name, queue_size_bytes, - qat_dev->pci_dev->device.numa_node); + pci_dev->device.numa_node); if (qp_mz == NULL) { QAT_LOG(ERR, "Failed to allocate ring memzone"); return -ENOMEM; @@ -430,18 +449,26 @@ qat_queue_create(struct qat_pci_device *qat_dev, struct qat_queue *queue, queue->tail = 0; queue->msg_size = desc_size; + /* For fast calculation of cookie index, relies on msg_size being 2^n */ + queue->trailz = __builtin_ctz(desc_size); + /* * Write an unused pattern to the queue memory. */ memset(queue->base_addr, 0x7F, queue_size_bytes); - - queue_base = BUILD_RING_BASE_ADDR(queue->base_phys_addr, - queue->queue_size); - io_addr = pci_dev->mem_resource[0].addr; - WRITE_CSR_RING_BASE(io_addr, queue->hw_bundle_number, + if (qat_dev_gen == QAT_GEN4) { + queue_base = BUILD_RING_BASE_ADDR_GEN4(queue->base_phys_addr, + queue->queue_size); + WRITE_CSR_RING_BASE_GEN4VF(io_addr, queue->hw_bundle_number, + queue->hw_queue_number, queue_base); + } else { + queue_base = BUILD_RING_BASE_ADDR(queue->base_phys_addr, + queue->queue_size); + WRITE_CSR_RING_BASE(io_addr, queue->hw_bundle_number, queue->hw_queue_number, queue_base); + } QAT_LOG(DEBUG, "RING: Name:%s, size in CSR: %u, in bytes %u," " nb msgs %u, msg_size %u, modulo mask %u", @@ -457,6 +484,84 @@ queue_create_err: return ret; } +int +qat_select_valid_queue(struct qat_pci_device *qat_dev, int qp_id, + enum qat_service_type service_type) +{ + if (qat_dev->qat_dev_gen == QAT_GEN4) { + int i = 0, valid_qps = 0; + + for (; i < QAT_GEN4_BUNDLE_NUM; i++) { + if (qat_dev->qp_gen4_data[i][0].service_type == + service_type) { + if (valid_qps == qp_id) + return i; + ++valid_qps; + } + } + } + return -1; +} + +int +qat_read_qp_config(struct qat_pci_device *qat_dev) +{ + int i = 0; + enum qat_device_gen qat_dev_gen = qat_dev->qat_dev_gen; + + if (qat_dev_gen == QAT_GEN4) { + uint16_t svc = 0; + + if (qat_query_svc(qat_dev, (uint8_t *)&svc)) + return -(EFAULT); + for (; i < QAT_GEN4_BUNDLE_NUM; i++) { + struct qat_qp_hw_data *hw_data = + &qat_dev->qp_gen4_data[i][0]; + uint8_t svc1 = (svc >> (3 * i)) & 0x7; + enum qat_service_type service_type = QAT_SERVICE_INVALID; + + if (svc1 == QAT_SVC_SYM) { + service_type = QAT_SERVICE_SYMMETRIC; + QAT_LOG(DEBUG, + "Discovered SYMMETRIC service on bundle %d", + i); + } else if (svc1 == QAT_SVC_COMPRESSION) { + service_type = QAT_SERVICE_COMPRESSION; + QAT_LOG(DEBUG, + "Discovered COPRESSION service on bundle %d", + i); + } else if (svc1 == QAT_SVC_ASYM) { + service_type = QAT_SERVICE_ASYMMETRIC; + QAT_LOG(DEBUG, + "Discovered ASYMMETRIC service on bundle %d", + i); + } else { + QAT_LOG(ERR, + "Unrecognized service on bundle %d", + i); + return -(EFAULT); + } + + memset(hw_data, 0, sizeof(*hw_data)); + hw_data->service_type = service_type; + if (service_type == QAT_SERVICE_ASYMMETRIC) { + hw_data->tx_msg_size = 64; + hw_data->rx_msg_size = 32; + } else if (service_type == QAT_SERVICE_SYMMETRIC || + service_type == + QAT_SERVICE_COMPRESSION) { + hw_data->tx_msg_size = 128; + hw_data->rx_msg_size = 32; + } + hw_data->tx_ring_num = 0; + hw_data->rx_ring_num = 1; + hw_data->hw_bundle_num = i; + } + return 0; + } + return -(EINVAL); +} + static int qat_qp_check_queue_alignment(uint64_t phys_addr, uint32_t queue_size_bytes) { @@ -480,54 +585,81 @@ static int adf_verify_queue_size(uint32_t msg_size, uint32_t msg_num, return -EINVAL; } -static void adf_queue_arb_enable(struct qat_queue *txq, void *base_addr, - rte_spinlock_t *lock) +static void +adf_queue_arb_enable(enum qat_device_gen qat_dev_gen, struct qat_queue *txq, + void *base_addr, rte_spinlock_t *lock) { - uint32_t arb_csr_offset = ADF_ARB_RINGSRVARBEN_OFFSET + - (ADF_ARB_REG_SLOT * - txq->hw_bundle_number); - uint32_t value; + uint32_t arb_csr_offset = 0, value; rte_spinlock_lock(lock); - value = ADF_CSR_RD(base_addr, arb_csr_offset); + if (qat_dev_gen == QAT_GEN4) { + arb_csr_offset = ADF_ARB_RINGSRVARBEN_OFFSET + + (ADF_RING_BUNDLE_SIZE_GEN4 * + txq->hw_bundle_number); + value = ADF_CSR_RD(base_addr + ADF_RING_CSR_ADDR_OFFSET_GEN4VF, + arb_csr_offset); + } else { + arb_csr_offset = ADF_ARB_RINGSRVARBEN_OFFSET + + (ADF_ARB_REG_SLOT * + txq->hw_bundle_number); + value = ADF_CSR_RD(base_addr, + arb_csr_offset); + } value |= (0x01 << txq->hw_queue_number); ADF_CSR_WR(base_addr, arb_csr_offset, value); rte_spinlock_unlock(lock); } -static void adf_queue_arb_disable(struct qat_queue *txq, void *base_addr, - rte_spinlock_t *lock) +static void adf_queue_arb_disable(enum qat_device_gen qat_dev_gen, + struct qat_queue *txq, void *base_addr, rte_spinlock_t *lock) { - uint32_t arb_csr_offset = ADF_ARB_RINGSRVARBEN_OFFSET + - (ADF_ARB_REG_SLOT * - txq->hw_bundle_number); - uint32_t value; + uint32_t arb_csr_offset = 0, value; rte_spinlock_lock(lock); - value = ADF_CSR_RD(base_addr, arb_csr_offset); + if (qat_dev_gen == QAT_GEN4) { + arb_csr_offset = ADF_ARB_RINGSRVARBEN_OFFSET + + (ADF_RING_BUNDLE_SIZE_GEN4 * + txq->hw_bundle_number); + value = ADF_CSR_RD(base_addr + ADF_RING_CSR_ADDR_OFFSET_GEN4VF, + arb_csr_offset); + } else { + arb_csr_offset = ADF_ARB_RINGSRVARBEN_OFFSET + + (ADF_ARB_REG_SLOT * + txq->hw_bundle_number); + value = ADF_CSR_RD(base_addr, + arb_csr_offset); + } value &= ~(0x01 << txq->hw_queue_number); ADF_CSR_WR(base_addr, arb_csr_offset, value); rte_spinlock_unlock(lock); } -static void adf_configure_queues(struct qat_qp *qp) +static void adf_configure_queues(struct qat_qp *qp, + enum qat_device_gen qat_dev_gen) { - uint32_t queue_config; - struct qat_queue *queue = &qp->tx_q; - - queue_config = BUILD_RING_CONFIG(queue->queue_size); - - WRITE_CSR_RING_CONFIG(qp->mmap_bar_addr, queue->hw_bundle_number, - queue->hw_queue_number, queue_config); - - queue = &qp->rx_q; - queue_config = - BUILD_RESP_RING_CONFIG(queue->queue_size, - ADF_RING_NEAR_WATERMARK_512, - ADF_RING_NEAR_WATERMARK_0); - - WRITE_CSR_RING_CONFIG(qp->mmap_bar_addr, queue->hw_bundle_number, - queue->hw_queue_number, queue_config); + uint32_t q_tx_config, q_resp_config; + struct qat_queue *q_tx = &qp->tx_q, *q_rx = &qp->rx_q; + + q_tx_config = BUILD_RING_CONFIG(q_tx->queue_size); + q_resp_config = BUILD_RESP_RING_CONFIG(q_rx->queue_size, + ADF_RING_NEAR_WATERMARK_512, + ADF_RING_NEAR_WATERMARK_0); + + if (qat_dev_gen == QAT_GEN4) { + WRITE_CSR_RING_CONFIG_GEN4VF(qp->mmap_bar_addr, + q_tx->hw_bundle_number, q_tx->hw_queue_number, + q_tx_config); + WRITE_CSR_RING_CONFIG_GEN4VF(qp->mmap_bar_addr, + q_rx->hw_bundle_number, q_rx->hw_queue_number, + q_resp_config); + } else { + WRITE_CSR_RING_CONFIG(qp->mmap_bar_addr, + q_tx->hw_bundle_number, q_tx->hw_queue_number, + q_tx_config); + WRITE_CSR_RING_CONFIG(qp->mmap_bar_addr, + q_rx->hw_bundle_number, q_rx->hw_queue_number, + q_resp_config); + } } static inline uint32_t adf_modulo(uint32_t data, uint32_t modulo_mask) @@ -536,14 +668,21 @@ static inline uint32_t adf_modulo(uint32_t data, uint32_t modulo_mask) } static inline void -txq_write_tail(struct qat_qp *qp, struct qat_queue *q) { - WRITE_CSR_RING_TAIL(qp->mmap_bar_addr, q->hw_bundle_number, +txq_write_tail(enum qat_device_gen qat_dev_gen, + struct qat_qp *qp, struct qat_queue *q) { + + if (qat_dev_gen == QAT_GEN4) { + WRITE_CSR_RING_TAIL_GEN4VF(qp->mmap_bar_addr, + q->hw_bundle_number, q->hw_queue_number, q->tail); + } else { + WRITE_CSR_RING_TAIL(qp->mmap_bar_addr, q->hw_bundle_number, q->hw_queue_number, q->tail); - q->csr_tail = q->tail; + } } static inline -void rxq_free_desc(struct qat_qp *qp, struct qat_queue *q) +void rxq_free_desc(enum qat_device_gen qat_dev_gen, struct qat_qp *qp, + struct qat_queue *q) { uint32_t old_head, new_head; uint32_t max_head; @@ -565,8 +704,14 @@ void rxq_free_desc(struct qat_qp *qp, struct qat_queue *q) q->csr_head = new_head; /* write current head to CSR */ - WRITE_CSR_RING_HEAD(qp->mmap_bar_addr, q->hw_bundle_number, - q->hw_queue_number, new_head); + if (qat_dev_gen == QAT_GEN4) { + WRITE_CSR_RING_HEAD_GEN4VF(qp->mmap_bar_addr, + q->hw_bundle_number, q->hw_queue_number, new_head); + } else { + WRITE_CSR_RING_HEAD(qp->mmap_bar_addr, q->hw_bundle_number, + q->hw_queue_number, new_head); + } + } uint16_t @@ -575,11 +720,10 @@ qat_enqueue_op_burst(void *qp, void **ops, uint16_t nb_ops) register struct qat_queue *queue; struct qat_qp *tmp_qp = (struct qat_qp *)qp; register uint32_t nb_ops_sent = 0; - register int ret; + register int ret = -1; uint16_t nb_ops_possible = nb_ops; register uint8_t *base_addr; register uint32_t tail; - int overflow; if (unlikely(nb_ops == 0)) return 0; @@ -590,26 +734,63 @@ qat_enqueue_op_burst(void *qp, void **ops, uint16_t nb_ops) tail = queue->tail; /* Find how many can actually fit on the ring */ - tmp_qp->inflights16 += nb_ops; - overflow = tmp_qp->inflights16 - tmp_qp->max_inflights; - if (overflow > 0) { - tmp_qp->inflights16 -= overflow; - nb_ops_possible = nb_ops - overflow; - if (nb_ops_possible == 0) + { + /* dequeued can only be written by one thread, but it may not + * be this thread. As it's 4-byte aligned it will be read + * atomically here by any Intel CPU. + * enqueued can wrap before dequeued, but cannot + * lap it as var size of enq/deq (uint32_t) > var size of + * max_inflights (uint16_t). In reality inflights is never + * even as big as max uint16_t, as it's <= ADF_MAX_DESC. + * On wrapping, the calculation still returns the correct + * positive value as all three vars are unsigned. + */ + uint32_t inflights = + tmp_qp->enqueued - tmp_qp->dequeued; + + if ((inflights + nb_ops) > tmp_qp->max_inflights) { + nb_ops_possible = tmp_qp->max_inflights - inflights; + if (nb_ops_possible == 0) + return 0; + } + /* QAT has plenty of work queued already, so don't waste cycles + * enqueueing, wait til the application has gathered a bigger + * burst or some completed ops have been dequeued + */ + if (tmp_qp->min_enq_burst_threshold && inflights > + QAT_QP_MIN_INFL_THRESHOLD && nb_ops_possible < + tmp_qp->min_enq_burst_threshold) { + tmp_qp->stats.threshold_hit_count++; return 0; + } } +#ifdef BUILD_QAT_SYM + if (tmp_qp->service_type == QAT_SERVICE_SYMMETRIC) + qat_sym_preprocess_requests(ops, nb_ops_possible); +#endif + while (nb_ops_sent != nb_ops_possible) { - ret = tmp_qp->build_request(*ops, base_addr + tail, - tmp_qp->op_cookies[tail / queue->msg_size], + if (tmp_qp->service_type == QAT_SERVICE_SYMMETRIC) { +#ifdef BUILD_QAT_SYM + ret = qat_sym_build_request(*ops, base_addr + tail, + tmp_qp->op_cookies[tail >> queue->trailz], + tmp_qp->qat_dev_gen); +#endif + } else if (tmp_qp->service_type == QAT_SERVICE_COMPRESSION) { + ret = qat_comp_build_request(*ops, base_addr + tail, + tmp_qp->op_cookies[tail >> queue->trailz], tmp_qp->qat_dev_gen); + } else if (tmp_qp->service_type == QAT_SERVICE_ASYMMETRIC) { +#ifdef BUILD_QAT_ASYM + ret = qat_asym_build_request(*ops, base_addr + tail, + tmp_qp->op_cookies[tail >> queue->trailz], + tmp_qp->qat_dev_gen); +#endif + } if (ret != 0) { tmp_qp->stats.enqueue_err_count++; - /* - * This message cannot be enqueued, - * decrease number of ops that wasn't sent - */ - tmp_qp->inflights16 -= nb_ops_possible - nb_ops_sent; + /* This message cannot be enqueued */ if (nb_ops_sent == 0) return 0; goto kick_tail; @@ -621,8 +802,182 @@ qat_enqueue_op_burst(void *qp, void **ops, uint16_t nb_ops) } kick_tail: queue->tail = tail; + tmp_qp->enqueued += nb_ops_sent; + tmp_qp->stats.enqueued_count += nb_ops_sent; + txq_write_tail(tmp_qp->qat_dev_gen, tmp_qp, queue); + return nb_ops_sent; +} + +/* Use this for compression only - but keep consistent with above common + * function as much as possible. + */ +uint16_t +qat_enqueue_comp_op_burst(void *qp, void **ops, uint16_t nb_ops) +{ + register struct qat_queue *queue; + struct qat_qp *tmp_qp = (struct qat_qp *)qp; + register uint32_t nb_ops_sent = 0; + register int nb_desc_to_build; + uint16_t nb_ops_possible = nb_ops; + register uint8_t *base_addr; + register uint32_t tail; + + int descriptors_built, total_descriptors_built = 0; + int nb_remaining_descriptors; + int overflow = 0; + + if (unlikely(nb_ops == 0)) + return 0; + + /* read params used a lot in main loop into registers */ + queue = &(tmp_qp->tx_q); + base_addr = (uint8_t *)queue->base_addr; + tail = queue->tail; + + /* Find how many can actually fit on the ring */ + { + /* dequeued can only be written by one thread, but it may not + * be this thread. As it's 4-byte aligned it will be read + * atomically here by any Intel CPU. + * enqueued can wrap before dequeued, but cannot + * lap it as var size of enq/deq (uint32_t) > var size of + * max_inflights (uint16_t). In reality inflights is never + * even as big as max uint16_t, as it's <= ADF_MAX_DESC. + * On wrapping, the calculation still returns the correct + * positive value as all three vars are unsigned. + */ + uint32_t inflights = + tmp_qp->enqueued - tmp_qp->dequeued; + + /* Find how many can actually fit on the ring */ + overflow = (inflights + nb_ops) - tmp_qp->max_inflights; + if (overflow > 0) { + nb_ops_possible = nb_ops - overflow; + if (nb_ops_possible == 0) + return 0; + } + + /* QAT has plenty of work queued already, so don't waste cycles + * enqueueing, wait til the application has gathered a bigger + * burst or some completed ops have been dequeued + */ + if (tmp_qp->min_enq_burst_threshold && inflights > + QAT_QP_MIN_INFL_THRESHOLD && nb_ops_possible < + tmp_qp->min_enq_burst_threshold) { + tmp_qp->stats.threshold_hit_count++; + return 0; + } + } + + /* At this point nb_ops_possible is assuming a 1:1 mapping + * between ops and descriptors. + * Fewer may be sent if some ops have to be split. + * nb_ops_possible is <= burst size. + * Find out how many spaces are actually available on the qp in case + * more are needed. + */ + nb_remaining_descriptors = nb_ops_possible + + ((overflow >= 0) ? 0 : overflow * (-1)); + QAT_DP_LOG(DEBUG, "Nb ops requested %d, nb descriptors remaining %d", + nb_ops, nb_remaining_descriptors); + + while (nb_ops_sent != nb_ops_possible && + nb_remaining_descriptors > 0) { + struct qat_comp_op_cookie *cookie = + tmp_qp->op_cookies[tail >> queue->trailz]; + + descriptors_built = 0; + + QAT_DP_LOG(DEBUG, "--- data length: %u", + ((struct rte_comp_op *)*ops)->src.length); + + nb_desc_to_build = qat_comp_build_request(*ops, + base_addr + tail, cookie, tmp_qp->qat_dev_gen); + QAT_DP_LOG(DEBUG, "%d descriptors built, %d remaining, " + "%d ops sent, %d descriptors needed", + total_descriptors_built, nb_remaining_descriptors, + nb_ops_sent, nb_desc_to_build); + + if (unlikely(nb_desc_to_build < 0)) { + /* this message cannot be enqueued */ + tmp_qp->stats.enqueue_err_count++; + if (nb_ops_sent == 0) + return 0; + goto kick_tail; + } else if (unlikely(nb_desc_to_build > 1)) { + /* this op is too big and must be split - get more + * descriptors and retry + */ + + QAT_DP_LOG(DEBUG, "Build %d descriptors for this op", + nb_desc_to_build); + + nb_remaining_descriptors -= nb_desc_to_build; + if (nb_remaining_descriptors >= 0) { + /* There are enough remaining descriptors + * so retry + */ + int ret2 = qat_comp_build_multiple_requests( + *ops, tmp_qp, tail, + nb_desc_to_build); + + if (unlikely(ret2 < 1)) { + QAT_DP_LOG(DEBUG, + "Failed to build (%d) descriptors, status %d", + nb_desc_to_build, ret2); + + qat_comp_free_split_op_memzones(cookie, + nb_desc_to_build - 1); + + tmp_qp->stats.enqueue_err_count++; + + /* This message cannot be enqueued */ + if (nb_ops_sent == 0) + return 0; + goto kick_tail; + } else { + descriptors_built = ret2; + total_descriptors_built += + descriptors_built; + nb_remaining_descriptors -= + descriptors_built; + QAT_DP_LOG(DEBUG, + "Multiple descriptors (%d) built ok", + descriptors_built); + } + } else { + QAT_DP_LOG(ERR, "For the current op, number of requested descriptors (%d) " + "exceeds number of available descriptors (%d)", + nb_desc_to_build, + nb_remaining_descriptors + + nb_desc_to_build); + + qat_comp_free_split_op_memzones(cookie, + nb_desc_to_build - 1); + + /* Not enough extra descriptors */ + if (nb_ops_sent == 0) + return 0; + goto kick_tail; + } + } else { + descriptors_built = 1; + total_descriptors_built++; + nb_remaining_descriptors--; + QAT_DP_LOG(DEBUG, "Single descriptor built ok"); + } + + tail = adf_modulo(tail + (queue->msg_size * descriptors_built), + queue->modulo_mask); + ops++; + nb_ops_sent++; + } + +kick_tail: + queue->tail = tail; + tmp_qp->enqueued += total_descriptors_built; tmp_qp->stats.enqueued_count += nb_ops_sent; - txq_write_tail(tmp_qp, queue); + txq_write_tail(tmp_qp->qat_dev_gen, tmp_qp, queue); return nb_ops_sent; } @@ -632,48 +987,159 @@ qat_dequeue_op_burst(void *qp, void **ops, uint16_t nb_ops) struct qat_queue *rx_queue; struct qat_qp *tmp_qp = (struct qat_qp *)qp; uint32_t head; - uint32_t resp_counter = 0; + uint32_t op_resp_counter = 0, fw_resp_counter = 0; uint8_t *resp_msg; + int nb_fw_responses; rx_queue = &(tmp_qp->rx_q); head = rx_queue->head; resp_msg = (uint8_t *)rx_queue->base_addr + rx_queue->head; while (*(uint32_t *)resp_msg != ADF_RING_EMPTY_SIG && - resp_counter != nb_ops) { + op_resp_counter != nb_ops) { + + nb_fw_responses = 1; if (tmp_qp->service_type == QAT_SERVICE_SYMMETRIC) - qat_sym_process_response(ops, resp_msg); + qat_sym_process_response(ops, resp_msg, + tmp_qp->op_cookies[head >> rx_queue->trailz]); else if (tmp_qp->service_type == QAT_SERVICE_COMPRESSION) - qat_comp_process_response(ops, resp_msg, - tmp_qp->op_cookies[head / rx_queue->msg_size], + nb_fw_responses = qat_comp_process_response( + ops, resp_msg, + tmp_qp->op_cookies[head >> rx_queue->trailz], &tmp_qp->stats.dequeue_err_count); - else if (tmp_qp->service_type == QAT_SERVICE_ASYMMETRIC) { #ifdef BUILD_QAT_ASYM + else if (tmp_qp->service_type == QAT_SERVICE_ASYMMETRIC) qat_asym_process_response(ops, resp_msg, - tmp_qp->op_cookies[head / rx_queue->msg_size]); + tmp_qp->op_cookies[head >> rx_queue->trailz]); #endif - } head = adf_modulo(head + rx_queue->msg_size, rx_queue->modulo_mask); resp_msg = (uint8_t *)rx_queue->base_addr + head; - ops++; - resp_counter++; + + if (nb_fw_responses) { + /* only move on to next op if one was ready to return + * to API + */ + ops++; + op_resp_counter++; + } + + /* A compression op may be broken up into multiple fw requests. + * Only count fw responses as complete once ALL the responses + * associated with an op have been processed, as the cookie + * data from the first response must be available until + * finished with all firmware responses. + */ + fw_resp_counter += nb_fw_responses; + + rx_queue->nb_processed_responses++; } - if (resp_counter > 0) { - rx_queue->head = head; - tmp_qp->stats.dequeued_count += resp_counter; - rx_queue->nb_processed_responses += resp_counter; - tmp_qp->inflights16 -= resp_counter; - if (rx_queue->nb_processed_responses > - QAT_CSR_HEAD_WRITE_THRESH) - rxq_free_desc(tmp_qp, rx_queue); + tmp_qp->dequeued += fw_resp_counter; + tmp_qp->stats.dequeued_count += op_resp_counter; + + rx_queue->head = head; + if (rx_queue->nb_processed_responses > QAT_CSR_HEAD_WRITE_THRESH) + rxq_free_desc(tmp_qp->qat_dev_gen, tmp_qp, rx_queue); + + QAT_DP_LOG(DEBUG, "Dequeue burst return: %u, QAT responses: %u", + op_resp_counter, fw_resp_counter); + + return op_resp_counter; +} + +/* This is almost same as dequeue_op_burst, without the atomic, without stats + * and without the op. Dequeues one response. + */ +static uint8_t +qat_cq_dequeue_response(struct qat_qp *qp, void *out_data) +{ + uint8_t result = 0; + uint8_t retries = 0; + struct qat_queue *queue = &(qp->rx_q); + struct icp_qat_fw_comn_resp *resp_msg = (struct icp_qat_fw_comn_resp *) + ((uint8_t *)queue->base_addr + queue->head); + + while (retries++ < QAT_CQ_MAX_DEQ_RETRIES && + *(uint32_t *)resp_msg == ADF_RING_EMPTY_SIG) { + /* loop waiting for response until we reach the timeout */ + rte_delay_ms(20); } - return resp_counter; + if (*(uint32_t *)resp_msg != ADF_RING_EMPTY_SIG) { + /* response received */ + result = 1; + + /* check status flag */ + if (ICP_QAT_FW_COMN_RESP_CRYPTO_STAT_GET( + resp_msg->comn_hdr.comn_status) == + ICP_QAT_FW_COMN_STATUS_FLAG_OK) { + /* success */ + memcpy(out_data, resp_msg, queue->msg_size); + } else { + memset(out_data, 0, queue->msg_size); + } + + queue->head = adf_modulo(queue->head + queue->msg_size, + queue->modulo_mask); + rxq_free_desc(qp->qat_dev_gen, qp, queue); + } + + return result; +} + +/* Sends a NULL message and extracts QAT fw version from the response. + * Used to determine detailed capabilities based on the fw version number. + * This assumes that there are no inflight messages, i.e. assumes there's space + * on the qp, one message is sent and only one response collected. + * Returns fw version number or 0 for unknown version or a negative error code. + */ +int +qat_cq_get_fw_version(struct qat_qp *qp) +{ + struct qat_queue *queue = &(qp->tx_q); + uint8_t *base_addr = (uint8_t *)queue->base_addr; + struct icp_qat_fw_comn_req null_msg; + struct icp_qat_fw_comn_resp response; + + /* prepare the NULL request */ + memset(&null_msg, 0, sizeof(null_msg)); + null_msg.comn_hdr.hdr_flags = + ICP_QAT_FW_COMN_HDR_FLAGS_BUILD(ICP_QAT_FW_COMN_REQ_FLAG_SET); + null_msg.comn_hdr.service_type = ICP_QAT_FW_COMN_REQ_NULL; + null_msg.comn_hdr.service_cmd_id = ICP_QAT_FW_NULL_REQ_SERV_ID; + +#if RTE_LOG_DP_LEVEL >= RTE_LOG_DEBUG + QAT_DP_HEXDUMP_LOG(DEBUG, "NULL request", &null_msg, sizeof(null_msg)); +#endif + + /* send the NULL request */ + memcpy(base_addr + queue->tail, &null_msg, sizeof(null_msg)); + queue->tail = adf_modulo(queue->tail + queue->msg_size, + queue->modulo_mask); + txq_write_tail(qp->qat_dev_gen, qp, queue); + + /* receive a response */ + if (qat_cq_dequeue_response(qp, &response)) { + +#if RTE_LOG_DP_LEVEL >= RTE_LOG_DEBUG + QAT_DP_HEXDUMP_LOG(DEBUG, "NULL response:", &response, + sizeof(response)); +#endif + /* if LW0 bit 24 is set - then the fw version was returned */ + if (QAT_FIELD_GET(response.comn_hdr.hdr_flags, + ICP_QAT_FW_COMN_NULL_VERSION_FLAG_BITPOS, + ICP_QAT_FW_COMN_NULL_VERSION_FLAG_MASK)) + return response.resrvd[0]; /* return LW4 */ + else + return 0; /* not set - we don't know fw version */ + } + + QAT_LOG(ERR, "No response received"); + return -EINVAL; } __rte_weak int