common/cnxk: add null authentication with IPsec

[dpdk.git] / drivers / dma / idxd / idxd_common.c
diff --git a/drivers/dma/idxd/idxd_common.c b/drivers/dma/idxd/idxd_common.c

index 70d094e..fc11b11 100644 (file)
--- a/drivers/dma/idxd/idxd_common.c
+++ b/drivers/dma/idxd/idxd_common.c
@@ -2,14 +2,404 @@
   * Copyright 2021 Intel Corporation
   */
  
+#include <x86intrin.h>
+
  #include <rte_malloc.h>
  #include <rte_common.h>
  #include <rte_log.h>
+#include <rte_prefetch.h>
  
  #include "idxd_internal.h"
  
  #define IDXD_PMD_NAME_STR "dmadev_idxd"
  
+static __rte_always_inline rte_iova_t
+__desc_idx_to_iova(struct idxd_dmadev *idxd, uint16_t n)
+{
+       return idxd->desc_iova + (n * sizeof(struct idxd_hw_desc));
+}
+
+static __rte_always_inline void
+__idxd_movdir64b(volatile void *dst, const struct idxd_hw_desc *src)
+{
+       asm volatile (".byte 0x66, 0x0f, 0x38, 0xf8, 0x02"
+                       :
+                       : "a" (dst), "d" (src)
+                       : "memory");
+}
+
+static __rte_always_inline void
+__submit(struct idxd_dmadev *idxd)
+{
+       rte_prefetch1(&idxd->batch_comp_ring[idxd->batch_idx_read]);
+
+       if (idxd->batch_size == 0)
+               return;
+
+       /* write completion to batch comp ring */
+       rte_iova_t comp_addr = idxd->batch_iova +
+                       (idxd->batch_idx_write * sizeof(struct idxd_completion));
+
+       if (idxd->batch_size == 1) {
+               /* submit batch directly */
+               struct idxd_hw_desc desc =
+                               idxd->desc_ring[idxd->batch_start & idxd->desc_ring_mask];
+               desc.completion = comp_addr;
+               desc.op_flags |= IDXD_FLAG_REQUEST_COMPLETION;
+               _mm_sfence(); /* fence before writing desc to device */
+               __idxd_movdir64b(idxd->portal, &desc);
+       } else {
+               const struct idxd_hw_desc batch_desc = {
+                               .op_flags = (idxd_op_batch << IDXD_CMD_OP_SHIFT) |
+                               IDXD_FLAG_COMPLETION_ADDR_VALID |
+                               IDXD_FLAG_REQUEST_COMPLETION,
+                               .desc_addr = __desc_idx_to_iova(idxd,
+                                               idxd->batch_start & idxd->desc_ring_mask),
+                               .completion = comp_addr,
+                               .size = idxd->batch_size,
+               };
+               _mm_sfence(); /* fence before writing desc to device */
+               __idxd_movdir64b(idxd->portal, &batch_desc);
+       }
+
+       if (++idxd->batch_idx_write > idxd->max_batches)
+               idxd->batch_idx_write = 0;
+
+       idxd->stats.submitted += idxd->batch_size;
+
+       idxd->batch_start += idxd->batch_size;
+       idxd->batch_size = 0;
+       idxd->batch_idx_ring[idxd->batch_idx_write] = idxd->batch_start;
+       _mm256_store_si256((void *)&idxd->batch_comp_ring[idxd->batch_idx_write],
+                       _mm256_setzero_si256());
+}
+
+static __rte_always_inline int
+__idxd_write_desc(struct idxd_dmadev *idxd,
+               const uint32_t op_flags,
+               const rte_iova_t src,
+               const rte_iova_t dst,
+               const uint32_t size,
+               const uint32_t flags)
+{
+       uint16_t mask = idxd->desc_ring_mask;
+       uint16_t job_id = idxd->batch_start + idxd->batch_size;
+       /* we never wrap batches, so we only mask the start and allow start+size to overflow */
+       uint16_t write_idx = (idxd->batch_start & mask) + idxd->batch_size;
+
+       /* first check batch ring space then desc ring space */
+       if ((idxd->batch_idx_read == 0 && idxd->batch_idx_write == idxd->max_batches) ||
+                       idxd->batch_idx_write + 1 == idxd->batch_idx_read)
+               return -ENOSPC;
+       if (((write_idx + 1) & mask) == (idxd->ids_returned & mask))
+               return -ENOSPC;
+
+       /* write desc. Note: descriptors don't wrap, but the completion address does */
+       const uint64_t op_flags64 = (uint64_t)(op_flags | IDXD_FLAG_COMPLETION_ADDR_VALID) << 32;
+       const uint64_t comp_addr = __desc_idx_to_iova(idxd, write_idx & mask);
+       _mm256_store_si256((void *)&idxd->desc_ring[write_idx],
+                       _mm256_set_epi64x(dst, src, comp_addr, op_flags64));
+       _mm256_store_si256((void *)&idxd->desc_ring[write_idx].size,
+                       _mm256_set_epi64x(0, 0, 0, size));
+
+       idxd->batch_size++;
+
+       rte_prefetch0_write(&idxd->desc_ring[write_idx + 1]);
+
+       if (flags & RTE_DMA_OP_FLAG_SUBMIT)
+               __submit(idxd);
+
+       return job_id;
+}
+
+int
+idxd_enqueue_copy(void *dev_private, uint16_t qid __rte_unused, rte_iova_t src,
+               rte_iova_t dst, unsigned int length, uint64_t flags)
+{
+       /* we can take advantage of the fact that the fence flag in dmadev and DSA are the same,
+        * but check it at compile time to be sure.
+        */
+       RTE_BUILD_BUG_ON(RTE_DMA_OP_FLAG_FENCE != IDXD_FLAG_FENCE);
+       uint32_t memmove = (idxd_op_memmove << IDXD_CMD_OP_SHIFT) |
+                       IDXD_FLAG_CACHE_CONTROL | (flags & IDXD_FLAG_FENCE);
+       return __idxd_write_desc(dev_private, memmove, src, dst, length,
+                       flags);
+}
+
+int
+idxd_enqueue_fill(void *dev_private, uint16_t qid __rte_unused, uint64_t pattern,
+               rte_iova_t dst, unsigned int length, uint64_t flags)
+{
+       uint32_t fill = (idxd_op_fill << IDXD_CMD_OP_SHIFT) |
+                       IDXD_FLAG_CACHE_CONTROL | (flags & IDXD_FLAG_FENCE);
+       return __idxd_write_desc(dev_private, fill, pattern, dst, length,
+                       flags);
+}
+
+int
+idxd_submit(void *dev_private, uint16_t qid __rte_unused)
+{
+       __submit(dev_private);
+       return 0;
+}
+
+static enum rte_dma_status_code
+get_comp_status(struct idxd_completion *c)
+{
+       uint8_t st = c->status;
+       switch (st) {
+       /* successful descriptors are not written back normally */
+       case IDXD_COMP_STATUS_INCOMPLETE:
+       case IDXD_COMP_STATUS_SUCCESS:
+               return RTE_DMA_STATUS_SUCCESSFUL;
+       case IDXD_COMP_STATUS_INVALID_OPCODE:
+               return RTE_DMA_STATUS_INVALID_OPCODE;
+       case IDXD_COMP_STATUS_INVALID_SIZE:
+               return RTE_DMA_STATUS_INVALID_LENGTH;
+       case IDXD_COMP_STATUS_SKIPPED:
+               return RTE_DMA_STATUS_NOT_ATTEMPTED;
+       default:
+               return RTE_DMA_STATUS_ERROR_UNKNOWN;
+       }
+}
+
+int
+idxd_vchan_status(const struct rte_dma_dev *dev, uint16_t vchan __rte_unused,
+               enum rte_dma_vchan_status *status)
+{
+       struct idxd_dmadev *idxd = dev->fp_obj->dev_private;
+       uint16_t last_batch_write = idxd->batch_idx_write == 0 ? idxd->max_batches :
+                       idxd->batch_idx_write - 1;
+       uint8_t bstatus = (idxd->batch_comp_ring[last_batch_write].status != 0);
+
+       /* An IDXD device will always be either active or idle.
+        * RTE_DMA_VCHAN_HALTED_ERROR is therefore not supported by IDXD.
+        */
+       *status = bstatus ? RTE_DMA_VCHAN_IDLE : RTE_DMA_VCHAN_ACTIVE;
+
+       return 0;
+}
+
+static __rte_always_inline int
+batch_ok(struct idxd_dmadev *idxd, uint16_t max_ops, enum rte_dma_status_code *status)
+{
+       uint16_t ret;
+       uint8_t bstatus;
+
+       if (max_ops == 0)
+               return 0;
+
+       /* first check if there are any unreturned handles from last time */
+       if (idxd->ids_avail != idxd->ids_returned) {
+               ret = RTE_MIN((uint16_t)(idxd->ids_avail - idxd->ids_returned), max_ops);
+               idxd->ids_returned += ret;
+               if (status)
+                       memset(status, RTE_DMA_STATUS_SUCCESSFUL, ret * sizeof(*status));
+               return ret;
+       }
+
+       if (idxd->batch_idx_read == idxd->batch_idx_write)
+               return 0;
+
+       bstatus = idxd->batch_comp_ring[idxd->batch_idx_read].status;
+       /* now check if next batch is complete and successful */
+       if (bstatus == IDXD_COMP_STATUS_SUCCESS) {
+               /* since the batch idx ring stores the start of each batch, pre-increment to lookup
+                * start of next batch.
+                */
+               if (++idxd->batch_idx_read > idxd->max_batches)
+                       idxd->batch_idx_read = 0;
+               idxd->ids_avail = idxd->batch_idx_ring[idxd->batch_idx_read];
+
+               ret = RTE_MIN((uint16_t)(idxd->ids_avail - idxd->ids_returned), max_ops);
+               idxd->ids_returned += ret;
+               if (status)
+                       memset(status, RTE_DMA_STATUS_SUCCESSFUL, ret * sizeof(*status));
+               return ret;
+       }
+       /* check if batch is incomplete */
+       else if (bstatus == IDXD_COMP_STATUS_INCOMPLETE)
+               return 0;
+
+       return -1; /* error case */
+}
+
+static inline uint16_t
+batch_completed(struct idxd_dmadev *idxd, uint16_t max_ops, bool *has_error)
+{
+       uint16_t i;
+       uint16_t b_start, b_end, next_batch;
+
+       int ret = batch_ok(idxd, max_ops, NULL);
+       if (ret >= 0)
+               return ret;
+
+       /* ERROR case, not successful, not incomplete */
+       /* Get the batch size, and special case size 1.
+        * once we identify the actual failure job, return other jobs, then update
+        * the batch ring indexes to make it look like the first job of the batch has failed.
+        * Subsequent calls here will always return zero packets, and the error must be cleared by
+        * calling the completed_status() function.
+        */
+       next_batch = (idxd->batch_idx_read + 1);
+       if (next_batch > idxd->max_batches)
+               next_batch = 0;
+       b_start = idxd->batch_idx_ring[idxd->batch_idx_read];
+       b_end = idxd->batch_idx_ring[next_batch];
+
+       if (b_end - b_start == 1) { /* not a batch */
+               *has_error = true;
+               return 0;
+       }
+
+       for (i = b_start; i < b_end; i++) {
+               struct idxd_completion *c = (void *)&idxd->desc_ring[i & idxd->desc_ring_mask];
+               if (c->status > IDXD_COMP_STATUS_SUCCESS) /* ignore incomplete(0) and success(1) */
+                       break;
+       }
+       ret = RTE_MIN((uint16_t)(i - idxd->ids_returned), max_ops);
+       if (ret < max_ops)
+               *has_error = true; /* we got up to the point of error */
+       idxd->ids_avail = idxd->ids_returned += ret;
+
+       /* to ensure we can call twice and just return 0, set start of batch to where we finished */
+       idxd->batch_comp_ring[idxd->batch_idx_read].completed_size -= ret;
+       idxd->batch_idx_ring[idxd->batch_idx_read] += ret;
+       if (idxd->batch_idx_ring[next_batch] - idxd->batch_idx_ring[idxd->batch_idx_read] == 1) {
+               /* copy over the descriptor status to the batch ring as if no batch */
+               uint16_t d_idx = idxd->batch_idx_ring[idxd->batch_idx_read] & idxd->desc_ring_mask;
+               struct idxd_completion *desc_comp = (void *)&idxd->desc_ring[d_idx];
+               idxd->batch_comp_ring[idxd->batch_idx_read].status = desc_comp->status;
+       }
+
+       return ret;
+}
+
+static uint16_t
+batch_completed_status(struct idxd_dmadev *idxd, uint16_t max_ops, enum rte_dma_status_code *status)
+{
+       uint16_t next_batch;
+
+       int ret = batch_ok(idxd, max_ops, status);
+       if (ret >= 0)
+               return ret;
+
+       /* ERROR case, not successful, not incomplete */
+       /* Get the batch size, and special case size 1.
+        */
+       next_batch = (idxd->batch_idx_read + 1);
+       if (next_batch > idxd->max_batches)
+               next_batch = 0;
+       const uint16_t b_start = idxd->batch_idx_ring[idxd->batch_idx_read];
+       const uint16_t b_end = idxd->batch_idx_ring[next_batch];
+       const uint16_t b_len = b_end - b_start;
+       if (b_len == 1) {/* not a batch */
+               *status = get_comp_status(&idxd->batch_comp_ring[idxd->batch_idx_read]);
+               if (status != RTE_DMA_STATUS_SUCCESSFUL)
+                       idxd->stats.errors++;
+               idxd->ids_avail++;
+               idxd->ids_returned++;
+               idxd->batch_idx_read = next_batch;
+               return 1;
+       }
+
+       /* not a single-element batch, need to process more.
+        * Scenarios:
+        * 1. max_ops >= batch_size - can fit everything, simple case
+        *   - loop through completed ops and then add on any not-attempted ones
+        * 2. max_ops < batch_size - can't fit everything, more complex case
+        *   - loop through completed/incomplete and stop when hit max_ops
+        *   - adjust the batch descriptor to update where we stopped, with appropriate bcount
+        *   - if bcount is to be exactly 1, update the batch descriptor as it will be treated as
+        *     non-batch next time.
+        */
+       const uint16_t bcount = idxd->batch_comp_ring[idxd->batch_idx_read].completed_size;
+       for (ret = 0; ret < b_len && ret < max_ops; ret++) {
+               struct idxd_completion *c = (void *)
+                               &idxd->desc_ring[(b_start + ret) & idxd->desc_ring_mask];
+               status[ret] = (ret < bcount) ? get_comp_status(c) : RTE_DMA_STATUS_NOT_ATTEMPTED;
+               if (status[ret] != RTE_DMA_STATUS_SUCCESSFUL)
+                       idxd->stats.errors++;
+       }
+       idxd->ids_avail = idxd->ids_returned += ret;
+
+       /* everything fit */
+       if (ret == b_len) {
+               idxd->batch_idx_read = next_batch;
+               return ret;
+       }
+
+       /* set up for next time, update existing batch descriptor & start idx at batch_idx_read */
+       idxd->batch_idx_ring[idxd->batch_idx_read] += ret;
+       if (ret > bcount) {
+               /* we have only incomplete ones - set batch completed size to 0 */
+               struct idxd_completion *comp = &idxd->batch_comp_ring[idxd->batch_idx_read];
+               comp->completed_size = 0;
+               /* if there is only one descriptor left, job skipped so set flag appropriately */
+               if (b_len - ret == 1)
+                       comp->status = IDXD_COMP_STATUS_SKIPPED;
+       } else {
+               struct idxd_completion *comp = &idxd->batch_comp_ring[idxd->batch_idx_read];
+               comp->completed_size -= ret;
+               /* if there is only one descriptor left, copy status info straight to desc */
+               if (comp->completed_size == 1) {
+                       struct idxd_completion *c = (void *)
+                                       &idxd->desc_ring[(b_start + ret) & idxd->desc_ring_mask];
+                       comp->status = c->status;
+                       /* individual descs can be ok without writeback, but not batches */
+                       if (comp->status == IDXD_COMP_STATUS_INCOMPLETE)
+                               comp->status = IDXD_COMP_STATUS_SUCCESS;
+               } else if (bcount == b_len) {
+                       /* check if we still have an error, and clear flag if not */
+                       uint16_t i;
+                       for (i = b_start + ret; i < b_end; i++) {
+                               struct idxd_completion *c = (void *)
+                                               &idxd->desc_ring[i & idxd->desc_ring_mask];
+                               if (c->status > IDXD_COMP_STATUS_SUCCESS)
+                                       break;
+                       }
+                       if (i == b_end) /* no errors */
+                               comp->status = IDXD_COMP_STATUS_SUCCESS;
+               }
+       }
+
+       return ret;
+}
+
+uint16_t
+idxd_completed(void *dev_private, uint16_t qid __rte_unused, uint16_t max_ops,
+               uint16_t *last_idx, bool *has_error)
+{
+       struct idxd_dmadev *idxd = dev_private;
+       uint16_t batch, ret = 0;
+
+       do {
+               batch = batch_completed(idxd, max_ops - ret, has_error);
+               ret += batch;
+       } while (batch > 0 && *has_error == false);
+
+       idxd->stats.completed += ret;
+       *last_idx = idxd->ids_returned - 1;
+       return ret;
+}
+
+uint16_t
+idxd_completed_status(void *dev_private, uint16_t qid __rte_unused, uint16_t max_ops,
+               uint16_t *last_idx, enum rte_dma_status_code *status)
+{
+       struct idxd_dmadev *idxd = dev_private;
+       uint16_t batch, ret = 0;
+
+       do {
+               batch = batch_completed_status(idxd, max_ops - ret, &status[ret]);
+               ret += batch;
+       } while (batch > 0);
+
+       idxd->stats.completed += ret;
+       *last_idx = idxd->ids_returned - 1;
+       return ret;
+}
+
  int
  idxd_dump(const struct rte_dma_dev *dev, FILE *f)
  {
@@ -39,6 +429,25 @@ idxd_dump(const struct rte_dma_dev *dev, FILE *f)
         return 0;
  }
  
+int
+idxd_stats_get(const struct rte_dma_dev *dev, uint16_t vchan __rte_unused,
+               struct rte_dma_stats *stats, uint32_t stats_sz)
+{
+       struct idxd_dmadev *idxd = dev->fp_obj->dev_private;
+       if (stats_sz < sizeof(*stats))
+               return -EINVAL;
+       *stats = idxd->stats;
+       return 0;
+}
+
+int
+idxd_stats_reset(struct rte_dma_dev *dev, uint16_t vchan __rte_unused)
+{
+       struct idxd_dmadev *idxd = dev->fp_obj->dev_private;
+       idxd->stats = (struct rte_dma_stats){0};
+       return 0;
+}
+
  int
  idxd_info_get(const struct rte_dma_dev *dev, struct rte_dma_info *info, uint32_t size)
  {
@@ -59,6 +468,26 @@ idxd_info_get(const struct rte_dma_dev *dev, struct rte_dma_info *info, uint32_t
         return 0;
  }
  
+uint16_t
+idxd_burst_capacity(const void *dev_private, uint16_t vchan __rte_unused)
+{
+       const struct idxd_dmadev *idxd = dev_private;
+       uint16_t write_idx = idxd->batch_start + idxd->batch_size;
+       uint16_t used_space;
+
+       /* Check for space in the batch ring */
+       if ((idxd->batch_idx_read == 0 && idxd->batch_idx_write == idxd->max_batches) ||
+                       idxd->batch_idx_write + 1 == idxd->batch_idx_read)
+               return 0;
+
+       /* For descriptors, check for wrap-around on write but not read */
+       if (idxd->ids_returned > write_idx)
+               write_idx += idxd->desc_ring_mask + 1;
+       used_space = write_idx - idxd->ids_returned;
+
+       return RTE_MIN((idxd->desc_ring_mask - used_space), idxd->max_batch_size);
+}
+
  int
  idxd_configure(struct rte_dma_dev *dev __rte_unused, const struct rte_dma_conf *dev_conf,
                 uint32_t conf_sz)
@@ -139,6 +568,13 @@ idxd_dmadev_create(const char *name, struct rte_device *dev,
         dmadev->dev_ops = ops;
         dmadev->device = dev;
  
+       dmadev->fp_obj->copy = idxd_enqueue_copy;
+       dmadev->fp_obj->fill = idxd_enqueue_fill;
+       dmadev->fp_obj->submit = idxd_submit;
+       dmadev->fp_obj->completed = idxd_completed;
+       dmadev->fp_obj->completed_status = idxd_completed_status;
+       dmadev->fp_obj->burst_capacity = idxd_burst_capacity;
+
         idxd = dmadev->data->dev_private;
         *idxd = *base_idxd; /* copy over the main fields already passed in */
         idxd->dmadev = dmadev;