idxd_common.c

   1 /* SPDX-License-Identifier: BSD-3-Clause
   2  * Copyright 2021 Intel Corporation
   3  */
   4
   5 #include <x86intrin.h>
   6
   7 #include <rte_malloc.h>
   8 #include <rte_common.h>
   9 #include <rte_log.h>
  10 #include <rte_prefetch.h>
  11
  12 #include "idxd_internal.h"
  13
  14 #define IDXD_PMD_NAME_STR "dmadev_idxd"
  15
  16 static __rte_always_inline rte_iova_t
  17 __desc_idx_to_iova(struct idxd_dmadev *idxd, uint16_t n)
  18 {
  19         return idxd->desc_iova + (n * sizeof(struct idxd_hw_desc));
  20 }
  21
  22 static __rte_always_inline void
  23 __idxd_movdir64b(volatile void *dst, const struct idxd_hw_desc *src)
  24 {
  25         asm volatile (".byte 0x66, 0x0f, 0x38, 0xf8, 0x02"
  26                         :
  27                         : "a" (dst), "d" (src)
  28                         : "memory");
  29 }
  30
  31 static __rte_always_inline void
  32 __submit(struct idxd_dmadev *idxd)
  33 {
  34         rte_prefetch1(&idxd->batch_comp_ring[idxd->batch_idx_read]);
  35
  36         if (idxd->batch_size == 0)
  37                 return;
  38
  39         /* write completion to batch comp ring */
  40         rte_iova_t comp_addr = idxd->batch_iova +
  41                         (idxd->batch_idx_write * sizeof(struct idxd_completion));
  42
  43         if (idxd->batch_size == 1) {
  44                 /* submit batch directly */
  45                 struct idxd_hw_desc desc =
  46                                 idxd->desc_ring[idxd->batch_start & idxd->desc_ring_mask];
  47                 desc.completion = comp_addr;
  48                 desc.op_flags |= IDXD_FLAG_REQUEST_COMPLETION;
  49                 _mm_sfence(); /* fence before writing desc to device */
  50                 __idxd_movdir64b(idxd->portal, &desc);
  51         } else {
  52                 const struct idxd_hw_desc batch_desc = {
  53                                 .op_flags = (idxd_op_batch << IDXD_CMD_OP_SHIFT) |
  54                                 IDXD_FLAG_COMPLETION_ADDR_VALID |
  55                                 IDXD_FLAG_REQUEST_COMPLETION,
  56                                 .desc_addr = __desc_idx_to_iova(idxd,
  57                                                 idxd->batch_start & idxd->desc_ring_mask),
  58                                 .completion = comp_addr,
  59                                 .size = idxd->batch_size,
  60                 };
  61                 _mm_sfence(); /* fence before writing desc to device */
  62                 __idxd_movdir64b(idxd->portal, &batch_desc);
  63         }
  64
  65         if (++idxd->batch_idx_write > idxd->max_batches)
  66                 idxd->batch_idx_write = 0;
  67
  68         idxd->stats.submitted += idxd->batch_size;
  69
  70         idxd->batch_start += idxd->batch_size;
  71         idxd->batch_size = 0;
  72         idxd->batch_idx_ring[idxd->batch_idx_write] = idxd->batch_start;
  73         _mm256_store_si256((void *)&idxd->batch_comp_ring[idxd->batch_idx_write],
  74                         _mm256_setzero_si256());
  75 }
  76
  77 static __rte_always_inline int
  78 __idxd_write_desc(struct idxd_dmadev *idxd,
  79                 const uint32_t op_flags,
  80                 const rte_iova_t src,
  81                 const rte_iova_t dst,
  82                 const uint32_t size,
  83                 const uint32_t flags)
  84 {
  85         uint16_t mask = idxd->desc_ring_mask;
  86         uint16_t job_id = idxd->batch_start + idxd->batch_size;
  87         /* we never wrap batches, so we only mask the start and allow start+size to overflow */
  88         uint16_t write_idx = (idxd->batch_start & mask) + idxd->batch_size;
  89
  90         /* first check batch ring space then desc ring space */
  91         if ((idxd->batch_idx_read == 0 && idxd->batch_idx_write == idxd->max_batches) ||
  92                         idxd->batch_idx_write + 1 == idxd->batch_idx_read)
  93                 return -ENOSPC;
  94         if (((write_idx + 1) & mask) == (idxd->ids_returned & mask))
  95                 return -ENOSPC;
  96
  97         /* write desc. Note: descriptors don't wrap, but the completion address does */
  98         const uint64_t op_flags64 = (uint64_t)(op_flags | IDXD_FLAG_COMPLETION_ADDR_VALID) << 32;
  99         const uint64_t comp_addr = __desc_idx_to_iova(idxd, write_idx & mask);
 100         _mm256_store_si256((void *)&idxd->desc_ring[write_idx],
 101                         _mm256_set_epi64x(dst, src, comp_addr, op_flags64));
 102         _mm256_store_si256((void *)&idxd->desc_ring[write_idx].size,
 103                         _mm256_set_epi64x(0, 0, 0, size));
 104
 105         idxd->batch_size++;
 106
 107         rte_prefetch0_write(&idxd->desc_ring[write_idx + 1]);
 108
 109         if (flags & RTE_DMA_OP_FLAG_SUBMIT)
 110                 __submit(idxd);
 111
 112         return job_id;
 113 }
 114
 115 int
 116 idxd_enqueue_copy(void *dev_private, uint16_t qid __rte_unused, rte_iova_t src,
 117                 rte_iova_t dst, unsigned int length, uint64_t flags)
 118 {
 119         /* we can take advantage of the fact that the fence flag in dmadev and DSA are the same,
 120          * but check it at compile time to be sure.
 121          */
 122         RTE_BUILD_BUG_ON(RTE_DMA_OP_FLAG_FENCE != IDXD_FLAG_FENCE);
 123         uint32_t memmove = (idxd_op_memmove << IDXD_CMD_OP_SHIFT) |
 124                         IDXD_FLAG_CACHE_CONTROL | (flags & IDXD_FLAG_FENCE);
 125         return __idxd_write_desc(dev_private, memmove, src, dst, length,
 126                         flags);
 127 }
 128
 129 int
 130 idxd_enqueue_fill(void *dev_private, uint16_t qid __rte_unused, uint64_t pattern,
 131                 rte_iova_t dst, unsigned int length, uint64_t flags)
 132 {
 133         uint32_t fill = (idxd_op_fill << IDXD_CMD_OP_SHIFT) |
 134                         IDXD_FLAG_CACHE_CONTROL | (flags & IDXD_FLAG_FENCE);
 135         return __idxd_write_desc(dev_private, fill, pattern, dst, length,
 136                         flags);
 137 }
 138
 139 int
 140 idxd_submit(void *dev_private, uint16_t qid __rte_unused)
 141 {
 142         __submit(dev_private);
 143         return 0;
 144 }
 145
 146 static enum rte_dma_status_code
 147 get_comp_status(struct idxd_completion *c)
 148 {
 149         uint8_t st = c->status;
 150         switch (st) {
 151         /* successful descriptors are not written back normally */
 152         case IDXD_COMP_STATUS_INCOMPLETE:
 153         case IDXD_COMP_STATUS_SUCCESS:
 154                 return RTE_DMA_STATUS_SUCCESSFUL;
 155         case IDXD_COMP_STATUS_INVALID_OPCODE:
 156                 return RTE_DMA_STATUS_INVALID_OPCODE;
 157         case IDXD_COMP_STATUS_INVALID_SIZE:
 158                 return RTE_DMA_STATUS_INVALID_LENGTH;
 159         case IDXD_COMP_STATUS_SKIPPED:
 160                 return RTE_DMA_STATUS_NOT_ATTEMPTED;
 161         default:
 162                 return RTE_DMA_STATUS_ERROR_UNKNOWN;
 163         }
 164 }
 165
 166 int
 167 idxd_vchan_status(const struct rte_dma_dev *dev, uint16_t vchan __rte_unused,
 168                 enum rte_dma_vchan_status *status)
 169 {
 170         struct idxd_dmadev *idxd = dev->fp_obj->dev_private;
 171         uint16_t last_batch_write = idxd->batch_idx_write == 0 ? idxd->max_batches :
 172                         idxd->batch_idx_write - 1;
 173         uint8_t bstatus = (idxd->batch_comp_ring[last_batch_write].status != 0);
 174
 175         /* An IDXD device will always be either active or idle.
 176          * RTE_DMA_VCHAN_HALTED_ERROR is therefore not supported by IDXD.
 177          */
 178         *status = bstatus ? RTE_DMA_VCHAN_IDLE : RTE_DMA_VCHAN_ACTIVE;
 179
 180         return 0;
 181 }
 182
 183 static __rte_always_inline int
 184 batch_ok(struct idxd_dmadev *idxd, uint16_t max_ops, enum rte_dma_status_code *status)
 185 {
 186         uint16_t ret;
 187         uint8_t bstatus;
 188
 189         if (max_ops == 0)
 190                 return 0;
 191
 192         /* first check if there are any unreturned handles from last time */
 193         if (idxd->ids_avail != idxd->ids_returned) {
 194                 ret = RTE_MIN((uint16_t)(idxd->ids_avail - idxd->ids_returned), max_ops);
 195                 idxd->ids_returned += ret;
 196                 if (status)
 197                         memset(status, RTE_DMA_STATUS_SUCCESSFUL, ret * sizeof(*status));
 198                 return ret;
 199         }
 200
 201         if (idxd->batch_idx_read == idxd->batch_idx_write)
 202                 return 0;
 203
 204         bstatus = idxd->batch_comp_ring[idxd->batch_idx_read].status;
 205         /* now check if next batch is complete and successful */
 206         if (bstatus == IDXD_COMP_STATUS_SUCCESS) {
 207                 /* since the batch idx ring stores the start of each batch, pre-increment to lookup
 208                  * start of next batch.
 209                  */
 210                 if (++idxd->batch_idx_read > idxd->max_batches)
 211                         idxd->batch_idx_read = 0;
 212                 idxd->ids_avail = idxd->batch_idx_ring[idxd->batch_idx_read];
 213
 214                 ret = RTE_MIN((uint16_t)(idxd->ids_avail - idxd->ids_returned), max_ops);
 215                 idxd->ids_returned += ret;
 216                 if (status)
 217                         memset(status, RTE_DMA_STATUS_SUCCESSFUL, ret * sizeof(*status));
 218                 return ret;
 219         }
 220         /* check if batch is incomplete */
 221         else if (bstatus == IDXD_COMP_STATUS_INCOMPLETE)
 222                 return 0;
 223
 224         return -1; /* error case */
 225 }
 226
 227 static inline uint16_t
 228 batch_completed(struct idxd_dmadev *idxd, uint16_t max_ops, bool *has_error)
 229 {
 230         uint16_t i;
 231         uint16_t b_start, b_end, next_batch;
 232
 233         int ret = batch_ok(idxd, max_ops, NULL);
 234         if (ret >= 0)
 235                 return ret;
 236
 237         /* ERROR case, not successful, not incomplete */
 238         /* Get the batch size, and special case size 1.
 239          * once we identify the actual failure job, return other jobs, then update
 240          * the batch ring indexes to make it look like the first job of the batch has failed.
 241          * Subsequent calls here will always return zero packets, and the error must be cleared by
 242          * calling the completed_status() function.
 243          */
 244         next_batch = (idxd->batch_idx_read + 1);
 245         if (next_batch > idxd->max_batches)
 246                 next_batch = 0;
 247         b_start = idxd->batch_idx_ring[idxd->batch_idx_read];
 248         b_end = idxd->batch_idx_ring[next_batch];
 249
 250         if (b_end - b_start == 1) { /* not a batch */
 251                 *has_error = true;
 252                 return 0;
 253         }
 254
 255         for (i = b_start; i < b_end; i++) {
 256                 struct idxd_completion *c = (void *)&idxd->desc_ring[i & idxd->desc_ring_mask];
 257                 if (c->status > IDXD_COMP_STATUS_SUCCESS) /* ignore incomplete(0) and success(1) */
 258                         break;
 259         }
 260         ret = RTE_MIN((uint16_t)(i - idxd->ids_returned), max_ops);
 261         if (ret < max_ops)
 262                 *has_error = true; /* we got up to the point of error */
 263         idxd->ids_avail = idxd->ids_returned += ret;
 264
 265         /* to ensure we can call twice and just return 0, set start of batch to where we finished */
 266         idxd->batch_comp_ring[idxd->batch_idx_read].completed_size -= ret;
 267         idxd->batch_idx_ring[idxd->batch_idx_read] += ret;
 268         if (idxd->batch_idx_ring[next_batch] - idxd->batch_idx_ring[idxd->batch_idx_read] == 1) {
 269                 /* copy over the descriptor status to the batch ring as if no batch */
 270                 uint16_t d_idx = idxd->batch_idx_ring[idxd->batch_idx_read] & idxd->desc_ring_mask;
 271                 struct idxd_completion *desc_comp = (void *)&idxd->desc_ring[d_idx];
 272                 idxd->batch_comp_ring[idxd->batch_idx_read].status = desc_comp->status;
 273         }
 274
 275         return ret;
 276 }
 277
 278 static uint16_t
 279 batch_completed_status(struct idxd_dmadev *idxd, uint16_t max_ops, enum rte_dma_status_code *status)
 280 {
 281         uint16_t next_batch;
 282
 283         int ret = batch_ok(idxd, max_ops, status);
 284         if (ret >= 0)
 285                 return ret;
 286
 287         /* ERROR case, not successful, not incomplete */
 288         /* Get the batch size, and special case size 1.
 289          */
 290         next_batch = (idxd->batch_idx_read + 1);
 291         if (next_batch > idxd->max_batches)
 292                 next_batch = 0;
 293         const uint16_t b_start = idxd->batch_idx_ring[idxd->batch_idx_read];
 294         const uint16_t b_end = idxd->batch_idx_ring[next_batch];
 295         const uint16_t b_len = b_end - b_start;
 296         if (b_len == 1) {/* not a batch */
 297                 *status = get_comp_status(&idxd->batch_comp_ring[idxd->batch_idx_read]);
 298                 if (status != RTE_DMA_STATUS_SUCCESSFUL)
 299                         idxd->stats.errors++;
 300                 idxd->ids_avail++;
 301                 idxd->ids_returned++;
 302                 idxd->batch_idx_read = next_batch;
 303                 return 1;
 304         }
 305
 306         /* not a single-element batch, need to process more.
 307          * Scenarios:
 308          * 1. max_ops >= batch_size - can fit everything, simple case
 309          *   - loop through completed ops and then add on any not-attempted ones
 310          * 2. max_ops < batch_size - can't fit everything, more complex case
 311          *   - loop through completed/incomplete and stop when hit max_ops
 312          *   - adjust the batch descriptor to update where we stopped, with appropriate bcount
 313          *   - if bcount is to be exactly 1, update the batch descriptor as it will be treated as
 314          *     non-batch next time.
 315          */
 316         const uint16_t bcount = idxd->batch_comp_ring[idxd->batch_idx_read].completed_size;
 317         for (ret = 0; ret < b_len && ret < max_ops; ret++) {
 318                 struct idxd_completion *c = (void *)
 319                                 &idxd->desc_ring[(b_start + ret) & idxd->desc_ring_mask];
 320                 status[ret] = (ret < bcount) ? get_comp_status(c) : RTE_DMA_STATUS_NOT_ATTEMPTED;
 321                 if (status[ret] != RTE_DMA_STATUS_SUCCESSFUL)
 322                         idxd->stats.errors++;
 323         }
 324         idxd->ids_avail = idxd->ids_returned += ret;
 325
 326         /* everything fit */
 327         if (ret == b_len) {
 328                 idxd->batch_idx_read = next_batch;
 329                 return ret;
 330         }
 331
 332         /* set up for next time, update existing batch descriptor & start idx at batch_idx_read */
 333         idxd->batch_idx_ring[idxd->batch_idx_read] += ret;
 334         if (ret > bcount) {
 335                 /* we have only incomplete ones - set batch completed size to 0 */
 336                 struct idxd_completion *comp = &idxd->batch_comp_ring[idxd->batch_idx_read];
 337                 comp->completed_size = 0;
 338                 /* if there is only one descriptor left, job skipped so set flag appropriately */
 339                 if (b_len - ret == 1)
 340                         comp->status = IDXD_COMP_STATUS_SKIPPED;
 341         } else {
 342                 struct idxd_completion *comp = &idxd->batch_comp_ring[idxd->batch_idx_read];
 343                 comp->completed_size -= ret;
 344                 /* if there is only one descriptor left, copy status info straight to desc */
 345                 if (comp->completed_size == 1) {
 346                         struct idxd_completion *c = (void *)
 347                                         &idxd->desc_ring[(b_start + ret) & idxd->desc_ring_mask];
 348                         comp->status = c->status;
 349                         /* individual descs can be ok without writeback, but not batches */
 350                         if (comp->status == IDXD_COMP_STATUS_INCOMPLETE)
 351                                 comp->status = IDXD_COMP_STATUS_SUCCESS;
 352                 } else if (bcount == b_len) {
 353                         /* check if we still have an error, and clear flag if not */
 354                         uint16_t i;
 355                         for (i = b_start + ret; i < b_end; i++) {
 356                                 struct idxd_completion *c = (void *)
 357                                                 &idxd->desc_ring[i & idxd->desc_ring_mask];
 358                                 if (c->status > IDXD_COMP_STATUS_SUCCESS)
 359                                         break;
 360                         }
 361                         if (i == b_end) /* no errors */
 362                                 comp->status = IDXD_COMP_STATUS_SUCCESS;
 363                 }
 364         }
 365
 366         return ret;
 367 }
 368
 369 uint16_t
 370 idxd_completed(void *dev_private, uint16_t qid __rte_unused, uint16_t max_ops,
 371                 uint16_t *last_idx, bool *has_error)
 372 {
 373         struct idxd_dmadev *idxd = dev_private;
 374         uint16_t batch, ret = 0;
 375
 376         do {
 377                 batch = batch_completed(idxd, max_ops - ret, has_error);
 378                 ret += batch;
 379         } while (batch > 0 && *has_error == false);
 380
 381         idxd->stats.completed += ret;
 382         *last_idx = idxd->ids_returned - 1;
 383         return ret;
 384 }
 385
 386 uint16_t
 387 idxd_completed_status(void *dev_private, uint16_t qid __rte_unused, uint16_t max_ops,
 388                 uint16_t *last_idx, enum rte_dma_status_code *status)
 389 {
 390         struct idxd_dmadev *idxd = dev_private;
 391         uint16_t batch, ret = 0;
 392
 393         do {
 394                 batch = batch_completed_status(idxd, max_ops - ret, &status[ret]);
 395                 ret += batch;
 396         } while (batch > 0);
 397
 398         idxd->stats.completed += ret;
 399         *last_idx = idxd->ids_returned - 1;
 400         return ret;
 401 }
 402
 403 int
 404 idxd_dump(const struct rte_dma_dev *dev, FILE *f)
 405 {
 406         struct idxd_dmadev *idxd = dev->fp_obj->dev_private;
 407         unsigned int i;
 408
 409         fprintf(f, "== IDXD Private Data ==\n");
 410         fprintf(f, "  Portal: %p\n", idxd->portal);
 411         fprintf(f, "  Config: { ring_size: %u }\n",
 412                         idxd->qcfg.nb_desc);
 413         fprintf(f, "  Batch ring (sz = %u, max_batches = %u):\n\t",
 414                         idxd->max_batches + 1, idxd->max_batches);
 415         for (i = 0; i <= idxd->max_batches; i++) {
 416                 fprintf(f, " %u ", idxd->batch_idx_ring[i]);
 417                 if (i == idxd->batch_idx_read && i == idxd->batch_idx_write)
 418                         fprintf(f, "[rd ptr, wr ptr] ");
 419                 else if (i == idxd->batch_idx_read)
 420                         fprintf(f, "[rd ptr] ");
 421                 else if (i == idxd->batch_idx_write)
 422                         fprintf(f, "[wr ptr] ");
 423                 if (i == idxd->max_batches)
 424                         fprintf(f, "\n");
 425         }
 426
 427         fprintf(f, "  Curr batch: start = %u, size = %u\n", idxd->batch_start, idxd->batch_size);
 428         fprintf(f, "  IDS: avail = %u, returned: %u\n", idxd->ids_avail, idxd->ids_returned);
 429         return 0;
 430 }
 431
 432 int
 433 idxd_stats_get(const struct rte_dma_dev *dev, uint16_t vchan __rte_unused,
 434                 struct rte_dma_stats *stats, uint32_t stats_sz)
 435 {
 436         struct idxd_dmadev *idxd = dev->fp_obj->dev_private;
 437         if (stats_sz < sizeof(*stats))
 438                 return -EINVAL;
 439         *stats = idxd->stats;
 440         return 0;
 441 }
 442
 443 int
 444 idxd_stats_reset(struct rte_dma_dev *dev, uint16_t vchan __rte_unused)
 445 {
 446         struct idxd_dmadev *idxd = dev->fp_obj->dev_private;
 447         idxd->stats = (struct rte_dma_stats){0};
 448         return 0;
 449 }
 450
 451 int
 452 idxd_info_get(const struct rte_dma_dev *dev, struct rte_dma_info *info, uint32_t size)
 453 {
 454         struct idxd_dmadev *idxd = dev->fp_obj->dev_private;
 455
 456         if (size < sizeof(*info))
 457                 return -EINVAL;
 458
 459         *info = (struct rte_dma_info) {
 460                         .dev_capa = RTE_DMA_CAPA_MEM_TO_MEM | RTE_DMA_CAPA_HANDLES_ERRORS |
 461                                 RTE_DMA_CAPA_OPS_COPY | RTE_DMA_CAPA_OPS_FILL,
 462                         .max_vchans = 1,
 463                         .max_desc = 4096,
 464                         .min_desc = 64,
 465         };
 466         if (idxd->sva_support)
 467                 info->dev_capa |= RTE_DMA_CAPA_SVA;
 468         return 0;
 469 }
 470
 471 uint16_t
 472 idxd_burst_capacity(const void *dev_private, uint16_t vchan __rte_unused)
 473 {
 474         const struct idxd_dmadev *idxd = dev_private;
 475         uint16_t write_idx = idxd->batch_start + idxd->batch_size;
 476         uint16_t used_space;
 477
 478         /* Check for space in the batch ring */
 479         if ((idxd->batch_idx_read == 0 && idxd->batch_idx_write == idxd->max_batches) ||
 480                         idxd->batch_idx_write + 1 == idxd->batch_idx_read)
 481                 return 0;
 482
 483         /* For descriptors, check for wrap-around on write but not read */
 484         if (idxd->ids_returned > write_idx)
 485                 write_idx += idxd->desc_ring_mask + 1;
 486         used_space = write_idx - idxd->ids_returned;
 487
 488         return RTE_MIN((idxd->desc_ring_mask - used_space), idxd->max_batch_size);
 489 }
 490
 491 int
 492 idxd_configure(struct rte_dma_dev *dev __rte_unused, const struct rte_dma_conf *dev_conf,
 493                 uint32_t conf_sz)
 494 {
 495         if (sizeof(struct rte_dma_conf) != conf_sz)
 496                 return -EINVAL;
 497
 498         if (dev_conf->nb_vchans != 1)
 499                 return -EINVAL;
 500         return 0;
 501 }
 502
 503 int
 504 idxd_vchan_setup(struct rte_dma_dev *dev, uint16_t vchan __rte_unused,
 505                 const struct rte_dma_vchan_conf *qconf, uint32_t qconf_sz)
 506 {
 507         struct idxd_dmadev *idxd = dev->fp_obj->dev_private;
 508         uint16_t max_desc = qconf->nb_desc;
 509
 510         if (sizeof(struct rte_dma_vchan_conf) != qconf_sz)
 511                 return -EINVAL;
 512
 513         idxd->qcfg = *qconf;
 514
 515         if (!rte_is_power_of_2(max_desc))
 516                 max_desc = rte_align32pow2(max_desc);
 517         IDXD_PMD_DEBUG("DMA dev %u using %u descriptors", dev->data->dev_id, max_desc);
 518         idxd->desc_ring_mask = max_desc - 1;
 519         idxd->qcfg.nb_desc = max_desc;
 520
 521         /* in case we are reconfiguring a device, free any existing memory */
 522         rte_free(idxd->desc_ring);
 523
 524         /* allocate the descriptor ring at 2x size as batches can't wrap */
 525         idxd->desc_ring = rte_zmalloc(NULL, sizeof(*idxd->desc_ring) * max_desc * 2, 0);
 526         if (idxd->desc_ring == NULL)
 527                 return -ENOMEM;
 528         idxd->desc_iova = rte_mem_virt2iova(idxd->desc_ring);
 529
 530         idxd->batch_idx_read = 0;
 531         idxd->batch_idx_write = 0;
 532         idxd->batch_start = 0;
 533         idxd->batch_size = 0;
 534         idxd->ids_returned = 0;
 535         idxd->ids_avail = 0;
 536
 537         memset(idxd->batch_comp_ring, 0, sizeof(*idxd->batch_comp_ring) *
 538                         (idxd->max_batches + 1));
 539         return 0;
 540 }
 541
 542 int
 543 idxd_dmadev_create(const char *name, struct rte_device *dev,
 544                    const struct idxd_dmadev *base_idxd,
 545                    const struct rte_dma_dev_ops *ops)
 546 {
 547         struct idxd_dmadev *idxd = NULL;
 548         struct rte_dma_dev *dmadev = NULL;
 549         int ret = 0;
 550
 551         RTE_BUILD_BUG_ON(sizeof(struct idxd_hw_desc) != 64);
 552         RTE_BUILD_BUG_ON(offsetof(struct idxd_hw_desc, size) != 32);
 553         RTE_BUILD_BUG_ON(sizeof(struct idxd_completion) != 32);
 554
 555         if (!name) {
 556                 IDXD_PMD_ERR("Invalid name of the device!");
 557                 ret = -EINVAL;
 558                 goto cleanup;
 559         }
 560
 561         /* Allocate device structure */
 562         dmadev = rte_dma_pmd_allocate(name, dev->numa_node, sizeof(struct idxd_dmadev));
 563         if (dmadev == NULL) {
 564                 IDXD_PMD_ERR("Unable to allocate dma device");
 565                 ret = -ENOMEM;
 566                 goto cleanup;
 567         }
 568         dmadev->dev_ops = ops;
 569         dmadev->device = dev;
 570
 571         dmadev->fp_obj->copy = idxd_enqueue_copy;
 572         dmadev->fp_obj->fill = idxd_enqueue_fill;
 573         dmadev->fp_obj->submit = idxd_submit;
 574         dmadev->fp_obj->completed = idxd_completed;
 575         dmadev->fp_obj->completed_status = idxd_completed_status;
 576         dmadev->fp_obj->burst_capacity = idxd_burst_capacity;
 577
 578         idxd = dmadev->data->dev_private;
 579         *idxd = *base_idxd; /* copy over the main fields already passed in */
 580         idxd->dmadev = dmadev;
 581
 582         /* allocate batch index ring and completion ring.
 583          * The +1 is because we can never fully use
 584          * the ring, otherwise read == write means both full and empty.
 585          */
 586         idxd->batch_comp_ring = rte_zmalloc_socket(NULL, (sizeof(idxd->batch_idx_ring[0]) +
 587                         sizeof(idxd->batch_comp_ring[0]))       * (idxd->max_batches + 1),
 588                         sizeof(idxd->batch_comp_ring[0]), dev->numa_node);
 589         if (idxd->batch_comp_ring == NULL) {
 590                 IDXD_PMD_ERR("Unable to reserve memory for batch data\n");
 591                 ret = -ENOMEM;
 592                 goto cleanup;
 593         }
 594         idxd->batch_idx_ring = (void *)&idxd->batch_comp_ring[idxd->max_batches+1];
 595         idxd->batch_iova = rte_mem_virt2iova(idxd->batch_comp_ring);
 596
 597         dmadev->fp_obj->dev_private = idxd;
 598
 599         idxd->dmadev->state = RTE_DMA_DEV_READY;
 600
 601         return 0;
 602
 603 cleanup:
 604         if (dmadev)
 605                 rte_dma_pmd_release(name);
 606
 607         return ret;
 608 }
 609
 610 int idxd_pmd_logtype;
 611
 612 RTE_LOG_REGISTER_DEFAULT(idxd_pmd_logtype, WARNING);