1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2018 Advanced Micro Devices, Inc. All rights reserved.
10 #include <sys/queue.h>
11 #include <sys/types.h>
15 #include <rte_hexdump.h>
16 #include <rte_memzone.h>
17 #include <rte_malloc.h>
18 #include <rte_memory.h>
19 #include <rte_spinlock.h>
20 #include <rte_string_fns.h>
24 #include "ccp_pmd_private.h"
26 struct ccp_list ccp_list = TAILQ_HEAD_INITIALIZER(ccp_list);
27 static int ccp_dev_id;
30 ccp_dev_start(struct rte_cryptodev *dev)
32 struct ccp_private *priv = dev->data->dev_private;
34 priv->last_dev = TAILQ_FIRST(&ccp_list);
39 ccp_allot_queue(struct rte_cryptodev *cdev, int slot_req)
42 struct ccp_device *dev;
43 struct ccp_private *priv = cdev->data->dev_private;
45 dev = TAILQ_NEXT(priv->last_dev, next);
46 if (unlikely(dev == NULL))
47 dev = TAILQ_FIRST(&ccp_list);
49 if (dev->qidx >= dev->cmd_q_count)
51 ret = rte_atomic64_read(&dev->cmd_q[dev->qidx].free_slots);
53 return &dev->cmd_q[dev->qidx];
54 for (i = 0; i < dev->cmd_q_count; i++) {
56 if (dev->qidx >= dev->cmd_q_count)
58 ret = rte_atomic64_read(&dev->cmd_q[dev->qidx].free_slots);
60 return &dev->cmd_q[dev->qidx];
66 ccp_read_hwrng(uint32_t *value)
68 struct ccp_device *dev;
70 TAILQ_FOREACH(dev, &ccp_list, next) {
71 void *vaddr = (void *)(dev->pci.mem_resource[2].addr);
73 while (dev->hwrng_retries++ < CCP_MAX_TRNG_RETRIES) {
74 *value = CCP_READ_REG(vaddr, TRNG_OUT_REG);
76 dev->hwrng_retries = 0;
80 dev->hwrng_retries = 0;
85 static const struct rte_memzone *
86 ccp_queue_dma_zone_reserve(const char *queue_name,
90 const struct rte_memzone *mz;
92 mz = rte_memzone_lookup(queue_name);
94 if (((size_t)queue_size <= mz->len) &&
95 ((socket_id == SOCKET_ID_ANY) ||
96 (socket_id == mz->socket_id))) {
97 CCP_LOG_INFO("re-use memzone already "
98 "allocated for %s", queue_name);
101 CCP_LOG_ERR("Incompatible memzone already "
102 "allocated %s, size %u, socket %d. "
103 "Requested size %u, socket %u",
104 queue_name, (uint32_t)mz->len,
105 mz->socket_id, queue_size, socket_id);
109 CCP_LOG_INFO("Allocate memzone for %s, size %u on socket %u",
110 queue_name, queue_size, socket_id);
112 return rte_memzone_reserve_aligned(queue_name, queue_size,
113 socket_id, RTE_MEMZONE_IOVA_CONTIG, queue_size);
116 /* bitmap support apis */
118 ccp_set_bit(unsigned long *bitmap, int n)
120 __sync_fetch_and_or(&bitmap[WORD_OFFSET(n)], (1UL << BIT_OFFSET(n)));
124 ccp_clear_bit(unsigned long *bitmap, int n)
126 __sync_fetch_and_and(&bitmap[WORD_OFFSET(n)], ~(1UL << BIT_OFFSET(n)));
129 static inline uint32_t
130 ccp_get_bit(unsigned long *bitmap, int n)
132 return ((bitmap[WORD_OFFSET(n)] & (1 << BIT_OFFSET(n))) != 0);
136 static inline uint32_t
137 ccp_ffz(unsigned long word)
139 unsigned long first_zero;
141 first_zero = __builtin_ffsl(~word);
142 return first_zero ? (first_zero - 1) :
146 static inline uint32_t
147 ccp_find_first_zero_bit(unsigned long *addr, uint32_t limit)
152 nwords = (limit - 1) / BITS_PER_WORD + 1;
153 for (i = 0; i < nwords; i++) {
155 return i * BITS_PER_WORD;
156 if (addr[i] < ~(0UL))
159 return (i == nwords) ? limit : i * BITS_PER_WORD + ccp_ffz(addr[i]);
163 ccp_bitmap_set(unsigned long *map, unsigned int start, int len)
165 unsigned long *p = map + WORD_OFFSET(start);
166 const unsigned int size = start + len;
167 int bits_to_set = BITS_PER_WORD - (start % BITS_PER_WORD);
168 unsigned long mask_to_set = CCP_BITMAP_FIRST_WORD_MASK(start);
170 while (len - bits_to_set >= 0) {
173 bits_to_set = BITS_PER_WORD;
178 mask_to_set &= CCP_BITMAP_LAST_WORD_MASK(size);
184 ccp_bitmap_clear(unsigned long *map, unsigned int start, int len)
186 unsigned long *p = map + WORD_OFFSET(start);
187 const unsigned int size = start + len;
188 int bits_to_clear = BITS_PER_WORD - (start % BITS_PER_WORD);
189 unsigned long mask_to_clear = CCP_BITMAP_FIRST_WORD_MASK(start);
191 while (len - bits_to_clear >= 0) {
192 *p &= ~mask_to_clear;
193 len -= bits_to_clear;
194 bits_to_clear = BITS_PER_WORD;
195 mask_to_clear = ~0UL;
199 mask_to_clear &= CCP_BITMAP_LAST_WORD_MASK(size);
200 *p &= ~mask_to_clear;
206 _ccp_find_next_bit(const unsigned long *addr,
209 unsigned long invert)
213 if (!nbits || start >= nbits)
216 tmp = addr[start / BITS_PER_WORD] ^ invert;
218 /* Handle 1st word. */
219 tmp &= CCP_BITMAP_FIRST_WORD_MASK(start);
220 start = ccp_round_down(start, BITS_PER_WORD);
223 start += BITS_PER_WORD;
227 tmp = addr[start / BITS_PER_WORD] ^ invert;
230 return RTE_MIN(start + (ffs(tmp) - 1), nbits);
234 ccp_find_next_bit(const unsigned long *addr,
236 unsigned long offset)
238 return _ccp_find_next_bit(addr, size, offset, 0UL);
242 ccp_find_next_zero_bit(const unsigned long *addr,
244 unsigned long offset)
246 return _ccp_find_next_bit(addr, size, offset, ~0UL);
250 * bitmap_find_next_zero_area - find a contiguous aligned zero area
251 * @map: The address to base the search on
252 * @size: The bitmap size in bits
253 * @start: The bitnumber to start searching at
254 * @nr: The number of zeroed bits we're looking for
257 ccp_bitmap_find_next_zero_area(unsigned long *map,
262 unsigned long index, end, i;
265 index = ccp_find_next_zero_bit(map, size, start);
270 i = ccp_find_next_bit(map, end, index);
279 ccp_lsb_alloc(struct ccp_queue *cmd_q, unsigned int count)
281 struct ccp_device *ccp;
284 /* First look at the map for the queue */
285 if (cmd_q->lsb >= 0) {
286 start = (uint32_t)ccp_bitmap_find_next_zero_area(cmd_q->lsbmap,
289 if (start < LSB_SIZE) {
290 ccp_bitmap_set(cmd_q->lsbmap, start, count);
291 return start + cmd_q->lsb * LSB_SIZE;
295 /* try to get an entry from the shared blocks */
298 rte_spinlock_lock(&ccp->lsb_lock);
300 start = (uint32_t)ccp_bitmap_find_next_zero_area(ccp->lsbmap,
301 MAX_LSB_CNT * LSB_SIZE,
303 if (start <= MAX_LSB_CNT * LSB_SIZE) {
304 ccp_bitmap_set(ccp->lsbmap, start, count);
305 rte_spinlock_unlock(&ccp->lsb_lock);
306 return start * LSB_ITEM_SIZE;
308 CCP_LOG_ERR("NO LSBs available");
310 rte_spinlock_unlock(&ccp->lsb_lock);
315 static void __rte_unused
316 ccp_lsb_free(struct ccp_queue *cmd_q,
320 int lsbno = start / LSB_SIZE;
325 if (cmd_q->lsb == lsbno) {
326 /* An entry from the private LSB */
327 ccp_bitmap_clear(cmd_q->lsbmap, start % LSB_SIZE, count);
329 /* From the shared LSBs */
330 struct ccp_device *ccp = cmd_q->dev;
332 rte_spinlock_lock(&ccp->lsb_lock);
333 ccp_bitmap_clear(ccp->lsbmap, start, count);
334 rte_spinlock_unlock(&ccp->lsb_lock);
339 ccp_find_lsb_regions(struct ccp_queue *cmd_q, uint64_t status)
341 int q_mask = 1 << cmd_q->id;
345 /* Build a bit mask to know which LSBs
346 * this queue has access to.
347 * Don't bother with segment 0
352 status >>= LSB_REGION_WIDTH;
353 for (j = 1; j < MAX_LSB_CNT; j++) {
355 ccp_set_bit(&cmd_q->lsbmask, j);
357 status >>= LSB_REGION_WIDTH;
360 for (j = 0; j < MAX_LSB_CNT; j++)
361 if (ccp_get_bit(&cmd_q->lsbmask, j))
364 printf("Queue %d can access %d LSB regions of mask %lu\n",
365 (int)cmd_q->id, weight, cmd_q->lsbmask);
367 return weight ? 0 : -EINVAL;
371 ccp_find_and_assign_lsb_to_q(struct ccp_device *ccp,
372 int lsb_cnt, int n_lsbs,
373 unsigned long *lsb_pub)
375 unsigned long qlsb = 0;
381 * If the count of potential LSBs available to a queue matches the
382 * ordinal given to us in lsb_cnt:
383 * Copy the mask of possible LSBs for this queue into "qlsb";
384 * For each bit in qlsb, see if the corresponding bit in the
385 * aggregation mask is set; if so, we have a match.
386 * If we have a match, clear the bit in the aggregation to
387 * mark it as no longer available.
388 * If there is no match, clear the bit in qlsb and keep looking.
390 for (i = 0; i < ccp->cmd_q_count; i++) {
391 struct ccp_queue *cmd_q = &ccp->cmd_q[i];
394 for (j = 0; j < MAX_LSB_CNT; j++)
395 if (ccp_get_bit(&cmd_q->lsbmask, j))
398 if (qlsb_wgt == lsb_cnt) {
399 qlsb = cmd_q->lsbmask;
401 bitno = ffs(qlsb) - 1;
402 while (bitno < MAX_LSB_CNT) {
403 if (ccp_get_bit(lsb_pub, bitno)) {
404 /* We found an available LSB
405 * that this queue can access
408 ccp_clear_bit(lsb_pub, bitno);
411 ccp_clear_bit(&qlsb, bitno);
412 bitno = ffs(qlsb) - 1;
414 if (bitno >= MAX_LSB_CNT)
422 /* For each queue, from the most- to least-constrained:
423 * find an LSB that can be assigned to the queue. If there are N queues that
424 * can only use M LSBs, where N > M, fail; otherwise, every queue will get a
425 * dedicated LSB. Remaining LSB regions become a shared resource.
426 * If we have fewer LSBs than queues, all LSB regions become shared
430 ccp_assign_lsbs(struct ccp_device *ccp)
432 unsigned long lsb_pub = 0, qlsb = 0;
438 rte_spinlock_init(&ccp->lsb_lock);
440 /* Create an aggregate bitmap to get a total count of available LSBs */
441 for (i = 0; i < ccp->cmd_q_count; i++)
442 lsb_pub |= ccp->cmd_q[i].lsbmask;
444 for (i = 0; i < MAX_LSB_CNT; i++)
445 if (ccp_get_bit(&lsb_pub, i))
448 if (n_lsbs >= ccp->cmd_q_count) {
449 /* We have enough LSBS to give every queue a private LSB.
450 * Brute force search to start with the queues that are more
451 * constrained in LSB choice. When an LSB is privately
452 * assigned, it is removed from the public mask.
453 * This is an ugly N squared algorithm with some optimization.
455 for (lsb_cnt = 1; n_lsbs && (lsb_cnt <= MAX_LSB_CNT);
457 rc = ccp_find_and_assign_lsb_to_q(ccp, lsb_cnt, n_lsbs,
466 /* What's left of the LSBs, according to the public mask, now become
467 * shared. Any zero bits in the lsb_pub mask represent an LSB region
468 * that can't be used as a shared resource, so mark the LSB slots for
472 bitno = ccp_find_first_zero_bit(&qlsb, MAX_LSB_CNT);
473 while (bitno < MAX_LSB_CNT) {
474 ccp_bitmap_set(ccp->lsbmap, bitno * LSB_SIZE, LSB_SIZE);
475 ccp_set_bit(&qlsb, bitno);
476 bitno = ccp_find_first_zero_bit(&qlsb, MAX_LSB_CNT);
483 ccp_add_device(struct ccp_device *dev, int type)
486 uint32_t qmr, status_lo, status_hi, dma_addr_lo, dma_addr_hi;
488 struct ccp_queue *cmd_q;
489 const struct rte_memzone *q_mz;
495 dev->id = ccp_dev_id++;
497 vaddr = (void *)(dev->pci.mem_resource[2].addr);
499 if (type == CCP_VERSION_5B) {
500 CCP_WRITE_REG(vaddr, CMD_TRNG_CTL_OFFSET, 0x00012D57);
501 CCP_WRITE_REG(vaddr, CMD_CONFIG_0_OFFSET, 0x00000003);
502 for (i = 0; i < 12; i++) {
503 CCP_WRITE_REG(vaddr, CMD_AES_MASK_OFFSET,
504 CCP_READ_REG(vaddr, TRNG_OUT_REG));
506 CCP_WRITE_REG(vaddr, CMD_QUEUE_MASK_OFFSET, 0x0000001F);
507 CCP_WRITE_REG(vaddr, CMD_QUEUE_PRIO_OFFSET, 0x00005B6D);
508 CCP_WRITE_REG(vaddr, CMD_CMD_TIMEOUT_OFFSET, 0x00000000);
510 CCP_WRITE_REG(vaddr, LSB_PRIVATE_MASK_LO_OFFSET, 0x3FFFFFFF);
511 CCP_WRITE_REG(vaddr, LSB_PRIVATE_MASK_HI_OFFSET, 0x000003FF);
513 CCP_WRITE_REG(vaddr, CMD_CLK_GATE_CTL_OFFSET, 0x00108823);
515 CCP_WRITE_REG(vaddr, CMD_REQID_CONFIG_OFFSET, 0x00001249);
517 /* Copy the private LSB mask to the public registers */
518 status_lo = CCP_READ_REG(vaddr, LSB_PRIVATE_MASK_LO_OFFSET);
519 status_hi = CCP_READ_REG(vaddr, LSB_PRIVATE_MASK_HI_OFFSET);
520 CCP_WRITE_REG(vaddr, LSB_PUBLIC_MASK_LO_OFFSET, status_lo);
521 CCP_WRITE_REG(vaddr, LSB_PUBLIC_MASK_HI_OFFSET, status_hi);
522 status = ((uint64_t)status_hi<<30) | ((uint64_t)status_lo);
524 dev->cmd_q_count = 0;
525 /* Find available queues */
526 qmr = CCP_READ_REG(vaddr, Q_MASK_REG);
527 for (i = 0; i < MAX_HW_QUEUES; i++) {
528 if (!(qmr & (1 << i)))
530 cmd_q = &dev->cmd_q[dev->cmd_q_count++];
534 cmd_q->qsize = Q_SIZE(Q_DESC_SIZE);
536 cmd_q->reg_base = (uint8_t *)vaddr +
537 CMD_Q_STATUS_INCR * (i + 1);
539 /* CCP queue memory */
540 snprintf(cmd_q->memz_name, sizeof(cmd_q->memz_name),
543 (int)dev->id, "queue",
544 (int)cmd_q->id, "mem");
545 q_mz = ccp_queue_dma_zone_reserve(cmd_q->memz_name,
546 cmd_q->qsize, SOCKET_ID_ANY);
547 cmd_q->qbase_addr = (void *)q_mz->addr;
548 cmd_q->qbase_desc = (void *)q_mz->addr;
549 cmd_q->qbase_phys_addr = q_mz->phys_addr;
552 /* init control reg to zero */
553 CCP_WRITE_REG(cmd_q->reg_base, CMD_Q_CONTROL_BASE,
556 /* Disable the interrupts */
557 CCP_WRITE_REG(cmd_q->reg_base, CMD_Q_INT_ENABLE_BASE, 0x00);
558 CCP_READ_REG(cmd_q->reg_base, CMD_Q_INT_STATUS_BASE);
559 CCP_READ_REG(cmd_q->reg_base, CMD_Q_STATUS_BASE);
561 /* Clear the interrupts */
562 CCP_WRITE_REG(cmd_q->reg_base, CMD_Q_INTERRUPT_STATUS_BASE,
565 /* Configure size of each virtual queue accessible to host */
566 cmd_q->qcontrol &= ~(CMD_Q_SIZE << CMD_Q_SHIFT);
567 cmd_q->qcontrol |= QUEUE_SIZE_VAL << CMD_Q_SHIFT;
569 dma_addr_lo = low32_value(cmd_q->qbase_phys_addr);
570 CCP_WRITE_REG(cmd_q->reg_base, CMD_Q_TAIL_LO_BASE,
571 (uint32_t)dma_addr_lo);
572 CCP_WRITE_REG(cmd_q->reg_base, CMD_Q_HEAD_LO_BASE,
573 (uint32_t)dma_addr_lo);
575 dma_addr_hi = high32_value(cmd_q->qbase_phys_addr);
576 cmd_q->qcontrol |= (dma_addr_hi << 16);
577 CCP_WRITE_REG(cmd_q->reg_base, CMD_Q_CONTROL_BASE,
580 /* create LSB Mask map */
581 if (ccp_find_lsb_regions(cmd_q, status))
582 CCP_LOG_ERR("queue doesn't have lsb regions");
585 rte_atomic64_init(&cmd_q->free_slots);
586 rte_atomic64_set(&cmd_q->free_slots, (COMMANDS_PER_QUEUE - 1));
587 /* unused slot barrier b/w H&T */
590 if (ccp_assign_lsbs(dev))
591 CCP_LOG_ERR("Unable to assign lsb region");
593 /* pre-allocate LSB slots */
594 for (i = 0; i < dev->cmd_q_count; i++) {
595 dev->cmd_q[i].sb_key =
596 ccp_lsb_alloc(&dev->cmd_q[i], 1);
597 dev->cmd_q[i].sb_iv =
598 ccp_lsb_alloc(&dev->cmd_q[i], 1);
599 dev->cmd_q[i].sb_sha =
600 ccp_lsb_alloc(&dev->cmd_q[i], 2);
601 dev->cmd_q[i].sb_hmac =
602 ccp_lsb_alloc(&dev->cmd_q[i], 2);
605 TAILQ_INSERT_TAIL(&ccp_list, dev, next);
610 ccp_remove_device(struct ccp_device *dev)
615 TAILQ_REMOVE(&ccp_list, dev, next);
619 is_ccp_device(const char *dirname,
620 const struct rte_pci_id *ccp_id,
623 char filename[PATH_MAX];
624 const struct rte_pci_id *id;
625 uint16_t vendor, device_id;
630 snprintf(filename, sizeof(filename), "%s/vendor", dirname);
631 if (ccp_pci_parse_sysfs_value(filename, &tmp) < 0)
633 vendor = (uint16_t)tmp;
636 snprintf(filename, sizeof(filename), "%s/device", dirname);
637 if (ccp_pci_parse_sysfs_value(filename, &tmp) < 0)
639 device_id = (uint16_t)tmp;
641 for (id = ccp_id, i = 0; id->vendor_id != 0; id++, i++) {
642 if (vendor == id->vendor_id &&
643 device_id == id->device_id) {
645 return 1; /* Matched device */
652 ccp_probe_device(const char *dirname, uint16_t domain,
653 uint8_t bus, uint8_t devid,
654 uint8_t function, int ccp_type)
656 struct ccp_device *ccp_dev = NULL;
657 struct rte_pci_device *pci;
658 char filename[PATH_MAX];
660 int uio_fd = -1, i, uio_num;
661 char uio_devname[PATH_MAX];
664 ccp_dev = rte_zmalloc("ccp_device", sizeof(*ccp_dev),
665 RTE_CACHE_LINE_SIZE);
668 pci = &(ccp_dev->pci);
670 pci->addr.domain = domain;
672 pci->addr.devid = devid;
673 pci->addr.function = function;
676 snprintf(filename, sizeof(filename), "%s/vendor", dirname);
677 if (ccp_pci_parse_sysfs_value(filename, &tmp) < 0)
679 pci->id.vendor_id = (uint16_t)tmp;
682 snprintf(filename, sizeof(filename), "%s/device", dirname);
683 if (ccp_pci_parse_sysfs_value(filename, &tmp) < 0)
685 pci->id.device_id = (uint16_t)tmp;
687 /* get subsystem_vendor id */
688 snprintf(filename, sizeof(filename), "%s/subsystem_vendor",
690 if (ccp_pci_parse_sysfs_value(filename, &tmp) < 0)
692 pci->id.subsystem_vendor_id = (uint16_t)tmp;
694 /* get subsystem_device id */
695 snprintf(filename, sizeof(filename), "%s/subsystem_device",
697 if (ccp_pci_parse_sysfs_value(filename, &tmp) < 0)
699 pci->id.subsystem_device_id = (uint16_t)tmp;
702 snprintf(filename, sizeof(filename), "%s/class",
704 if (ccp_pci_parse_sysfs_value(filename, &tmp) < 0)
706 /* the least 24 bits are valid: class, subclass, program interface */
707 pci->id.class_id = (uint32_t)tmp & RTE_CLASS_ANY_ID;
709 /* parse resources */
710 snprintf(filename, sizeof(filename), "%s/resource", dirname);
711 if (ccp_pci_parse_sysfs_resource(filename, pci) < 0)
714 uio_num = ccp_find_uio_devname(dirname);
717 * It may take time for uio device to appear,
718 * wait here and try again
721 uio_num = ccp_find_uio_devname(dirname);
725 snprintf(uio_devname, sizeof(uio_devname), "/dev/uio%u", uio_num);
727 uio_fd = open(uio_devname, O_RDWR | O_NONBLOCK);
730 if (flock(uio_fd, LOCK_EX | LOCK_NB))
733 /* Map the PCI memory resource of device */
734 for (i = 0; i < PCI_MAX_RESOURCE; i++) {
736 char devname[PATH_MAX];
739 if (pci->mem_resource[i].phys_addr == 0)
741 snprintf(devname, sizeof(devname), "%s/resource%d", dirname, i);
742 res_fd = open(devname, O_RDWR);
745 map_addr = mmap(NULL, pci->mem_resource[i].len,
746 PROT_READ | PROT_WRITE,
747 MAP_SHARED, res_fd, 0);
748 if (map_addr == MAP_FAILED)
751 pci->mem_resource[i].addr = map_addr;
754 /* device is valid, add in list */
755 if (ccp_add_device(ccp_dev, ccp_type)) {
756 ccp_remove_device(ccp_dev);
762 CCP_LOG_ERR("CCP Device probe failed");
771 ccp_probe_devices(const struct rte_pci_id *ccp_id)
780 uint8_t bus, devid, function;
781 char dirname[PATH_MAX];
783 module_idx = ccp_check_pci_uio_module();
787 TAILQ_INIT(&ccp_list);
788 dir = opendir(SYSFS_PCI_DEVICES);
791 while ((d = readdir(dir)) != NULL) {
792 if (d->d_name[0] == '.')
794 if (ccp_parse_pci_addr_format(d->d_name, sizeof(d->d_name),
795 &domain, &bus, &devid, &function) != 0)
797 snprintf(dirname, sizeof(dirname), "%s/%s",
798 SYSFS_PCI_DEVICES, d->d_name);
799 if (is_ccp_device(dirname, ccp_id, &ccp_type)) {
800 printf("CCP : Detected CCP device with ID = 0x%x\n",
801 ccp_id[ccp_type].device_id);
802 ret = ccp_probe_device(dirname, domain, bus, devid,