1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(C) 2021 Marvell.
8 #define ROC_AURA_ID_MASK (BIT_ULL(16) - 1)
9 #define ROC_AURA_OP_LIMIT_MASK (BIT_ULL(36) - 1)
11 #define ROC_CN10K_NPA_BATCH_ALLOC_MAX_PTRS 512
12 #define ROC_CN10K_NPA_BATCH_FREE_MAX_PTRS 15
14 /* 16 CASP instructions can be outstanding in CN9k, but we use only 15
15 * outstanding CASPs as we run out of registers.
17 #define ROC_CN9K_NPA_BULK_ALLOC_MAX_PTRS 30
20 * Generate 64bit handle to have optimized alloc and free aura operation.
21 * 0 - ROC_AURA_ID_MASK for storing the aura_id.
22 * [ROC_AURA_ID_MASK+1, (2^64 - 1)] for storing the lf base address.
23 * This scheme is valid when OS can give ROC_AURA_ID_MASK
24 * aligned address for lf base address.
26 static inline uint64_t
27 roc_npa_aura_handle_gen(uint32_t aura_id, uintptr_t addr)
31 val = aura_id & ROC_AURA_ID_MASK;
32 return (uint64_t)addr | val;
35 static inline uint64_t
36 roc_npa_aura_handle_to_aura(uint64_t aura_handle)
38 return aura_handle & ROC_AURA_ID_MASK;
41 static inline uintptr_t
42 roc_npa_aura_handle_to_base(uint64_t aura_handle)
44 return (uintptr_t)(aura_handle & ~ROC_AURA_ID_MASK);
47 static inline uint64_t
48 roc_npa_aura_op_alloc(uint64_t aura_handle, const int drop)
50 uint64_t wdata = roc_npa_aura_handle_to_aura(aura_handle);
54 wdata |= BIT_ULL(63); /* DROP */
56 addr = (int64_t *)(roc_npa_aura_handle_to_base(aura_handle) +
57 NPA_LF_AURA_OP_ALLOCX(0));
58 return roc_atomic64_add_nosync(wdata, addr);
62 roc_npa_aura_op_free(uint64_t aura_handle, const int fabs, uint64_t iova)
64 uint64_t reg = roc_npa_aura_handle_to_aura(aura_handle);
66 roc_npa_aura_handle_to_base(aura_handle) + NPA_LF_AURA_OP_FREE0;
68 reg |= BIT_ULL(63); /* FABS */
70 roc_store_pair(iova, reg, addr);
73 static inline uint64_t
74 roc_npa_aura_op_cnt_get(uint64_t aura_handle)
80 wdata = roc_npa_aura_handle_to_aura(aura_handle) << 44;
81 addr = (int64_t *)(roc_npa_aura_handle_to_base(aura_handle) +
83 reg = roc_atomic64_add_nosync(wdata, addr);
85 if (reg & BIT_ULL(42) /* OP_ERR */)
88 return reg & 0xFFFFFFFFF;
92 roc_npa_aura_op_cnt_set(uint64_t aura_handle, const int sign, uint64_t count)
94 uint64_t reg = count & (BIT_ULL(36) - 1);
97 reg |= BIT_ULL(43); /* CNT_ADD */
99 reg |= (roc_npa_aura_handle_to_aura(aura_handle) << 44);
101 plt_write64(reg, roc_npa_aura_handle_to_base(aura_handle) +
105 static inline uint64_t
106 roc_npa_aura_op_limit_get(uint64_t aura_handle)
112 wdata = roc_npa_aura_handle_to_aura(aura_handle) << 44;
113 addr = (int64_t *)(roc_npa_aura_handle_to_base(aura_handle) +
114 NPA_LF_AURA_OP_LIMIT);
115 reg = roc_atomic64_add_nosync(wdata, addr);
117 if (reg & BIT_ULL(42) /* OP_ERR */)
120 return reg & ROC_AURA_OP_LIMIT_MASK;
124 roc_npa_aura_op_limit_set(uint64_t aura_handle, uint64_t limit)
126 uint64_t reg = limit & ROC_AURA_OP_LIMIT_MASK;
128 reg |= (roc_npa_aura_handle_to_aura(aura_handle) << 44);
130 plt_write64(reg, roc_npa_aura_handle_to_base(aura_handle) +
131 NPA_LF_AURA_OP_LIMIT);
134 static inline uint64_t
135 roc_npa_aura_op_available(uint64_t aura_handle)
141 wdata = roc_npa_aura_handle_to_aura(aura_handle) << 44;
142 addr = (int64_t *)(roc_npa_aura_handle_to_base(aura_handle) +
143 NPA_LF_POOL_OP_AVAILABLE);
144 reg = roc_atomic64_add_nosync(wdata, addr);
146 if (reg & BIT_ULL(42) /* OP_ERR */)
149 return reg & 0xFFFFFFFFF;
152 static inline uint64_t
153 roc_npa_pool_op_performance_counter(uint64_t aura_handle, const int drop)
157 struct npa_aura_op_wdata_s s;
163 op_wdata.s.aura = roc_npa_aura_handle_to_aura(aura_handle);
165 op_wdata.s.drop |= BIT_ULL(63); /* DROP */
167 addr = (int64_t *)(roc_npa_aura_handle_to_base(aura_handle) +
170 reg = roc_atomic64_add_nosync(op_wdata.u, addr);
172 * NPA_LF_POOL_OP_PC Read Data
175 * -----------------------------
176 * | Reserved | OP_ERR | OP_PC |
177 * -----------------------------
180 if (reg & BIT_ULL(48) /* OP_ERR */)
183 return reg & 0xFFFFFFFFFFFF;
187 roc_npa_aura_batch_alloc_issue(uint64_t aura_handle, uint64_t *buf,
188 unsigned int num, const int dis_wait,
196 struct npa_batch_alloc_compare_s compare_s;
199 if (num > ROC_CN10K_NPA_BATCH_ALLOC_MAX_PTRS)
202 /* Zero first word of every cache line */
203 for (i = 0; i < num; i += (ROC_ALIGN / sizeof(uint64_t)))
206 addr = (int64_t *)(roc_npa_aura_handle_to_base(aura_handle) +
207 NPA_LF_AURA_BATCH_ALLOC);
209 cmp.compare_s.aura = roc_npa_aura_handle_to_aura(aura_handle);
210 cmp.compare_s.drop = drop;
211 cmp.compare_s.stype = ALLOC_STYPE_STSTP;
212 cmp.compare_s.dis_wait = dis_wait;
213 cmp.compare_s.count = num;
215 res = roc_atomic64_cas(cmp.u, (uint64_t)buf, addr);
216 if (res != ALLOC_RESULT_ACCEPTED && res != ALLOC_RESULT_NOCORE)
222 static inline unsigned int
223 roc_npa_aura_batch_alloc_count(uint64_t *aligned_buf, unsigned int num)
225 unsigned int count, i;
227 if (num > ROC_CN10K_NPA_BATCH_ALLOC_MAX_PTRS)
231 /* Check each ROC cache line one by one */
232 for (i = 0; i < num; i += (ROC_ALIGN >> 3)) {
233 struct npa_batch_alloc_status_s *status;
236 status = (struct npa_batch_alloc_status_s *)&aligned_buf[i];
238 /* Status is updated in first 7 bits of each 128 byte cache
239 * line. Wait until the status gets updated.
242 ccode = (volatile int)status->ccode;
243 } while (ccode == ALLOC_CCODE_INVAL);
245 count += status->count;
251 static inline unsigned int
252 roc_npa_aura_batch_alloc_extract(uint64_t *buf, uint64_t *aligned_buf,
255 unsigned int count, i;
257 if (num > ROC_CN10K_NPA_BATCH_ALLOC_MAX_PTRS)
261 /* Check each ROC cache line one by one */
262 for (i = 0; i < num; i += (ROC_ALIGN >> 3)) {
263 struct npa_batch_alloc_status_s *status;
264 int line_count, ccode;
266 status = (struct npa_batch_alloc_status_s *)&aligned_buf[i];
268 /* Status is updated in first 7 bits of each 128 byte cache
269 * line. Wait until the status gets updated.
272 ccode = (volatile int)status->ccode;
273 } while (ccode == ALLOC_CCODE_INVAL);
275 line_count = status->count;
277 /* Clear the status from the cache line */
281 /* 'Compress' the allocated buffers as there can
282 * be 'holes' at the end of the 128 byte cache
285 memmove(&buf[count], &aligned_buf[i],
286 line_count * sizeof(uint64_t));
295 roc_npa_aura_op_bulk_free(uint64_t aura_handle, uint64_t const *buf,
296 unsigned int num, const int fabs)
300 for (i = 0; i < num; i++) {
301 const uint64_t inbuf = buf[i];
303 roc_npa_aura_op_free(aura_handle, fabs, inbuf);
307 static inline unsigned int
308 roc_npa_aura_op_batch_alloc(uint64_t aura_handle, uint64_t *buf,
309 uint64_t *aligned_buf, unsigned int num,
310 const int dis_wait, const int drop,
313 unsigned int count, chunk, num_alloc;
315 /* The buffer should be 128 byte cache line aligned */
316 if (((uint64_t)aligned_buf & (ROC_ALIGN - 1)) != 0)
321 chunk = (num > ROC_CN10K_NPA_BATCH_ALLOC_MAX_PTRS) ?
322 ROC_CN10K_NPA_BATCH_ALLOC_MAX_PTRS :
325 if (roc_npa_aura_batch_alloc_issue(aura_handle, aligned_buf,
326 chunk, dis_wait, drop))
329 num_alloc = roc_npa_aura_batch_alloc_extract(buf, aligned_buf,
336 if (num_alloc != chunk)
340 /* If the requested number of pointers was not allocated and if partial
341 * alloc is not desired, then free allocated pointers.
343 if (unlikely(num != 0 && !partial)) {
344 roc_npa_aura_op_bulk_free(aura_handle, buf - count, count, 1);
352 roc_npa_aura_batch_free(uint64_t aura_handle, uint64_t const *buf,
353 unsigned int num, const int fabs, uint64_t lmt_addr,
356 uint64_t addr, tar_addr, free0;
357 volatile uint64_t *lmt_data;
360 if (num > ROC_CN10K_NPA_BATCH_FREE_MAX_PTRS)
363 lmt_data = (uint64_t *)lmt_addr;
365 addr = roc_npa_aura_handle_to_base(aura_handle) +
366 NPA_LF_AURA_BATCH_FREE0;
369 * NPA_LF_AURA_BATCH_FREE0
371 * 63 63 62 33 32 32 31 20 19 0
372 * -----------------------------------------
373 * | FABS | Rsvd | COUNT_EOT | Rsvd | AURA |
374 * -----------------------------------------
376 free0 = roc_npa_aura_handle_to_aura(aura_handle);
378 free0 |= (0x1UL << 63);
380 free0 |= (0x1UL << 32);
382 /* tar_addr[4:6] is LMTST size-1 in units of 128b */
383 tar_addr = addr | ((num >> 1) << 4);
386 for (i = 0; i < num; i++)
387 lmt_data[i + 1] = buf[i];
389 roc_lmt_submit_steorl(lmt_id, tar_addr);
394 roc_npa_aura_op_batch_free(uint64_t aura_handle, uint64_t const *buf,
395 unsigned int num, const int fabs, uint64_t lmt_addr,
401 chunk = (num >= ROC_CN10K_NPA_BATCH_FREE_MAX_PTRS) ?
402 ROC_CN10K_NPA_BATCH_FREE_MAX_PTRS :
405 roc_npa_aura_batch_free(aura_handle, buf, chunk, fabs, lmt_addr,
413 static inline unsigned int
414 roc_npa_aura_bulk_alloc(uint64_t aura_handle, uint64_t *buf, unsigned int num,
417 #if defined(__aarch64__)
418 uint64_t wdata = roc_npa_aura_handle_to_aura(aura_handle);
419 unsigned int i, count;
423 wdata |= BIT_ULL(63); /* DROP */
425 addr = roc_npa_aura_handle_to_base(aura_handle) +
426 NPA_LF_AURA_OP_ALLOCX(0);
432 "mov v18.d[0], %[dst]\n"
433 "mov v18.d[1], %[loc]\n"
434 "mov v19.d[0], %[wdata]\n"
435 "mov v19.d[1], x30\n"
436 "mov v20.d[0], x24\n"
437 "mov v20.d[1], x25\n"
438 "mov v21.d[0], x26\n"
439 "mov v21.d[1], x27\n"
440 "mov v22.d[0], x28\n"
441 "mov v22.d[1], x29\n"
442 "mov x28, v19.d[0]\n"
443 "mov x29, v19.d[0]\n"
444 "mov x30, v18.d[1]\n"
445 "casp x0, x1, x28, x29, [x30]\n"
446 "casp x2, x3, x28, x29, [x30]\n"
447 "casp x4, x5, x28, x29, [x30]\n"
448 "casp x6, x7, x28, x29, [x30]\n"
449 "casp x8, x9, x28, x29, [x30]\n"
450 "casp x10, x11, x28, x29, [x30]\n"
451 "casp x12, x13, x28, x29, [x30]\n"
452 "casp x14, x15, x28, x29, [x30]\n"
453 "casp x16, x17, x28, x29, [x30]\n"
454 "casp x18, x19, x28, x29, [x30]\n"
455 "casp x20, x21, x28, x29, [x30]\n"
456 "casp x22, x23, x28, x29, [x30]\n"
457 "casp x24, x25, x28, x29, [x30]\n"
458 "casp x26, x27, x28, x29, [x30]\n"
459 "casp x28, x29, x28, x29, [x30]\n"
460 "mov x30, v18.d[0]\n"
461 "stp x0, x1, [x30]\n"
462 "stp x2, x3, [x30, #16]\n"
463 "stp x4, x5, [x30, #32]\n"
464 "stp x6, x7, [x30, #48]\n"
465 "stp x8, x9, [x30, #64]\n"
466 "stp x10, x11, [x30, #80]\n"
467 "stp x12, x13, [x30, #96]\n"
468 "stp x14, x15, [x30, #112]\n"
469 "stp x16, x17, [x30, #128]\n"
470 "stp x18, x19, [x30, #144]\n"
471 "stp x20, x21, [x30, #160]\n"
472 "stp x22, x23, [x30, #176]\n"
473 "stp x24, x25, [x30, #192]\n"
474 "stp x26, x27, [x30, #208]\n"
475 "stp x28, x29, [x30, #224]\n"
476 "mov %[dst], v18.d[0]\n"
477 "mov %[loc], v18.d[1]\n"
478 "mov %[wdata], v19.d[0]\n"
479 "mov x30, v19.d[1]\n"
480 "mov x24, v20.d[0]\n"
481 "mov x25, v20.d[1]\n"
482 "mov x26, v21.d[0]\n"
483 "mov x27, v21.d[1]\n"
484 "mov x28, v22.d[0]\n"
485 "mov x29, v22.d[1]\n"
487 : [wdata] "r"(wdata), [loc] "r"(addr), [dst] "r"(buf)
488 : "memory", "x0", "x1", "x2", "x3", "x4", "x5", "x6",
489 "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14",
490 "x15", "x16", "x17", "x18", "x19", "x20", "x21",
491 "x22", "x23", "v18", "v19", "v20", "v21", "v22");
496 "mov x16, %[wdata]\n"
497 "mov x17, %[wdata]\n"
498 "casp x0, x1, x16, x17, [%[loc]]\n"
499 "casp x2, x3, x16, x17, [%[loc]]\n"
500 "casp x4, x5, x16, x17, [%[loc]]\n"
501 "casp x6, x7, x16, x17, [%[loc]]\n"
502 "casp x8, x9, x16, x17, [%[loc]]\n"
503 "casp x10, x11, x16, x17, [%[loc]]\n"
504 "casp x12, x13, x16, x17, [%[loc]]\n"
505 "casp x14, x15, x16, x17, [%[loc]]\n"
506 "stp x0, x1, [%[dst]]\n"
507 "stp x2, x3, [%[dst], #16]\n"
508 "stp x4, x5, [%[dst], #32]\n"
509 "stp x6, x7, [%[dst], #48]\n"
510 "stp x8, x9, [%[dst], #64]\n"
511 "stp x10, x11, [%[dst], #80]\n"
512 "stp x12, x13, [%[dst], #96]\n"
513 "stp x14, x15, [%[dst], #112]\n"
515 : [wdata] "r" (wdata), [dst] "r" (buf), [loc] "r" (addr)
516 : "memory", "x0", "x1", "x2", "x3", "x4", "x5", "x6",
517 "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14",
524 "mov x16, %[wdata]\n"
525 "mov x17, %[wdata]\n"
526 "casp x0, x1, x16, x17, [%[loc]]\n"
527 "casp x2, x3, x16, x17, [%[loc]]\n"
528 "casp x4, x5, x16, x17, [%[loc]]\n"
529 "casp x6, x7, x16, x17, [%[loc]]\n"
530 "stp x0, x1, [%[dst]]\n"
531 "stp x2, x3, [%[dst], #16]\n"
532 "stp x4, x5, [%[dst], #32]\n"
533 "stp x6, x7, [%[dst], #48]\n"
535 : [wdata] "r" (wdata), [dst] "r" (buf), [loc] "r" (addr)
536 : "memory", "x0", "x1", "x2", "x3", "x4", "x5", "x6",
543 "mov x16, %[wdata]\n"
544 "mov x17, %[wdata]\n"
545 "casp x0, x1, x16, x17, [%[loc]]\n"
546 "casp x2, x3, x16, x17, [%[loc]]\n"
547 "stp x0, x1, [%[dst]]\n"
548 "stp x2, x3, [%[dst], #16]\n"
550 : [wdata] "r" (wdata), [dst] "r" (buf), [loc] "r" (addr)
551 : "memory", "x0", "x1", "x2", "x3", "x16", "x17"
557 "mov x16, %[wdata]\n"
558 "mov x17, %[wdata]\n"
559 "casp x0, x1, x16, x17, [%[loc]]\n"
560 "stp x0, x1, [%[dst]]\n"
562 : [wdata] "r" (wdata), [dst] "r" (buf), [loc] "r" (addr)
563 : "memory", "x0", "x1", "x16", "x17"
567 buf[0] = roc_npa_aura_op_alloc(aura_handle, drop);
571 /* Pack the pointers */
572 for (i = 0, count = 0; i < num; i++)
574 buf[count++] = buf[i];
578 unsigned int i, count;
580 for (i = 0, count = 0; i < num; i++) {
581 buf[count] = roc_npa_aura_op_alloc(aura_handle, drop);
590 static inline unsigned int
591 roc_npa_aura_op_bulk_alloc(uint64_t aura_handle, uint64_t *buf,
592 unsigned int num, const int drop, const int partial)
594 unsigned int chunk, count, num_alloc;
598 chunk = (num >= ROC_CN9K_NPA_BULK_ALLOC_MAX_PTRS) ?
599 ROC_CN9K_NPA_BULK_ALLOC_MAX_PTRS :
600 plt_align32prevpow2(num);
603 roc_npa_aura_bulk_alloc(aura_handle, buf, chunk, drop);
609 if (unlikely(num_alloc != chunk))
613 /* If the requested number of pointers was not allocated and if partial
614 * alloc is not desired, then free allocated pointers.
616 if (unlikely(num != 0 && !partial)) {
617 roc_npa_aura_op_bulk_free(aura_handle, buf - count, count, 1);
625 struct plt_pci_device *pci_dev;
627 #define ROC_NPA_MEM_SZ (1 * 1024)
628 uint8_t reserved[ROC_NPA_MEM_SZ] __plt_cache_aligned;
629 } __plt_cache_aligned;
631 int __roc_api roc_npa_dev_init(struct roc_npa *roc_npa);
632 int __roc_api roc_npa_dev_fini(struct roc_npa *roc_npa);
635 int __roc_api roc_npa_pool_create(uint64_t *aura_handle, uint32_t block_size,
636 uint32_t block_count, struct npa_aura_s *aura,
637 struct npa_pool_s *pool);
638 int __roc_api roc_npa_aura_limit_modify(uint64_t aura_handle,
639 uint16_t aura_limit);
640 int __roc_api roc_npa_pool_destroy(uint64_t aura_handle);
641 int __roc_api roc_npa_pool_range_update_check(uint64_t aura_handle);
642 void __roc_api roc_npa_aura_op_range_set(uint64_t aura_handle,
647 int __roc_api roc_npa_ctx_dump(void);
648 int __roc_api roc_npa_dump(void);
650 /* Reset operation performance counter. */
651 int __roc_api roc_npa_pool_op_pc_reset(uint64_t aura_handle);
653 #endif /* _ROC_NPA_H_ */