1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(C) 2021 Marvell.
8 #define ROC_AURA_ID_MASK (BIT_ULL(16) - 1)
9 #define ROC_AURA_OP_LIMIT_MASK (BIT_ULL(36) - 1)
11 /* 16 CASP instructions can be outstanding in CN9k, but we use only 15
12 * outstanding CASPs as we run out of registers.
14 #define ROC_CN9K_NPA_BULK_ALLOC_MAX_PTRS 30
17 * Generate 64bit handle to have optimized alloc and free aura operation.
18 * 0 - ROC_AURA_ID_MASK for storing the aura_id.
19 * [ROC_AURA_ID_MASK+1, (2^64 - 1)] for storing the lf base address.
20 * This scheme is valid when OS can give ROC_AURA_ID_MASK
21 * aligned address for lf base address.
23 static inline uint64_t
24 roc_npa_aura_handle_gen(uint32_t aura_id, uintptr_t addr)
28 val = aura_id & ROC_AURA_ID_MASK;
29 return (uint64_t)addr | val;
32 static inline uint64_t
33 roc_npa_aura_handle_to_aura(uint64_t aura_handle)
35 return aura_handle & ROC_AURA_ID_MASK;
38 static inline uintptr_t
39 roc_npa_aura_handle_to_base(uint64_t aura_handle)
41 return (uintptr_t)(aura_handle & ~ROC_AURA_ID_MASK);
44 static inline uint64_t
45 roc_npa_aura_op_alloc(uint64_t aura_handle, const int drop)
47 uint64_t wdata = roc_npa_aura_handle_to_aura(aura_handle);
51 wdata |= BIT_ULL(63); /* DROP */
53 addr = (int64_t *)(roc_npa_aura_handle_to_base(aura_handle) +
54 NPA_LF_AURA_OP_ALLOCX(0));
55 return roc_atomic64_add_nosync(wdata, addr);
59 roc_npa_aura_op_free(uint64_t aura_handle, const int fabs, uint64_t iova)
61 uint64_t reg = roc_npa_aura_handle_to_aura(aura_handle);
63 roc_npa_aura_handle_to_base(aura_handle) + NPA_LF_AURA_OP_FREE0;
65 reg |= BIT_ULL(63); /* FABS */
67 roc_store_pair(iova, reg, addr);
70 static inline uint64_t
71 roc_npa_aura_op_cnt_get(uint64_t aura_handle)
77 wdata = roc_npa_aura_handle_to_aura(aura_handle) << 44;
78 addr = (int64_t *)(roc_npa_aura_handle_to_base(aura_handle) +
80 reg = roc_atomic64_add_nosync(wdata, addr);
82 if (reg & BIT_ULL(42) /* OP_ERR */)
85 return reg & 0xFFFFFFFFF;
89 roc_npa_aura_op_cnt_set(uint64_t aura_handle, const int sign, uint64_t count)
91 uint64_t reg = count & (BIT_ULL(36) - 1);
94 reg |= BIT_ULL(43); /* CNT_ADD */
96 reg |= (roc_npa_aura_handle_to_aura(aura_handle) << 44);
98 plt_write64(reg, roc_npa_aura_handle_to_base(aura_handle) +
102 static inline uint64_t
103 roc_npa_aura_op_limit_get(uint64_t aura_handle)
109 wdata = roc_npa_aura_handle_to_aura(aura_handle) << 44;
110 addr = (int64_t *)(roc_npa_aura_handle_to_base(aura_handle) +
111 NPA_LF_AURA_OP_LIMIT);
112 reg = roc_atomic64_add_nosync(wdata, addr);
114 if (reg & BIT_ULL(42) /* OP_ERR */)
117 return reg & ROC_AURA_OP_LIMIT_MASK;
121 roc_npa_aura_op_limit_set(uint64_t aura_handle, uint64_t limit)
123 uint64_t reg = limit & ROC_AURA_OP_LIMIT_MASK;
125 reg |= (roc_npa_aura_handle_to_aura(aura_handle) << 44);
127 plt_write64(reg, roc_npa_aura_handle_to_base(aura_handle) +
128 NPA_LF_AURA_OP_LIMIT);
131 static inline uint64_t
132 roc_npa_aura_op_available(uint64_t aura_handle)
138 wdata = roc_npa_aura_handle_to_aura(aura_handle) << 44;
139 addr = (int64_t *)(roc_npa_aura_handle_to_base(aura_handle) +
140 NPA_LF_POOL_OP_AVAILABLE);
141 reg = roc_atomic64_add_nosync(wdata, addr);
143 if (reg & BIT_ULL(42) /* OP_ERR */)
146 return reg & 0xFFFFFFFFF;
149 static inline uint64_t
150 roc_npa_pool_op_performance_counter(uint64_t aura_handle, const int drop)
154 struct npa_aura_op_wdata_s s;
160 op_wdata.s.aura = roc_npa_aura_handle_to_aura(aura_handle);
162 op_wdata.s.drop |= BIT_ULL(63); /* DROP */
164 addr = (int64_t *)(roc_npa_aura_handle_to_base(aura_handle) +
167 reg = roc_atomic64_add_nosync(op_wdata.u, addr);
169 * NPA_LF_POOL_OP_PC Read Data
172 * -----------------------------
173 * | Reserved | OP_ERR | OP_PC |
174 * -----------------------------
177 if (reg & BIT_ULL(48) /* OP_ERR */)
180 return reg & 0xFFFFFFFFFFFF;
184 roc_npa_aura_op_bulk_free(uint64_t aura_handle, uint64_t const *buf,
185 unsigned int num, const int fabs)
189 for (i = 0; i < num; i++) {
190 const uint64_t inbuf = buf[i];
192 roc_npa_aura_op_free(aura_handle, fabs, inbuf);
196 static inline unsigned int
197 roc_npa_aura_bulk_alloc(uint64_t aura_handle, uint64_t *buf, unsigned int num,
200 #if defined(__aarch64__)
201 uint64_t wdata = roc_npa_aura_handle_to_aura(aura_handle);
202 unsigned int i, count;
206 wdata |= BIT_ULL(63); /* DROP */
208 addr = roc_npa_aura_handle_to_base(aura_handle) +
209 NPA_LF_AURA_OP_ALLOCX(0);
215 "mov v18.d[0], %[dst]\n"
216 "mov v18.d[1], %[loc]\n"
217 "mov v19.d[0], %[wdata]\n"
218 "mov v19.d[1], x30\n"
219 "mov v20.d[0], x24\n"
220 "mov v20.d[1], x25\n"
221 "mov v21.d[0], x26\n"
222 "mov v21.d[1], x27\n"
223 "mov v22.d[0], x28\n"
224 "mov v22.d[1], x29\n"
225 "mov x28, v19.d[0]\n"
226 "mov x29, v19.d[0]\n"
227 "mov x30, v18.d[1]\n"
228 "casp x0, x1, x28, x29, [x30]\n"
229 "casp x2, x3, x28, x29, [x30]\n"
230 "casp x4, x5, x28, x29, [x30]\n"
231 "casp x6, x7, x28, x29, [x30]\n"
232 "casp x8, x9, x28, x29, [x30]\n"
233 "casp x10, x11, x28, x29, [x30]\n"
234 "casp x12, x13, x28, x29, [x30]\n"
235 "casp x14, x15, x28, x29, [x30]\n"
236 "casp x16, x17, x28, x29, [x30]\n"
237 "casp x18, x19, x28, x29, [x30]\n"
238 "casp x20, x21, x28, x29, [x30]\n"
239 "casp x22, x23, x28, x29, [x30]\n"
240 "casp x24, x25, x28, x29, [x30]\n"
241 "casp x26, x27, x28, x29, [x30]\n"
242 "casp x28, x29, x28, x29, [x30]\n"
243 "mov x30, v18.d[0]\n"
244 "stp x0, x1, [x30]\n"
245 "stp x2, x3, [x30, #16]\n"
246 "stp x4, x5, [x30, #32]\n"
247 "stp x6, x7, [x30, #48]\n"
248 "stp x8, x9, [x30, #64]\n"
249 "stp x10, x11, [x30, #80]\n"
250 "stp x12, x13, [x30, #96]\n"
251 "stp x14, x15, [x30, #112]\n"
252 "stp x16, x17, [x30, #128]\n"
253 "stp x18, x19, [x30, #144]\n"
254 "stp x20, x21, [x30, #160]\n"
255 "stp x22, x23, [x30, #176]\n"
256 "stp x24, x25, [x30, #192]\n"
257 "stp x26, x27, [x30, #208]\n"
258 "stp x28, x29, [x30, #224]\n"
259 "mov %[dst], v18.d[0]\n"
260 "mov %[loc], v18.d[1]\n"
261 "mov %[wdata], v19.d[0]\n"
262 "mov x30, v19.d[1]\n"
263 "mov x24, v20.d[0]\n"
264 "mov x25, v20.d[1]\n"
265 "mov x26, v21.d[0]\n"
266 "mov x27, v21.d[1]\n"
267 "mov x28, v22.d[0]\n"
268 "mov x29, v22.d[1]\n"
270 : [wdata] "r"(wdata), [loc] "r"(addr), [dst] "r"(buf)
271 : "memory", "x0", "x1", "x2", "x3", "x4", "x5", "x6",
272 "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14",
273 "x15", "x16", "x17", "x18", "x19", "x20", "x21",
274 "x22", "x23", "v18", "v19", "v20", "v21", "v22");
279 "mov x16, %[wdata]\n"
280 "mov x17, %[wdata]\n"
281 "casp x0, x1, x16, x17, [%[loc]]\n"
282 "casp x2, x3, x16, x17, [%[loc]]\n"
283 "casp x4, x5, x16, x17, [%[loc]]\n"
284 "casp x6, x7, x16, x17, [%[loc]]\n"
285 "casp x8, x9, x16, x17, [%[loc]]\n"
286 "casp x10, x11, x16, x17, [%[loc]]\n"
287 "casp x12, x13, x16, x17, [%[loc]]\n"
288 "casp x14, x15, x16, x17, [%[loc]]\n"
289 "stp x0, x1, [%[dst]]\n"
290 "stp x2, x3, [%[dst], #16]\n"
291 "stp x4, x5, [%[dst], #32]\n"
292 "stp x6, x7, [%[dst], #48]\n"
293 "stp x8, x9, [%[dst], #64]\n"
294 "stp x10, x11, [%[dst], #80]\n"
295 "stp x12, x13, [%[dst], #96]\n"
296 "stp x14, x15, [%[dst], #112]\n"
298 : [wdata] "r" (wdata), [dst] "r" (buf), [loc] "r" (addr)
299 : "memory", "x0", "x1", "x2", "x3", "x4", "x5", "x6",
300 "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14",
307 "mov x16, %[wdata]\n"
308 "mov x17, %[wdata]\n"
309 "casp x0, x1, x16, x17, [%[loc]]\n"
310 "casp x2, x3, x16, x17, [%[loc]]\n"
311 "casp x4, x5, x16, x17, [%[loc]]\n"
312 "casp x6, x7, x16, x17, [%[loc]]\n"
313 "stp x0, x1, [%[dst]]\n"
314 "stp x2, x3, [%[dst], #16]\n"
315 "stp x4, x5, [%[dst], #32]\n"
316 "stp x6, x7, [%[dst], #48]\n"
318 : [wdata] "r" (wdata), [dst] "r" (buf), [loc] "r" (addr)
319 : "memory", "x0", "x1", "x2", "x3", "x4", "x5", "x6",
326 "mov x16, %[wdata]\n"
327 "mov x17, %[wdata]\n"
328 "casp x0, x1, x16, x17, [%[loc]]\n"
329 "casp x2, x3, x16, x17, [%[loc]]\n"
330 "stp x0, x1, [%[dst]]\n"
331 "stp x2, x3, [%[dst], #16]\n"
333 : [wdata] "r" (wdata), [dst] "r" (buf), [loc] "r" (addr)
334 : "memory", "x0", "x1", "x2", "x3", "x16", "x17"
340 "mov x16, %[wdata]\n"
341 "mov x17, %[wdata]\n"
342 "casp x0, x1, x16, x17, [%[loc]]\n"
343 "stp x0, x1, [%[dst]]\n"
345 : [wdata] "r" (wdata), [dst] "r" (buf), [loc] "r" (addr)
346 : "memory", "x0", "x1", "x16", "x17"
350 buf[0] = roc_npa_aura_op_alloc(aura_handle, drop);
354 /* Pack the pointers */
355 for (i = 0, count = 0; i < num; i++)
357 buf[count++] = buf[i];
361 unsigned int i, count;
363 for (i = 0, count = 0; i < num; i++) {
364 buf[count] = roc_npa_aura_op_alloc(aura_handle, drop);
373 static inline unsigned int
374 roc_npa_aura_op_bulk_alloc(uint64_t aura_handle, uint64_t *buf,
375 unsigned int num, const int drop, const int partial)
377 unsigned int chunk, count, num_alloc;
381 chunk = (num >= ROC_CN9K_NPA_BULK_ALLOC_MAX_PTRS) ?
382 ROC_CN9K_NPA_BULK_ALLOC_MAX_PTRS :
383 plt_align32prevpow2(num);
386 roc_npa_aura_bulk_alloc(aura_handle, buf, chunk, drop);
392 if (unlikely(num_alloc != chunk))
396 /* If the requested number of pointers was not allocated and if partial
397 * alloc is not desired, then free allocated pointers.
399 if (unlikely(num != 0 && !partial)) {
400 roc_npa_aura_op_bulk_free(aura_handle, buf - count, count, 1);
408 struct plt_pci_device *pci_dev;
410 #define ROC_NPA_MEM_SZ (1 * 1024)
411 uint8_t reserved[ROC_NPA_MEM_SZ] __plt_cache_aligned;
412 } __plt_cache_aligned;
414 int __roc_api roc_npa_dev_init(struct roc_npa *roc_npa);
415 int __roc_api roc_npa_dev_fini(struct roc_npa *roc_npa);
418 int __roc_api roc_npa_pool_create(uint64_t *aura_handle, uint32_t block_size,
419 uint32_t block_count, struct npa_aura_s *aura,
420 struct npa_pool_s *pool);
421 int __roc_api roc_npa_aura_limit_modify(uint64_t aura_handle,
422 uint16_t aura_limit);
423 int __roc_api roc_npa_pool_destroy(uint64_t aura_handle);
424 int __roc_api roc_npa_pool_range_update_check(uint64_t aura_handle);
425 void __roc_api roc_npa_aura_op_range_set(uint64_t aura_handle,
430 int __roc_api roc_npa_ctx_dump(void);
431 int __roc_api roc_npa_dump(void);
433 /* Reset operation performance counter. */
434 int __roc_api roc_npa_pool_op_pc_reset(uint64_t aura_handle);
436 #endif /* _ROC_NPA_H_ */