7c6f78d529d5bebe0abe860a56fff7e8d64fdbb3
[dpdk.git] / drivers / common / cnxk / roc_npa.h
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(C) 2021 Marvell.
3  */
4
5 #ifndef _ROC_NPA_H_
6 #define _ROC_NPA_H_
7
8 #define ROC_AURA_ID_MASK       (BIT_ULL(16) - 1)
9 #define ROC_AURA_OP_LIMIT_MASK (BIT_ULL(36) - 1)
10
11 /* 16 CASP instructions can be outstanding in CN9k, but we use only 15
12  * outstanding CASPs as we run out of registers.
13  */
14 #define ROC_CN9K_NPA_BULK_ALLOC_MAX_PTRS 30
15
16 /*
17  * Generate 64bit handle to have optimized alloc and free aura operation.
18  * 0 - ROC_AURA_ID_MASK for storing the aura_id.
19  * [ROC_AURA_ID_MASK+1, (2^64 - 1)] for storing the lf base address.
20  * This scheme is valid when OS can give ROC_AURA_ID_MASK
21  * aligned address for lf base address.
22  */
23 static inline uint64_t
24 roc_npa_aura_handle_gen(uint32_t aura_id, uintptr_t addr)
25 {
26         uint64_t val;
27
28         val = aura_id & ROC_AURA_ID_MASK;
29         return (uint64_t)addr | val;
30 }
31
32 static inline uint64_t
33 roc_npa_aura_handle_to_aura(uint64_t aura_handle)
34 {
35         return aura_handle & ROC_AURA_ID_MASK;
36 }
37
38 static inline uintptr_t
39 roc_npa_aura_handle_to_base(uint64_t aura_handle)
40 {
41         return (uintptr_t)(aura_handle & ~ROC_AURA_ID_MASK);
42 }
43
44 static inline uint64_t
45 roc_npa_aura_op_alloc(uint64_t aura_handle, const int drop)
46 {
47         uint64_t wdata = roc_npa_aura_handle_to_aura(aura_handle);
48         int64_t *addr;
49
50         if (drop)
51                 wdata |= BIT_ULL(63); /* DROP */
52
53         addr = (int64_t *)(roc_npa_aura_handle_to_base(aura_handle) +
54                            NPA_LF_AURA_OP_ALLOCX(0));
55         return roc_atomic64_add_nosync(wdata, addr);
56 }
57
58 static inline void
59 roc_npa_aura_op_free(uint64_t aura_handle, const int fabs, uint64_t iova)
60 {
61         uint64_t reg = roc_npa_aura_handle_to_aura(aura_handle);
62         const uint64_t addr =
63                 roc_npa_aura_handle_to_base(aura_handle) + NPA_LF_AURA_OP_FREE0;
64         if (fabs)
65                 reg |= BIT_ULL(63); /* FABS */
66
67         roc_store_pair(iova, reg, addr);
68 }
69
70 static inline uint64_t
71 roc_npa_aura_op_cnt_get(uint64_t aura_handle)
72 {
73         uint64_t wdata;
74         int64_t *addr;
75         uint64_t reg;
76
77         wdata = roc_npa_aura_handle_to_aura(aura_handle) << 44;
78         addr = (int64_t *)(roc_npa_aura_handle_to_base(aura_handle) +
79                            NPA_LF_AURA_OP_CNT);
80         reg = roc_atomic64_add_nosync(wdata, addr);
81
82         if (reg & BIT_ULL(42) /* OP_ERR */)
83                 return 0;
84         else
85                 return reg & 0xFFFFFFFFF;
86 }
87
88 static inline void
89 roc_npa_aura_op_cnt_set(uint64_t aura_handle, const int sign, uint64_t count)
90 {
91         uint64_t reg = count & (BIT_ULL(36) - 1);
92
93         if (sign)
94                 reg |= BIT_ULL(43); /* CNT_ADD */
95
96         reg |= (roc_npa_aura_handle_to_aura(aura_handle) << 44);
97
98         plt_write64(reg, roc_npa_aura_handle_to_base(aura_handle) +
99                                  NPA_LF_AURA_OP_CNT);
100 }
101
102 static inline uint64_t
103 roc_npa_aura_op_limit_get(uint64_t aura_handle)
104 {
105         uint64_t wdata;
106         int64_t *addr;
107         uint64_t reg;
108
109         wdata = roc_npa_aura_handle_to_aura(aura_handle) << 44;
110         addr = (int64_t *)(roc_npa_aura_handle_to_base(aura_handle) +
111                            NPA_LF_AURA_OP_LIMIT);
112         reg = roc_atomic64_add_nosync(wdata, addr);
113
114         if (reg & BIT_ULL(42) /* OP_ERR */)
115                 return 0;
116         else
117                 return reg & ROC_AURA_OP_LIMIT_MASK;
118 }
119
120 static inline void
121 roc_npa_aura_op_limit_set(uint64_t aura_handle, uint64_t limit)
122 {
123         uint64_t reg = limit & ROC_AURA_OP_LIMIT_MASK;
124
125         reg |= (roc_npa_aura_handle_to_aura(aura_handle) << 44);
126
127         plt_write64(reg, roc_npa_aura_handle_to_base(aura_handle) +
128                                  NPA_LF_AURA_OP_LIMIT);
129 }
130
131 static inline uint64_t
132 roc_npa_aura_op_available(uint64_t aura_handle)
133 {
134         uint64_t wdata;
135         uint64_t reg;
136         int64_t *addr;
137
138         wdata = roc_npa_aura_handle_to_aura(aura_handle) << 44;
139         addr = (int64_t *)(roc_npa_aura_handle_to_base(aura_handle) +
140                            NPA_LF_POOL_OP_AVAILABLE);
141         reg = roc_atomic64_add_nosync(wdata, addr);
142
143         if (reg & BIT_ULL(42) /* OP_ERR */)
144                 return 0;
145         else
146                 return reg & 0xFFFFFFFFF;
147 }
148
149 static inline uint64_t
150 roc_npa_pool_op_performance_counter(uint64_t aura_handle, const int drop)
151 {
152         union {
153                 uint64_t u;
154                 struct npa_aura_op_wdata_s s;
155         } op_wdata;
156         int64_t *addr;
157         uint64_t reg;
158
159         op_wdata.u = 0;
160         op_wdata.s.aura = roc_npa_aura_handle_to_aura(aura_handle);
161         if (drop)
162                 op_wdata.s.drop |= BIT_ULL(63); /* DROP */
163
164         addr = (int64_t *)(roc_npa_aura_handle_to_base(aura_handle) +
165                            NPA_LF_POOL_OP_PC);
166
167         reg = roc_atomic64_add_nosync(op_wdata.u, addr);
168         /*
169          * NPA_LF_POOL_OP_PC Read Data
170          *
171          * 63       49 48    48 47     0
172          * -----------------------------
173          * | Reserved | OP_ERR | OP_PC |
174          * -----------------------------
175          */
176
177         if (reg & BIT_ULL(48) /* OP_ERR */)
178                 return 0;
179         else
180                 return reg & 0xFFFFFFFFFFFF;
181 }
182
183 static inline void
184 roc_npa_aura_op_bulk_free(uint64_t aura_handle, uint64_t const *buf,
185                           unsigned int num, const int fabs)
186 {
187         unsigned int i;
188
189         for (i = 0; i < num; i++) {
190                 const uint64_t inbuf = buf[i];
191
192                 roc_npa_aura_op_free(aura_handle, fabs, inbuf);
193         }
194 }
195
196 static inline unsigned int
197 roc_npa_aura_bulk_alloc(uint64_t aura_handle, uint64_t *buf, unsigned int num,
198                         const int drop)
199 {
200 #if defined(__aarch64__)
201         uint64_t wdata = roc_npa_aura_handle_to_aura(aura_handle);
202         unsigned int i, count;
203         uint64_t addr;
204
205         if (drop)
206                 wdata |= BIT_ULL(63); /* DROP */
207
208         addr = roc_npa_aura_handle_to_base(aura_handle) +
209                NPA_LF_AURA_OP_ALLOCX(0);
210
211         switch (num) {
212         case 30:
213                 asm volatile(
214                         ".cpu  generic+lse\n"
215                         "mov v18.d[0], %[dst]\n"
216                         "mov v18.d[1], %[loc]\n"
217                         "mov v19.d[0], %[wdata]\n"
218                         "mov v19.d[1], x30\n"
219                         "mov v20.d[0], x24\n"
220                         "mov v20.d[1], x25\n"
221                         "mov v21.d[0], x26\n"
222                         "mov v21.d[1], x27\n"
223                         "mov v22.d[0], x28\n"
224                         "mov v22.d[1], x29\n"
225                         "mov x28, v19.d[0]\n"
226                         "mov x29, v19.d[0]\n"
227                         "mov x30, v18.d[1]\n"
228                         "casp x0, x1, x28, x29, [x30]\n"
229                         "casp x2, x3, x28, x29, [x30]\n"
230                         "casp x4, x5, x28, x29, [x30]\n"
231                         "casp x6, x7, x28, x29, [x30]\n"
232                         "casp x8, x9, x28, x29, [x30]\n"
233                         "casp x10, x11, x28, x29, [x30]\n"
234                         "casp x12, x13, x28, x29, [x30]\n"
235                         "casp x14, x15, x28, x29, [x30]\n"
236                         "casp x16, x17, x28, x29, [x30]\n"
237                         "casp x18, x19, x28, x29, [x30]\n"
238                         "casp x20, x21, x28, x29, [x30]\n"
239                         "casp x22, x23, x28, x29, [x30]\n"
240                         "casp x24, x25, x28, x29, [x30]\n"
241                         "casp x26, x27, x28, x29, [x30]\n"
242                         "casp x28, x29, x28, x29, [x30]\n"
243                         "mov x30, v18.d[0]\n"
244                         "stp x0, x1, [x30]\n"
245                         "stp x2, x3, [x30, #16]\n"
246                         "stp x4, x5, [x30, #32]\n"
247                         "stp x6, x7, [x30, #48]\n"
248                         "stp x8, x9, [x30, #64]\n"
249                         "stp x10, x11, [x30, #80]\n"
250                         "stp x12, x13, [x30, #96]\n"
251                         "stp x14, x15, [x30, #112]\n"
252                         "stp x16, x17, [x30, #128]\n"
253                         "stp x18, x19, [x30, #144]\n"
254                         "stp x20, x21, [x30, #160]\n"
255                         "stp x22, x23, [x30, #176]\n"
256                         "stp x24, x25, [x30, #192]\n"
257                         "stp x26, x27, [x30, #208]\n"
258                         "stp x28, x29, [x30, #224]\n"
259                         "mov %[dst], v18.d[0]\n"
260                         "mov %[loc], v18.d[1]\n"
261                         "mov %[wdata], v19.d[0]\n"
262                         "mov x30, v19.d[1]\n"
263                         "mov x24, v20.d[0]\n"
264                         "mov x25, v20.d[1]\n"
265                         "mov x26, v21.d[0]\n"
266                         "mov x27, v21.d[1]\n"
267                         "mov x28, v22.d[0]\n"
268                         "mov x29, v22.d[1]\n"
269                         :
270                         : [wdata] "r"(wdata), [loc] "r"(addr), [dst] "r"(buf)
271                         : "memory", "x0", "x1", "x2", "x3", "x4", "x5", "x6",
272                           "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14",
273                           "x15", "x16", "x17", "x18", "x19", "x20", "x21",
274                           "x22", "x23", "v18", "v19", "v20", "v21", "v22");
275                 break;
276         case 16:
277                 asm volatile(
278                         ".cpu  generic+lse\n"
279                         "mov x16, %[wdata]\n"
280                         "mov x17, %[wdata]\n"
281                         "casp x0, x1, x16, x17, [%[loc]]\n"
282                         "casp x2, x3, x16, x17, [%[loc]]\n"
283                         "casp x4, x5, x16, x17, [%[loc]]\n"
284                         "casp x6, x7, x16, x17, [%[loc]]\n"
285                         "casp x8, x9, x16, x17, [%[loc]]\n"
286                         "casp x10, x11, x16, x17, [%[loc]]\n"
287                         "casp x12, x13, x16, x17, [%[loc]]\n"
288                         "casp x14, x15, x16, x17, [%[loc]]\n"
289                         "stp x0, x1, [%[dst]]\n"
290                         "stp x2, x3, [%[dst], #16]\n"
291                         "stp x4, x5, [%[dst], #32]\n"
292                         "stp x6, x7, [%[dst], #48]\n"
293                         "stp x8, x9, [%[dst], #64]\n"
294                         "stp x10, x11, [%[dst], #80]\n"
295                         "stp x12, x13, [%[dst], #96]\n"
296                         "stp x14, x15, [%[dst], #112]\n"
297                         :
298                         : [wdata] "r" (wdata), [dst] "r" (buf), [loc] "r" (addr)
299                         : "memory", "x0", "x1", "x2", "x3", "x4", "x5", "x6",
300                           "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14",
301                           "x15", "x16", "x17"
302                 );
303                 break;
304         case 8:
305                 asm volatile(
306                         ".cpu  generic+lse\n"
307                         "mov x16, %[wdata]\n"
308                         "mov x17, %[wdata]\n"
309                         "casp x0, x1, x16, x17, [%[loc]]\n"
310                         "casp x2, x3, x16, x17, [%[loc]]\n"
311                         "casp x4, x5, x16, x17, [%[loc]]\n"
312                         "casp x6, x7, x16, x17, [%[loc]]\n"
313                         "stp x0, x1, [%[dst]]\n"
314                         "stp x2, x3, [%[dst], #16]\n"
315                         "stp x4, x5, [%[dst], #32]\n"
316                         "stp x6, x7, [%[dst], #48]\n"
317                         :
318                         : [wdata] "r" (wdata), [dst] "r" (buf), [loc] "r" (addr)
319                         : "memory", "x0", "x1", "x2", "x3", "x4", "x5", "x6",
320                           "x7", "x16", "x17"
321                 );
322                 break;
323         case 4:
324                 asm volatile(
325                         ".cpu  generic+lse\n"
326                         "mov x16, %[wdata]\n"
327                         "mov x17, %[wdata]\n"
328                         "casp x0, x1, x16, x17, [%[loc]]\n"
329                         "casp x2, x3, x16, x17, [%[loc]]\n"
330                         "stp x0, x1, [%[dst]]\n"
331                         "stp x2, x3, [%[dst], #16]\n"
332                         :
333                         : [wdata] "r" (wdata), [dst] "r" (buf), [loc] "r" (addr)
334                         : "memory", "x0", "x1", "x2", "x3", "x16", "x17"
335                 );
336                 break;
337         case 2:
338                 asm volatile(
339                         ".cpu  generic+lse\n"
340                         "mov x16, %[wdata]\n"
341                         "mov x17, %[wdata]\n"
342                         "casp x0, x1, x16, x17, [%[loc]]\n"
343                         "stp x0, x1, [%[dst]]\n"
344                         :
345                         : [wdata] "r" (wdata), [dst] "r" (buf), [loc] "r" (addr)
346                         : "memory", "x0", "x1", "x16", "x17"
347                 );
348                 break;
349         case 1:
350                 buf[0] = roc_npa_aura_op_alloc(aura_handle, drop);
351                 return !!buf[0];
352         }
353
354         /* Pack the pointers */
355         for (i = 0, count = 0; i < num; i++)
356                 if (buf[i])
357                         buf[count++] = buf[i];
358
359         return count;
360 #else
361         unsigned int i, count;
362
363         for (i = 0, count = 0; i < num; i++) {
364                 buf[count] = roc_npa_aura_op_alloc(aura_handle, drop);
365                 if (buf[count])
366                         count++;
367         }
368
369         return count;
370 #endif
371 }
372
373 static inline unsigned int
374 roc_npa_aura_op_bulk_alloc(uint64_t aura_handle, uint64_t *buf,
375                            unsigned int num, const int drop, const int partial)
376 {
377         unsigned int chunk, count, num_alloc;
378
379         count = 0;
380         while (num) {
381                 chunk = (num >= ROC_CN9K_NPA_BULK_ALLOC_MAX_PTRS) ?
382                                       ROC_CN9K_NPA_BULK_ALLOC_MAX_PTRS :
383                                       plt_align32prevpow2(num);
384
385                 num_alloc =
386                         roc_npa_aura_bulk_alloc(aura_handle, buf, chunk, drop);
387
388                 count += num_alloc;
389                 buf += num_alloc;
390                 num -= num_alloc;
391
392                 if (unlikely(num_alloc != chunk))
393                         break;
394         }
395
396         /* If the requested number of pointers was not allocated and if partial
397          * alloc is not desired, then free allocated pointers.
398          */
399         if (unlikely(num != 0 && !partial)) {
400                 roc_npa_aura_op_bulk_free(aura_handle, buf - count, count, 1);
401                 count = 0;
402         }
403
404         return count;
405 }
406
407 struct roc_npa {
408         struct plt_pci_device *pci_dev;
409
410 #define ROC_NPA_MEM_SZ (1 * 1024)
411         uint8_t reserved[ROC_NPA_MEM_SZ] __plt_cache_aligned;
412 } __plt_cache_aligned;
413
414 int __roc_api roc_npa_dev_init(struct roc_npa *roc_npa);
415 int __roc_api roc_npa_dev_fini(struct roc_npa *roc_npa);
416
417 /* NPA pool */
418 int __roc_api roc_npa_pool_create(uint64_t *aura_handle, uint32_t block_size,
419                                   uint32_t block_count, struct npa_aura_s *aura,
420                                   struct npa_pool_s *pool);
421 int __roc_api roc_npa_aura_limit_modify(uint64_t aura_handle,
422                                         uint16_t aura_limit);
423 int __roc_api roc_npa_pool_destroy(uint64_t aura_handle);
424 int __roc_api roc_npa_pool_range_update_check(uint64_t aura_handle);
425 void __roc_api roc_npa_aura_op_range_set(uint64_t aura_handle,
426                                          uint64_t start_iova,
427                                          uint64_t end_iova);
428
429 /* Debug */
430 int __roc_api roc_npa_ctx_dump(void);
431 int __roc_api roc_npa_dump(void);
432
433 /* Reset operation performance counter. */
434 int __roc_api roc_npa_pool_op_pc_reset(uint64_t aura_handle);
435
436 #endif /* _ROC_NPA_H_ */