1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(C) 2019 Marvell International Ltd.
5 #include <rte_mempool.h>
8 #include "otx2_mempool.h"
11 otx2_npa_enq(struct rte_mempool *mp, void * const *obj_table, unsigned int n)
13 unsigned int index; const uint64_t aura_handle = mp->pool_id;
14 const uint64_t reg = npa_lf_aura_handle_to_aura(aura_handle);
15 const uint64_t addr = npa_lf_aura_handle_to_base(aura_handle) +
18 /* Ensure mbuf init changes are written before the free pointers
19 * are enqueued to the stack.
22 for (index = 0; index < n; index++)
23 otx2_store_pair((uint64_t)obj_table[index], reg, addr);
28 static __rte_noinline int
29 npa_lf_aura_op_alloc_one(const int64_t wdata, int64_t * const addr,
30 void **obj_table, uint8_t i)
35 obj_table[i] = (void *)otx2_atomic64_add_nosync(wdata, addr);
36 if (obj_table[i] != NULL)
44 #if defined(RTE_ARCH_ARM64)
45 static __rte_noinline int
46 npa_lf_aura_op_search_alloc(const int64_t wdata, int64_t * const addr,
47 void **obj_table, unsigned int n)
51 for (i = 0; i < n; i++) {
52 if (obj_table[i] != NULL)
54 if (npa_lf_aura_op_alloc_one(wdata, addr, obj_table, i))
61 static __rte_noinline int
62 npa_lf_aura_op_alloc_bulk(const int64_t wdata, int64_t * const addr,
63 unsigned int n, void **obj_table)
65 register const uint64_t wdata64 __asm("x26") = wdata;
66 register const uint64_t wdata128 __asm("x27") = wdata;
67 uint64x2_t failed = vdupq_n_u64(~0);
74 "casp x0, x1, %[wdata64], %[wdata128], [%[loc]]\n"
75 "casp x2, x3, %[wdata64], %[wdata128], [%[loc]]\n"
76 "casp x4, x5, %[wdata64], %[wdata128], [%[loc]]\n"
77 "casp x6, x7, %[wdata64], %[wdata128], [%[loc]]\n"
78 "casp x8, x9, %[wdata64], %[wdata128], [%[loc]]\n"
79 "casp x10, x11, %[wdata64], %[wdata128], [%[loc]]\n"
80 "casp x12, x13, %[wdata64], %[wdata128], [%[loc]]\n"
81 "casp x14, x15, %[wdata64], %[wdata128], [%[loc]]\n"
82 "casp x16, x17, %[wdata64], %[wdata128], [%[loc]]\n"
83 "casp x18, x19, %[wdata64], %[wdata128], [%[loc]]\n"
84 "casp x20, x21, %[wdata64], %[wdata128], [%[loc]]\n"
85 "casp x22, x23, %[wdata64], %[wdata128], [%[loc]]\n"
88 "casp x0, x1, %[wdata64], %[wdata128], [%[loc]]\n"
91 "casp x2, x3, %[wdata64], %[wdata128], [%[loc]]\n"
94 "casp x4, x5, %[wdata64], %[wdata128], [%[loc]]\n"
97 "casp x6, x7, %[wdata64], %[wdata128], [%[loc]]\n"
98 "and %[failed].16B, %[failed].16B, v16.16B\n"
99 "and %[failed].16B, %[failed].16B, v17.16B\n"
100 "and %[failed].16B, %[failed].16B, v18.16B\n"
101 "and %[failed].16B, %[failed].16B, v19.16B\n"
103 "fmov v20.D[1], x9\n"
105 "fmov v21.D[1], x11\n"
107 "fmov v22.D[1], x13\n"
109 "fmov v23.D[1], x15\n"
110 "and %[failed].16B, %[failed].16B, v20.16B\n"
111 "and %[failed].16B, %[failed].16B, v21.16B\n"
112 "and %[failed].16B, %[failed].16B, v22.16B\n"
113 "and %[failed].16B, %[failed].16B, v23.16B\n"
114 "st1 { v16.2d, v17.2d, v18.2d, v19.2d}, [%[dst]], 64\n"
115 "st1 { v20.2d, v21.2d, v22.2d, v23.2d}, [%[dst]], 64\n"
117 "fmov v16.D[1], x17\n"
119 "fmov v17.D[1], x19\n"
121 "fmov v18.D[1], x21\n"
123 "fmov v19.D[1], x23\n"
124 "and %[failed].16B, %[failed].16B, v16.16B\n"
125 "and %[failed].16B, %[failed].16B, v17.16B\n"
126 "and %[failed].16B, %[failed].16B, v18.16B\n"
127 "and %[failed].16B, %[failed].16B, v19.16B\n"
129 "fmov v20.D[1], x1\n"
131 "fmov v21.D[1], x3\n"
133 "fmov v22.D[1], x5\n"
135 "fmov v23.D[1], x7\n"
136 "and %[failed].16B, %[failed].16B, v20.16B\n"
137 "and %[failed].16B, %[failed].16B, v21.16B\n"
138 "and %[failed].16B, %[failed].16B, v22.16B\n"
139 "and %[failed].16B, %[failed].16B, v23.16B\n"
140 "st1 { v16.2d, v17.2d, v18.2d, v19.2d}, [%[dst]], 64\n"
141 "st1 { v20.2d, v21.2d, v22.2d, v23.2d}, [%[dst]], 64\n"
142 : "+Q" (*addr), [failed] "=&w" (failed)
143 : [wdata64] "r" (wdata64), [wdata128] "r" (wdata128),
144 [dst] "r" (obj_table), [loc] "r" (addr)
145 : "memory", "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
146 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16",
147 "x17", "x18", "x19", "x20", "x21", "x22", "x23", "v16", "v17",
148 "v18", "v19", "v20", "v21", "v22", "v23"
156 "casp x0, x1, %[wdata64], %[wdata128], [%[loc]]\n"
157 "casp x2, x3, %[wdata64], %[wdata128], [%[loc]]\n"
158 "casp x4, x5, %[wdata64], %[wdata128], [%[loc]]\n"
159 "casp x6, x7, %[wdata64], %[wdata128], [%[loc]]\n"
160 "casp x8, x9, %[wdata64], %[wdata128], [%[loc]]\n"
161 "casp x10, x11, %[wdata64], %[wdata128], [%[loc]]\n"
162 "casp x12, x13, %[wdata64], %[wdata128], [%[loc]]\n"
163 "casp x14, x15, %[wdata64], %[wdata128], [%[loc]]\n"
165 "fmov v16.D[1], x1\n"
167 "fmov v17.D[1], x3\n"
169 "fmov v18.D[1], x5\n"
171 "fmov v19.D[1], x7\n"
172 "and %[failed].16B, %[failed].16B, v16.16B\n"
173 "and %[failed].16B, %[failed].16B, v17.16B\n"
174 "and %[failed].16B, %[failed].16B, v18.16B\n"
175 "and %[failed].16B, %[failed].16B, v19.16B\n"
177 "fmov v20.D[1], x9\n"
179 "fmov v21.D[1], x11\n"
181 "fmov v22.D[1], x13\n"
183 "fmov v23.D[1], x15\n"
184 "and %[failed].16B, %[failed].16B, v20.16B\n"
185 "and %[failed].16B, %[failed].16B, v21.16B\n"
186 "and %[failed].16B, %[failed].16B, v22.16B\n"
187 "and %[failed].16B, %[failed].16B, v23.16B\n"
188 "st1 { v16.2d, v17.2d, v18.2d, v19.2d}, [%[dst]], 64\n"
189 "st1 { v20.2d, v21.2d, v22.2d, v23.2d}, [%[dst]], 64\n"
190 : "+Q" (*addr), [failed] "=&w" (failed)
191 : [wdata64] "r" (wdata64), [wdata128] "r" (wdata128),
192 [dst] "r" (obj_table), [loc] "r" (addr)
193 : "memory", "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
194 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "v16",
195 "v17", "v18", "v19", "v20", "v21", "v22", "v23"
203 "casp x0, x1, %[wdata64], %[wdata128], [%[loc]]\n"
204 "casp x2, x3, %[wdata64], %[wdata128], [%[loc]]\n"
205 "casp x4, x5, %[wdata64], %[wdata128], [%[loc]]\n"
206 "casp x6, x7, %[wdata64], %[wdata128], [%[loc]]\n"
208 "fmov v16.D[1], x1\n"
210 "fmov v17.D[1], x3\n"
212 "fmov v18.D[1], x5\n"
214 "fmov v19.D[1], x7\n"
215 "and %[failed].16B, %[failed].16B, v16.16B\n"
216 "and %[failed].16B, %[failed].16B, v17.16B\n"
217 "and %[failed].16B, %[failed].16B, v18.16B\n"
218 "and %[failed].16B, %[failed].16B, v19.16B\n"
219 "st1 { v16.2d, v17.2d, v18.2d, v19.2d}, [%[dst]], 64\n"
220 : "+Q" (*addr), [failed] "=&w" (failed)
221 : [wdata64] "r" (wdata64), [wdata128] "r" (wdata128),
222 [dst] "r" (obj_table), [loc] "r" (addr)
223 : "memory", "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
224 "v16", "v17", "v18", "v19"
232 "casp x0, x1, %[wdata64], %[wdata128], [%[loc]]\n"
233 "casp x2, x3, %[wdata64], %[wdata128], [%[loc]]\n"
235 "fmov v16.D[1], x1\n"
237 "fmov v17.D[1], x3\n"
238 "and %[failed].16B, %[failed].16B, v16.16B\n"
239 "and %[failed].16B, %[failed].16B, v17.16B\n"
240 "st1 { v16.2d, v17.2d}, [%[dst]], 32\n"
241 : "+Q" (*addr), [failed] "=&w" (failed)
242 : [wdata64] "r" (wdata64), [wdata128] "r" (wdata128),
243 [dst] "r" (obj_table), [loc] "r" (addr)
244 : "memory", "x0", "x1", "x2", "x3", "v16", "v17"
252 "casp x0, x1, %[wdata64], %[wdata128], [%[loc]]\n"
254 "fmov v16.D[1], x1\n"
255 "and %[failed].16B, %[failed].16B, v16.16B\n"
256 "st1 { v16.2d}, [%[dst]], 16\n"
257 : "+Q" (*addr), [failed] "=&w" (failed)
258 : [wdata64] "r" (wdata64), [wdata128] "r" (wdata128),
259 [dst] "r" (obj_table), [loc] "r" (addr)
260 : "memory", "x0", "x1", "v16"
265 return npa_lf_aura_op_alloc_one(wdata, addr, obj_table, 0);
268 if (unlikely(!(vgetq_lane_u64(failed, 0) & vgetq_lane_u64(failed, 1))))
269 return npa_lf_aura_op_search_alloc(wdata, addr, (void **)
270 ((char *)obj_table - (sizeof(uint64_t) * n)), n);
275 static __rte_noinline void
276 otx2_npa_clear_alloc(struct rte_mempool *mp, void **obj_table, unsigned int n)
280 for (i = 0; i < n; i++) {
281 if (obj_table[i] != NULL) {
282 otx2_npa_enq(mp, &obj_table[i], 1);
288 static __rte_noinline int __rte_hot
289 otx2_npa_deq_arm64(struct rte_mempool *mp, void **obj_table, unsigned int n)
291 const int64_t wdata = npa_lf_aura_handle_to_aura(mp->pool_id);
292 void **obj_table_bak = obj_table;
293 const unsigned int nfree = n;
296 int64_t * const addr = (int64_t * const)
297 (npa_lf_aura_handle_to_base(mp->pool_id) +
298 NPA_LF_AURA_OP_ALLOCX(0));
300 parts = n > 31 ? 32 : rte_align32prevpow2(n);
302 if (unlikely(npa_lf_aura_op_alloc_bulk(wdata, addr,
303 parts, obj_table))) {
304 otx2_npa_clear_alloc(mp, obj_table_bak, nfree - n);
315 static inline int __rte_hot
316 otx2_npa_deq(struct rte_mempool *mp, void **obj_table, unsigned int n)
318 const int64_t wdata = npa_lf_aura_handle_to_aura(mp->pool_id);
322 int64_t * const addr = (int64_t *)
323 (npa_lf_aura_handle_to_base(mp->pool_id) +
324 NPA_LF_AURA_OP_ALLOCX(0));
325 for (index = 0; index < n; index++, obj_table++) {
326 obj = npa_lf_aura_op_alloc_one(wdata, addr, obj_table, 0);
328 for (; index > 0; index--) {
330 otx2_npa_enq(mp, obj_table, 1);
334 *obj_table = (void *)obj;
343 otx2_npa_get_count(const struct rte_mempool *mp)
345 return (unsigned int)npa_lf_aura_op_available(mp->pool_id);
349 npa_lf_aura_pool_init(struct otx2_mbox *mbox, uint32_t aura_id,
350 struct npa_aura_s *aura, struct npa_pool_s *pool)
352 struct npa_aq_enq_req *aura_init_req, *pool_init_req;
353 struct npa_aq_enq_rsp *aura_init_rsp, *pool_init_rsp;
354 struct otx2_mbox_dev *mdev = &mbox->dev[0];
355 struct otx2_idev_cfg *idev;
358 idev = otx2_intra_dev_get_cfg();
362 aura_init_req = otx2_mbox_alloc_msg_npa_aq_enq(mbox);
364 aura_init_req->aura_id = aura_id;
365 aura_init_req->ctype = NPA_AQ_CTYPE_AURA;
366 aura_init_req->op = NPA_AQ_INSTOP_INIT;
367 otx2_mbox_memcpy(&aura_init_req->aura, aura, sizeof(*aura));
369 pool_init_req = otx2_mbox_alloc_msg_npa_aq_enq(mbox);
371 pool_init_req->aura_id = aura_id;
372 pool_init_req->ctype = NPA_AQ_CTYPE_POOL;
373 pool_init_req->op = NPA_AQ_INSTOP_INIT;
374 otx2_mbox_memcpy(&pool_init_req->pool, pool, sizeof(*pool));
376 otx2_mbox_msg_send(mbox, 0);
377 rc = otx2_mbox_wait_for_rsp(mbox, 0);
381 off = mbox->rx_start +
382 RTE_ALIGN(sizeof(struct mbox_hdr), MBOX_MSG_ALIGN);
383 aura_init_rsp = (struct npa_aq_enq_rsp *)((uintptr_t)mdev->mbase + off);
384 off = mbox->rx_start + aura_init_rsp->hdr.next_msgoff;
385 pool_init_rsp = (struct npa_aq_enq_rsp *)((uintptr_t)mdev->mbase + off);
387 if (rc == 2 && aura_init_rsp->hdr.rc == 0 && pool_init_rsp->hdr.rc == 0)
390 return NPA_LF_ERR_AURA_POOL_INIT;
392 if (!(idev->npa_lock_mask & BIT_ULL(aura_id)))
395 aura_init_req = otx2_mbox_alloc_msg_npa_aq_enq(mbox);
396 aura_init_req->aura_id = aura_id;
397 aura_init_req->ctype = NPA_AQ_CTYPE_AURA;
398 aura_init_req->op = NPA_AQ_INSTOP_LOCK;
400 pool_init_req = otx2_mbox_alloc_msg_npa_aq_enq(mbox);
401 if (!pool_init_req) {
402 /* The shared memory buffer can be full.
405 otx2_mbox_msg_send(mbox, 0);
406 rc = otx2_mbox_wait_for_rsp(mbox, 0);
408 otx2_err("Failed to LOCK AURA context");
412 pool_init_req = otx2_mbox_alloc_msg_npa_aq_enq(mbox);
413 if (!pool_init_req) {
414 otx2_err("Failed to LOCK POOL context");
418 pool_init_req->aura_id = aura_id;
419 pool_init_req->ctype = NPA_AQ_CTYPE_POOL;
420 pool_init_req->op = NPA_AQ_INSTOP_LOCK;
422 rc = otx2_mbox_process(mbox);
424 otx2_err("Failed to lock POOL ctx to NDC");
432 npa_lf_aura_pool_fini(struct otx2_mbox *mbox,
434 uint64_t aura_handle)
436 struct npa_aq_enq_req *aura_req, *pool_req;
437 struct npa_aq_enq_rsp *aura_rsp, *pool_rsp;
438 struct otx2_mbox_dev *mdev = &mbox->dev[0];
439 struct ndc_sync_op *ndc_req;
440 struct otx2_idev_cfg *idev;
443 idev = otx2_intra_dev_get_cfg();
447 /* Procedure for disabling an aura/pool */
449 npa_lf_aura_op_alloc(aura_handle, 0);
451 pool_req = otx2_mbox_alloc_msg_npa_aq_enq(mbox);
452 pool_req->aura_id = aura_id;
453 pool_req->ctype = NPA_AQ_CTYPE_POOL;
454 pool_req->op = NPA_AQ_INSTOP_WRITE;
455 pool_req->pool.ena = 0;
456 pool_req->pool_mask.ena = ~pool_req->pool_mask.ena;
458 aura_req = otx2_mbox_alloc_msg_npa_aq_enq(mbox);
459 aura_req->aura_id = aura_id;
460 aura_req->ctype = NPA_AQ_CTYPE_AURA;
461 aura_req->op = NPA_AQ_INSTOP_WRITE;
462 aura_req->aura.ena = 0;
463 aura_req->aura_mask.ena = ~aura_req->aura_mask.ena;
465 otx2_mbox_msg_send(mbox, 0);
466 rc = otx2_mbox_wait_for_rsp(mbox, 0);
470 off = mbox->rx_start +
471 RTE_ALIGN(sizeof(struct mbox_hdr), MBOX_MSG_ALIGN);
472 pool_rsp = (struct npa_aq_enq_rsp *)((uintptr_t)mdev->mbase + off);
474 off = mbox->rx_start + pool_rsp->hdr.next_msgoff;
475 aura_rsp = (struct npa_aq_enq_rsp *)((uintptr_t)mdev->mbase + off);
477 if (rc != 2 || aura_rsp->hdr.rc != 0 || pool_rsp->hdr.rc != 0)
478 return NPA_LF_ERR_AURA_POOL_FINI;
480 /* Sync NDC-NPA for LF */
481 ndc_req = otx2_mbox_alloc_msg_ndc_sync_op(mbox);
482 ndc_req->npa_lf_sync = 1;
484 rc = otx2_mbox_process(mbox);
486 otx2_err("Error on NDC-NPA LF sync, rc %d", rc);
487 return NPA_LF_ERR_AURA_POOL_FINI;
490 if (!(idev->npa_lock_mask & BIT_ULL(aura_id)))
493 aura_req = otx2_mbox_alloc_msg_npa_aq_enq(mbox);
494 aura_req->aura_id = aura_id;
495 aura_req->ctype = NPA_AQ_CTYPE_AURA;
496 aura_req->op = NPA_AQ_INSTOP_UNLOCK;
498 rc = otx2_mbox_process(mbox);
500 otx2_err("Failed to unlock AURA ctx to NDC");
504 pool_req = otx2_mbox_alloc_msg_npa_aq_enq(mbox);
505 pool_req->aura_id = aura_id;
506 pool_req->ctype = NPA_AQ_CTYPE_POOL;
507 pool_req->op = NPA_AQ_INSTOP_UNLOCK;
509 rc = otx2_mbox_process(mbox);
511 otx2_err("Failed to unlock POOL ctx to NDC");
519 npa_lf_stack_memzone_name(struct otx2_npa_lf *lf, int pool_id, char *name)
521 snprintf(name, RTE_MEMZONE_NAMESIZE, "otx2_npa_stack_%x_%d",
522 lf->pf_func, pool_id);
527 static inline const struct rte_memzone *
528 npa_lf_stack_dma_alloc(struct otx2_npa_lf *lf, char *name,
529 int pool_id, size_t size)
531 return rte_memzone_reserve_aligned(
532 npa_lf_stack_memzone_name(lf, pool_id, name), size, 0,
533 RTE_MEMZONE_IOVA_CONTIG, OTX2_ALIGN);
537 npa_lf_stack_dma_free(struct otx2_npa_lf *lf, char *name, int pool_id)
539 const struct rte_memzone *mz;
541 mz = rte_memzone_lookup(npa_lf_stack_memzone_name(lf, pool_id, name));
545 return rte_memzone_free(mz);
549 bitmap_ctzll(uint64_t slab)
554 return __builtin_ctzll(slab);
558 npa_lf_aura_pool_pair_alloc(struct otx2_npa_lf *lf, const uint32_t block_size,
559 const uint32_t block_count, struct npa_aura_s *aura,
560 struct npa_pool_s *pool, uint64_t *aura_handle)
562 int rc, aura_id, pool_id, stack_size, alloc_size;
563 char name[RTE_MEMZONE_NAMESIZE];
564 const struct rte_memzone *mz;
569 if (!lf || !block_size || !block_count ||
570 !pool || !aura || !aura_handle)
571 return NPA_LF_ERR_PARAM;
573 /* Block size should be cache line aligned and in range of 128B-128KB */
574 if (block_size % OTX2_ALIGN || block_size < 128 ||
575 block_size > 128 * 1024)
576 return NPA_LF_ERR_INVALID_BLOCK_SZ;
579 /* Scan from the beginning */
580 __rte_bitmap_scan_init(lf->npa_bmp);
581 /* Scan bitmap to get the free pool */
582 rc = rte_bitmap_scan(lf->npa_bmp, &pos, &slab);
585 otx2_err("Mempools exhausted, 'max_pools' devargs to increase");
589 /* Get aura_id from resource bitmap */
590 aura_id = pos + bitmap_ctzll(slab);
591 /* Mark pool as reserved */
592 rte_bitmap_clear(lf->npa_bmp, aura_id);
594 /* Configuration based on each aura has separate pool(aura-pool pair) */
596 rc = (aura_id < 0 || pool_id >= (int)lf->nr_pools || aura_id >=
597 (int)BIT_ULL(6 + lf->aura_sz)) ? NPA_LF_ERR_AURA_ID_ALLOC : 0;
601 /* Allocate stack memory */
602 stack_size = (block_count + lf->stack_pg_ptrs - 1) / lf->stack_pg_ptrs;
603 alloc_size = stack_size * lf->stack_pg_bytes;
605 mz = npa_lf_stack_dma_alloc(lf, name, pool_id, alloc_size);
611 /* Update aura fields */
612 aura->pool_addr = pool_id;/* AF will translate to associated poolctx */
614 aura->shift = __builtin_clz(block_count) - 8;
615 aura->limit = block_count;
616 aura->pool_caching = 1;
617 aura->err_int_ena = BIT(NPA_AURA_ERR_INT_AURA_ADD_OVER);
618 aura->err_int_ena |= BIT(NPA_AURA_ERR_INT_AURA_ADD_UNDER);
619 aura->err_int_ena |= BIT(NPA_AURA_ERR_INT_AURA_FREE_UNDER);
620 aura->err_int_ena |= BIT(NPA_AURA_ERR_INT_POOL_DIS);
621 /* Many to one reduction */
622 aura->err_qint_idx = aura_id % lf->qints;
624 /* Update pool fields */
625 pool->stack_base = mz->iova;
627 pool->buf_size = block_size / OTX2_ALIGN;
628 pool->stack_max_pages = stack_size;
629 pool->shift = __builtin_clz(block_count) - 8;
632 pool->stack_caching = 1;
633 pool->err_int_ena = BIT(NPA_POOL_ERR_INT_OVFLS);
634 pool->err_int_ena |= BIT(NPA_POOL_ERR_INT_RANGE);
635 pool->err_int_ena |= BIT(NPA_POOL_ERR_INT_PERR);
637 /* Many to one reduction */
638 pool->err_qint_idx = pool_id % lf->qints;
640 /* Issue AURA_INIT and POOL_INIT op */
641 rc = npa_lf_aura_pool_init(lf->mbox, aura_id, aura, pool);
645 *aura_handle = npa_lf_aura_handle_gen(aura_id, lf->base);
647 /* Update aura count */
648 npa_lf_aura_op_cnt_set(*aura_handle, 0, block_count);
649 /* Read it back to make sure aura count is updated */
650 npa_lf_aura_op_cnt_get(*aura_handle);
655 rte_memzone_free(mz);
657 rte_bitmap_set(lf->npa_bmp, aura_id);
663 npa_lf_aura_pool_pair_free(struct otx2_npa_lf *lf, uint64_t aura_handle)
665 char name[RTE_MEMZONE_NAMESIZE];
666 int aura_id, pool_id, rc;
668 if (!lf || !aura_handle)
669 return NPA_LF_ERR_PARAM;
671 aura_id = pool_id = npa_lf_aura_handle_to_aura(aura_handle);
672 rc = npa_lf_aura_pool_fini(lf->mbox, aura_id, aura_handle);
673 rc |= npa_lf_stack_dma_free(lf, name, pool_id);
675 rte_bitmap_set(lf->npa_bmp, aura_id);
681 npa_lf_aura_range_update_check(uint64_t aura_handle)
683 uint64_t aura_id = npa_lf_aura_handle_to_aura(aura_handle);
684 struct otx2_npa_lf *lf = otx2_npa_lf_obj_get();
685 struct npa_aura_lim *lim = lf->aura_lim;
686 __otx2_io struct npa_pool_s *pool;
687 struct npa_aq_enq_req *req;
688 struct npa_aq_enq_rsp *rsp;
691 req = otx2_mbox_alloc_msg_npa_aq_enq(lf->mbox);
693 req->aura_id = aura_id;
694 req->ctype = NPA_AQ_CTYPE_POOL;
695 req->op = NPA_AQ_INSTOP_READ;
697 rc = otx2_mbox_process_msg(lf->mbox, (void *)&rsp);
699 otx2_err("Failed to get pool(0x%"PRIx64") context", aura_id);
705 if (lim[aura_id].ptr_start != pool->ptr_start ||
706 lim[aura_id].ptr_end != pool->ptr_end) {
707 otx2_err("Range update failed on pool(0x%"PRIx64")", aura_id);
715 otx2_npa_alloc(struct rte_mempool *mp)
717 uint32_t block_size, block_count;
718 uint64_t aura_handle = 0;
719 struct otx2_npa_lf *lf;
720 struct npa_aura_s aura;
721 struct npa_pool_s pool;
725 lf = otx2_npa_lf_obj_get();
731 block_size = mp->elt_size + mp->header_size + mp->trailer_size;
733 * OCTEON TX2 has 8 sets, 41 ways L1D cache, VA<9:7> bits dictate
735 * Add additional padding to ensure that the element size always
736 * occupies odd number of cachelines to ensure even distribution
737 * of elements among L1D cache sets.
739 padding = ((block_size / RTE_CACHE_LINE_SIZE) % 2) ? 0 :
741 mp->trailer_size += padding;
742 block_size += padding;
744 block_count = mp->size;
746 if (block_size % OTX2_ALIGN != 0) {
747 otx2_err("Block size should be multiple of 128B");
752 memset(&aura, 0, sizeof(struct npa_aura_s));
753 memset(&pool, 0, sizeof(struct npa_pool_s));
757 if ((uint32_t)pool.buf_offset * OTX2_ALIGN != mp->header_size) {
758 otx2_err("Unsupported mp->header_size=%d", mp->header_size);
763 /* Use driver specific mp->pool_config to override aura config */
764 if (mp->pool_config != NULL)
765 memcpy(&aura, mp->pool_config, sizeof(struct npa_aura_s));
767 rc = npa_lf_aura_pool_pair_alloc(lf, block_size, block_count,
768 &aura, &pool, &aura_handle);
770 otx2_err("Failed to alloc pool or aura rc=%d", rc);
774 /* Store aura_handle for future queue operations */
775 mp->pool_id = aura_handle;
776 otx2_npa_dbg("lf=%p block_sz=%d block_count=%d aura_handle=0x%"PRIx64,
777 lf, block_size, block_count, aura_handle);
779 /* Just hold the reference of the object */
780 otx2_npa_lf_obj_ref();
787 otx2_npa_free(struct rte_mempool *mp)
789 struct otx2_npa_lf *lf = otx2_npa_lf_obj_get();
792 otx2_npa_dbg("lf=%p aura_handle=0x%"PRIx64, lf, mp->pool_id);
794 rc = npa_lf_aura_pool_pair_free(lf, mp->pool_id);
797 otx2_err("Failed to free pool or aura rc=%d", rc);
799 /* Release the reference of npalf */
804 otx2_npa_calc_mem_size(const struct rte_mempool *mp, uint32_t obj_num,
805 uint32_t pg_shift, size_t *min_chunk_size, size_t *align)
809 /* Need space for one more obj on each chunk to fulfill
810 * alignment requirements.
812 total_elt_sz = mp->header_size + mp->elt_size + mp->trailer_size;
813 return rte_mempool_op_calc_mem_size_helper(mp, obj_num, pg_shift,
814 total_elt_sz, min_chunk_size,
819 otx2_npa_l1d_way_set_get(uint64_t iova)
821 return (iova >> rte_log2_u32(RTE_CACHE_LINE_SIZE)) & 0x7;
825 otx2_npa_populate(struct rte_mempool *mp, unsigned int max_objs, void *vaddr,
826 rte_iova_t iova, size_t len,
827 rte_mempool_populate_obj_cb_t *obj_cb, void *obj_cb_arg)
829 #define OTX2_L1D_NB_SETS 8
830 uint64_t distribution[OTX2_L1D_NB_SETS];
831 rte_iova_t start_iova;
837 if (iova == RTE_BAD_IOVA)
840 total_elt_sz = mp->header_size + mp->elt_size + mp->trailer_size;
842 /* Align object start address to a multiple of total_elt_sz */
843 off = total_elt_sz - ((((uintptr_t)vaddr - 1) % total_elt_sz) + 1);
849 vaddr = (char *)vaddr + off;
853 memset(distribution, 0, sizeof(uint64_t) * OTX2_L1D_NB_SETS);
855 while (start_iova < iova + len) {
856 set = otx2_npa_l1d_way_set_get(start_iova + mp->header_size);
858 start_iova += total_elt_sz;
861 otx2_npa_dbg("iova %"PRIx64", aligned iova %"PRIx64"", iova - off,
863 otx2_npa_dbg("length %"PRIu64", aligned length %"PRIu64"",
864 (uint64_t)(len + off), (uint64_t)len);
865 otx2_npa_dbg("element size %"PRIu64"", (uint64_t)total_elt_sz);
866 otx2_npa_dbg("requested objects %"PRIu64", possible objects %"PRIu64"",
867 (uint64_t)max_objs, (uint64_t)(len / total_elt_sz));
868 otx2_npa_dbg("L1D set distribution :");
869 for (i = 0; i < OTX2_L1D_NB_SETS; i++)
870 otx2_npa_dbg("set[%d] : objects : %"PRIu64"", i,
873 npa_lf_aura_op_range_set(mp->pool_id, iova, iova + len);
875 if (npa_lf_aura_range_update_check(mp->pool_id) < 0)
878 return rte_mempool_op_populate_helper(mp,
879 RTE_MEMPOOL_POPULATE_F_ALIGN_OBJ,
880 max_objs, vaddr, iova, len,
884 static struct rte_mempool_ops otx2_npa_ops = {
885 .name = "octeontx2_npa",
886 .alloc = otx2_npa_alloc,
887 .free = otx2_npa_free,
888 .enqueue = otx2_npa_enq,
889 .get_count = otx2_npa_get_count,
890 .calc_mem_size = otx2_npa_calc_mem_size,
891 .populate = otx2_npa_populate,
892 #if defined(RTE_ARCH_ARM64)
893 .dequeue = otx2_npa_deq_arm64,
895 .dequeue = otx2_npa_deq,
899 MEMPOOL_REGISTER_OPS(otx2_npa_ops);