1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(C) 2019 Marvell International Ltd.
5 #include <rte_mempool.h>
8 #include "otx2_mempool.h"
11 otx2_npa_enq(struct rte_mempool *mp, void * const *obj_table, unsigned int n)
13 unsigned int index; const uint64_t aura_handle = mp->pool_id;
14 const uint64_t reg = npa_lf_aura_handle_to_aura(aura_handle);
15 const uint64_t addr = npa_lf_aura_handle_to_base(aura_handle) +
18 for (index = 0; index < n; index++)
19 otx2_store_pair((uint64_t)obj_table[index], reg, addr);
24 static __rte_noinline int
25 npa_lf_aura_op_alloc_one(const int64_t wdata, int64_t * const addr,
26 void **obj_table, uint8_t i)
31 obj_table[i] = (void *)otx2_atomic64_add_nosync(wdata, addr);
32 if (obj_table[i] != NULL)
40 #if defined(RTE_ARCH_ARM64)
41 static __rte_noinline int
42 npa_lf_aura_op_search_alloc(const int64_t wdata, int64_t * const addr,
43 void **obj_table, unsigned int n)
47 for (i = 0; i < n; i++) {
48 if (obj_table[i] != NULL)
50 if (npa_lf_aura_op_alloc_one(wdata, addr, obj_table, i))
57 static __rte_noinline int
58 npa_lf_aura_op_alloc_bulk(const int64_t wdata, int64_t * const addr,
59 unsigned int n, void **obj_table)
61 register const uint64_t wdata64 __asm("x26") = wdata;
62 register const uint64_t wdata128 __asm("x27") = wdata;
63 uint64x2_t failed = vdupq_n_u64(~0);
70 "casp x0, x1, %[wdata64], %[wdata128], [%[loc]]\n"
71 "casp x2, x3, %[wdata64], %[wdata128], [%[loc]]\n"
72 "casp x4, x5, %[wdata64], %[wdata128], [%[loc]]\n"
73 "casp x6, x7, %[wdata64], %[wdata128], [%[loc]]\n"
74 "casp x8, x9, %[wdata64], %[wdata128], [%[loc]]\n"
75 "casp x10, x11, %[wdata64], %[wdata128], [%[loc]]\n"
76 "casp x12, x13, %[wdata64], %[wdata128], [%[loc]]\n"
77 "casp x14, x15, %[wdata64], %[wdata128], [%[loc]]\n"
78 "casp x16, x17, %[wdata64], %[wdata128], [%[loc]]\n"
79 "casp x18, x19, %[wdata64], %[wdata128], [%[loc]]\n"
80 "casp x20, x21, %[wdata64], %[wdata128], [%[loc]]\n"
81 "casp x22, x23, %[wdata64], %[wdata128], [%[loc]]\n"
84 "casp x0, x1, %[wdata64], %[wdata128], [%[loc]]\n"
87 "casp x2, x3, %[wdata64], %[wdata128], [%[loc]]\n"
90 "casp x4, x5, %[wdata64], %[wdata128], [%[loc]]\n"
93 "casp x6, x7, %[wdata64], %[wdata128], [%[loc]]\n"
94 "and %[failed].16B, %[failed].16B, v16.16B\n"
95 "and %[failed].16B, %[failed].16B, v17.16B\n"
96 "and %[failed].16B, %[failed].16B, v18.16B\n"
97 "and %[failed].16B, %[failed].16B, v19.16B\n"
101 "fmov v21.D[1], x11\n"
103 "fmov v22.D[1], x13\n"
105 "fmov v23.D[1], x15\n"
106 "and %[failed].16B, %[failed].16B, v20.16B\n"
107 "and %[failed].16B, %[failed].16B, v21.16B\n"
108 "and %[failed].16B, %[failed].16B, v22.16B\n"
109 "and %[failed].16B, %[failed].16B, v23.16B\n"
110 "st1 { v16.2d, v17.2d, v18.2d, v19.2d}, [%[dst]], 64\n"
111 "st1 { v20.2d, v21.2d, v22.2d, v23.2d}, [%[dst]], 64\n"
113 "fmov v16.D[1], x17\n"
115 "fmov v17.D[1], x19\n"
117 "fmov v18.D[1], x21\n"
119 "fmov v19.D[1], x23\n"
120 "and %[failed].16B, %[failed].16B, v16.16B\n"
121 "and %[failed].16B, %[failed].16B, v17.16B\n"
122 "and %[failed].16B, %[failed].16B, v18.16B\n"
123 "and %[failed].16B, %[failed].16B, v19.16B\n"
125 "fmov v20.D[1], x1\n"
127 "fmov v21.D[1], x3\n"
129 "fmov v22.D[1], x5\n"
131 "fmov v23.D[1], x7\n"
132 "and %[failed].16B, %[failed].16B, v20.16B\n"
133 "and %[failed].16B, %[failed].16B, v21.16B\n"
134 "and %[failed].16B, %[failed].16B, v22.16B\n"
135 "and %[failed].16B, %[failed].16B, v23.16B\n"
136 "st1 { v16.2d, v17.2d, v18.2d, v19.2d}, [%[dst]], 64\n"
137 "st1 { v20.2d, v21.2d, v22.2d, v23.2d}, [%[dst]], 64\n"
138 : "+Q" (*addr), [failed] "=&w" (failed)
139 : [wdata64] "r" (wdata64), [wdata128] "r" (wdata128),
140 [dst] "r" (obj_table), [loc] "r" (addr)
141 : "memory", "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
142 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16",
143 "x17", "x18", "x19", "x20", "x21", "x22", "x23", "v16", "v17",
144 "v18", "v19", "v20", "v21", "v22", "v23"
152 "casp x0, x1, %[wdata64], %[wdata128], [%[loc]]\n"
153 "casp x2, x3, %[wdata64], %[wdata128], [%[loc]]\n"
154 "casp x4, x5, %[wdata64], %[wdata128], [%[loc]]\n"
155 "casp x6, x7, %[wdata64], %[wdata128], [%[loc]]\n"
156 "casp x8, x9, %[wdata64], %[wdata128], [%[loc]]\n"
157 "casp x10, x11, %[wdata64], %[wdata128], [%[loc]]\n"
158 "casp x12, x13, %[wdata64], %[wdata128], [%[loc]]\n"
159 "casp x14, x15, %[wdata64], %[wdata128], [%[loc]]\n"
161 "fmov v16.D[1], x1\n"
163 "fmov v17.D[1], x3\n"
165 "fmov v18.D[1], x5\n"
167 "fmov v19.D[1], x7\n"
168 "and %[failed].16B, %[failed].16B, v16.16B\n"
169 "and %[failed].16B, %[failed].16B, v17.16B\n"
170 "and %[failed].16B, %[failed].16B, v18.16B\n"
171 "and %[failed].16B, %[failed].16B, v19.16B\n"
173 "fmov v20.D[1], x9\n"
175 "fmov v21.D[1], x11\n"
177 "fmov v22.D[1], x13\n"
179 "fmov v23.D[1], x15\n"
180 "and %[failed].16B, %[failed].16B, v20.16B\n"
181 "and %[failed].16B, %[failed].16B, v21.16B\n"
182 "and %[failed].16B, %[failed].16B, v22.16B\n"
183 "and %[failed].16B, %[failed].16B, v23.16B\n"
184 "st1 { v16.2d, v17.2d, v18.2d, v19.2d}, [%[dst]], 64\n"
185 "st1 { v20.2d, v21.2d, v22.2d, v23.2d}, [%[dst]], 64\n"
186 : "+Q" (*addr), [failed] "=&w" (failed)
187 : [wdata64] "r" (wdata64), [wdata128] "r" (wdata128),
188 [dst] "r" (obj_table), [loc] "r" (addr)
189 : "memory", "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
190 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "v16",
191 "v17", "v18", "v19", "v20", "v21", "v22", "v23"
199 "casp x0, x1, %[wdata64], %[wdata128], [%[loc]]\n"
200 "casp x2, x3, %[wdata64], %[wdata128], [%[loc]]\n"
201 "casp x4, x5, %[wdata64], %[wdata128], [%[loc]]\n"
202 "casp x6, x7, %[wdata64], %[wdata128], [%[loc]]\n"
204 "fmov v16.D[1], x1\n"
206 "fmov v17.D[1], x3\n"
208 "fmov v18.D[1], x5\n"
210 "fmov v19.D[1], x7\n"
211 "and %[failed].16B, %[failed].16B, v16.16B\n"
212 "and %[failed].16B, %[failed].16B, v17.16B\n"
213 "and %[failed].16B, %[failed].16B, v18.16B\n"
214 "and %[failed].16B, %[failed].16B, v19.16B\n"
215 "st1 { v16.2d, v17.2d, v18.2d, v19.2d}, [%[dst]], 64\n"
216 : "+Q" (*addr), [failed] "=&w" (failed)
217 : [wdata64] "r" (wdata64), [wdata128] "r" (wdata128),
218 [dst] "r" (obj_table), [loc] "r" (addr)
219 : "memory", "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
220 "v16", "v17", "v18", "v19"
228 "casp x0, x1, %[wdata64], %[wdata128], [%[loc]]\n"
229 "casp x2, x3, %[wdata64], %[wdata128], [%[loc]]\n"
231 "fmov v16.D[1], x1\n"
233 "fmov v17.D[1], x3\n"
234 "and %[failed].16B, %[failed].16B, v16.16B\n"
235 "and %[failed].16B, %[failed].16B, v17.16B\n"
236 "st1 { v16.2d, v17.2d}, [%[dst]], 32\n"
237 : "+Q" (*addr), [failed] "=&w" (failed)
238 : [wdata64] "r" (wdata64), [wdata128] "r" (wdata128),
239 [dst] "r" (obj_table), [loc] "r" (addr)
240 : "memory", "x0", "x1", "x2", "x3", "v16", "v17"
248 "casp x0, x1, %[wdata64], %[wdata128], [%[loc]]\n"
250 "fmov v16.D[1], x1\n"
251 "and %[failed].16B, %[failed].16B, v16.16B\n"
252 "st1 { v16.2d}, [%[dst]], 16\n"
253 : "+Q" (*addr), [failed] "=&w" (failed)
254 : [wdata64] "r" (wdata64), [wdata128] "r" (wdata128),
255 [dst] "r" (obj_table), [loc] "r" (addr)
256 : "memory", "x0", "x1", "v16"
261 return npa_lf_aura_op_alloc_one(wdata, addr, obj_table, 0);
264 if (unlikely(!(vgetq_lane_u64(failed, 0) & vgetq_lane_u64(failed, 1))))
265 return npa_lf_aura_op_search_alloc(wdata, addr, (void **)
266 ((char *)obj_table - (sizeof(uint64_t) * n)), n);
271 static __rte_noinline void
272 otx2_npa_clear_alloc(struct rte_mempool *mp, void **obj_table, unsigned int n)
276 for (i = 0; i < n; i++) {
277 if (obj_table[i] != NULL) {
278 otx2_npa_enq(mp, &obj_table[i], 1);
284 static __rte_noinline int __rte_hot
285 otx2_npa_deq_arm64(struct rte_mempool *mp, void **obj_table, unsigned int n)
287 const int64_t wdata = npa_lf_aura_handle_to_aura(mp->pool_id);
288 void **obj_table_bak = obj_table;
289 const unsigned int nfree = n;
292 int64_t * const addr = (int64_t * const)
293 (npa_lf_aura_handle_to_base(mp->pool_id) +
294 NPA_LF_AURA_OP_ALLOCX(0));
296 parts = n > 31 ? 32 : rte_align32prevpow2(n);
298 if (unlikely(npa_lf_aura_op_alloc_bulk(wdata, addr,
299 parts, obj_table))) {
300 otx2_npa_clear_alloc(mp, obj_table_bak, nfree - n);
311 static inline int __rte_hot
312 otx2_npa_deq(struct rte_mempool *mp, void **obj_table, unsigned int n)
314 const int64_t wdata = npa_lf_aura_handle_to_aura(mp->pool_id);
318 int64_t * const addr = (int64_t *)
319 (npa_lf_aura_handle_to_base(mp->pool_id) +
320 NPA_LF_AURA_OP_ALLOCX(0));
321 for (index = 0; index < n; index++, obj_table++) {
322 obj = npa_lf_aura_op_alloc_one(wdata, addr, obj_table, 0);
324 for (; index > 0; index--) {
326 otx2_npa_enq(mp, obj_table, 1);
330 *obj_table = (void *)obj;
339 otx2_npa_get_count(const struct rte_mempool *mp)
341 return (unsigned int)npa_lf_aura_op_available(mp->pool_id);
345 npa_lf_aura_pool_init(struct otx2_mbox *mbox, uint32_t aura_id,
346 struct npa_aura_s *aura, struct npa_pool_s *pool)
348 struct npa_aq_enq_req *aura_init_req, *pool_init_req;
349 struct npa_aq_enq_rsp *aura_init_rsp, *pool_init_rsp;
350 struct otx2_mbox_dev *mdev = &mbox->dev[0];
351 struct otx2_idev_cfg *idev;
354 idev = otx2_intra_dev_get_cfg();
358 aura_init_req = otx2_mbox_alloc_msg_npa_aq_enq(mbox);
360 aura_init_req->aura_id = aura_id;
361 aura_init_req->ctype = NPA_AQ_CTYPE_AURA;
362 aura_init_req->op = NPA_AQ_INSTOP_INIT;
363 otx2_mbox_memcpy(&aura_init_req->aura, aura, sizeof(*aura));
365 pool_init_req = otx2_mbox_alloc_msg_npa_aq_enq(mbox);
367 pool_init_req->aura_id = aura_id;
368 pool_init_req->ctype = NPA_AQ_CTYPE_POOL;
369 pool_init_req->op = NPA_AQ_INSTOP_INIT;
370 otx2_mbox_memcpy(&pool_init_req->pool, pool, sizeof(*pool));
372 otx2_mbox_msg_send(mbox, 0);
373 rc = otx2_mbox_wait_for_rsp(mbox, 0);
377 off = mbox->rx_start +
378 RTE_ALIGN(sizeof(struct mbox_hdr), MBOX_MSG_ALIGN);
379 aura_init_rsp = (struct npa_aq_enq_rsp *)((uintptr_t)mdev->mbase + off);
380 off = mbox->rx_start + aura_init_rsp->hdr.next_msgoff;
381 pool_init_rsp = (struct npa_aq_enq_rsp *)((uintptr_t)mdev->mbase + off);
383 if (rc == 2 && aura_init_rsp->hdr.rc == 0 && pool_init_rsp->hdr.rc == 0)
386 return NPA_LF_ERR_AURA_POOL_INIT;
388 if (!(idev->npa_lock_mask & BIT_ULL(aura_id)))
391 aura_init_req = otx2_mbox_alloc_msg_npa_aq_enq(mbox);
392 aura_init_req->aura_id = aura_id;
393 aura_init_req->ctype = NPA_AQ_CTYPE_AURA;
394 aura_init_req->op = NPA_AQ_INSTOP_LOCK;
396 pool_init_req = otx2_mbox_alloc_msg_npa_aq_enq(mbox);
397 if (!pool_init_req) {
398 /* The shared memory buffer can be full.
401 otx2_mbox_msg_send(mbox, 0);
402 rc = otx2_mbox_wait_for_rsp(mbox, 0);
404 otx2_err("Failed to LOCK AURA context");
408 pool_init_req = otx2_mbox_alloc_msg_npa_aq_enq(mbox);
409 if (!pool_init_req) {
410 otx2_err("Failed to LOCK POOL context");
414 pool_init_req->aura_id = aura_id;
415 pool_init_req->ctype = NPA_AQ_CTYPE_POOL;
416 pool_init_req->op = NPA_AQ_INSTOP_LOCK;
418 rc = otx2_mbox_process(mbox);
420 otx2_err("Failed to lock POOL ctx to NDC");
428 npa_lf_aura_pool_fini(struct otx2_mbox *mbox,
430 uint64_t aura_handle)
432 struct npa_aq_enq_req *aura_req, *pool_req;
433 struct npa_aq_enq_rsp *aura_rsp, *pool_rsp;
434 struct otx2_mbox_dev *mdev = &mbox->dev[0];
435 struct ndc_sync_op *ndc_req;
436 struct otx2_idev_cfg *idev;
439 idev = otx2_intra_dev_get_cfg();
443 /* Procedure for disabling an aura/pool */
445 npa_lf_aura_op_alloc(aura_handle, 0);
447 pool_req = otx2_mbox_alloc_msg_npa_aq_enq(mbox);
448 pool_req->aura_id = aura_id;
449 pool_req->ctype = NPA_AQ_CTYPE_POOL;
450 pool_req->op = NPA_AQ_INSTOP_WRITE;
451 pool_req->pool.ena = 0;
452 pool_req->pool_mask.ena = ~pool_req->pool_mask.ena;
454 aura_req = otx2_mbox_alloc_msg_npa_aq_enq(mbox);
455 aura_req->aura_id = aura_id;
456 aura_req->ctype = NPA_AQ_CTYPE_AURA;
457 aura_req->op = NPA_AQ_INSTOP_WRITE;
458 aura_req->aura.ena = 0;
459 aura_req->aura_mask.ena = ~aura_req->aura_mask.ena;
461 otx2_mbox_msg_send(mbox, 0);
462 rc = otx2_mbox_wait_for_rsp(mbox, 0);
466 off = mbox->rx_start +
467 RTE_ALIGN(sizeof(struct mbox_hdr), MBOX_MSG_ALIGN);
468 pool_rsp = (struct npa_aq_enq_rsp *)((uintptr_t)mdev->mbase + off);
470 off = mbox->rx_start + pool_rsp->hdr.next_msgoff;
471 aura_rsp = (struct npa_aq_enq_rsp *)((uintptr_t)mdev->mbase + off);
473 if (rc != 2 || aura_rsp->hdr.rc != 0 || pool_rsp->hdr.rc != 0)
474 return NPA_LF_ERR_AURA_POOL_FINI;
476 /* Sync NDC-NPA for LF */
477 ndc_req = otx2_mbox_alloc_msg_ndc_sync_op(mbox);
478 ndc_req->npa_lf_sync = 1;
480 rc = otx2_mbox_process(mbox);
482 otx2_err("Error on NDC-NPA LF sync, rc %d", rc);
483 return NPA_LF_ERR_AURA_POOL_FINI;
486 if (!(idev->npa_lock_mask & BIT_ULL(aura_id)))
489 aura_req = otx2_mbox_alloc_msg_npa_aq_enq(mbox);
490 aura_req->aura_id = aura_id;
491 aura_req->ctype = NPA_AQ_CTYPE_AURA;
492 aura_req->op = NPA_AQ_INSTOP_UNLOCK;
494 rc = otx2_mbox_process(mbox);
496 otx2_err("Failed to unlock AURA ctx to NDC");
500 pool_req = otx2_mbox_alloc_msg_npa_aq_enq(mbox);
501 pool_req->aura_id = aura_id;
502 pool_req->ctype = NPA_AQ_CTYPE_POOL;
503 pool_req->op = NPA_AQ_INSTOP_UNLOCK;
505 rc = otx2_mbox_process(mbox);
507 otx2_err("Failed to unlock POOL ctx to NDC");
515 npa_lf_stack_memzone_name(struct otx2_npa_lf *lf, int pool_id, char *name)
517 snprintf(name, RTE_MEMZONE_NAMESIZE, "otx2_npa_stack_%x_%d",
518 lf->pf_func, pool_id);
523 static inline const struct rte_memzone *
524 npa_lf_stack_dma_alloc(struct otx2_npa_lf *lf, char *name,
525 int pool_id, size_t size)
527 return rte_memzone_reserve_aligned(
528 npa_lf_stack_memzone_name(lf, pool_id, name), size, 0,
529 RTE_MEMZONE_IOVA_CONTIG, OTX2_ALIGN);
533 npa_lf_stack_dma_free(struct otx2_npa_lf *lf, char *name, int pool_id)
535 const struct rte_memzone *mz;
537 mz = rte_memzone_lookup(npa_lf_stack_memzone_name(lf, pool_id, name));
541 return rte_memzone_free(mz);
545 bitmap_ctzll(uint64_t slab)
550 return __builtin_ctzll(slab);
554 npa_lf_aura_pool_pair_alloc(struct otx2_npa_lf *lf, const uint32_t block_size,
555 const uint32_t block_count, struct npa_aura_s *aura,
556 struct npa_pool_s *pool, uint64_t *aura_handle)
558 int rc, aura_id, pool_id, stack_size, alloc_size;
559 char name[RTE_MEMZONE_NAMESIZE];
560 const struct rte_memzone *mz;
565 if (!lf || !block_size || !block_count ||
566 !pool || !aura || !aura_handle)
567 return NPA_LF_ERR_PARAM;
569 /* Block size should be cache line aligned and in range of 128B-128KB */
570 if (block_size % OTX2_ALIGN || block_size < 128 ||
571 block_size > 128 * 1024)
572 return NPA_LF_ERR_INVALID_BLOCK_SZ;
575 /* Scan from the beginning */
576 __rte_bitmap_scan_init(lf->npa_bmp);
577 /* Scan bitmap to get the free pool */
578 rc = rte_bitmap_scan(lf->npa_bmp, &pos, &slab);
581 otx2_err("Mempools exhausted, 'max_pools' devargs to increase");
585 /* Get aura_id from resource bitmap */
586 aura_id = pos + bitmap_ctzll(slab);
587 /* Mark pool as reserved */
588 rte_bitmap_clear(lf->npa_bmp, aura_id);
590 /* Configuration based on each aura has separate pool(aura-pool pair) */
592 rc = (aura_id < 0 || pool_id >= (int)lf->nr_pools || aura_id >=
593 (int)BIT_ULL(6 + lf->aura_sz)) ? NPA_LF_ERR_AURA_ID_ALLOC : 0;
597 /* Allocate stack memory */
598 stack_size = (block_count + lf->stack_pg_ptrs - 1) / lf->stack_pg_ptrs;
599 alloc_size = stack_size * lf->stack_pg_bytes;
601 mz = npa_lf_stack_dma_alloc(lf, name, pool_id, alloc_size);
607 /* Update aura fields */
608 aura->pool_addr = pool_id;/* AF will translate to associated poolctx */
610 aura->shift = __builtin_clz(block_count) - 8;
611 aura->limit = block_count;
612 aura->pool_caching = 1;
613 aura->err_int_ena = BIT(NPA_AURA_ERR_INT_AURA_ADD_OVER);
614 aura->err_int_ena |= BIT(NPA_AURA_ERR_INT_AURA_ADD_UNDER);
615 aura->err_int_ena |= BIT(NPA_AURA_ERR_INT_AURA_FREE_UNDER);
616 aura->err_int_ena |= BIT(NPA_AURA_ERR_INT_POOL_DIS);
617 /* Many to one reduction */
618 aura->err_qint_idx = aura_id % lf->qints;
620 /* Update pool fields */
621 pool->stack_base = mz->iova;
623 pool->buf_size = block_size / OTX2_ALIGN;
624 pool->stack_max_pages = stack_size;
625 pool->shift = __builtin_clz(block_count) - 8;
628 pool->stack_caching = 1;
629 pool->err_int_ena = BIT(NPA_POOL_ERR_INT_OVFLS);
630 pool->err_int_ena |= BIT(NPA_POOL_ERR_INT_RANGE);
631 pool->err_int_ena |= BIT(NPA_POOL_ERR_INT_PERR);
633 /* Many to one reduction */
634 pool->err_qint_idx = pool_id % lf->qints;
636 /* Issue AURA_INIT and POOL_INIT op */
637 rc = npa_lf_aura_pool_init(lf->mbox, aura_id, aura, pool);
641 *aura_handle = npa_lf_aura_handle_gen(aura_id, lf->base);
643 /* Update aura count */
644 npa_lf_aura_op_cnt_set(*aura_handle, 0, block_count);
645 /* Read it back to make sure aura count is updated */
646 npa_lf_aura_op_cnt_get(*aura_handle);
651 rte_memzone_free(mz);
653 rte_bitmap_set(lf->npa_bmp, aura_id);
659 npa_lf_aura_pool_pair_free(struct otx2_npa_lf *lf, uint64_t aura_handle)
661 char name[RTE_MEMZONE_NAMESIZE];
662 int aura_id, pool_id, rc;
664 if (!lf || !aura_handle)
665 return NPA_LF_ERR_PARAM;
667 aura_id = pool_id = npa_lf_aura_handle_to_aura(aura_handle);
668 rc = npa_lf_aura_pool_fini(lf->mbox, aura_id, aura_handle);
669 rc |= npa_lf_stack_dma_free(lf, name, pool_id);
671 rte_bitmap_set(lf->npa_bmp, aura_id);
677 npa_lf_aura_range_update_check(uint64_t aura_handle)
679 uint64_t aura_id = npa_lf_aura_handle_to_aura(aura_handle);
680 struct otx2_npa_lf *lf = otx2_npa_lf_obj_get();
681 struct npa_aura_lim *lim = lf->aura_lim;
682 __otx2_io struct npa_pool_s *pool;
683 struct npa_aq_enq_req *req;
684 struct npa_aq_enq_rsp *rsp;
687 req = otx2_mbox_alloc_msg_npa_aq_enq(lf->mbox);
689 req->aura_id = aura_id;
690 req->ctype = NPA_AQ_CTYPE_POOL;
691 req->op = NPA_AQ_INSTOP_READ;
693 rc = otx2_mbox_process_msg(lf->mbox, (void *)&rsp);
695 otx2_err("Failed to get pool(0x%"PRIx64") context", aura_id);
701 if (lim[aura_id].ptr_start != pool->ptr_start ||
702 lim[aura_id].ptr_end != pool->ptr_end) {
703 otx2_err("Range update failed on pool(0x%"PRIx64")", aura_id);
711 otx2_npa_alloc(struct rte_mempool *mp)
713 uint32_t block_size, block_count;
714 uint64_t aura_handle = 0;
715 struct otx2_npa_lf *lf;
716 struct npa_aura_s aura;
717 struct npa_pool_s pool;
721 lf = otx2_npa_lf_obj_get();
727 block_size = mp->elt_size + mp->header_size + mp->trailer_size;
729 * OCTEON TX2 has 8 sets, 41 ways L1D cache, VA<9:7> bits dictate
731 * Add additional padding to ensure that the element size always
732 * occupies odd number of cachelines to ensure even distribution
733 * of elements among L1D cache sets.
735 padding = ((block_size / RTE_CACHE_LINE_SIZE) % 2) ? 0 :
737 mp->trailer_size += padding;
738 block_size += padding;
740 block_count = mp->size;
742 if (block_size % OTX2_ALIGN != 0) {
743 otx2_err("Block size should be multiple of 128B");
748 memset(&aura, 0, sizeof(struct npa_aura_s));
749 memset(&pool, 0, sizeof(struct npa_pool_s));
753 if ((uint32_t)pool.buf_offset * OTX2_ALIGN != mp->header_size) {
754 otx2_err("Unsupported mp->header_size=%d", mp->header_size);
759 /* Use driver specific mp->pool_config to override aura config */
760 if (mp->pool_config != NULL)
761 memcpy(&aura, mp->pool_config, sizeof(struct npa_aura_s));
763 rc = npa_lf_aura_pool_pair_alloc(lf, block_size, block_count,
764 &aura, &pool, &aura_handle);
766 otx2_err("Failed to alloc pool or aura rc=%d", rc);
770 /* Store aura_handle for future queue operations */
771 mp->pool_id = aura_handle;
772 otx2_npa_dbg("lf=%p block_sz=%d block_count=%d aura_handle=0x%"PRIx64,
773 lf, block_size, block_count, aura_handle);
775 /* Just hold the reference of the object */
776 otx2_npa_lf_obj_ref();
783 otx2_npa_free(struct rte_mempool *mp)
785 struct otx2_npa_lf *lf = otx2_npa_lf_obj_get();
788 otx2_npa_dbg("lf=%p aura_handle=0x%"PRIx64, lf, mp->pool_id);
790 rc = npa_lf_aura_pool_pair_free(lf, mp->pool_id);
793 otx2_err("Failed to free pool or aura rc=%d", rc);
795 /* Release the reference of npalf */
800 otx2_npa_calc_mem_size(const struct rte_mempool *mp, uint32_t obj_num,
801 uint32_t pg_shift, size_t *min_chunk_size, size_t *align)
805 /* Need space for one more obj on each chunk to fulfill
806 * alignment requirements.
808 total_elt_sz = mp->header_size + mp->elt_size + mp->trailer_size;
809 return rte_mempool_op_calc_mem_size_helper(mp, obj_num, pg_shift,
810 total_elt_sz, min_chunk_size,
815 otx2_npa_l1d_way_set_get(uint64_t iova)
817 return (iova >> rte_log2_u32(RTE_CACHE_LINE_SIZE)) & 0x7;
821 otx2_npa_populate(struct rte_mempool *mp, unsigned int max_objs, void *vaddr,
822 rte_iova_t iova, size_t len,
823 rte_mempool_populate_obj_cb_t *obj_cb, void *obj_cb_arg)
825 #define OTX2_L1D_NB_SETS 8
826 uint64_t distribution[OTX2_L1D_NB_SETS];
827 rte_iova_t start_iova;
833 if (iova == RTE_BAD_IOVA)
836 total_elt_sz = mp->header_size + mp->elt_size + mp->trailer_size;
838 /* Align object start address to a multiple of total_elt_sz */
839 off = total_elt_sz - ((((uintptr_t)vaddr - 1) % total_elt_sz) + 1);
845 vaddr = (char *)vaddr + off;
849 memset(distribution, 0, sizeof(uint64_t) * OTX2_L1D_NB_SETS);
851 while (start_iova < iova + len) {
852 set = otx2_npa_l1d_way_set_get(start_iova + mp->header_size);
854 start_iova += total_elt_sz;
857 otx2_npa_dbg("iova %"PRIx64", aligned iova %"PRIx64"", iova - off,
859 otx2_npa_dbg("length %"PRIu64", aligned length %"PRIu64"",
860 (uint64_t)(len + off), (uint64_t)len);
861 otx2_npa_dbg("element size %"PRIu64"", (uint64_t)total_elt_sz);
862 otx2_npa_dbg("requested objects %"PRIu64", possible objects %"PRIu64"",
863 (uint64_t)max_objs, (uint64_t)(len / total_elt_sz));
864 otx2_npa_dbg("L1D set distribution :");
865 for (i = 0; i < OTX2_L1D_NB_SETS; i++)
866 otx2_npa_dbg("set[%d] : objects : %"PRIu64"", i,
869 npa_lf_aura_op_range_set(mp->pool_id, iova, iova + len);
871 if (npa_lf_aura_range_update_check(mp->pool_id) < 0)
874 return rte_mempool_op_populate_helper(mp,
875 RTE_MEMPOOL_POPULATE_F_ALIGN_OBJ,
876 max_objs, vaddr, iova, len,
880 static struct rte_mempool_ops otx2_npa_ops = {
881 .name = "octeontx2_npa",
882 .alloc = otx2_npa_alloc,
883 .free = otx2_npa_free,
884 .enqueue = otx2_npa_enq,
885 .get_count = otx2_npa_get_count,
886 .calc_mem_size = otx2_npa_calc_mem_size,
887 .populate = otx2_npa_populate,
888 #if defined(RTE_ARCH_ARM64)
889 .dequeue = otx2_npa_deq_arm64,
891 .dequeue = otx2_npa_deq,
895 MEMPOOL_REGISTER_OPS(otx2_npa_ops);