1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(C) 2019 Marvell International Ltd.
5 #include <rte_mempool.h>
8 #include "otx2_mempool.h"
11 otx2_npa_enq(struct rte_mempool *mp, void * const *obj_table, unsigned int n)
13 unsigned int index; const uint64_t aura_handle = mp->pool_id;
14 const uint64_t reg = npa_lf_aura_handle_to_aura(aura_handle);
15 const uint64_t addr = npa_lf_aura_handle_to_base(aura_handle) +
18 for (index = 0; index < n; index++)
19 otx2_store_pair((uint64_t)obj_table[index], reg, addr);
24 static __rte_noinline int
25 npa_lf_aura_op_alloc_one(const int64_t wdata, int64_t * const addr,
26 void **obj_table, uint8_t i)
31 obj_table[i] = (void *)otx2_atomic64_add_nosync(wdata, addr);
32 if (obj_table[i] != NULL)
40 #if defined(RTE_ARCH_ARM64)
41 static __rte_noinline int
42 npa_lf_aura_op_search_alloc(const int64_t wdata, int64_t * const addr,
43 void **obj_table, unsigned int n)
47 for (i = 0; i < n; i++) {
48 if (obj_table[i] != NULL)
50 if (npa_lf_aura_op_alloc_one(wdata, addr, obj_table, i))
57 static __rte_noinline int
58 npa_lf_aura_op_alloc_bulk(const int64_t wdata, int64_t * const addr,
59 unsigned int n, void **obj_table)
61 register const uint64_t wdata64 __asm("x26") = wdata;
62 register const uint64_t wdata128 __asm("x27") = wdata;
63 uint64x2_t failed = vdupq_n_u64(~0);
70 "casp x0, x1, %[wdata64], %[wdata128], [%[loc]]\n"
71 "casp x2, x3, %[wdata64], %[wdata128], [%[loc]]\n"
72 "casp x4, x5, %[wdata64], %[wdata128], [%[loc]]\n"
73 "casp x6, x7, %[wdata64], %[wdata128], [%[loc]]\n"
74 "casp x8, x9, %[wdata64], %[wdata128], [%[loc]]\n"
75 "casp x10, x11, %[wdata64], %[wdata128], [%[loc]]\n"
76 "casp x12, x13, %[wdata64], %[wdata128], [%[loc]]\n"
77 "casp x14, x15, %[wdata64], %[wdata128], [%[loc]]\n"
78 "casp x16, x17, %[wdata64], %[wdata128], [%[loc]]\n"
79 "casp x18, x19, %[wdata64], %[wdata128], [%[loc]]\n"
80 "casp x20, x21, %[wdata64], %[wdata128], [%[loc]]\n"
81 "casp x22, x23, %[wdata64], %[wdata128], [%[loc]]\n"
84 "casp x0, x1, %[wdata64], %[wdata128], [%[loc]]\n"
87 "casp x2, x3, %[wdata64], %[wdata128], [%[loc]]\n"
90 "casp x4, x5, %[wdata64], %[wdata128], [%[loc]]\n"
93 "casp x6, x7, %[wdata64], %[wdata128], [%[loc]]\n"
94 "and %[failed].16B, %[failed].16B, v16.16B\n"
95 "and %[failed].16B, %[failed].16B, v17.16B\n"
96 "and %[failed].16B, %[failed].16B, v18.16B\n"
97 "and %[failed].16B, %[failed].16B, v19.16B\n"
101 "fmov v21.D[1], x11\n"
103 "fmov v22.D[1], x13\n"
105 "fmov v23.D[1], x15\n"
106 "and %[failed].16B, %[failed].16B, v20.16B\n"
107 "and %[failed].16B, %[failed].16B, v21.16B\n"
108 "and %[failed].16B, %[failed].16B, v22.16B\n"
109 "and %[failed].16B, %[failed].16B, v23.16B\n"
110 "st1 { v16.2d, v17.2d, v18.2d, v19.2d}, [%[dst]], 64\n"
111 "st1 { v20.2d, v21.2d, v22.2d, v23.2d}, [%[dst]], 64\n"
113 "fmov v16.D[1], x17\n"
115 "fmov v17.D[1], x19\n"
117 "fmov v18.D[1], x21\n"
119 "fmov v19.D[1], x23\n"
120 "and %[failed].16B, %[failed].16B, v16.16B\n"
121 "and %[failed].16B, %[failed].16B, v17.16B\n"
122 "and %[failed].16B, %[failed].16B, v18.16B\n"
123 "and %[failed].16B, %[failed].16B, v19.16B\n"
125 "fmov v20.D[1], x1\n"
127 "fmov v21.D[1], x3\n"
129 "fmov v22.D[1], x5\n"
131 "fmov v23.D[1], x7\n"
132 "and %[failed].16B, %[failed].16B, v20.16B\n"
133 "and %[failed].16B, %[failed].16B, v21.16B\n"
134 "and %[failed].16B, %[failed].16B, v22.16B\n"
135 "and %[failed].16B, %[failed].16B, v23.16B\n"
136 "st1 { v16.2d, v17.2d, v18.2d, v19.2d}, [%[dst]], 64\n"
137 "st1 { v20.2d, v21.2d, v22.2d, v23.2d}, [%[dst]], 64\n"
138 : "+Q" (*addr), [failed] "=&w" (failed)
139 : [wdata64] "r" (wdata64), [wdata128] "r" (wdata128),
140 [dst] "r" (obj_table), [loc] "r" (addr)
141 : "memory", "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
142 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16",
143 "x17", "x18", "x19", "x20", "x21", "x22", "x23", "v16", "v17",
144 "v18", "v19", "v20", "v21", "v22", "v23"
152 "casp x0, x1, %[wdata64], %[wdata128], [%[loc]]\n"
153 "casp x2, x3, %[wdata64], %[wdata128], [%[loc]]\n"
154 "casp x4, x5, %[wdata64], %[wdata128], [%[loc]]\n"
155 "casp x6, x7, %[wdata64], %[wdata128], [%[loc]]\n"
156 "casp x8, x9, %[wdata64], %[wdata128], [%[loc]]\n"
157 "casp x10, x11, %[wdata64], %[wdata128], [%[loc]]\n"
158 "casp x12, x13, %[wdata64], %[wdata128], [%[loc]]\n"
159 "casp x14, x15, %[wdata64], %[wdata128], [%[loc]]\n"
161 "fmov v16.D[1], x1\n"
163 "fmov v17.D[1], x3\n"
165 "fmov v18.D[1], x5\n"
167 "fmov v19.D[1], x7\n"
168 "and %[failed].16B, %[failed].16B, v16.16B\n"
169 "and %[failed].16B, %[failed].16B, v17.16B\n"
170 "and %[failed].16B, %[failed].16B, v18.16B\n"
171 "and %[failed].16B, %[failed].16B, v19.16B\n"
173 "fmov v20.D[1], x9\n"
175 "fmov v21.D[1], x11\n"
177 "fmov v22.D[1], x13\n"
179 "fmov v23.D[1], x15\n"
180 "and %[failed].16B, %[failed].16B, v20.16B\n"
181 "and %[failed].16B, %[failed].16B, v21.16B\n"
182 "and %[failed].16B, %[failed].16B, v22.16B\n"
183 "and %[failed].16B, %[failed].16B, v23.16B\n"
184 "st1 { v16.2d, v17.2d, v18.2d, v19.2d}, [%[dst]], 64\n"
185 "st1 { v20.2d, v21.2d, v22.2d, v23.2d}, [%[dst]], 64\n"
186 : "+Q" (*addr), [failed] "=&w" (failed)
187 : [wdata64] "r" (wdata64), [wdata128] "r" (wdata128),
188 [dst] "r" (obj_table), [loc] "r" (addr)
189 : "memory", "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
190 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "v16",
191 "v17", "v18", "v19", "v20", "v21", "v22", "v23"
199 "casp x0, x1, %[wdata64], %[wdata128], [%[loc]]\n"
200 "casp x2, x3, %[wdata64], %[wdata128], [%[loc]]\n"
201 "casp x4, x5, %[wdata64], %[wdata128], [%[loc]]\n"
202 "casp x6, x7, %[wdata64], %[wdata128], [%[loc]]\n"
204 "fmov v16.D[1], x1\n"
206 "fmov v17.D[1], x3\n"
208 "fmov v18.D[1], x5\n"
210 "fmov v19.D[1], x7\n"
211 "and %[failed].16B, %[failed].16B, v16.16B\n"
212 "and %[failed].16B, %[failed].16B, v17.16B\n"
213 "and %[failed].16B, %[failed].16B, v18.16B\n"
214 "and %[failed].16B, %[failed].16B, v19.16B\n"
215 "st1 { v16.2d, v17.2d, v18.2d, v19.2d}, [%[dst]], 64\n"
216 : "+Q" (*addr), [failed] "=&w" (failed)
217 : [wdata64] "r" (wdata64), [wdata128] "r" (wdata128),
218 [dst] "r" (obj_table), [loc] "r" (addr)
219 : "memory", "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
220 "v16", "v17", "v18", "v19"
228 "casp x0, x1, %[wdata64], %[wdata128], [%[loc]]\n"
229 "casp x2, x3, %[wdata64], %[wdata128], [%[loc]]\n"
231 "fmov v16.D[1], x1\n"
233 "fmov v17.D[1], x3\n"
234 "and %[failed].16B, %[failed].16B, v16.16B\n"
235 "and %[failed].16B, %[failed].16B, v17.16B\n"
236 "st1 { v16.2d, v17.2d}, [%[dst]], 32\n"
237 : "+Q" (*addr), [failed] "=&w" (failed)
238 : [wdata64] "r" (wdata64), [wdata128] "r" (wdata128),
239 [dst] "r" (obj_table), [loc] "r" (addr)
240 : "memory", "x0", "x1", "x2", "x3", "v16", "v17"
248 "casp x0, x1, %[wdata64], %[wdata128], [%[loc]]\n"
250 "fmov v16.D[1], x1\n"
251 "and %[failed].16B, %[failed].16B, v16.16B\n"
252 "st1 { v16.2d}, [%[dst]], 16\n"
253 : "+Q" (*addr), [failed] "=&w" (failed)
254 : [wdata64] "r" (wdata64), [wdata128] "r" (wdata128),
255 [dst] "r" (obj_table), [loc] "r" (addr)
256 : "memory", "x0", "x1", "v16"
261 return npa_lf_aura_op_alloc_one(wdata, addr, obj_table, 0);
264 if (unlikely(!(vgetq_lane_u64(failed, 0) & vgetq_lane_u64(failed, 1))))
265 return npa_lf_aura_op_search_alloc(wdata, addr, (void **)
266 ((char *)obj_table - (sizeof(uint64_t) * n)), n);
271 static __rte_noinline void
272 otx2_npa_clear_alloc(struct rte_mempool *mp, void **obj_table, unsigned int n)
276 for (i = 0; i < n; i++) {
277 if (obj_table[i] != NULL) {
278 otx2_npa_enq(mp, &obj_table[i], 1);
284 static __rte_noinline int __rte_hot
285 otx2_npa_deq_arm64(struct rte_mempool *mp, void **obj_table, unsigned int n)
287 const int64_t wdata = npa_lf_aura_handle_to_aura(mp->pool_id);
288 void **obj_table_bak = obj_table;
289 const unsigned int nfree = n;
292 int64_t * const addr = (int64_t * const)
293 (npa_lf_aura_handle_to_base(mp->pool_id) +
294 NPA_LF_AURA_OP_ALLOCX(0));
296 parts = n > 31 ? 32 : rte_align32prevpow2(n);
298 if (unlikely(npa_lf_aura_op_alloc_bulk(wdata, addr,
299 parts, obj_table))) {
300 otx2_npa_clear_alloc(mp, obj_table_bak, nfree - n);
311 static inline int __rte_hot
312 otx2_npa_deq(struct rte_mempool *mp, void **obj_table, unsigned int n)
314 const int64_t wdata = npa_lf_aura_handle_to_aura(mp->pool_id);
318 int64_t * const addr = (int64_t *)
319 (npa_lf_aura_handle_to_base(mp->pool_id) +
320 NPA_LF_AURA_OP_ALLOCX(0));
321 for (index = 0; index < n; index++, obj_table++) {
322 obj = npa_lf_aura_op_alloc_one(wdata, addr, obj_table, 0);
324 for (; index > 0; index--) {
326 otx2_npa_enq(mp, obj_table, 1);
330 *obj_table = (void *)obj;
339 otx2_npa_get_count(const struct rte_mempool *mp)
341 return (unsigned int)npa_lf_aura_op_available(mp->pool_id);
345 npa_lf_aura_pool_init(struct otx2_mbox *mbox, uint32_t aura_id,
346 struct npa_aura_s *aura, struct npa_pool_s *pool)
348 struct npa_aq_enq_req *aura_init_req, *pool_init_req;
349 struct npa_aq_enq_rsp *aura_init_rsp, *pool_init_rsp;
350 struct otx2_mbox_dev *mdev = &mbox->dev[0];
353 aura_init_req = otx2_mbox_alloc_msg_npa_aq_enq(mbox);
355 aura_init_req->aura_id = aura_id;
356 aura_init_req->ctype = NPA_AQ_CTYPE_AURA;
357 aura_init_req->op = NPA_AQ_INSTOP_INIT;
358 otx2_mbox_memcpy(&aura_init_req->aura, aura, sizeof(*aura));
360 pool_init_req = otx2_mbox_alloc_msg_npa_aq_enq(mbox);
362 pool_init_req->aura_id = aura_id;
363 pool_init_req->ctype = NPA_AQ_CTYPE_POOL;
364 pool_init_req->op = NPA_AQ_INSTOP_INIT;
365 otx2_mbox_memcpy(&pool_init_req->pool, pool, sizeof(*pool));
367 otx2_mbox_msg_send(mbox, 0);
368 rc = otx2_mbox_wait_for_rsp(mbox, 0);
372 off = mbox->rx_start +
373 RTE_ALIGN(sizeof(struct mbox_hdr), MBOX_MSG_ALIGN);
374 aura_init_rsp = (struct npa_aq_enq_rsp *)((uintptr_t)mdev->mbase + off);
375 off = mbox->rx_start + aura_init_rsp->hdr.next_msgoff;
376 pool_init_rsp = (struct npa_aq_enq_rsp *)((uintptr_t)mdev->mbase + off);
378 if (rc == 2 && aura_init_rsp->hdr.rc == 0 && pool_init_rsp->hdr.rc == 0)
381 return NPA_LF_ERR_AURA_POOL_INIT;
385 npa_lf_aura_pool_fini(struct otx2_mbox *mbox,
387 uint64_t aura_handle)
389 struct npa_aq_enq_req *aura_req, *pool_req;
390 struct npa_aq_enq_rsp *aura_rsp, *pool_rsp;
391 struct otx2_mbox_dev *mdev = &mbox->dev[0];
392 struct ndc_sync_op *ndc_req;
395 /* Procedure for disabling an aura/pool */
397 npa_lf_aura_op_alloc(aura_handle, 0);
399 pool_req = otx2_mbox_alloc_msg_npa_aq_enq(mbox);
400 pool_req->aura_id = aura_id;
401 pool_req->ctype = NPA_AQ_CTYPE_POOL;
402 pool_req->op = NPA_AQ_INSTOP_WRITE;
403 pool_req->pool.ena = 0;
404 pool_req->pool_mask.ena = ~pool_req->pool_mask.ena;
406 aura_req = otx2_mbox_alloc_msg_npa_aq_enq(mbox);
407 aura_req->aura_id = aura_id;
408 aura_req->ctype = NPA_AQ_CTYPE_AURA;
409 aura_req->op = NPA_AQ_INSTOP_WRITE;
410 aura_req->aura.ena = 0;
411 aura_req->aura_mask.ena = ~aura_req->aura_mask.ena;
413 otx2_mbox_msg_send(mbox, 0);
414 rc = otx2_mbox_wait_for_rsp(mbox, 0);
418 off = mbox->rx_start +
419 RTE_ALIGN(sizeof(struct mbox_hdr), MBOX_MSG_ALIGN);
420 pool_rsp = (struct npa_aq_enq_rsp *)((uintptr_t)mdev->mbase + off);
422 off = mbox->rx_start + pool_rsp->hdr.next_msgoff;
423 aura_rsp = (struct npa_aq_enq_rsp *)((uintptr_t)mdev->mbase + off);
425 if (rc != 2 || aura_rsp->hdr.rc != 0 || pool_rsp->hdr.rc != 0)
426 return NPA_LF_ERR_AURA_POOL_FINI;
428 /* Sync NDC-NPA for LF */
429 ndc_req = otx2_mbox_alloc_msg_ndc_sync_op(mbox);
430 ndc_req->npa_lf_sync = 1;
432 rc = otx2_mbox_process(mbox);
434 otx2_err("Error on NDC-NPA LF sync, rc %d", rc);
435 return NPA_LF_ERR_AURA_POOL_FINI;
441 npa_lf_stack_memzone_name(struct otx2_npa_lf *lf, int pool_id, char *name)
443 snprintf(name, RTE_MEMZONE_NAMESIZE, "otx2_npa_stack_%x_%d",
444 lf->pf_func, pool_id);
449 static inline const struct rte_memzone *
450 npa_lf_stack_dma_alloc(struct otx2_npa_lf *lf, char *name,
451 int pool_id, size_t size)
453 return rte_memzone_reserve_aligned(
454 npa_lf_stack_memzone_name(lf, pool_id, name), size, 0,
455 RTE_MEMZONE_IOVA_CONTIG, OTX2_ALIGN);
459 npa_lf_stack_dma_free(struct otx2_npa_lf *lf, char *name, int pool_id)
461 const struct rte_memzone *mz;
463 mz = rte_memzone_lookup(npa_lf_stack_memzone_name(lf, pool_id, name));
467 return rte_memzone_free(mz);
471 bitmap_ctzll(uint64_t slab)
476 return __builtin_ctzll(slab);
480 npa_lf_aura_pool_pair_alloc(struct otx2_npa_lf *lf, const uint32_t block_size,
481 const uint32_t block_count, struct npa_aura_s *aura,
482 struct npa_pool_s *pool, uint64_t *aura_handle)
484 int rc, aura_id, pool_id, stack_size, alloc_size;
485 char name[RTE_MEMZONE_NAMESIZE];
486 const struct rte_memzone *mz;
491 if (!lf || !block_size || !block_count ||
492 !pool || !aura || !aura_handle)
493 return NPA_LF_ERR_PARAM;
495 /* Block size should be cache line aligned and in range of 128B-128KB */
496 if (block_size % OTX2_ALIGN || block_size < 128 ||
497 block_size > 128 * 1024)
498 return NPA_LF_ERR_INVALID_BLOCK_SZ;
501 /* Scan from the beginning */
502 __rte_bitmap_scan_init(lf->npa_bmp);
503 /* Scan bitmap to get the free pool */
504 rc = rte_bitmap_scan(lf->npa_bmp, &pos, &slab);
507 otx2_err("Mempools exhausted, 'max_pools' devargs to increase");
511 /* Get aura_id from resource bitmap */
512 aura_id = pos + bitmap_ctzll(slab);
513 /* Mark pool as reserved */
514 rte_bitmap_clear(lf->npa_bmp, aura_id);
516 /* Configuration based on each aura has separate pool(aura-pool pair) */
518 rc = (aura_id < 0 || pool_id >= (int)lf->nr_pools || aura_id >=
519 (int)BIT_ULL(6 + lf->aura_sz)) ? NPA_LF_ERR_AURA_ID_ALLOC : 0;
523 /* Allocate stack memory */
524 stack_size = (block_count + lf->stack_pg_ptrs - 1) / lf->stack_pg_ptrs;
525 alloc_size = stack_size * lf->stack_pg_bytes;
527 mz = npa_lf_stack_dma_alloc(lf, name, pool_id, alloc_size);
533 /* Update aura fields */
534 aura->pool_addr = pool_id;/* AF will translate to associated poolctx */
536 aura->shift = __builtin_clz(block_count) - 8;
537 aura->limit = block_count;
538 aura->pool_caching = 1;
539 aura->err_int_ena = BIT(NPA_AURA_ERR_INT_AURA_ADD_OVER);
540 aura->err_int_ena |= BIT(NPA_AURA_ERR_INT_AURA_ADD_UNDER);
541 aura->err_int_ena |= BIT(NPA_AURA_ERR_INT_AURA_FREE_UNDER);
542 aura->err_int_ena |= BIT(NPA_AURA_ERR_INT_POOL_DIS);
543 /* Many to one reduction */
544 aura->err_qint_idx = aura_id % lf->qints;
546 /* Update pool fields */
547 pool->stack_base = mz->iova;
549 pool->buf_size = block_size / OTX2_ALIGN;
550 pool->stack_max_pages = stack_size;
551 pool->shift = __builtin_clz(block_count) - 8;
554 pool->stack_caching = 1;
555 pool->err_int_ena = BIT(NPA_POOL_ERR_INT_OVFLS);
556 pool->err_int_ena |= BIT(NPA_POOL_ERR_INT_RANGE);
557 pool->err_int_ena |= BIT(NPA_POOL_ERR_INT_PERR);
559 /* Many to one reduction */
560 pool->err_qint_idx = pool_id % lf->qints;
562 /* Issue AURA_INIT and POOL_INIT op */
563 rc = npa_lf_aura_pool_init(lf->mbox, aura_id, aura, pool);
567 *aura_handle = npa_lf_aura_handle_gen(aura_id, lf->base);
569 /* Update aura count */
570 npa_lf_aura_op_cnt_set(*aura_handle, 0, block_count);
571 /* Read it back to make sure aura count is updated */
572 npa_lf_aura_op_cnt_get(*aura_handle);
577 rte_memzone_free(mz);
579 rte_bitmap_set(lf->npa_bmp, aura_id);
585 npa_lf_aura_pool_pair_free(struct otx2_npa_lf *lf, uint64_t aura_handle)
587 char name[RTE_MEMZONE_NAMESIZE];
588 int aura_id, pool_id, rc;
590 if (!lf || !aura_handle)
591 return NPA_LF_ERR_PARAM;
593 aura_id = pool_id = npa_lf_aura_handle_to_aura(aura_handle);
594 rc = npa_lf_aura_pool_fini(lf->mbox, aura_id, aura_handle);
595 rc |= npa_lf_stack_dma_free(lf, name, pool_id);
597 rte_bitmap_set(lf->npa_bmp, aura_id);
603 npa_lf_aura_range_update_check(uint64_t aura_handle)
605 uint64_t aura_id = npa_lf_aura_handle_to_aura(aura_handle);
606 struct otx2_npa_lf *lf = otx2_npa_lf_obj_get();
607 struct npa_aura_lim *lim = lf->aura_lim;
608 __otx2_io struct npa_pool_s *pool;
609 struct npa_aq_enq_req *req;
610 struct npa_aq_enq_rsp *rsp;
613 req = otx2_mbox_alloc_msg_npa_aq_enq(lf->mbox);
615 req->aura_id = aura_id;
616 req->ctype = NPA_AQ_CTYPE_POOL;
617 req->op = NPA_AQ_INSTOP_READ;
619 rc = otx2_mbox_process_msg(lf->mbox, (void *)&rsp);
621 otx2_err("Failed to get pool(0x%"PRIx64") context", aura_id);
627 if (lim[aura_id].ptr_start != pool->ptr_start ||
628 lim[aura_id].ptr_end != pool->ptr_end) {
629 otx2_err("Range update failed on pool(0x%"PRIx64")", aura_id);
637 otx2_npa_alloc(struct rte_mempool *mp)
639 uint32_t block_size, block_count;
640 struct otx2_npa_lf *lf;
641 struct npa_aura_s aura;
642 struct npa_pool_s pool;
643 uint64_t aura_handle;
647 lf = otx2_npa_lf_obj_get();
653 block_size = mp->elt_size + mp->header_size + mp->trailer_size;
655 * OCTEON TX2 has 8 sets, 41 ways L1D cache, VA<9:7> bits dictate
657 * Add additional padding to ensure that the element size always
658 * occupies odd number of cachelines to ensure even distribution
659 * of elements among L1D cache sets.
661 padding = ((block_size / RTE_CACHE_LINE_SIZE) % 2) ? 0 :
663 mp->trailer_size += padding;
664 block_size += padding;
666 block_count = mp->size;
668 if (block_size % OTX2_ALIGN != 0) {
669 otx2_err("Block size should be multiple of 128B");
674 memset(&aura, 0, sizeof(struct npa_aura_s));
675 memset(&pool, 0, sizeof(struct npa_pool_s));
679 if ((uint32_t)pool.buf_offset * OTX2_ALIGN != mp->header_size) {
680 otx2_err("Unsupported mp->header_size=%d", mp->header_size);
685 /* Use driver specific mp->pool_config to override aura config */
686 if (mp->pool_config != NULL)
687 memcpy(&aura, mp->pool_config, sizeof(struct npa_aura_s));
689 rc = npa_lf_aura_pool_pair_alloc(lf, block_size, block_count,
690 &aura, &pool, &aura_handle);
692 otx2_err("Failed to alloc pool or aura rc=%d", rc);
696 /* Store aura_handle for future queue operations */
697 mp->pool_id = aura_handle;
698 otx2_npa_dbg("lf=%p block_sz=%d block_count=%d aura_handle=0x%"PRIx64,
699 lf, block_size, block_count, aura_handle);
701 /* Just hold the reference of the object */
702 otx2_npa_lf_obj_ref();
709 otx2_npa_free(struct rte_mempool *mp)
711 struct otx2_npa_lf *lf = otx2_npa_lf_obj_get();
714 otx2_npa_dbg("lf=%p aura_handle=0x%"PRIx64, lf, mp->pool_id);
716 rc = npa_lf_aura_pool_pair_free(lf, mp->pool_id);
719 otx2_err("Failed to free pool or aura rc=%d", rc);
721 /* Release the reference of npalf */
726 otx2_npa_calc_mem_size(const struct rte_mempool *mp, uint32_t obj_num,
727 uint32_t pg_shift, size_t *min_chunk_size, size_t *align)
731 /* Need space for one more obj on each chunk to fulfill
732 * alignment requirements.
734 total_elt_sz = mp->header_size + mp->elt_size + mp->trailer_size;
735 return rte_mempool_op_calc_mem_size_helper(mp, obj_num, pg_shift,
736 total_elt_sz, min_chunk_size,
741 otx2_npa_l1d_way_set_get(uint64_t iova)
743 return (iova >> rte_log2_u32(RTE_CACHE_LINE_SIZE)) & 0x7;
747 otx2_npa_populate(struct rte_mempool *mp, unsigned int max_objs, void *vaddr,
748 rte_iova_t iova, size_t len,
749 rte_mempool_populate_obj_cb_t *obj_cb, void *obj_cb_arg)
751 #define OTX2_L1D_NB_SETS 8
752 uint64_t distribution[OTX2_L1D_NB_SETS];
753 rte_iova_t start_iova;
759 if (iova == RTE_BAD_IOVA)
762 total_elt_sz = mp->header_size + mp->elt_size + mp->trailer_size;
764 /* Align object start address to a multiple of total_elt_sz */
765 off = total_elt_sz - ((((uintptr_t)vaddr - 1) % total_elt_sz) + 1);
771 vaddr = (char *)vaddr + off;
775 memset(distribution, 0, sizeof(uint64_t) * OTX2_L1D_NB_SETS);
777 while (start_iova < iova + len) {
778 set = otx2_npa_l1d_way_set_get(start_iova + mp->header_size);
780 start_iova += total_elt_sz;
783 otx2_npa_dbg("iova %"PRIx64", aligned iova %"PRIx64"", iova - off,
785 otx2_npa_dbg("length %"PRIu64", aligned length %"PRIu64"",
786 (uint64_t)(len + off), (uint64_t)len);
787 otx2_npa_dbg("element size %"PRIu64"", (uint64_t)total_elt_sz);
788 otx2_npa_dbg("requested objects %"PRIu64", possible objects %"PRIu64"",
789 (uint64_t)max_objs, (uint64_t)(len / total_elt_sz));
790 otx2_npa_dbg("L1D set distribution :");
791 for (i = 0; i < OTX2_L1D_NB_SETS; i++)
792 otx2_npa_dbg("set[%d] : objects : %"PRIu64"", i,
795 npa_lf_aura_op_range_set(mp->pool_id, iova, iova + len);
797 if (npa_lf_aura_range_update_check(mp->pool_id) < 0)
800 return rte_mempool_op_populate_helper(mp,
801 RTE_MEMPOOL_POPULATE_F_ALIGN_OBJ,
802 max_objs, vaddr, iova, len,
806 static struct rte_mempool_ops otx2_npa_ops = {
807 .name = "octeontx2_npa",
808 .alloc = otx2_npa_alloc,
809 .free = otx2_npa_free,
810 .enqueue = otx2_npa_enq,
811 .get_count = otx2_npa_get_count,
812 .calc_mem_size = otx2_npa_calc_mem_size,
813 .populate = otx2_npa_populate,
814 #if defined(RTE_ARCH_ARM64)
815 .dequeue = otx2_npa_deq_arm64,
817 .dequeue = otx2_npa_deq,
821 MEMPOOL_REGISTER_OPS(otx2_npa_ops);