X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=drivers%2Fmempool%2Focteontx2%2Fotx2_mempool_ops.c;h=3aea92a01bc5b6c35eb2b2091f42c593af19e6a4;hb=23bdcedcd8caa0d268b615df3bdb08411f97856e;hp=c59bd73c03b55e24756f1bcbfb14b85460196480;hpb=d7a0da3c0043612346bc7a410b9586244cda8e4a;p=dpdk.git diff --git a/drivers/mempool/octeontx2/otx2_mempool_ops.c b/drivers/mempool/octeontx2/otx2_mempool_ops.c index c59bd73c03..3aea92a01b 100644 --- a/drivers/mempool/octeontx2/otx2_mempool_ops.c +++ b/drivers/mempool/octeontx2/otx2_mempool_ops.c @@ -37,6 +37,277 @@ npa_lf_aura_op_alloc_one(const int64_t wdata, int64_t * const addr, return -ENOENT; } +#if defined(RTE_ARCH_ARM64) +static __rte_noinline int +npa_lf_aura_op_search_alloc(const int64_t wdata, int64_t * const addr, + void **obj_table, unsigned int n) +{ + uint8_t i; + + for (i = 0; i < n; i++) { + if (obj_table[i] != NULL) + continue; + if (npa_lf_aura_op_alloc_one(wdata, addr, obj_table, i)) + return -ENOENT; + } + + return 0; +} + +static __rte_noinline int +npa_lf_aura_op_alloc_bulk(const int64_t wdata, int64_t * const addr, + unsigned int n, void **obj_table) +{ + register const uint64_t wdata64 __asm("x26") = wdata; + register const uint64_t wdata128 __asm("x27") = wdata; + uint64x2_t failed = vdupq_n_u64(~0); + + switch (n) { + case 32: + { + asm volatile ( + ".cpu generic+lse\n" + "casp x0, x1, %[wdata64], %[wdata128], [%[loc]]\n" + "casp x2, x3, %[wdata64], %[wdata128], [%[loc]]\n" + "casp x4, x5, %[wdata64], %[wdata128], [%[loc]]\n" + "casp x6, x7, %[wdata64], %[wdata128], [%[loc]]\n" + "casp x8, x9, %[wdata64], %[wdata128], [%[loc]]\n" + "casp x10, x11, %[wdata64], %[wdata128], [%[loc]]\n" + "casp x12, x13, %[wdata64], %[wdata128], [%[loc]]\n" + "casp x14, x15, %[wdata64], %[wdata128], [%[loc]]\n" + "casp x16, x17, %[wdata64], %[wdata128], [%[loc]]\n" + "casp x18, x19, %[wdata64], %[wdata128], [%[loc]]\n" + "casp x20, x21, %[wdata64], %[wdata128], [%[loc]]\n" + "casp x22, x23, %[wdata64], %[wdata128], [%[loc]]\n" + "fmov d16, x0\n" + "fmov v16.D[1], x1\n" + "casp x0, x1, %[wdata64], %[wdata128], [%[loc]]\n" + "fmov d17, x2\n" + "fmov v17.D[1], x3\n" + "casp x2, x3, %[wdata64], %[wdata128], [%[loc]]\n" + "fmov d18, x4\n" + "fmov v18.D[1], x5\n" + "casp x4, x5, %[wdata64], %[wdata128], [%[loc]]\n" + "fmov d19, x6\n" + "fmov v19.D[1], x7\n" + "casp x6, x7, %[wdata64], %[wdata128], [%[loc]]\n" + "and %[failed].16B, %[failed].16B, v16.16B\n" + "and %[failed].16B, %[failed].16B, v17.16B\n" + "and %[failed].16B, %[failed].16B, v18.16B\n" + "and %[failed].16B, %[failed].16B, v19.16B\n" + "fmov d20, x8\n" + "fmov v20.D[1], x9\n" + "fmov d21, x10\n" + "fmov v21.D[1], x11\n" + "fmov d22, x12\n" + "fmov v22.D[1], x13\n" + "fmov d23, x14\n" + "fmov v23.D[1], x15\n" + "and %[failed].16B, %[failed].16B, v20.16B\n" + "and %[failed].16B, %[failed].16B, v21.16B\n" + "and %[failed].16B, %[failed].16B, v22.16B\n" + "and %[failed].16B, %[failed].16B, v23.16B\n" + "st1 { v16.2d, v17.2d, v18.2d, v19.2d}, [%[dst]], 64\n" + "st1 { v20.2d, v21.2d, v22.2d, v23.2d}, [%[dst]], 64\n" + "fmov d16, x16\n" + "fmov v16.D[1], x17\n" + "fmov d17, x18\n" + "fmov v17.D[1], x19\n" + "fmov d18, x20\n" + "fmov v18.D[1], x21\n" + "fmov d19, x22\n" + "fmov v19.D[1], x23\n" + "and %[failed].16B, %[failed].16B, v16.16B\n" + "and %[failed].16B, %[failed].16B, v17.16B\n" + "and %[failed].16B, %[failed].16B, v18.16B\n" + "and %[failed].16B, %[failed].16B, v19.16B\n" + "fmov d20, x0\n" + "fmov v20.D[1], x1\n" + "fmov d21, x2\n" + "fmov v21.D[1], x3\n" + "fmov d22, x4\n" + "fmov v22.D[1], x5\n" + "fmov d23, x6\n" + "fmov v23.D[1], x7\n" + "and %[failed].16B, %[failed].16B, v20.16B\n" + "and %[failed].16B, %[failed].16B, v21.16B\n" + "and %[failed].16B, %[failed].16B, v22.16B\n" + "and %[failed].16B, %[failed].16B, v23.16B\n" + "st1 { v16.2d, v17.2d, v18.2d, v19.2d}, [%[dst]], 64\n" + "st1 { v20.2d, v21.2d, v22.2d, v23.2d}, [%[dst]], 64\n" + : "+Q" (*addr), [failed] "=&w" (failed) + : [wdata64] "r" (wdata64), [wdata128] "r" (wdata128), + [dst] "r" (obj_table), [loc] "r" (addr) + : "memory", "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", + "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", + "x17", "x18", "x19", "x20", "x21", "x22", "x23", "v16", "v17", + "v18", "v19", "v20", "v21", "v22", "v23" + ); + break; + } + case 16: + { + asm volatile ( + ".cpu generic+lse\n" + "casp x0, x1, %[wdata64], %[wdata128], [%[loc]]\n" + "casp x2, x3, %[wdata64], %[wdata128], [%[loc]]\n" + "casp x4, x5, %[wdata64], %[wdata128], [%[loc]]\n" + "casp x6, x7, %[wdata64], %[wdata128], [%[loc]]\n" + "casp x8, x9, %[wdata64], %[wdata128], [%[loc]]\n" + "casp x10, x11, %[wdata64], %[wdata128], [%[loc]]\n" + "casp x12, x13, %[wdata64], %[wdata128], [%[loc]]\n" + "casp x14, x15, %[wdata64], %[wdata128], [%[loc]]\n" + "fmov d16, x0\n" + "fmov v16.D[1], x1\n" + "fmov d17, x2\n" + "fmov v17.D[1], x3\n" + "fmov d18, x4\n" + "fmov v18.D[1], x5\n" + "fmov d19, x6\n" + "fmov v19.D[1], x7\n" + "and %[failed].16B, %[failed].16B, v16.16B\n" + "and %[failed].16B, %[failed].16B, v17.16B\n" + "and %[failed].16B, %[failed].16B, v18.16B\n" + "and %[failed].16B, %[failed].16B, v19.16B\n" + "fmov d20, x8\n" + "fmov v20.D[1], x9\n" + "fmov d21, x10\n" + "fmov v21.D[1], x11\n" + "fmov d22, x12\n" + "fmov v22.D[1], x13\n" + "fmov d23, x14\n" + "fmov v23.D[1], x15\n" + "and %[failed].16B, %[failed].16B, v20.16B\n" + "and %[failed].16B, %[failed].16B, v21.16B\n" + "and %[failed].16B, %[failed].16B, v22.16B\n" + "and %[failed].16B, %[failed].16B, v23.16B\n" + "st1 { v16.2d, v17.2d, v18.2d, v19.2d}, [%[dst]], 64\n" + "st1 { v20.2d, v21.2d, v22.2d, v23.2d}, [%[dst]], 64\n" + : "+Q" (*addr), [failed] "=&w" (failed) + : [wdata64] "r" (wdata64), [wdata128] "r" (wdata128), + [dst] "r" (obj_table), [loc] "r" (addr) + : "memory", "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", + "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "v16", + "v17", "v18", "v19", "v20", "v21", "v22", "v23" + ); + break; + } + case 8: + { + asm volatile ( + ".cpu generic+lse\n" + "casp x0, x1, %[wdata64], %[wdata128], [%[loc]]\n" + "casp x2, x3, %[wdata64], %[wdata128], [%[loc]]\n" + "casp x4, x5, %[wdata64], %[wdata128], [%[loc]]\n" + "casp x6, x7, %[wdata64], %[wdata128], [%[loc]]\n" + "fmov d16, x0\n" + "fmov v16.D[1], x1\n" + "fmov d17, x2\n" + "fmov v17.D[1], x3\n" + "fmov d18, x4\n" + "fmov v18.D[1], x5\n" + "fmov d19, x6\n" + "fmov v19.D[1], x7\n" + "and %[failed].16B, %[failed].16B, v16.16B\n" + "and %[failed].16B, %[failed].16B, v17.16B\n" + "and %[failed].16B, %[failed].16B, v18.16B\n" + "and %[failed].16B, %[failed].16B, v19.16B\n" + "st1 { v16.2d, v17.2d, v18.2d, v19.2d}, [%[dst]], 64\n" + : "+Q" (*addr), [failed] "=&w" (failed) + : [wdata64] "r" (wdata64), [wdata128] "r" (wdata128), + [dst] "r" (obj_table), [loc] "r" (addr) + : "memory", "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", + "v16", "v17", "v18", "v19" + ); + break; + } + case 4: + { + asm volatile ( + ".cpu generic+lse\n" + "casp x0, x1, %[wdata64], %[wdata128], [%[loc]]\n" + "casp x2, x3, %[wdata64], %[wdata128], [%[loc]]\n" + "fmov d16, x0\n" + "fmov v16.D[1], x1\n" + "fmov d17, x2\n" + "fmov v17.D[1], x3\n" + "and %[failed].16B, %[failed].16B, v16.16B\n" + "and %[failed].16B, %[failed].16B, v17.16B\n" + "st1 { v16.2d, v17.2d}, [%[dst]], 32\n" + : "+Q" (*addr), [failed] "=&w" (failed) + : [wdata64] "r" (wdata64), [wdata128] "r" (wdata128), + [dst] "r" (obj_table), [loc] "r" (addr) + : "memory", "x0", "x1", "x2", "x3", "v16", "v17" + ); + break; + } + case 2: + { + asm volatile ( + ".cpu generic+lse\n" + "casp x0, x1, %[wdata64], %[wdata128], [%[loc]]\n" + "fmov d16, x0\n" + "fmov v16.D[1], x1\n" + "and %[failed].16B, %[failed].16B, v16.16B\n" + "st1 { v16.2d}, [%[dst]], 16\n" + : "+Q" (*addr), [failed] "=&w" (failed) + : [wdata64] "r" (wdata64), [wdata128] "r" (wdata128), + [dst] "r" (obj_table), [loc] "r" (addr) + : "memory", "x0", "x1", "v16" + ); + break; + } + case 1: + return npa_lf_aura_op_alloc_one(wdata, addr, obj_table, 0); + } + + if (unlikely(!(vgetq_lane_u64(failed, 0) & vgetq_lane_u64(failed, 1)))) + return npa_lf_aura_op_search_alloc(wdata, addr, (void **) + ((char *)obj_table - (sizeof(uint64_t) * n)), n); + + return 0; +} + +static __rte_noinline void +otx2_npa_clear_alloc(struct rte_mempool *mp, void **obj_table, unsigned int n) +{ + unsigned int i; + + for (i = 0; i < n; i++) { + if (obj_table[i] != NULL) { + otx2_npa_enq(mp, &obj_table[i], 1); + obj_table[i] = NULL; + } + } +} + +static __rte_noinline int __hot +otx2_npa_deq_arm64(struct rte_mempool *mp, void **obj_table, unsigned int n) +{ + const int64_t wdata = npa_lf_aura_handle_to_aura(mp->pool_id); + void **obj_table_bak = obj_table; + const unsigned int nfree = n; + unsigned int parts; + + int64_t * const addr = (int64_t * const) + (npa_lf_aura_handle_to_base(mp->pool_id) + + NPA_LF_AURA_OP_ALLOCX(0)); + while (n) { + parts = n > 31 ? 32 : rte_align32prevpow2(n); + n -= parts; + if (unlikely(npa_lf_aura_op_alloc_bulk(wdata, addr, + parts, obj_table))) { + otx2_npa_clear_alloc(mp, obj_table_bak, nfree - n); + return -ENOENT; + } + obj_table += parts; + } + + return 0; +} + +#else + static inline int __hot otx2_npa_deq(struct rte_mempool *mp, void **obj_table, unsigned int n) { @@ -44,7 +315,7 @@ otx2_npa_deq(struct rte_mempool *mp, void **obj_table, unsigned int n) unsigned int index; uint64_t obj; - int64_t * const addr = (int64_t * const) + int64_t * const addr = (int64_t *) (npa_lf_aura_handle_to_base(mp->pool_id) + NPA_LF_AURA_OP_ALLOCX(0)); for (index = 0; index < n; index++, obj_table++) { @@ -62,6 +333,8 @@ otx2_npa_deq(struct rte_mempool *mp, void **obj_table, unsigned int n) return 0; } +#endif + static unsigned int otx2_npa_get_count(const struct rte_mempool *mp) { @@ -82,14 +355,14 @@ npa_lf_aura_pool_init(struct otx2_mbox *mbox, uint32_t aura_id, aura_init_req->aura_id = aura_id; aura_init_req->ctype = NPA_AQ_CTYPE_AURA; aura_init_req->op = NPA_AQ_INSTOP_INIT; - memcpy(&aura_init_req->aura, aura, sizeof(*aura)); + otx2_mbox_memcpy(&aura_init_req->aura, aura, sizeof(*aura)); pool_init_req = otx2_mbox_alloc_msg_npa_aq_enq(mbox); pool_init_req->aura_id = aura_id; pool_init_req->ctype = NPA_AQ_CTYPE_POOL; pool_init_req->op = NPA_AQ_INSTOP_INIT; - memcpy(&pool_init_req->pool, pool, sizeof(*pool)); + otx2_mbox_memcpy(&pool_init_req->pool, pool, sizeof(*pool)); otx2_mbox_msg_send(mbox, 0); rc = otx2_mbox_wait_for_rsp(mbox, 0); @@ -326,6 +599,40 @@ npa_lf_aura_pool_pair_free(struct otx2_npa_lf *lf, uint64_t aura_handle) return rc; } +static int +npa_lf_aura_range_update_check(uint64_t aura_handle) +{ + uint64_t aura_id = npa_lf_aura_handle_to_aura(aura_handle); + struct otx2_npa_lf *lf = otx2_npa_lf_obj_get(); + struct npa_aura_lim *lim = lf->aura_lim; + __otx2_io struct npa_pool_s *pool; + struct npa_aq_enq_req *req; + struct npa_aq_enq_rsp *rsp; + int rc; + + req = otx2_mbox_alloc_msg_npa_aq_enq(lf->mbox); + + req->aura_id = aura_id; + req->ctype = NPA_AQ_CTYPE_POOL; + req->op = NPA_AQ_INSTOP_READ; + + rc = otx2_mbox_process_msg(lf->mbox, (void *)&rsp); + if (rc) { + otx2_err("Failed to get pool(0x%"PRIx64") context", aura_id); + return rc; + } + + pool = &rsp->pool; + + if (lim[aura_id].ptr_start != pool->ptr_start || + lim[aura_id].ptr_end != pool->ptr_end) { + otx2_err("Range update failed on pool(0x%"PRIx64")", aura_id); + return -ERANGE; + } + + return 0; +} + static int otx2_npa_alloc(struct rte_mempool *mp) { @@ -406,24 +713,12 @@ static ssize_t otx2_npa_calc_mem_size(const struct rte_mempool *mp, uint32_t obj_num, uint32_t pg_shift, size_t *min_chunk_size, size_t *align) { - ssize_t mem_size; - /* * Simply need space for one more object to be able to * fulfill alignment requirements. */ - mem_size = rte_mempool_op_calc_mem_size_default(mp, obj_num + 1, - pg_shift, - min_chunk_size, align); - if (mem_size >= 0) { - /* - * Memory area which contains objects must be physically - * contiguous. - */ - *min_chunk_size = mem_size; - } - - return mem_size; + return rte_mempool_op_calc_mem_size_helper(mp, obj_num + 1, pg_shift, + min_chunk_size, align); } static int @@ -451,7 +746,10 @@ otx2_npa_populate(struct rte_mempool *mp, unsigned int max_objs, void *vaddr, npa_lf_aura_op_range_set(mp->pool_id, iova, iova + len); - return rte_mempool_op_populate_default(mp, max_objs, vaddr, iova, len, + if (npa_lf_aura_range_update_check(mp->pool_id) < 0) + return -EBUSY; + + return rte_mempool_op_populate_helper(mp, max_objs, vaddr, iova, len, obj_cb, obj_cb_arg); } @@ -463,7 +761,11 @@ static struct rte_mempool_ops otx2_npa_ops = { .get_count = otx2_npa_get_count, .calc_mem_size = otx2_npa_calc_mem_size, .populate = otx2_npa_populate, +#if defined(RTE_ARCH_ARM64) + .dequeue = otx2_npa_deq_arm64, +#else .dequeue = otx2_npa_deq, +#endif }; MEMPOOL_REGISTER_OPS(otx2_npa_ops);