X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=drivers%2Fmempool%2Focteontx2%2Fotx2_mempool_ops.c;h=5229a7cfba05f756e0033741fde3d7de51c3a5c1;hb=53313910bcfd5a05cc07ef5db97e7c65db7a93a3;hp=a60a77a4e182013ea7edd7a6a017c0e9fe81d41e;hpb=a78ffb5dd4699f54cd8cc56d032133d2aeff601d;p=dpdk.git diff --git a/drivers/mempool/octeontx2/otx2_mempool_ops.c b/drivers/mempool/octeontx2/otx2_mempool_ops.c index a60a77a4e1..5229a7cfba 100644 --- a/drivers/mempool/octeontx2/otx2_mempool_ops.c +++ b/drivers/mempool/octeontx2/otx2_mempool_ops.c @@ -7,7 +7,7 @@ #include "otx2_mempool.h" -static int __hot +static int __rte_hot otx2_npa_enq(struct rte_mempool *mp, void * const *obj_table, unsigned int n) { unsigned int index; const uint64_t aura_handle = mp->pool_id; @@ -54,7 +54,7 @@ npa_lf_aura_op_search_alloc(const int64_t wdata, int64_t * const addr, return 0; } -static __rte_always_inline int +static __rte_noinline int npa_lf_aura_op_alloc_bulk(const int64_t wdata, int64_t * const addr, unsigned int n, void **obj_table) { @@ -281,7 +281,7 @@ otx2_npa_clear_alloc(struct rte_mempool *mp, void **obj_table, unsigned int n) } } -static __rte_noinline int __hot +static __rte_noinline int __rte_hot otx2_npa_deq_arm64(struct rte_mempool *mp, void **obj_table, unsigned int n) { const int64_t wdata = npa_lf_aura_handle_to_aura(mp->pool_id); @@ -308,7 +308,7 @@ otx2_npa_deq_arm64(struct rte_mempool *mp, void **obj_table, unsigned int n) #else -static inline int __hot +static inline int __rte_hot otx2_npa_deq(struct rte_mempool *mp, void **obj_table, unsigned int n) { const int64_t wdata = npa_lf_aura_handle_to_aura(mp->pool_id); @@ -348,21 +348,26 @@ npa_lf_aura_pool_init(struct otx2_mbox *mbox, uint32_t aura_id, struct npa_aq_enq_req *aura_init_req, *pool_init_req; struct npa_aq_enq_rsp *aura_init_rsp, *pool_init_rsp; struct otx2_mbox_dev *mdev = &mbox->dev[0]; + struct otx2_idev_cfg *idev; int rc, off; + idev = otx2_intra_dev_get_cfg(); + if (idev == NULL) + return -ENOMEM; + aura_init_req = otx2_mbox_alloc_msg_npa_aq_enq(mbox); aura_init_req->aura_id = aura_id; aura_init_req->ctype = NPA_AQ_CTYPE_AURA; aura_init_req->op = NPA_AQ_INSTOP_INIT; - memcpy(&aura_init_req->aura, aura, sizeof(*aura)); + otx2_mbox_memcpy(&aura_init_req->aura, aura, sizeof(*aura)); pool_init_req = otx2_mbox_alloc_msg_npa_aq_enq(mbox); pool_init_req->aura_id = aura_id; pool_init_req->ctype = NPA_AQ_CTYPE_POOL; pool_init_req->op = NPA_AQ_INSTOP_INIT; - memcpy(&pool_init_req->pool, pool, sizeof(*pool)); + otx2_mbox_memcpy(&pool_init_req->pool, pool, sizeof(*pool)); otx2_mbox_msg_send(mbox, 0); rc = otx2_mbox_wait_for_rsp(mbox, 0); @@ -379,6 +384,44 @@ npa_lf_aura_pool_init(struct otx2_mbox *mbox, uint32_t aura_id, return 0; else return NPA_LF_ERR_AURA_POOL_INIT; + + if (!(idev->npa_lock_mask & BIT_ULL(aura_id))) + return 0; + + aura_init_req = otx2_mbox_alloc_msg_npa_aq_enq(mbox); + aura_init_req->aura_id = aura_id; + aura_init_req->ctype = NPA_AQ_CTYPE_AURA; + aura_init_req->op = NPA_AQ_INSTOP_LOCK; + + pool_init_req = otx2_mbox_alloc_msg_npa_aq_enq(mbox); + if (!pool_init_req) { + /* The shared memory buffer can be full. + * Flush it and retry + */ + otx2_mbox_msg_send(mbox, 0); + rc = otx2_mbox_wait_for_rsp(mbox, 0); + if (rc < 0) { + otx2_err("Failed to LOCK AURA context"); + return -ENOMEM; + } + + pool_init_req = otx2_mbox_alloc_msg_npa_aq_enq(mbox); + if (!pool_init_req) { + otx2_err("Failed to LOCK POOL context"); + return -ENOMEM; + } + } + pool_init_req->aura_id = aura_id; + pool_init_req->ctype = NPA_AQ_CTYPE_POOL; + pool_init_req->op = NPA_AQ_INSTOP_LOCK; + + rc = otx2_mbox_process(mbox); + if (rc < 0) { + otx2_err("Failed to lock POOL ctx to NDC"); + return -ENOMEM; + } + + return 0; } static int @@ -390,8 +433,13 @@ npa_lf_aura_pool_fini(struct otx2_mbox *mbox, struct npa_aq_enq_rsp *aura_rsp, *pool_rsp; struct otx2_mbox_dev *mdev = &mbox->dev[0]; struct ndc_sync_op *ndc_req; + struct otx2_idev_cfg *idev; int rc, off; + idev = otx2_intra_dev_get_cfg(); + if (idev == NULL) + return -EINVAL; + /* Procedure for disabling an aura/pool */ rte_delay_us(10); npa_lf_aura_op_alloc(aura_handle, 0); @@ -434,6 +482,32 @@ npa_lf_aura_pool_fini(struct otx2_mbox *mbox, otx2_err("Error on NDC-NPA LF sync, rc %d", rc); return NPA_LF_ERR_AURA_POOL_FINI; } + + if (!(idev->npa_lock_mask & BIT_ULL(aura_id))) + return 0; + + aura_req = otx2_mbox_alloc_msg_npa_aq_enq(mbox); + aura_req->aura_id = aura_id; + aura_req->ctype = NPA_AQ_CTYPE_AURA; + aura_req->op = NPA_AQ_INSTOP_UNLOCK; + + rc = otx2_mbox_process(mbox); + if (rc < 0) { + otx2_err("Failed to unlock AURA ctx to NDC"); + return -EINVAL; + } + + pool_req = otx2_mbox_alloc_msg_npa_aq_enq(mbox); + pool_req->aura_id = aura_id; + pool_req->ctype = NPA_AQ_CTYPE_POOL; + pool_req->op = NPA_AQ_INSTOP_UNLOCK; + + rc = otx2_mbox_process(mbox); + if (rc < 0) { + otx2_err("Failed to unlock POOL ctx to NDC"); + return -EINVAL; + } + return 0; } @@ -605,9 +679,9 @@ npa_lf_aura_range_update_check(uint64_t aura_handle) uint64_t aura_id = npa_lf_aura_handle_to_aura(aura_handle); struct otx2_npa_lf *lf = otx2_npa_lf_obj_get(); struct npa_aura_lim *lim = lf->aura_lim; + __otx2_io struct npa_pool_s *pool; struct npa_aq_enq_req *req; struct npa_aq_enq_rsp *rsp; - struct npa_pool_s *pool; int rc; req = otx2_mbox_alloc_msg_npa_aq_enq(lf->mbox); @@ -637,10 +711,11 @@ static int otx2_npa_alloc(struct rte_mempool *mp) { uint32_t block_size, block_count; + uint64_t aura_handle = 0; struct otx2_npa_lf *lf; struct npa_aura_s aura; struct npa_pool_s pool; - uint64_t aura_handle; + size_t padding; int rc; lf = otx2_npa_lf_obj_get(); @@ -650,6 +725,18 @@ otx2_npa_alloc(struct rte_mempool *mp) } block_size = mp->elt_size + mp->header_size + mp->trailer_size; + /* + * OCTEON TX2 has 8 sets, 41 ways L1D cache, VA<9:7> bits dictate + * the set selection. + * Add additional padding to ensure that the element size always + * occupies odd number of cachelines to ensure even distribution + * of elements among L1D cache sets. + */ + padding = ((block_size / RTE_CACHE_LINE_SIZE) % 2) ? 0 : + RTE_CACHE_LINE_SIZE; + mp->trailer_size += padding; + block_size += padding; + block_count = mp->size; if (block_size % OTX2_ALIGN != 0) { @@ -713,24 +800,21 @@ static ssize_t otx2_npa_calc_mem_size(const struct rte_mempool *mp, uint32_t obj_num, uint32_t pg_shift, size_t *min_chunk_size, size_t *align) { - ssize_t mem_size; + size_t total_elt_sz; - /* - * Simply need space for one more object to be able to - * fulfill alignment requirements. + /* Need space for one more obj on each chunk to fulfill + * alignment requirements. */ - mem_size = rte_mempool_op_calc_mem_size_default(mp, obj_num + 1, - pg_shift, - min_chunk_size, align); - if (mem_size >= 0) { - /* - * Memory area which contains objects must be physically - * contiguous. - */ - *min_chunk_size = mem_size; - } + total_elt_sz = mp->header_size + mp->elt_size + mp->trailer_size; + return rte_mempool_op_calc_mem_size_helper(mp, obj_num, pg_shift, + total_elt_sz, min_chunk_size, + align); +} - return mem_size; +static uint8_t +otx2_npa_l1d_way_set_get(uint64_t iova) +{ + return (iova >> rte_log2_u32(RTE_CACHE_LINE_SIZE)) & 0x7; } static int @@ -738,8 +822,13 @@ otx2_npa_populate(struct rte_mempool *mp, unsigned int max_objs, void *vaddr, rte_iova_t iova, size_t len, rte_mempool_populate_obj_cb_t *obj_cb, void *obj_cb_arg) { +#define OTX2_L1D_NB_SETS 8 + uint64_t distribution[OTX2_L1D_NB_SETS]; + rte_iova_t start_iova; size_t total_elt_sz; + uint8_t set; size_t off; + int i; if (iova == RTE_BAD_IOVA) return -EINVAL; @@ -747,22 +836,45 @@ otx2_npa_populate(struct rte_mempool *mp, unsigned int max_objs, void *vaddr, total_elt_sz = mp->header_size + mp->elt_size + mp->trailer_size; /* Align object start address to a multiple of total_elt_sz */ - off = total_elt_sz - ((uintptr_t)vaddr % total_elt_sz); + off = total_elt_sz - ((((uintptr_t)vaddr - 1) % total_elt_sz) + 1); if (len < off) return -EINVAL; + vaddr = (char *)vaddr + off; iova += off; len -= off; + memset(distribution, 0, sizeof(uint64_t) * OTX2_L1D_NB_SETS); + start_iova = iova; + while (start_iova < iova + len) { + set = otx2_npa_l1d_way_set_get(start_iova + mp->header_size); + distribution[set]++; + start_iova += total_elt_sz; + } + + otx2_npa_dbg("iova %"PRIx64", aligned iova %"PRIx64"", iova - off, + iova); + otx2_npa_dbg("length %"PRIu64", aligned length %"PRIu64"", + (uint64_t)(len + off), (uint64_t)len); + otx2_npa_dbg("element size %"PRIu64"", (uint64_t)total_elt_sz); + otx2_npa_dbg("requested objects %"PRIu64", possible objects %"PRIu64"", + (uint64_t)max_objs, (uint64_t)(len / total_elt_sz)); + otx2_npa_dbg("L1D set distribution :"); + for (i = 0; i < OTX2_L1D_NB_SETS; i++) + otx2_npa_dbg("set[%d] : objects : %"PRIu64"", i, + distribution[i]); + npa_lf_aura_op_range_set(mp->pool_id, iova, iova + len); if (npa_lf_aura_range_update_check(mp->pool_id) < 0) return -EBUSY; - return rte_mempool_op_populate_default(mp, max_objs, vaddr, iova, len, - obj_cb, obj_cb_arg); + return rte_mempool_op_populate_helper(mp, + RTE_MEMPOOL_POPULATE_F_ALIGN_OBJ, + max_objs, vaddr, iova, len, + obj_cb, obj_cb_arg); } static struct rte_mempool_ops otx2_npa_ops = {