X-Git-Url: http://git.droids-corp.org/?a=blobdiff_plain;f=drivers%2Fmempool%2Focteontx2%2Fotx2_mempool_ops.c;h=5229a7cfba05f756e0033741fde3d7de51c3a5c1;hb=53313910bcfd5a05cc07ef5db97e7c65db7a93a3;hp=a60a77a4e182013ea7edd7a6a017c0e9fe81d41e;hpb=a78ffb5dd4699f54cd8cc56d032133d2aeff601d;p=dpdk.git

diff --git a/drivers/mempool/octeontx2/otx2_mempool_ops.c b/drivers/mempool/octeontx2/otx2_mempool_ops.c
index a60a77a4e1..5229a7cfba 100644
--- a/drivers/mempool/octeontx2/otx2_mempool_ops.c
+++ b/drivers/mempool/octeontx2/otx2_mempool_ops.c
@@ -7,7 +7,7 @@
 
 #include "otx2_mempool.h"
 
-static int __hot
+static int __rte_hot
 otx2_npa_enq(struct rte_mempool *mp, void * const *obj_table, unsigned int n)
 {
 	unsigned int index; const uint64_t aura_handle = mp->pool_id;
@@ -54,7 +54,7 @@ npa_lf_aura_op_search_alloc(const int64_t wdata, int64_t * const addr,
 	return 0;
 }
 
-static __rte_always_inline int
+static __rte_noinline int
 npa_lf_aura_op_alloc_bulk(const int64_t wdata, int64_t * const addr,
 			  unsigned int n, void **obj_table)
 {
@@ -281,7 +281,7 @@ otx2_npa_clear_alloc(struct rte_mempool *mp, void **obj_table, unsigned int n)
 	}
 }
 
-static __rte_noinline int __hot
+static __rte_noinline int __rte_hot
 otx2_npa_deq_arm64(struct rte_mempool *mp, void **obj_table, unsigned int n)
 {
 	const int64_t wdata = npa_lf_aura_handle_to_aura(mp->pool_id);
@@ -308,7 +308,7 @@ otx2_npa_deq_arm64(struct rte_mempool *mp, void **obj_table, unsigned int n)
 
 #else
 
-static inline int __hot
+static inline int __rte_hot
 otx2_npa_deq(struct rte_mempool *mp, void **obj_table, unsigned int n)
 {
 	const int64_t wdata = npa_lf_aura_handle_to_aura(mp->pool_id);
@@ -348,21 +348,26 @@ npa_lf_aura_pool_init(struct otx2_mbox *mbox, uint32_t aura_id,
 	struct npa_aq_enq_req *aura_init_req, *pool_init_req;
 	struct npa_aq_enq_rsp *aura_init_rsp, *pool_init_rsp;
 	struct otx2_mbox_dev *mdev = &mbox->dev[0];
+	struct otx2_idev_cfg *idev;
 	int rc, off;
 
+	idev = otx2_intra_dev_get_cfg();
+	if (idev == NULL)
+		return -ENOMEM;
+
 	aura_init_req = otx2_mbox_alloc_msg_npa_aq_enq(mbox);
 
 	aura_init_req->aura_id = aura_id;
 	aura_init_req->ctype = NPA_AQ_CTYPE_AURA;
 	aura_init_req->op = NPA_AQ_INSTOP_INIT;
-	memcpy(&aura_init_req->aura, aura, sizeof(*aura));
+	otx2_mbox_memcpy(&aura_init_req->aura, aura, sizeof(*aura));
 
 	pool_init_req = otx2_mbox_alloc_msg_npa_aq_enq(mbox);
 
 	pool_init_req->aura_id = aura_id;
 	pool_init_req->ctype = NPA_AQ_CTYPE_POOL;
 	pool_init_req->op = NPA_AQ_INSTOP_INIT;
-	memcpy(&pool_init_req->pool, pool, sizeof(*pool));
+	otx2_mbox_memcpy(&pool_init_req->pool, pool, sizeof(*pool));
 
 	otx2_mbox_msg_send(mbox, 0);
 	rc = otx2_mbox_wait_for_rsp(mbox, 0);
@@ -379,6 +384,44 @@ npa_lf_aura_pool_init(struct otx2_mbox *mbox, uint32_t aura_id,
 		return 0;
 	else
 		return NPA_LF_ERR_AURA_POOL_INIT;
+
+	if (!(idev->npa_lock_mask & BIT_ULL(aura_id)))
+		return 0;
+
+	aura_init_req = otx2_mbox_alloc_msg_npa_aq_enq(mbox);
+	aura_init_req->aura_id = aura_id;
+	aura_init_req->ctype = NPA_AQ_CTYPE_AURA;
+	aura_init_req->op = NPA_AQ_INSTOP_LOCK;
+
+	pool_init_req = otx2_mbox_alloc_msg_npa_aq_enq(mbox);
+	if (!pool_init_req) {
+		/* The shared memory buffer can be full.
+		 * Flush it and retry
+		 */
+		otx2_mbox_msg_send(mbox, 0);
+		rc = otx2_mbox_wait_for_rsp(mbox, 0);
+		if (rc < 0) {
+			otx2_err("Failed to LOCK AURA context");
+			return -ENOMEM;
+		}
+
+		pool_init_req = otx2_mbox_alloc_msg_npa_aq_enq(mbox);
+		if (!pool_init_req) {
+			otx2_err("Failed to LOCK POOL context");
+			return -ENOMEM;
+		}
+	}
+	pool_init_req->aura_id = aura_id;
+	pool_init_req->ctype = NPA_AQ_CTYPE_POOL;
+	pool_init_req->op = NPA_AQ_INSTOP_LOCK;
+
+	rc = otx2_mbox_process(mbox);
+	if (rc < 0) {
+		otx2_err("Failed to lock POOL ctx to NDC");
+		return -ENOMEM;
+	}
+
+	return 0;
 }
 
 static int
@@ -390,8 +433,13 @@ npa_lf_aura_pool_fini(struct otx2_mbox *mbox,
 	struct npa_aq_enq_rsp *aura_rsp, *pool_rsp;
 	struct otx2_mbox_dev *mdev = &mbox->dev[0];
 	struct ndc_sync_op *ndc_req;
+	struct otx2_idev_cfg *idev;
 	int rc, off;
 
+	idev = otx2_intra_dev_get_cfg();
+	if (idev == NULL)
+		return -EINVAL;
+
 	/* Procedure for disabling an aura/pool */
 	rte_delay_us(10);
 	npa_lf_aura_op_alloc(aura_handle, 0);
@@ -434,6 +482,32 @@ npa_lf_aura_pool_fini(struct otx2_mbox *mbox,
 		otx2_err("Error on NDC-NPA LF sync, rc %d", rc);
 		return NPA_LF_ERR_AURA_POOL_FINI;
 	}
+
+	if (!(idev->npa_lock_mask & BIT_ULL(aura_id)))
+		return 0;
+
+	aura_req = otx2_mbox_alloc_msg_npa_aq_enq(mbox);
+	aura_req->aura_id = aura_id;
+	aura_req->ctype = NPA_AQ_CTYPE_AURA;
+	aura_req->op = NPA_AQ_INSTOP_UNLOCK;
+
+	rc = otx2_mbox_process(mbox);
+	if (rc < 0) {
+		otx2_err("Failed to unlock AURA ctx to NDC");
+		return -EINVAL;
+	}
+
+	pool_req = otx2_mbox_alloc_msg_npa_aq_enq(mbox);
+	pool_req->aura_id = aura_id;
+	pool_req->ctype = NPA_AQ_CTYPE_POOL;
+	pool_req->op = NPA_AQ_INSTOP_UNLOCK;
+
+	rc = otx2_mbox_process(mbox);
+	if (rc < 0) {
+		otx2_err("Failed to unlock POOL ctx to NDC");
+		return -EINVAL;
+	}
+
 	return 0;
 }
 
@@ -605,9 +679,9 @@ npa_lf_aura_range_update_check(uint64_t aura_handle)
 	uint64_t aura_id = npa_lf_aura_handle_to_aura(aura_handle);
 	struct otx2_npa_lf *lf = otx2_npa_lf_obj_get();
 	struct npa_aura_lim *lim = lf->aura_lim;
+	__otx2_io struct npa_pool_s *pool;
 	struct npa_aq_enq_req *req;
 	struct npa_aq_enq_rsp *rsp;
-	struct npa_pool_s *pool;
 	int rc;
 
 	req  = otx2_mbox_alloc_msg_npa_aq_enq(lf->mbox);
@@ -637,10 +711,11 @@ static int
 otx2_npa_alloc(struct rte_mempool *mp)
 {
 	uint32_t block_size, block_count;
+	uint64_t aura_handle = 0;
 	struct otx2_npa_lf *lf;
 	struct npa_aura_s aura;
 	struct npa_pool_s pool;
-	uint64_t aura_handle;
+	size_t padding;
 	int rc;
 
 	lf = otx2_npa_lf_obj_get();
@@ -650,6 +725,18 @@ otx2_npa_alloc(struct rte_mempool *mp)
 	}
 
 	block_size = mp->elt_size + mp->header_size + mp->trailer_size;
+	/*
+	 * OCTEON TX2 has 8 sets, 41 ways L1D cache, VA<9:7> bits dictate
+	 * the set selection.
+	 * Add additional padding to ensure that the element size always
+	 * occupies odd number of cachelines to ensure even distribution
+	 * of elements among L1D cache sets.
+	 */
+	padding = ((block_size / RTE_CACHE_LINE_SIZE) % 2) ? 0 :
+				RTE_CACHE_LINE_SIZE;
+	mp->trailer_size += padding;
+	block_size += padding;
+
 	block_count = mp->size;
 
 	if (block_size % OTX2_ALIGN != 0) {
@@ -713,24 +800,21 @@ static ssize_t
 otx2_npa_calc_mem_size(const struct rte_mempool *mp, uint32_t obj_num,
 		       uint32_t pg_shift, size_t *min_chunk_size, size_t *align)
 {
-	ssize_t mem_size;
+	size_t total_elt_sz;
 
-	/*
-	 * Simply need space for one more object to be able to
-	 * fulfill alignment requirements.
+	/* Need space for one more obj on each chunk to fulfill
+	 * alignment requirements.
 	 */
-	mem_size = rte_mempool_op_calc_mem_size_default(mp, obj_num + 1,
-							pg_shift,
-							min_chunk_size, align);
-	if (mem_size >= 0) {
-		/*
-		 * Memory area which contains objects must be physically
-		 * contiguous.
-		 */
-		*min_chunk_size = mem_size;
-	}
+	total_elt_sz = mp->header_size + mp->elt_size + mp->trailer_size;
+	return rte_mempool_op_calc_mem_size_helper(mp, obj_num, pg_shift,
+						total_elt_sz, min_chunk_size,
+						align);
+}
 
-	return mem_size;
+static uint8_t
+otx2_npa_l1d_way_set_get(uint64_t iova)
+{
+	return (iova >> rte_log2_u32(RTE_CACHE_LINE_SIZE)) & 0x7;
 }
 
 static int
@@ -738,8 +822,13 @@ otx2_npa_populate(struct rte_mempool *mp, unsigned int max_objs, void *vaddr,
 		  rte_iova_t iova, size_t len,
 		  rte_mempool_populate_obj_cb_t *obj_cb, void *obj_cb_arg)
 {
+#define OTX2_L1D_NB_SETS	8
+	uint64_t distribution[OTX2_L1D_NB_SETS];
+	rte_iova_t start_iova;
 	size_t total_elt_sz;
+	uint8_t set;
 	size_t off;
+	int i;
 
 	if (iova == RTE_BAD_IOVA)
 		return -EINVAL;
@@ -747,22 +836,45 @@ otx2_npa_populate(struct rte_mempool *mp, unsigned int max_objs, void *vaddr,
 	total_elt_sz = mp->header_size + mp->elt_size + mp->trailer_size;
 
 	/* Align object start address to a multiple of total_elt_sz */
-	off = total_elt_sz - ((uintptr_t)vaddr % total_elt_sz);
+	off = total_elt_sz - ((((uintptr_t)vaddr - 1) % total_elt_sz) + 1);
 
 	if (len < off)
 		return -EINVAL;
 
+
 	vaddr = (char *)vaddr + off;
 	iova += off;
 	len -= off;
 
+	memset(distribution, 0, sizeof(uint64_t) * OTX2_L1D_NB_SETS);
+	start_iova = iova;
+	while (start_iova < iova + len) {
+		set = otx2_npa_l1d_way_set_get(start_iova + mp->header_size);
+		distribution[set]++;
+		start_iova += total_elt_sz;
+	}
+
+	otx2_npa_dbg("iova %"PRIx64", aligned iova %"PRIx64"", iova - off,
+		     iova);
+	otx2_npa_dbg("length %"PRIu64", aligned length %"PRIu64"",
+		     (uint64_t)(len + off), (uint64_t)len);
+	otx2_npa_dbg("element size %"PRIu64"", (uint64_t)total_elt_sz);
+	otx2_npa_dbg("requested objects %"PRIu64", possible objects %"PRIu64"",
+		     (uint64_t)max_objs, (uint64_t)(len / total_elt_sz));
+	otx2_npa_dbg("L1D set distribution :");
+	for (i = 0; i < OTX2_L1D_NB_SETS; i++)
+		otx2_npa_dbg("set[%d] : objects : %"PRIu64"", i,
+			     distribution[i]);
+
 	npa_lf_aura_op_range_set(mp->pool_id, iova, iova + len);
 
 	if (npa_lf_aura_range_update_check(mp->pool_id) < 0)
 		return -EBUSY;
 
-	return rte_mempool_op_populate_default(mp, max_objs, vaddr, iova, len,
-					       obj_cb, obj_cb_arg);
+	return rte_mempool_op_populate_helper(mp,
+					RTE_MEMPOOL_POPULATE_F_ALIGN_OBJ,
+					max_objs, vaddr, iova, len,
+					obj_cb, obj_cb_arg);
 }
 
 static struct rte_mempool_ops otx2_npa_ops = {