From: Omkar Maslekar Date: Thu, 15 Oct 2020 23:20:03 +0000 (-0700) Subject: eal: add cache line demotion API X-Git-Url: http://git.droids-corp.org/?a=commitdiff_plain;h=4ffc2276e20b80d019d2190f35f4b6093bed6554;p=dpdk.git eal: add cache line demotion API rte_cldemote is similar to a prefetch hint - in reverse. On x86, cldemote(addr) enables software to hint to hardware that line is likely to be shared. This is quite useful in core-to-core communications where cache-line is likely to be shared. ARM and PPC implementation is provided with NOP and can be added if any equivalent instructions could be used for implementation on those architectures. Signed-off-by: Omkar Maslekar Acked-by: Bruce Richardson Acked-by: David Christensen Acked-by: Jerin Jacob Reviewed-by: Ruifeng Wang --- diff --git a/app/test/test_prefetch.c b/app/test/test_prefetch.c index 32e08f8afe..5489885b51 100644 --- a/app/test/test_prefetch.c +++ b/app/test/test_prefetch.c @@ -30,6 +30,8 @@ test_prefetch(void) rte_prefetch1_write(&a); rte_prefetch2_write(&a); + rte_cldemote(&a); + return 0; } diff --git a/doc/guides/rel_notes/release_20_11.rst b/doc/guides/rel_notes/release_20_11.rst index cda5b2f5b2..48717ee536 100644 --- a/doc/guides/rel_notes/release_20_11.rst +++ b/doc/guides/rel_notes/release_20_11.rst @@ -68,6 +68,15 @@ New Features which allow the programmer to prefetch a cache line and also indicate the intention to write. +* **Added the rte_cldemote API.** + + Added a hardware hint CLDEMOTE, which is similar to prefetch in reverse. + CLDEMOTE moves the cache line to the more remote cache, where it expects + sharing to be efficient. Moving the cache line to a level more distant from + the processor helps to accelerate core-to-core communication. + This API is specific to x86 and implemented as a stub for other + architectures. + * **Updated CRC modules of the net library.** * Added runtime selection of the optimal architecture-specific CRC path. diff --git a/lib/librte_eal/arm/include/rte_prefetch_32.h b/lib/librte_eal/arm/include/rte_prefetch_32.h index e53420a0ba..303caaa780 100644 --- a/lib/librte_eal/arm/include/rte_prefetch_32.h +++ b/lib/librte_eal/arm/include/rte_prefetch_32.h @@ -33,6 +33,13 @@ static inline void rte_prefetch_non_temporal(const volatile void *p) rte_prefetch0(p); } +__rte_experimental +static inline void +rte_cldemote(const volatile void *p) +{ + RTE_SET_USED(p); +} + #ifdef __cplusplus } #endif diff --git a/lib/librte_eal/arm/include/rte_prefetch_64.h b/lib/librte_eal/arm/include/rte_prefetch_64.h index fc2b391aa8..e28b66fee0 100644 --- a/lib/librte_eal/arm/include/rte_prefetch_64.h +++ b/lib/librte_eal/arm/include/rte_prefetch_64.h @@ -32,6 +32,13 @@ static inline void rte_prefetch_non_temporal(const volatile void *p) asm volatile ("PRFM PLDL1STRM, [%0]" : : "r" (p)); } +__rte_experimental +static inline void +rte_cldemote(const volatile void *p) +{ + RTE_SET_USED(p); +} + #ifdef __cplusplus } #endif diff --git a/lib/librte_eal/include/generic/rte_prefetch.h b/lib/librte_eal/include/generic/rte_prefetch.h index df9764e0bc..f9fab5e359 100644 --- a/lib/librte_eal/include/generic/rte_prefetch.h +++ b/lib/librte_eal/include/generic/rte_prefetch.h @@ -116,4 +116,22 @@ rte_prefetch2_write(const void *p) __builtin_prefetch(p, 1, 1); } +/** + * @warning + * @b EXPERIMENTAL: this API may change, or be removed, without prior notice + * + * Demote a cache line to a more distant level of cache from the processor. + * CLDEMOTE hints to hardware to move (demote) a cache line from the closest to + * the processor to a level more distant from the processor. It is a hint and + * not guaranteed. rte_cldemote is intended to move the cache line to the more + * remote cache, where it expects sharing to be efficient and to indicate that + * a line may be accessed by a different core in the future. + * + * @param p + * Address to demote + */ +__rte_experimental +static inline void +rte_cldemote(const volatile void *p); + #endif /* _RTE_PREFETCH_H_ */ diff --git a/lib/librte_eal/ppc/include/rte_prefetch.h b/lib/librte_eal/ppc/include/rte_prefetch.h index 9ba07c815d..6df8087e41 100644 --- a/lib/librte_eal/ppc/include/rte_prefetch.h +++ b/lib/librte_eal/ppc/include/rte_prefetch.h @@ -34,6 +34,13 @@ static inline void rte_prefetch_non_temporal(const volatile void *p) rte_prefetch0(p); } +__rte_experimental +static inline void +rte_cldemote(const volatile void *p) +{ + RTE_SET_USED(p); +} + #ifdef __cplusplus } #endif diff --git a/lib/librte_eal/x86/include/rte_prefetch.h b/lib/librte_eal/x86/include/rte_prefetch.h index 384c6b3ef2..53404989dd 100644 --- a/lib/librte_eal/x86/include/rte_prefetch.h +++ b/lib/librte_eal/x86/include/rte_prefetch.h @@ -32,6 +32,17 @@ static inline void rte_prefetch_non_temporal(const volatile void *p) asm volatile ("prefetchnta %[p]" : : [p] "m" (*(const volatile char *)p)); } +/* + * We use raw byte codes for now as only the newest compiler + * versions support this instruction natively. + */ +__rte_experimental +static inline void +rte_cldemote(const volatile void *p) +{ + asm volatile(".byte 0x0f, 0x1c, 0x06" :: "S" (p)); +} + #ifdef __cplusplus } #endif