[Arm-dev] [PATCH v1 47/87] ARM64:spinlocks: Fix up for WFE and improve performance slightly.

Thu Aug 13 13:18:44 UTC 2015
Vadim Lomovtsev <Vadim.Lomovtsev at caviumnetworks.com>

From: Andrew Pinski <apinski at cavium.com>

In the previous patch, I had made a mistake of putting WFE after the delay which
meant if we enable the WFE, we would get the same bad performance as before.
Also use the flags register some more to allow the instructions to be fused together.

Signed-off-by: Andrew Pinski <apinski at cavium.com>
Signed-off-by: Vadim Lomovtsev <Vadim.Lomovtsev at caviumnetworks.com>
---
 arch/arm64/include/asm/spinlock.h | 41 +++++++++++++++++++++++----------------
 1 file changed, 24 insertions(+), 17 deletions(-)

diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h
index d867547..ad629ee 100644
--- a/arch/arm64/include/asm/spinlock.h
+++ b/arch/arm64/include/asm/spinlock.h
@@ -46,31 +46,38 @@ static inline void arch_spin_lock(arch_spinlock_t *lock)
 	/* Did we get the lock? */
 "	eor	%w1, %w0, %w0, ror #16\n"
 "	cbz	%w1, 3f\n"
+	/* Put the current ticket into %w2 */
+"	uxth	%w2, %w0\n"
+	/* Put the our ticket into %w0 */
+"	lsr	%w0, %w0, 16\n"
 	/*
 	 * No: spin on the owner. Send a local event to avoid missing an
 	 * unlock before the exclusive load.
 	 */
 "	sevl\n"
+	/* Wait for event, we might not be the current ticket. */
+"2:	wfe\n"
 	/* Delay if our ticket is not the next ticket. */
-"	uxth	%w2, %w0\n"
-"	lsr	%w0, %w0, 16\n"
 	/* %w2 is the difference between our ticket and the current ticket. */
-"2:	sub	%w2, %w0, %w2\n"
+"2:	subs	%w2, %w0, %w2\n"
 	/* If the tickets have wrapped, then we need to add USHORT_MAX.  */
-"	cmp	%w2, wzr\n"
-"	b.lt	5f\n"
-"6:	sub	%w2, %w2, 1\n"
-"	cbz	%w2, 7f\n"
-	/* Multiply by 64, a good estimate of how long an lock/unlock will take. */
-"	lsl	%w2, %w2, 6\n"
+"	b.mi	5f\n"
+	/* Subtract one from the difference. */
+"6:	subs	%w2, %w2, 1\n"
+	/*  Don't wait if we the next ticket. */
+"	b.eq	7f\n"
+	/* Multiply by 80, a good estimate of how long an lock/unlock will take. */
+"	lsl	%w2, %w2, #4\n"
+"	add	%w2, %w2, %w2, lsl #2\n"
 	/* Spin until we get 0. */
-"4:	sub	%w2, %w2, 1\n"
-"	cbnz	%w2, 4b\n"
-	/* Wait for event, we might not be the current ticket. */
-"7:	wfe\n"
-"	ldaxrh	%w2, %4\n"
-"	eor	%w1, %w2, %w0\n"
-"	cbnz	%w1, 2b\n"
+"4:	subs	%w2, %w2, #1\n"
+"	b.ne	4b\n"
+
+	/* Get the current ticket. */
+"7:	ldaxrh	%w2, %4\n"
+	/* See if we get the ticket, otherwise loop. */
+"	cmp	%w2, %w0\n"
+"	b.ne	2b\n"
 "	b	3f\n"
 	/* Wrap case, add USHORT_MAX to wrap around again. */
 "5:	mov	%w1, 0xffff\n"
@@ -80,7 +87,7 @@ static inline void arch_spin_lock(arch_spinlock_t *lock)
 "3:"
 	: "=&r" (lockval), "=&r" (newval), "=&r" (tmp), "+Q" (*lock)
 	: "Q" (lock->owner), "I" (1 << TICKET_SHIFT)
-	: "memory");
+	: "memory", "cc");
 }
 
 static inline int arch_spin_trylock(arch_spinlock_t *lock)
-- 
2.4.3