[compiler-rt] r200394 - Optimized implementation of __clzdi2 and __clzsi2 for ARM. Written in
Joerg Sonnenberger
joerg at bec.de
Wed Jan 29 05:46:28 PST 2014
Author: joerg
Date: Wed Jan 29 07:46:28 2014
New Revision: 200394
URL: http://llvm.org/viewvc/llvm-project?rev=200394&view=rev
Log:
Optimized implementation of __clzdi2 and __clzsi2 for ARM. Written in
collaboration with Matt Thomas.
Differential Revision: http://llvm-reviews.chandlerc.com/D2630
Added:
compiler-rt/trunk/lib/arm/clzdi2.S
compiler-rt/trunk/lib/arm/clzsi2.S
Added: compiler-rt/trunk/lib/arm/clzdi2.S
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/arm/clzdi2.S?rev=200394&view=auto
==============================================================================
--- compiler-rt/trunk/lib/arm/clzdi2.S (added)
+++ compiler-rt/trunk/lib/arm/clzdi2.S Wed Jan 29 07:46:28 2014
@@ -0,0 +1,95 @@
+/* ===-- clzdi2.c - Implement __clzdi2 -------------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements count leading zeros for 64bit arguments.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+#include "../assembly.h"
+
+#ifdef ARM_HAS_BX
+#define JMP(r) bx r
+#else
+#define JMP(r) mov pc, r
+#endif
+
+ .syntax unified
+
+ .text
+ .align 2
+DEFINE_COMPILERRT_FUNCTION(__clzdi2)
+#ifdef __ARM_FEATURE_CLZ
+#ifdef __ARMEB__
+ cmp r0, 0
+ itee ne
+ clzne r0, r0
+ clzeq r0, r1
+ addeq r0, r0, 32
+#else
+ cmp r1, 0
+ itee ne
+ clzne r0, r1
+ clzeq r0, r0
+ addeq r0, r0, 32
+#endif
+ JMP(lr)
+#else
+ /* Assumption: n != 0 */
+
+ /*
+ * r0: n
+ * r1: upper half of n, overwritten after check
+ * r1: count of leading zeros in n + 1
+ * r2: scratch register for shifted r0
+ */
+#ifdef __ARMEB__
+ cmp r0, 0
+ moveq r0, r1
+#else
+ cmp r1, 0
+ movne r0, r1
+#endif
+ movne r1, 1
+ moveq r1, 33
+
+ /*
+ * Basic block:
+ * if ((r0 >> SHIFT) == 0)
+ * r1 += SHIFT;
+ * else
+ * r0 >>= SHIFT;
+ * for descending powers of two as SHIFT.
+ */
+#define BLOCK(shift) \
+ lsrs r2, r0, shift; \
+ movne r0, r2; \
+ addeq r1, shift \
+
+ BLOCK(16)
+ BLOCK(8)
+ BLOCK(4)
+ BLOCK(2)
+
+ /*
+ * The basic block invariants at this point are (r0 >> 2) == 0 and
+ * r0 != 0. This means 1 <= r0 <= 3 and 0 <= (r0 >> 1) <= 1.
+ *
+ * r0 | (r0 >> 1) == 0 | (r0 >> 1) == 1 | -(r0 >> 1) | 1 - (r0 >> 1)
+ * ---+----------------+----------------+------------+--------------
+ * 1 | 1 | 0 | 0 | 1
+ * 2 | 0 | 1 | -1 | 0
+ * 3 | 0 | 1 | -1 | 0
+ *
+ * The r1's initial value of 1 compensates for the 1 here.
+ */
+ sub r0, r1, r0, lsr #1
+
+ JMP(lr)
+#endif // __ARM_FEATURE_CLZ
+END_COMPILERRT_FUNCTION(__clzdi2)
Added: compiler-rt/trunk/lib/arm/clzsi2.S
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/arm/clzsi2.S?rev=200394&view=auto
==============================================================================
--- compiler-rt/trunk/lib/arm/clzsi2.S (added)
+++ compiler-rt/trunk/lib/arm/clzsi2.S Wed Jan 29 07:46:28 2014
@@ -0,0 +1,75 @@
+/* ===-- clzsi2.c - Implement __clzsi2 -------------------------------------===
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements count leading zeros for 32bit arguments.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+#include "../assembly.h"
+
+#ifdef ARM_HAS_BX
+#define JMP(r) bx r
+#else
+#define JMP(r) mov pc, r
+#endif
+
+ .syntax unified
+
+ .text
+ .align 2
+DEFINE_COMPILERRT_FUNCTION(__clzsi2)
+#ifdef __ARM_FEATURE_CLZ
+ clz r0, r0
+ JMP(lr)
+#else
+ /* Assumption: n != 0 */
+
+ /*
+ * r0: n
+ * r1: count of leading zeros in n + 1
+ * r2: scratch register for shifted r0
+ */
+ mov r1, 1
+
+ /*
+ * Basic block:
+ * if ((r0 >> SHIFT) == 0)
+ * r1 += SHIFT;
+ * else
+ * r0 >>= SHIFT;
+ * for descending powers of two as SHIFT.
+ */
+
+#define BLOCK(shift) \
+ lsrs r2, r0, shift; \
+ movne r0, r2; \
+ addeq r1, shift \
+
+ BLOCK(16)
+ BLOCK(8)
+ BLOCK(4)
+ BLOCK(2)
+
+ /*
+ * The basic block invariants at this point are (r0 >> 2) == 0 and
+ * r0 != 0. This means 1 <= r0 <= 3 and 0 <= (r0 >> 1) <= 1.
+ *
+ * r0 | (r0 >> 1) == 0 | (r0 >> 1) == 1 | -(r0 >> 1) | 1 - (r0 >> 1)
+ * ---+----------------+----------------+------------+--------------
+ * 1 | 1 | 0 | 0 | 1
+ * 2 | 0 | 1 | -1 | 0
+ * 3 | 0 | 1 | -1 | 0
+ *
+ * The r1's initial value of 1 compensates for the 1 here.
+ */
+ sub r0, r1, r0, lsr #1
+
+ JMP(lr)
+#endif // __ARM_FEATURE_CLZ
+END_COMPILERRT_FUNCTION(__clzsi2)
More information about the llvm-commits
mailing list