[compiler-rt] r200394 - Optimized implementation of __clzdi2 and __clzsi2 for ARM. Written in

Joerg Sonnenberger joerg at bec.de
Wed Jan 29 05:46:28 PST 2014


Author: joerg
Date: Wed Jan 29 07:46:28 2014
New Revision: 200394

URL: http://llvm.org/viewvc/llvm-project?rev=200394&view=rev
Log:
Optimized implementation of __clzdi2 and __clzsi2 for ARM. Written in
collaboration with Matt Thomas.

Differential Revision: http://llvm-reviews.chandlerc.com/D2630

Added:
    compiler-rt/trunk/lib/arm/clzdi2.S
    compiler-rt/trunk/lib/arm/clzsi2.S

Added: compiler-rt/trunk/lib/arm/clzdi2.S
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/arm/clzdi2.S?rev=200394&view=auto
==============================================================================
--- compiler-rt/trunk/lib/arm/clzdi2.S (added)
+++ compiler-rt/trunk/lib/arm/clzdi2.S Wed Jan 29 07:46:28 2014
@@ -0,0 +1,95 @@
+/* ===-- clzdi2.c - Implement __clzdi2 -------------------------------------===
+ *
+ *               The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements count leading zeros for 64bit arguments.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+#include "../assembly.h"
+
+#ifdef ARM_HAS_BX
+#define	JMP(r)	bx	r
+#else
+#define	JMP(r)	mov	pc, r
+#endif
+
+	.syntax unified
+
+	.text
+	.align	2
+DEFINE_COMPILERRT_FUNCTION(__clzdi2)
+#ifdef __ARM_FEATURE_CLZ
+#ifdef __ARMEB__
+	cmp	r0, 0
+	itee ne
+	clzne	r0, r0
+	clzeq	r0, r1
+	addeq	r0, r0, 32
+#else
+	cmp	r1, 0
+	itee ne
+	clzne	r0, r1
+	clzeq	r0, r0
+	addeq	r0, r0, 32
+#endif
+	JMP(lr)
+#else
+	/* Assumption: n != 0 */
+
+	/*
+	 * r0: n
+	 * r1: upper half of n, overwritten after check
+	 * r1: count of leading zeros in n + 1
+	 * r2: scratch register for shifted r0
+	 */
+#ifdef __ARMEB__
+	cmp	r0, 0
+	moveq	r0, r1
+#else
+	cmp	r1, 0
+	movne	r0, r1
+#endif
+	movne	r1, 1
+	moveq	r1, 33
+
+	/*
+	 * Basic block:
+	 * if ((r0 >> SHIFT) == 0)
+	 *   r1 += SHIFT;
+	 * else
+	 *   r0 >>= SHIFT;
+	 * for descending powers of two as SHIFT.
+	 */
+#define BLOCK(shift) \
+	lsrs	r2, r0, shift; \
+	movne	r0, r2; \
+	addeq	r1, shift \
+
+	BLOCK(16)
+	BLOCK(8)
+	BLOCK(4)
+	BLOCK(2)
+
+	/*
+	 * The basic block invariants at this point are (r0 >> 2) == 0 and
+	 * r0 != 0. This means 1 <= r0 <= 3 and 0 <= (r0 >> 1) <= 1.
+	 *
+	 * r0 | (r0 >> 1) == 0 | (r0 >> 1) == 1 | -(r0 >> 1) | 1 - (r0 >> 1)
+	 * ---+----------------+----------------+------------+--------------
+	 * 1  | 1              | 0              | 0          | 1
+	 * 2  | 0              | 1              | -1         | 0
+	 * 3  | 0              | 1              | -1         | 0
+	 *
+	 * The r1's initial value of 1 compensates for the 1 here.
+	 */
+	sub	r0, r1, r0, lsr #1
+
+	JMP(lr)
+#endif // __ARM_FEATURE_CLZ
+END_COMPILERRT_FUNCTION(__clzdi2)

Added: compiler-rt/trunk/lib/arm/clzsi2.S
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/arm/clzsi2.S?rev=200394&view=auto
==============================================================================
--- compiler-rt/trunk/lib/arm/clzsi2.S (added)
+++ compiler-rt/trunk/lib/arm/clzsi2.S Wed Jan 29 07:46:28 2014
@@ -0,0 +1,75 @@
+/* ===-- clzsi2.c - Implement __clzsi2 -------------------------------------===
+ *
+ *               The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements count leading zeros for 32bit arguments.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+#include "../assembly.h"
+
+#ifdef ARM_HAS_BX
+#define	JMP(r)	bx	r
+#else
+#define	JMP(r)	mov	pc, r
+#endif
+
+	.syntax unified
+
+	.text
+	.align	2
+DEFINE_COMPILERRT_FUNCTION(__clzsi2)
+#ifdef __ARM_FEATURE_CLZ
+	clz	r0, r0
+	JMP(lr)
+#else
+	/* Assumption: n != 0 */
+
+	/*
+	 * r0: n
+	 * r1: count of leading zeros in n + 1
+	 * r2: scratch register for shifted r0
+	 */
+	mov	r1, 1
+
+	/*
+	 * Basic block:
+	 * if ((r0 >> SHIFT) == 0)
+	 *   r1 += SHIFT;
+	 * else
+	 *   r0 >>= SHIFT;
+	 * for descending powers of two as SHIFT.
+	 */
+
+#define BLOCK(shift) \
+	lsrs	r2, r0, shift; \
+	movne	r0, r2; \
+	addeq	r1, shift \
+
+	BLOCK(16)
+	BLOCK(8)
+	BLOCK(4)
+	BLOCK(2)
+
+	/*
+	 * The basic block invariants at this point are (r0 >> 2) == 0 and
+	 * r0 != 0. This means 1 <= r0 <= 3 and 0 <= (r0 >> 1) <= 1.
+	 *
+	 * r0 | (r0 >> 1) == 0 | (r0 >> 1) == 1 | -(r0 >> 1) | 1 - (r0 >> 1)
+	 * ---+----------------+----------------+------------+--------------
+	 * 1  | 1              | 0              | 0          | 1
+	 * 2  | 0              | 1              | -1         | 0
+	 * 3  | 0              | 1              | -1         | 0
+	 *
+	 * The r1's initial value of 1 compensates for the 1 here.
+	 */
+	sub	r0, r1, r0, lsr #1
+
+	JMP(lr)
+#endif // __ARM_FEATURE_CLZ
+END_COMPILERRT_FUNCTION(__clzsi2)





More information about the llvm-commits mailing list