[PATCH] Implement __clzdi2 and __clzsi2 for early ARM architectures that lack the clz instruction

Joerg Sonnenberger joerg at NetBSD.org
Mon Jan 27 11:46:57 PST 2014


Hi t.p.northover, compnerd,

Implement __clzdi2 and __clzsi2 for early ARM architectures that lack the clz instruction. For __clzdi2 special care is needed for dealing with Little Endian vs Big Endian.

http://llvm-reviews.chandlerc.com/D2630

Files:
  lib/arm/clzdi2.S
  lib/arm/clzsi2.S

Index: lib/arm/clzdi2.S
===================================================================
--- lib/arm/clzdi2.S
+++ lib/arm/clzdi2.S
@@ -0,0 +1,97 @@
+/* ===-- clzdi2.c - Implement __clzdi2 -------------------------------------===
+ *
+ *               The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements count leading zeros for 64bit arguments.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+#include "../assembly.h"
+
+	.syntax unified
+
+	.text
+	.align	2
+DEFINE_COMPILERRT_FUNCTION(__clzdi2)
+#ifdef __ARM_FEATURE_CLZ
+#ifdef __ARMEB__
+	cmp	r0, #0
+	it ne
+	clzne	r0, r0
+	it eq
+	clzeq	r0, r1
+#else
+	cmp	r1, #0
+	it ne
+	clzne	r0, r1
+	it eq
+	clzeq	r0, r0
+#endif
+	it eq
+	addeq	r0, r0, #32
+	bx	lr
+#else
+	/* Assumption: n != 0 */
+
+	/*
+	 * r0: n
+	 * r1: upper half of n, overwritten after check
+	 * r1: count of leading zeros in n + 1
+	 * r2: scratch register for shifted r0
+	 */
+#ifdef __ARMEB__
+	cmp	r0, #0
+	moveq	r0, r1
+#else
+	cmp	r1, #0
+	movne	r0, r1
+#endif
+	movne	r1, #1
+	moveq	r1, #33
+
+	/*
+	 * Basic block:
+	 * if ((r0 >> SHIFT) == 0)
+	 *   r1 += SHIFT;
+	 * else
+	 *   r0 >>= SHIFT;
+	 * for descending powers of two as SHIFT.
+	 */
+#define	IMM	#
+
+#define block(shift) \
+	lsrs	r2, r0, IMM shift; \
+	movne	r0, r2; \
+	addeq	r1, IMM shift \
+
+	block(16)
+	block(8)
+	block(4)
+	block(2)
+
+	/*
+	 * The basic block invariants at this point are (r0 >> 2) == 0 and
+	 * r0 != 0. This means 1 <= r0 <= 3 and 0 <= (r0 >> 1) <= 1.
+	 *
+	 * r0 | (r0 >> 1) == 0 | (r0 >> 1) == 1 | -(r0 >> 1) | 1 - (r0 >> 1)
+	 * ---+----------------+----------------+------------+--------------
+	 * 1  | 1              | 0              | 0          | 1
+	 * 2  | 0              | 1              | -1         | 0
+	 * 3  | 0              | 1              | -1         | 0
+	 *
+	 * The r1's initial value of 1 compensates for the 1 here.
+	 */
+	sub	r0, r1, r0, lsr #1
+
+#  ifdef ARM_HAS_BX
+	bx	lr
+#  else
+	mov	pc, lr
+#  endif
+#endif // __ARM_FEATURE_CLZ
+END_COMPILERRT_FUNCTION(__clzdi2)
Index: lib/arm/clzsi2.S
===================================================================
--- lib/arm/clzsi2.S
+++ lib/arm/clzsi2.S
@@ -0,0 +1,74 @@
+/* ===-- clzsi2.c - Implement __clzsi2 -------------------------------------===
+ *
+ *               The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements count leading zeros for 32bit arguments.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+#include "../assembly.h"
+
+	.syntax unified
+
+	.text
+	.align	2
+DEFINE_COMPILERRT_FUNCTION(__clzsi2)
+#ifdef __ARM_FEATURE_CLZ
+	clz	r0, r0
+	bx	lr
+#else
+	/* Assumption: n != 0 */
+
+	/*
+	 * r0: n
+	 * r1: count of leading zeros in n + 1
+	 * r2: scratch register for shifted r0
+	 */
+	mov	r1, #1
+
+	/*
+	 * Basic block:
+	 * if ((r0 >> SHIFT) == 0)
+	 *   r1 += SHIFT;
+	 * else
+	 *   r0 >>= SHIFT;
+	 * for descending powers of two as SHIFT.
+	 */
+#define	IMM	#
+
+#define block(shift) \
+	lsrs	r2, r0, IMM shift; \
+	movne	r0, r2; \
+	addeq	r1, IMM shift \
+
+	block(16)
+	block(8)
+	block(4)
+	block(2)
+
+	/*
+	 * The basic block invariants at this point are (r0 >> 2) == 0 and
+	 * r0 != 0. This means 1 <= r0 <= 3 and 0 <= (r0 >> 1) <= 1.
+	 *
+	 * r0 | (r0 >> 1) == 0 | (r0 >> 1) == 1 | -(r0 >> 1) | 1 - (r0 >> 1)
+	 * ---+----------------+----------------+------------+--------------
+	 * 1  | 1              | 0              | 0          | 1
+	 * 2  | 0              | 1              | -1         | 0
+	 * 3  | 0              | 1              | -1         | 0
+	 *
+	 * The r1's initial value of 1 compensates for the 1 here.
+	 */
+	sub	r0, r1, r0, lsr #1
+
+#  ifdef ARM_HAS_BX
+	bx	lr
+#  else
+	mov	pc, lr
+#  endif
+#endif // __ARM_FEATURE_CLZ
+END_COMPILERRT_FUNCTION(__clzsi2)
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D2630.1.patch
Type: text/x-patch
Size: 4267 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20140127/93e3bfa3/attachment.bin>


More information about the llvm-commits mailing list