[compiler-rt] r288777 - builtins: Add ARM Thumb1 implementation for uidiv and uidivmod
Zhao, Weiming via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 7 10:25:18 PST 2016
Hi Reid,
Sorry for the trouble. Looks the condition for the CLZ availability is
not right.
Please review the fix below.
https://reviews.llvm.org/D27530
Weiming
On 12/7/2016 9:47 AM, Reid Kleckner wrote:
> With this CL, we can no longer build compiler-rt with clang from XCode
> 7.0:
> https://build.chromium.org/p/chromium.fyi/builders/ClangToTMac%20%28dbg%29/builds/7185/steps/gclient%20runhooks/logs/stdio
>
> FAILED:
> lib/builtins/CMakeFiles/clang_rt.hard_pic_armv7em_macho_embedded.dir/arm/clzdi2.S.o
>
> /Applications/Xcode7.0.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/cc
> ...
> /b/c/b/ClangToTMac__dbg_/src/third_party/llvm/compiler-rt/lib/builtins/arm/clzdi2.S:58:2:
> error: predicated instructions must be in IT block
> movne r0, r1
> ^
> /b/c/b/ClangToTMac__dbg_/src/third_party/llvm/compiler-rt/lib/builtins/arm/clzdi2.S:60:2:
> error: predicated instructions must be in IT block
> movne r1, 1
> ^
> ...
>
> With the changed __ARM_FEATURE_CLZ ifdef, now something goes wrong.
>
> On Mon, Dec 5, 2016 at 11:13 PM, Weiming Zhao via llvm-commits
> <llvm-commits at lists.llvm.org <mailto:llvm-commits at lists.llvm.org>> wrote:
>
> Author: weimingz
> Date: Tue Dec 6 01:13:15 2016
> New Revision: 288777
>
> URL: http://llvm.org/viewvc/llvm-project?rev=288777&view=rev
> <http://llvm.org/viewvc/llvm-project?rev=288777&view=rev>
> Log:
> builtins: Add ARM Thumb1 implementation for uidiv and uidivmod
>
> This is a resubmit of r288710 due to breakage of Darwin armv7em.
>
> Modified:
> compiler-rt/trunk/lib/builtins/arm/aeabi_uidivmod.S
> compiler-rt/trunk/lib/builtins/arm/udivsi3.S
> compiler-rt/trunk/lib/builtins/assembly.h
>
> Modified: compiler-rt/trunk/lib/builtins/arm/aeabi_uidivmod.S
> URL:
> http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/builtins/arm/aeabi_uidivmod.S?rev=288777&r1=288776&r2=288777&view=diff
> <http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/builtins/arm/aeabi_uidivmod.S?rev=288777&r1=288776&r2=288777&view=diff>
> ==============================================================================
> --- compiler-rt/trunk/lib/builtins/arm/aeabi_uidivmod.S (original)
> +++ compiler-rt/trunk/lib/builtins/arm/aeabi_uidivmod.S Tue Dec 6
> 01:13:15 2016
> @@ -23,6 +23,20 @@
> .syntax unified
> .p2align 2
> DEFINE_COMPILERRT_FUNCTION(__aeabi_uidivmod)
> +#if __ARM_ARCH_ISA_THUMB == 1
> + cmp r0, r1
> + bcc LOCAL_LABEL(case_denom_larger)
> + push {r0, r1, lr}
> + bl SYMBOL_NAME(__aeabi_uidiv)
> + pop {r1, r2, r3}
> + muls r2, r2, r0 // r2 = quot * denom
> + subs r1, r1, r2
> + JMP (r3)
> +LOCAL_LABEL(case_denom_larger):
> + movs r1, r0
> + movs r0, #0
> + JMP (lr)
> +#else
> push { lr }
> sub sp, sp, #4
> mov r2, sp
> @@ -35,6 +49,7 @@ DEFINE_COMPILERRT_FUNCTION(__aeabi_uidiv
> ldr r1, [sp]
> add sp, sp, #4
> pop { pc }
> +#endif
> END_COMPILERRT_FUNCTION(__aeabi_uidivmod)
>
> NO_EXEC_STACK_DIRECTIVE
>
> Modified: compiler-rt/trunk/lib/builtins/arm/udivsi3.S
> URL:
> http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/builtins/arm/udivsi3.S?rev=288777&r1=288776&r2=288777&view=diff
> <http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/builtins/arm/udivsi3.S?rev=288777&r1=288776&r2=288777&view=diff>
> ==============================================================================
> --- compiler-rt/trunk/lib/builtins/arm/udivsi3.S (original)
> +++ compiler-rt/trunk/lib/builtins/arm/udivsi3.S Tue Dec 6
> 01:13:15 2016
> @@ -40,12 +40,26 @@ DEFINE_COMPILERRT_FUNCTION(__udivsi3)
> #else
> cmp r1, #1
> bcc LOCAL_LABEL(divby0)
> +#if __ARM_ARCH_ISA_THUMB == 1
> + bne LOCAL_LABEL(num_neq_denom)
> + JMP(lr)
> +LOCAL_LABEL(num_neq_denom):
> +#else
> IT(eq)
> JMPc(lr, eq)
> +#endif
> cmp r0, r1
> +#if __ARM_ARCH_ISA_THUMB == 1
> + bhs LOCAL_LABEL(num_ge_denom)
> + movs r0, #0
> + JMP(lr)
> +LOCAL_LABEL(num_ge_denom):
> +#else
> ITT(cc)
> movcc r0, #0
> JMPc(lr, cc)
> +#endif
> +
> /*
> * Implement division using binary long division algorithm.
> *
> @@ -62,7 +76,7 @@ DEFINE_COMPILERRT_FUNCTION(__udivsi3)
> * that (r0 << shift) < 2 * r1. The quotient is stored in r3.
> */
>
> -# ifdef __ARM_FEATURE_CLZ
> +# if defined(__ARM_FEATURE_CLZ)
> clz ip, r0
> clz r3, r1
> /* r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3. */
> @@ -77,49 +91,128 @@ DEFINE_COMPILERRT_FUNCTION(__udivsi3)
> sub ip, ip, r3, lsl #3
> mov r3, #0
> bx ip
> -# else
> +# else /* No CLZ Feature */
> # if __ARM_ARCH_ISA_THUMB == 2
> # error THUMB mode requires CLZ or UDIV
> # endif
> +# if __ARM_ARCH_ISA_THUMB == 1
> +# define BLOCK_SIZE 10
> +# else
> +# define BLOCK_SIZE 12
> +# endif
> +
> mov r2, r0
> +# if __ARM_ARCH_ISA_THUMB == 1
> + mov ip, r0
> + adr r0, LOCAL_LABEL(div0block)
> + adds r0, #1
> +# else
> adr ip, LOCAL_LABEL(div0block)
> -
> - lsr r3, r2, #16
> +# endif
> + lsrs r3, r2, #16
> cmp r3, r1
> +# if __ARM_ARCH_ISA_THUMB == 1
> + blo LOCAL_LABEL(skip_16)
> + movs r2, r3
> + subs r0, r0, #(16 * BLOCK_SIZE)
> +LOCAL_LABEL(skip_16):
> +# else
> movhs r2, r3
> - subhs ip, ip, #(16 * 12)
> + subhs ip, ip, #(16 * BLOCK_SIZE)
> +# endif
>
> - lsr r3, r2, #8
> + lsrs r3, r2, #8
> cmp r3, r1
> +# if __ARM_ARCH_ISA_THUMB == 1
> + blo LOCAL_LABEL(skip_8)
> + movs r2, r3
> + subs r0, r0, #(8 * BLOCK_SIZE)
> +LOCAL_LABEL(skip_8):
> +# else
> movhs r2, r3
> - subhs ip, ip, #(8 * 12)
> + subhs ip, ip, #(8 * BLOCK_SIZE)
> +# endif
>
> - lsr r3, r2, #4
> + lsrs r3, r2, #4
> cmp r3, r1
> +# if __ARM_ARCH_ISA_THUMB == 1
> + blo LOCAL_LABEL(skip_4)
> + movs r2, r3
> + subs r0, r0, #(4 * BLOCK_SIZE)
> +LOCAL_LABEL(skip_4):
> +# else
> movhs r2, r3
> - subhs ip, #(4 * 12)
> + subhs ip, #(4 * BLOCK_SIZE)
> +# endif
>
> - lsr r3, r2, #2
> + lsrs r3, r2, #2
> cmp r3, r1
> +# if __ARM_ARCH_ISA_THUMB == 1
> + blo LOCAL_LABEL(skip_2)
> + movs r2, r3
> + subs r0, r0, #(2 * BLOCK_SIZE)
> +LOCAL_LABEL(skip_2):
> +# else
> movhs r2, r3
> - subhs ip, ip, #(2 * 12)
> + subhs ip, ip, #(2 * BLOCK_SIZE)
> +# endif
>
> /* Last block, no need to update r2 or r3. */
> +# if __ARM_ARCH_ISA_THUMB == 1
> + lsrs r3, r2, #1
> + cmp r3, r1
> + blo LOCAL_LABEL(skip_1)
> + subs r0, r0, #(1 * BLOCK_SIZE)
> +LOCAL_LABEL(skip_1):
> + movs r2, r0
> + mov r0, ip
> + movs r3, #0
> + JMP (r2)
> +
> +# else
> cmp r1, r2, lsr #1
> - subls ip, ip, #(1 * 12)
> + subls ip, ip, #(1 * BLOCK_SIZE)
>
> - mov r3, #0
> + movs r3, #0
>
> JMP(ip)
> -# endif
> +# endif
> +# endif /* __ARM_FEATURE_CLZ */
> +
>
> #define IMM #
> + /* due to the range limit of branch in Thumb1, we have to
> place the
> + block closer */
> +LOCAL_LABEL(divby0):
> + movs r0, #0
> +# if defined(__ARM_EABI__)
> + bl __aeabi_idiv0 // due to relocation limit, can't use b.
> +# endif
> + JMP(lr)
>
> +
> +#if __ARM_ARCH_ISA_THUMB == 1
> +#define block(shift) \
> + lsls r2, r1, IMM shift; \
> + cmp r0, r2; \
> + blo LOCAL_LABEL(block_skip_##shift);
> \
> + subs r0, r0, r2; \
> + LOCAL_LABEL(block_skip_##shift) :;
> \
> + adcs r3, r3 /* same as ((r3 << 1) | Carry). Carry is set
> if r0 >= r2. */
> +
> + /* TODO: if current location counter is not not word
> aligned, we don't
> + need the .p2align and nop */
> + /* Label div0block must be word-aligned. First align block
> 31 */
> + .p2align 2
> + nop /* Padding to align div0block as 31 blocks = 310 bytes */
> +
> +#else
> #define block(shift) \
> cmp r0, r1, lsl IMM shift; \
> ITT(hs); \
> WIDE(addhs) r3, r3, IMM (1 << shift);
> \
> WIDE(subhs) r0, r0, r1, lsl IMM shift
> +#endif
>
> block(31)
> block(30)
> @@ -159,12 +252,14 @@ LOCAL_LABEL(div0block):
> JMP(lr)
> #endif /* __ARM_ARCH_EXT_IDIV__ */
>
> +#if __ARM_ARCH_EXT_IDIV__
> LOCAL_LABEL(divby0):
> - mov r0, #0
> -#ifdef __ARM_EABI__
> - b __aeabi_idiv0
> -#else
> - JMP(lr)
> + mov r0, #0
> +# ifdef __ARM_EABI__
> + b __aeabi_idiv0
> +# else
> + JMP(lr)
> +# endif
> #endif
>
> END_COMPILERRT_FUNCTION(__udivsi3)
>
> Modified: compiler-rt/trunk/lib/builtins/assembly.h
> URL:
> http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/builtins/assembly.h?rev=288777&r1=288776&r2=288777&view=diff
> <http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/builtins/assembly.h?rev=288777&r1=288776&r2=288777&view=diff>
> ==============================================================================
> --- compiler-rt/trunk/lib/builtins/assembly.h (original)
> +++ compiler-rt/trunk/lib/builtins/assembly.h Tue Dec 6 01:13:15 2016
> @@ -71,7 +71,8 @@
> #define ARM_HAS_BX
> #endif
> #if !defined(__ARM_FEATURE_CLZ) && \
> - (__ARM_ARCH >= 6 || (__ARM_ARCH == 5 &&
> !defined(__ARM_ARCH_5__)))
> + ((__ARM_ARCH >= 6 && __ARM_ARCH_PROFILE != 'M') ||
> \
> + (__ARM_ARCH == 5 && !defined(__ARM_ARCH_5__)))
> #define __ARM_FEATURE_CLZ
> #endif
>
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org <mailto:llvm-commits at lists.llvm.org>
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
> <http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits>
>
>
--
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by The Linux Foundation
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20161207/14b6a645/attachment.html>
More information about the llvm-commits
mailing list