[compiler-rt] r288777 - builtins: Add ARM Thumb1 implementation for uidiv and uidivmod

Zhao, Weiming via llvm-commits llvm-commits at lists.llvm.org
Wed Dec 7 10:25:18 PST 2016


Hi Reid,

Sorry for the trouble. Looks the condition for the CLZ availability is 
not right.

Please review the fix below.

https://reviews.llvm.org/D27530


Weiming


On 12/7/2016 9:47 AM, Reid Kleckner wrote:
> With this CL, we can no longer build compiler-rt with clang from XCode 
> 7.0:
> https://build.chromium.org/p/chromium.fyi/builders/ClangToTMac%20%28dbg%29/builds/7185/steps/gclient%20runhooks/logs/stdio
>
> FAILED: 
> lib/builtins/CMakeFiles/clang_rt.hard_pic_armv7em_macho_embedded.dir/arm/clzdi2.S.o 
>
> /Applications/Xcode7.0.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/cc 
>    ...
> /b/c/b/ClangToTMac__dbg_/src/third_party/llvm/compiler-rt/lib/builtins/arm/clzdi2.S:58:2: 
> error: predicated instructions must be in IT block
>  movne r0, r1
>  ^
> /b/c/b/ClangToTMac__dbg_/src/third_party/llvm/compiler-rt/lib/builtins/arm/clzdi2.S:60:2: 
> error: predicated instructions must be in IT block
>  movne r1, 1
>  ^
> ...
>
> With the changed __ARM_FEATURE_CLZ ifdef, now something goes wrong.
>
> On Mon, Dec 5, 2016 at 11:13 PM, Weiming Zhao via llvm-commits 
> <llvm-commits at lists.llvm.org <mailto:llvm-commits at lists.llvm.org>> wrote:
>
>     Author: weimingz
>     Date: Tue Dec  6 01:13:15 2016
>     New Revision: 288777
>
>     URL: http://llvm.org/viewvc/llvm-project?rev=288777&view=rev
>     <http://llvm.org/viewvc/llvm-project?rev=288777&view=rev>
>     Log:
>     builtins: Add ARM Thumb1 implementation for uidiv and uidivmod
>
>     This is a resubmit of r288710 due to breakage of Darwin armv7em.
>
>     Modified:
>         compiler-rt/trunk/lib/builtins/arm/aeabi_uidivmod.S
>         compiler-rt/trunk/lib/builtins/arm/udivsi3.S
>         compiler-rt/trunk/lib/builtins/assembly.h
>
>     Modified: compiler-rt/trunk/lib/builtins/arm/aeabi_uidivmod.S
>     URL:
>     http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/builtins/arm/aeabi_uidivmod.S?rev=288777&r1=288776&r2=288777&view=diff
>     <http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/builtins/arm/aeabi_uidivmod.S?rev=288777&r1=288776&r2=288777&view=diff>
>     ==============================================================================
>     --- compiler-rt/trunk/lib/builtins/arm/aeabi_uidivmod.S (original)
>     +++ compiler-rt/trunk/lib/builtins/arm/aeabi_uidivmod.S Tue Dec  6
>     01:13:15 2016
>     @@ -23,6 +23,20 @@
>              .syntax unified
>              .p2align 2
>      DEFINE_COMPILERRT_FUNCTION(__aeabi_uidivmod)
>     +#if __ARM_ARCH_ISA_THUMB == 1
>     +        cmp     r0, r1
>     +        bcc     LOCAL_LABEL(case_denom_larger)
>     +        push    {r0, r1, lr}
>     +        bl      SYMBOL_NAME(__aeabi_uidiv)
>     +        pop     {r1, r2, r3}
>     +        muls    r2, r2, r0 // r2 = quot * denom
>     +        subs    r1, r1, r2
>     +        JMP     (r3)
>     +LOCAL_LABEL(case_denom_larger):
>     +        movs    r1, r0
>     +        movs    r0, #0
>     +        JMP     (lr)
>     +#else
>              push    { lr }
>              sub     sp, sp, #4
>              mov     r2, sp
>     @@ -35,6 +49,7 @@ DEFINE_COMPILERRT_FUNCTION(__aeabi_uidiv
>              ldr     r1, [sp]
>              add     sp, sp, #4
>              pop     { pc }
>     +#endif
>      END_COMPILERRT_FUNCTION(__aeabi_uidivmod)
>
>      NO_EXEC_STACK_DIRECTIVE
>
>     Modified: compiler-rt/trunk/lib/builtins/arm/udivsi3.S
>     URL:
>     http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/builtins/arm/udivsi3.S?rev=288777&r1=288776&r2=288777&view=diff
>     <http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/builtins/arm/udivsi3.S?rev=288777&r1=288776&r2=288777&view=diff>
>     ==============================================================================
>     --- compiler-rt/trunk/lib/builtins/arm/udivsi3.S (original)
>     +++ compiler-rt/trunk/lib/builtins/arm/udivsi3.S Tue Dec  6
>     01:13:15 2016
>     @@ -40,12 +40,26 @@ DEFINE_COMPILERRT_FUNCTION(__udivsi3)
>      #else
>             cmp     r1, #1
>             bcc     LOCAL_LABEL(divby0)
>     +#if __ARM_ARCH_ISA_THUMB == 1
>     +       bne LOCAL_LABEL(num_neq_denom)
>     +       JMP(lr)
>     +LOCAL_LABEL(num_neq_denom):
>     +#else
>             IT(eq)
>             JMPc(lr, eq)
>     +#endif
>             cmp     r0, r1
>     +#if __ARM_ARCH_ISA_THUMB == 1
>     +       bhs LOCAL_LABEL(num_ge_denom)
>     +       movs r0, #0
>     +       JMP(lr)
>     +LOCAL_LABEL(num_ge_denom):
>     +#else
>             ITT(cc)
>             movcc   r0, #0
>             JMPc(lr, cc)
>     +#endif
>     +
>             /*
>              * Implement division using binary long division algorithm.
>              *
>     @@ -62,7 +76,7 @@ DEFINE_COMPILERRT_FUNCTION(__udivsi3)
>              * that (r0 << shift) < 2 * r1. The quotient is stored in r3.
>              */
>
>     -#  ifdef __ARM_FEATURE_CLZ
>     +#  if defined(__ARM_FEATURE_CLZ)
>             clz     ip, r0
>             clz     r3, r1
>             /* r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3. */
>     @@ -77,49 +91,128 @@ DEFINE_COMPILERRT_FUNCTION(__udivsi3)
>             sub     ip, ip, r3, lsl #3
>             mov     r3, #0
>             bx      ip
>     -#  else
>     +#  else /* No CLZ Feature */
>      #    if __ARM_ARCH_ISA_THUMB == 2
>      #    error THUMB mode requires CLZ or UDIV
>      #    endif
>     +#    if __ARM_ARCH_ISA_THUMB == 1
>     +#      define BLOCK_SIZE 10
>     +#    else
>     +#      define BLOCK_SIZE 12
>     +#    endif
>     +
>             mov     r2, r0
>     +#    if __ARM_ARCH_ISA_THUMB == 1
>     +       mov ip, r0
>     +       adr r0, LOCAL_LABEL(div0block)
>     +       adds r0, #1
>     +#    else
>             adr     ip, LOCAL_LABEL(div0block)
>     -
>     -       lsr     r3, r2, #16
>     +#    endif
>     +       lsrs    r3, r2, #16
>             cmp     r3, r1
>     +#    if __ARM_ARCH_ISA_THUMB == 1
>     +       blo LOCAL_LABEL(skip_16)
>     +       movs r2, r3
>     +       subs r0, r0, #(16 * BLOCK_SIZE)
>     +LOCAL_LABEL(skip_16):
>     +#    else
>             movhs   r2, r3
>     -       subhs   ip, ip, #(16 * 12)
>     +       subhs   ip, ip, #(16 * BLOCK_SIZE)
>     +#    endif
>
>     -       lsr     r3, r2, #8
>     +       lsrs    r3, r2, #8
>             cmp     r3, r1
>     +#    if __ARM_ARCH_ISA_THUMB == 1
>     +       blo LOCAL_LABEL(skip_8)
>     +       movs r2, r3
>     +       subs r0, r0, #(8 * BLOCK_SIZE)
>     +LOCAL_LABEL(skip_8):
>     +#    else
>             movhs   r2, r3
>     -       subhs   ip, ip, #(8 * 12)
>     +       subhs   ip, ip, #(8 * BLOCK_SIZE)
>     +#    endif
>
>     -       lsr     r3, r2, #4
>     +       lsrs    r3, r2, #4
>             cmp     r3, r1
>     +#    if __ARM_ARCH_ISA_THUMB == 1
>     +       blo LOCAL_LABEL(skip_4)
>     +       movs r2, r3
>     +       subs r0, r0, #(4 * BLOCK_SIZE)
>     +LOCAL_LABEL(skip_4):
>     +#    else
>             movhs   r2, r3
>     -       subhs   ip, #(4 * 12)
>     +       subhs   ip, #(4 * BLOCK_SIZE)
>     +#    endif
>
>     -       lsr     r3, r2, #2
>     +       lsrs    r3, r2, #2
>             cmp     r3, r1
>     +#    if __ARM_ARCH_ISA_THUMB == 1
>     +       blo LOCAL_LABEL(skip_2)
>     +       movs r2, r3
>     +       subs r0, r0, #(2 * BLOCK_SIZE)
>     +LOCAL_LABEL(skip_2):
>     +#    else
>             movhs   r2, r3
>     -       subhs   ip, ip, #(2 * 12)
>     +       subhs   ip, ip, #(2 * BLOCK_SIZE)
>     +#    endif
>
>             /* Last block, no need to update r2 or r3. */
>     +#    if __ARM_ARCH_ISA_THUMB == 1
>     +       lsrs r3, r2, #1
>     +       cmp r3, r1
>     +       blo LOCAL_LABEL(skip_1)
>     +       subs r0, r0, #(1 * BLOCK_SIZE)
>     +LOCAL_LABEL(skip_1):
>     +       movs r2, r0
>     +       mov r0, ip
>     +       movs r3, #0
>     +       JMP (r2)
>     +
>     +#    else
>             cmp     r1, r2, lsr #1
>     -       subls   ip, ip, #(1 * 12)
>     +       subls   ip, ip, #(1 * BLOCK_SIZE)
>
>     -       mov     r3, #0
>     +       movs    r3, #0
>
>             JMP(ip)
>     -#  endif
>     +#    endif
>     +#  endif /* __ARM_FEATURE_CLZ */
>     +
>
>      #define        IMM     #
>     +       /* due to the range limit of branch in Thumb1, we have to
>     place the
>     +                block closer */
>     +LOCAL_LABEL(divby0):
>     +       movs    r0, #0
>     +#      if defined(__ARM_EABI__)
>     +       bl      __aeabi_idiv0 // due to relocation limit, can't use b.
>     +#      endif
>     +       JMP(lr)
>
>     +
>     +#if __ARM_ARCH_ISA_THUMB == 1
>     +#define block(shift)                    \
>     +       lsls r2, r1, IMM shift;                         \
>     +       cmp r0, r2;                         \
>     +       blo LOCAL_LABEL(block_skip_##shift);                      
>            \
>     +       subs r0, r0, r2;                          \
>     +       LOCAL_LABEL(block_skip_##shift) :;                        
>          \
>     +       adcs r3, r3 /* same as ((r3 << 1) | Carry). Carry is set
>     if r0 >= r2. */
>     +
>     +       /* TODO: if current location counter is not not word
>     aligned, we don't
>     +                need the .p2align and nop */
>     +       /* Label div0block must be word-aligned. First align block
>     31 */
>     +       .p2align 2
>     +       nop /* Padding to align div0block as 31 blocks = 310 bytes */
>     +
>     +#else
>      #define block(shift)                    \
>             cmp     r0, r1, lsl IMM shift;                    \
>             ITT(hs);                    \
>             WIDE(addhs)     r3, r3, IMM (1 << shift);                
>             \
>             WIDE(subhs)     r0, r0, r1, lsl IMM shift
>     +#endif
>
>             block(31)
>             block(30)
>     @@ -159,12 +252,14 @@ LOCAL_LABEL(div0block):
>             JMP(lr)
>      #endif /* __ARM_ARCH_EXT_IDIV__ */
>
>     +#if __ARM_ARCH_EXT_IDIV__
>      LOCAL_LABEL(divby0):
>     -       mov     r0, #0
>     -#ifdef __ARM_EABI__
>     -       b       __aeabi_idiv0
>     -#else
>     -       JMP(lr)
>     +        mov     r0, #0
>     +#  ifdef __ARM_EABI__
>     +        b       __aeabi_idiv0
>     +#  else
>     +        JMP(lr)
>     +#  endif
>      #endif
>
>      END_COMPILERRT_FUNCTION(__udivsi3)
>
>     Modified: compiler-rt/trunk/lib/builtins/assembly.h
>     URL:
>     http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/builtins/assembly.h?rev=288777&r1=288776&r2=288777&view=diff
>     <http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/builtins/assembly.h?rev=288777&r1=288776&r2=288777&view=diff>
>     ==============================================================================
>     --- compiler-rt/trunk/lib/builtins/assembly.h (original)
>     +++ compiler-rt/trunk/lib/builtins/assembly.h Tue Dec 6 01:13:15 2016
>     @@ -71,7 +71,8 @@
>      #define ARM_HAS_BX
>      #endif
>      #if !defined(__ARM_FEATURE_CLZ) &&                            \
>     -    (__ARM_ARCH >= 6 || (__ARM_ARCH == 5 &&
>     !defined(__ARM_ARCH_5__)))
>     +    ((__ARM_ARCH >= 6 && __ARM_ARCH_PROFILE != 'M') ||           
>                  \
>     +     (__ARM_ARCH == 5 && !defined(__ARM_ARCH_5__)))
>      #define __ARM_FEATURE_CLZ
>      #endif
>
>
>
>     _______________________________________________
>     llvm-commits mailing list
>     llvm-commits at lists.llvm.org <mailto:llvm-commits at lists.llvm.org>
>     http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>     <http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits>
>
>

-- 
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by The Linux Foundation

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20161207/14b6a645/attachment.html>


More information about the llvm-commits mailing list