[compiler-rt] r288710 - builtins: Add ARM Thumb1 implementation for uidiv and uidivmod

Chris Bieneman via llvm-commits llvm-commits at lists.llvm.org
Mon Dec 5 21:53:01 PST 2016


The Darwin buildbots have been broken since 2:40 this afternoon as a result of this patch, so I've reverted it in r288773.

Build logs for the failure are here:

http://lab.llvm.org:8080/green/job/clang-stage2-configure-Rlto_build/12035/console <http://lab.llvm.org:8080/green/job/clang-stage2-configure-Rlto_build/12035/console>

The specific error is:

/Users/buildslave/jenkins/sharedspace/clang-R_master at 2/clang-build/Build/./bin/clang    -O2 -g -DNDEBUG    -isysroot /Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX10.11.sdk -Oz -Wall -fomit-frame-pointer -ffreestanding -arch armv7em -static -mfloat-abi=soft -MMD -MT lib/builtins/CMakeFiles/clang_rt.soft_static_armv7em_macho_embedded.dir/arm/udivsi3.S.o -MF lib/builtins/CMakeFiles/clang_rt.soft_static_armv7em_macho_embedded.dir/arm/udivsi3.S.o.d -o lib/builtins/CMakeFiles/clang_rt.soft_static_armv7em_macho_embedded.dir/arm/udivsi3.S.o -c '/Users/buildslave/jenkins/sharedspace/clang-R_master at 2/llvm/projects/compiler-rt/lib/builtins/arm/udivsi3.S'
/Users/buildslave/jenkins/sharedspace/clang-R_master at 2/llvm/projects/compiler-rt/lib/builtins/arm/udivsi3.S:40:1: error: assembler local symbol 'L_divby0' not defined

^

-Chris

> On Dec 5, 2016, at 1:40 PM, Weiming Zhao via llvm-commits <llvm-commits at lists.llvm.org> wrote:
> 
> Author: weimingz
> Date: Mon Dec  5 15:40:36 2016
> New Revision: 288710
> 
> URL: http://llvm.org/viewvc/llvm-project?rev=288710&view=rev
> Log:
> builtins: Add ARM Thumb1 implementation for uidiv and uidivmod
> 
> Summary:
> The current uidiv supports archs without clz. However, the asm is for thumb2/arm.
> For uidivmod, the existing code calls the C version of uidivmodsi4, which then calls uidiv. The extra push/pop/bl makes it less efficient.
> 
> Reviewers: jmolloy, jroelofs, joerg, compnerd, rengolin
> 
> Subscribers: llvm-commits, aemerson
> 
> Differential Revision: https://reviews.llvm.org/D27309
> 
> Modified:
>    compiler-rt/trunk/lib/builtins/arm/aeabi_uidivmod.S
>    compiler-rt/trunk/lib/builtins/arm/udivsi3.S
>    compiler-rt/trunk/lib/builtins/assembly.h
> 
> Modified: compiler-rt/trunk/lib/builtins/arm/aeabi_uidivmod.S
> URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/builtins/arm/aeabi_uidivmod.S?rev=288710&r1=288709&r2=288710&view=diff
> ==============================================================================
> --- compiler-rt/trunk/lib/builtins/arm/aeabi_uidivmod.S (original)
> +++ compiler-rt/trunk/lib/builtins/arm/aeabi_uidivmod.S Mon Dec  5 15:40:36 2016
> @@ -23,6 +23,20 @@
>         .syntax unified
>         .p2align 2
> DEFINE_COMPILERRT_FUNCTION(__aeabi_uidivmod)
> +#if __ARM_ARCH_ISA_THUMB == 1
> +        cmp     r0, r1
> +        bcc     LOCAL_LABEL(case_denom_larger)
> +        push    {r0, r1, lr}
> +        bl      SYMBOL_NAME(__aeabi_uidiv)
> +        pop     {r1, r2, r3}
> +        muls    r2, r2, r0 // r2 = quot * denom
> +        subs    r1, r1, r2
> +        JMP     (r3)
> +LOCAL_LABEL(case_denom_larger):
> +        movs    r1, r0
> +        movs    r0, #0
> +        JMP     (lr)
> +#else
>         push    { lr }
>         sub     sp, sp, #4
>         mov     r2, sp
> @@ -35,6 +49,7 @@ DEFINE_COMPILERRT_FUNCTION(__aeabi_uidiv
>         ldr     r1, [sp]
>         add     sp, sp, #4
>         pop     { pc }
> +#endif
> END_COMPILERRT_FUNCTION(__aeabi_uidivmod)
> 
> NO_EXEC_STACK_DIRECTIVE
> 
> Modified: compiler-rt/trunk/lib/builtins/arm/udivsi3.S
> URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/builtins/arm/udivsi3.S?rev=288710&r1=288709&r2=288710&view=diff
> ==============================================================================
> --- compiler-rt/trunk/lib/builtins/arm/udivsi3.S (original)
> +++ compiler-rt/trunk/lib/builtins/arm/udivsi3.S Mon Dec  5 15:40:36 2016
> @@ -40,12 +40,26 @@ DEFINE_COMPILERRT_FUNCTION(__udivsi3)
> #else
> 	cmp	r1, #1
> 	bcc	LOCAL_LABEL(divby0)
> +#if __ARM_ARCH_ISA_THUMB == 1
> +	bne LOCAL_LABEL(num_neq_denom)
> +	JMP(lr)
> +LOCAL_LABEL(num_neq_denom):
> +#else
> 	IT(eq)
> 	JMPc(lr, eq)
> +#endif
> 	cmp	r0, r1
> +#if __ARM_ARCH_ISA_THUMB == 1
> +	bhs LOCAL_LABEL(num_ge_denom)
> +	movs r0, #0
> +	JMP(lr)
> +LOCAL_LABEL(num_ge_denom):
> +#else
> 	ITT(cc)
> 	movcc	r0, #0
> 	JMPc(lr, cc)
> +#endif
> +
> 	/*
> 	 * Implement division using binary long division algorithm.
> 	 *
> @@ -62,7 +76,7 @@ DEFINE_COMPILERRT_FUNCTION(__udivsi3)
> 	 * that (r0 << shift) < 2 * r1. The quotient is stored in r3.
> 	 */
> 
> -#  ifdef __ARM_FEATURE_CLZ
> +#  if defined(__ARM_FEATURE_CLZ)
> 	clz	ip, r0
> 	clz	r3, r1
> 	/* r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3. */
> @@ -77,49 +91,128 @@ DEFINE_COMPILERRT_FUNCTION(__udivsi3)
> 	sub	ip, ip, r3, lsl #3
> 	mov	r3, #0
> 	bx	ip
> -#  else
> +#  else /* No CLZ Feature */
> #    if __ARM_ARCH_ISA_THUMB == 2
> #    error THUMB mode requires CLZ or UDIV
> #    endif
> +#    if __ARM_ARCH_ISA_THUMB == 1
> +#      define BLOCK_SIZE 10
> +#    else
> +#      define BLOCK_SIZE 12
> +#    endif
> +
> 	mov	r2, r0
> +#    if __ARM_ARCH_ISA_THUMB == 1
> +	mov ip, r0
> +	adr r0, LOCAL_LABEL(div0block)
> +	adds r0, #1
> +#    else
> 	adr	ip, LOCAL_LABEL(div0block)
> -
> -	lsr	r3, r2, #16
> +#    endif
> +	lsrs	r3, r2, #16
> 	cmp	r3, r1
> +#    if __ARM_ARCH_ISA_THUMB == 1
> +	blo LOCAL_LABEL(skip_16)
> +	movs r2, r3
> +	subs r0, r0, #(16 * BLOCK_SIZE)
> +LOCAL_LABEL(skip_16):
> +#    else
> 	movhs	r2, r3
> -	subhs	ip, ip, #(16 * 12)
> +	subhs	ip, ip, #(16 * BLOCK_SIZE)
> +#    endif
> 
> -	lsr	r3, r2, #8
> +	lsrs	r3, r2, #8
> 	cmp	r3, r1
> +#    if __ARM_ARCH_ISA_THUMB == 1
> +	blo LOCAL_LABEL(skip_8)
> +	movs r2, r3
> +	subs r0, r0, #(8 * BLOCK_SIZE)
> +LOCAL_LABEL(skip_8):
> +#    else
> 	movhs	r2, r3
> -	subhs	ip, ip, #(8 * 12)
> +	subhs	ip, ip, #(8 * BLOCK_SIZE)
> +#    endif
> 
> -	lsr	r3, r2, #4
> +	lsrs	r3, r2, #4
> 	cmp	r3, r1
> +#    if __ARM_ARCH_ISA_THUMB == 1
> +	blo LOCAL_LABEL(skip_4)
> +	movs r2, r3
> +	subs r0, r0, #(4 * BLOCK_SIZE)
> +LOCAL_LABEL(skip_4):
> +#    else
> 	movhs	r2, r3
> -	subhs	ip, #(4 * 12)
> +	subhs	ip, #(4 * BLOCK_SIZE)
> +#    endif
> 
> -	lsr	r3, r2, #2
> +	lsrs	r3, r2, #2
> 	cmp	r3, r1
> +#    if __ARM_ARCH_ISA_THUMB == 1
> +	blo LOCAL_LABEL(skip_2)
> +	movs r2, r3
> +	subs r0, r0, #(2 * BLOCK_SIZE)
> +LOCAL_LABEL(skip_2):
> +#    else
> 	movhs	r2, r3
> -	subhs	ip, ip, #(2 * 12)
> +	subhs	ip, ip, #(2 * BLOCK_SIZE)
> +#    endif
> 
> 	/* Last block, no need to update r2 or r3. */
> +#    if __ARM_ARCH_ISA_THUMB == 1
> +	lsrs r3, r2, #1
> +	cmp r3, r1
> +	blo LOCAL_LABEL(skip_1)
> +	subs r0, r0, #(1 * BLOCK_SIZE)
> +LOCAL_LABEL(skip_1):
> +	movs r2, r0
> +	mov r0, ip
> +	movs r3, #0
> +	JMP (r2)
> +
> +#    else
> 	cmp	r1, r2, lsr #1
> -	subls	ip, ip, #(1 * 12)
> +	subls	ip, ip, #(1 * BLOCK_SIZE)
> 
> -	mov	r3, #0
> +	movs	r3, #0
> 
> 	JMP(ip)
> -#  endif
> +#    endif
> +#  endif /* __ARM_FEATURE_CLZ */
> +
> 
> #define	IMM	#
> +	/* due to the range limit of branch in Thumb1, we have to place the
> +		 block closer */
> +LOCAL_LABEL(divby0):
> +	movs	r0, #0
> +#      if defined(__ARM_EABI__)
> +	bl	__aeabi_idiv0 // due to relocation limit, can't use b.
> +#      endif
> +	JMP(lr)
> +
> 
> +#if __ARM_ARCH_ISA_THUMB == 1
> +#define block(shift)                                                           \
> +	lsls r2, r1, IMM shift;                                                      \
> +	cmp r0, r2;                                                                  \
> +	blo LOCAL_LABEL(block_skip_##shift);                                         \
> +	subs r0, r0, r2;                                                             \
> +	LOCAL_LABEL(block_skip_##shift) :;                                           \
> +	adcs r3, r3 /* same as ((r3 << 1) | Carry). Carry is set if r0 >= r2. */
> +
> +	/* TODO: if current location counter is not not word aligned, we don't
> +		 need the .p2align and nop */
> +	/* Label div0block must be word-aligned. First align block 31 */
> +	.p2align 2
> +	nop /* Padding to align div0block as 31 blocks = 310 bytes */
> +
> +#else
> #define block(shift)                                                           \
> 	cmp	r0, r1, lsl IMM shift;                                         \
> 	ITT(hs);                                                               \
> 	WIDE(addhs)	r3, r3, IMM (1 << shift);                              \
> 	WIDE(subhs)	r0, r0, r1, lsl IMM shift
> +#endif
> 
> 	block(31)
> 	block(30)
> @@ -159,14 +252,6 @@ LOCAL_LABEL(div0block):
> 	JMP(lr)
> #endif /* __ARM_ARCH_EXT_IDIV__ */
> 
> -LOCAL_LABEL(divby0):
> -	mov	r0, #0
> -#ifdef __ARM_EABI__
> -	b	__aeabi_idiv0
> -#else
> -	JMP(lr)
> -#endif
> -
> END_COMPILERRT_FUNCTION(__udivsi3)
> 
> NO_EXEC_STACK_DIRECTIVE
> 
> Modified: compiler-rt/trunk/lib/builtins/assembly.h
> URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/builtins/assembly.h?rev=288710&r1=288709&r2=288710&view=diff
> ==============================================================================
> --- compiler-rt/trunk/lib/builtins/assembly.h (original)
> +++ compiler-rt/trunk/lib/builtins/assembly.h Mon Dec  5 15:40:36 2016
> @@ -71,7 +71,8 @@
> #define ARM_HAS_BX
> #endif
> #if !defined(__ARM_FEATURE_CLZ) &&                                             \
> -    (__ARM_ARCH >= 6 || (__ARM_ARCH == 5 && !defined(__ARM_ARCH_5__)))
> +    ((__ARM_ARCH >= 6 && __ARM_ARCH_PROFILE != 'M') ||                         \
> +     (__ARM_ARCH == 5 && !defined(__ARM_ARCH_5__)))
> #define __ARM_FEATURE_CLZ
> #endif
> 
> 
> 
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20161205/e90e6240/attachment.html>


More information about the llvm-commits mailing list