[llvm-commits] [llvm] r106299 - in /llvm/trunk: lib/Target/ARM/ARMISelLowering.cpp test/CodeGen/ARM/call-tc.ll test/CodeGen/ARM/ifcvt6-tc.ll test/CodeGen/ARM/insn-sched1-tc.ll test/CodeGen/ARM/ldm-tc.ll test/CodeGen/Thumb2/thumb2-call-tc.ll test/CodeGen/Thumb2/thumb2-ifcvt1-tc.ll

Evan Cheng evan.cheng at apple.com
Fri Jun 18 20:16:32 PDT 2010


On Jun 18, 2010, at 5:10 PM, Dale Johannesen wrote:

> 
> On Jun 18, 2010, at 4:50 PMPDT, Evan Cheng wrote:
> 
>> Thanks Dale. Have you updated ARMCodeEmitter to handle tail call instructions?
>> 
>> Evan
> 
> No.  It might just work as all the tail call patterns are duplicates of existing instructions, but we know better, don't we.   How do you test

Ok.

> the JIT?  The usual cross build uses an lli built on host, which won't work no matter where you run it.

Just build llvm for ARM and test the JIT on devices. :-)

Evan

> 
>> On Jun 18, 2010, at 12:00 PM, Dale Johannesen wrote:
>> 
>>> Author: johannes
>>> Date: Fri Jun 18 14:00:18 2010
>>> New Revision: 106299
>>> 
>>> URL: http://llvm.org/viewvc/llvm-project?rev=106299&view=rev
>>> Log:
>>> Enable tail calls on ARM by default, with some
>>> basic tests.
>>> 
>>> This has been well tested on Darwin but not elsewhere.
>>> It should work provided the linker correctly resolves
>>> B.W  <label in other function>
>>> which it has not seen before, at least from llvm-based
>>> compilers.  I'm leaving the arm-tail-calls switch in
>>> until I see if there's any problems because of that;
>>> it might need to be disabled for some environments.
>>> 
>>> 
>>> Added:
>>>  llvm/trunk/test/CodeGen/ARM/call-tc.ll
>>>  llvm/trunk/test/CodeGen/ARM/ifcvt6-tc.ll
>>>  llvm/trunk/test/CodeGen/ARM/insn-sched1-tc.ll
>>>  llvm/trunk/test/CodeGen/ARM/ldm-tc.ll
>>>  llvm/trunk/test/CodeGen/Thumb2/thumb2-call-tc.ll
>>>  llvm/trunk/test/CodeGen/Thumb2/thumb2-ifcvt1-tc.ll
>>> Modified:
>>>  llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
>>> 
>>> Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp?rev=106299&r1=106298&r2=106299&view=diff
>>> ==============================================================================
>>> --- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp (original)
>>> +++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp Fri Jun 18 14:00:18 2010
>>> @@ -55,7 +55,7 @@
>>> static cl::opt<bool>
>>> EnableARMTailCalls("arm-tail-calls", cl::Hidden,
>>> cl::desc("Generate tail calls (TEMPORARY OPTION)."),
>>> -  cl::init(false));
>>> +  cl::init(true));
>>> 
>>> static cl::opt<bool>
>>> EnableARMLongCalls("arm-long-calls", cl::Hidden,
>>> 
>>> Added: llvm/trunk/test/CodeGen/ARM/call-tc.ll
>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/call-tc.ll?rev=106299&view=auto
>>> ==============================================================================
>>> --- llvm/trunk/test/CodeGen/ARM/call-tc.ll (added)
>>> +++ llvm/trunk/test/CodeGen/ARM/call-tc.ll Fri Jun 18 14:00:18 2010
>>> @@ -0,0 +1,36 @@
>>> +; RUN: llc < %s -march=arm | FileCheck %s -check-prefix=CHECKV4
>>> +; RUN: llc < %s -march=arm -mattr=+v5t | FileCheck %s -check-prefix=CHECKV5
>>> +; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi\
>>> +; RUN:   -relocation-model=pic | FileCheck %s -check-prefix=CHECKELF
>>> +
>>> + at t = weak global i32 ()* null           ; <i32 ()**> [#uses=1]
>>> +
>>> +declare void @g(i32, i32, i32, i32)
>>> +
>>> +define void @f() {
>>> +; CHECKELF: PLT
>>> +        call void @g( i32 1, i32 2, i32 3, i32 4 )
>>> +        ret void
>>> +}
>>> +
>>> +define void @g.upgrd.1() {
>>> +; CHECKV4: bx r0 @ TAILCALL
>>> +; CHECKV5: bx r0 @ TAILCALL
>>> +        %tmp = load i32 ()** @t         ; <i32 ()*> [#uses=1]
>>> +        %tmp.upgrd.2 = tail call i32 %tmp( )            ; <i32> [#uses=0]
>>> +        ret void
>>> +}
>>> +
>>> +define i32* @m_231b(i32, i32, i32*, i32*, i32*) nounwind {
>>> +; CHECKV4: m_231b
>>> +; CHECKV4: bx r{{.*}}
>>> +BB0:
>>> +  %5 = inttoptr i32 %0 to i32*                    ; <i32*> [#uses=1]
>>> +  %t35 = volatile load i32* %5                    ; <i32> [#uses=1]
>>> +  %6 = inttoptr i32 %t35 to i32**                 ; <i32**> [#uses=1]
>>> +  %7 = getelementptr i32** %6, i32 86             ; <i32**> [#uses=1]
>>> +  %8 = load i32** %7                              ; <i32*> [#uses=1]
>>> +  %9 = bitcast i32* %8 to i32* (i32, i32*, i32, i32*, i32*, i32*)* ; <i32* (i32, i32*, i32, i32*, i32*, i32*)*> [#uses=1]
>>> +  %10 = call i32* %9(i32 %0, i32* null, i32 %1, i32* %2, i32* %3, i32* %4) ; <i32*> [#uses=1]
>>> +  ret i32* %10
>>> +}
>>> 
>>> Added: llvm/trunk/test/CodeGen/ARM/ifcvt6-tc.ll
>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/ifcvt6-tc.ll?rev=106299&view=auto
>>> ==============================================================================
>>> --- llvm/trunk/test/CodeGen/ARM/ifcvt6-tc.ll (added)
>>> +++ llvm/trunk/test/CodeGen/ARM/ifcvt6-tc.ll Fri Jun 18 14:00:18 2010
>>> @@ -0,0 +1,23 @@
>>> +; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | \
>>> +; RUN:   grep cmpne | count 1
>>> +; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | \
>>> +; RUN:   grep bhi | count 1
>>> +; Here, tail call wins over eliminating branches.  It is 1 fewer instruction
>>> +; and removes all stack accesses, so seems like a win.
>>> +
>>> +define void @foo(i32 %X, i32 %Y) {
>>> +entry:
>>> +	%tmp1 = icmp ult i32 %X, 4		; <i1> [#uses=1]
>>> +	%tmp4 = icmp eq i32 %Y, 0		; <i1> [#uses=1]
>>> +	%tmp7 = or i1 %tmp4, %tmp1		; <i1> [#uses=1]
>>> +	br i1 %tmp7, label %cond_true, label %UnifiedReturnBlock
>>> +
>>> +cond_true:		; preds = %entry
>>> +	%tmp10 = tail call i32 (...)* @bar( )		; <i32> [#uses=0]
>>> +	ret void
>>> +
>>> +UnifiedReturnBlock:		; preds = %entry
>>> +	ret void
>>> +}
>>> +
>>> +declare i32 @bar(...)
>>> 
>>> Added: llvm/trunk/test/CodeGen/ARM/insn-sched1-tc.ll
>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/insn-sched1-tc.ll?rev=106299&view=auto
>>> ==============================================================================
>>> --- llvm/trunk/test/CodeGen/ARM/insn-sched1-tc.ll (added)
>>> +++ llvm/trunk/test/CodeGen/ARM/insn-sched1-tc.ll Fri Jun 18 14:00:18 2010
>>> @@ -0,0 +1,11 @@
>>> +; RUN: llc < %s -march=arm -mattr=+v6
>>> +; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+v6 |\
>>> +; RUN:   grep mov | count 2
>>> +
>>> +define i32 @test(i32 %x) {
>>> +        %tmp = trunc i32 %x to i16              ; <i16> [#uses=1]
>>> +        %tmp2 = tail call i32 @f( i32 1, i16 %tmp )             ; <i32> [#uses=1]
>>> +        ret i32 %tmp2
>>> +}
>>> +
>>> +declare i32 @f(i32, i16)
>>> 
>>> Added: llvm/trunk/test/CodeGen/ARM/ldm-tc.ll
>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/ldm-tc.ll?rev=106299&view=auto
>>> ==============================================================================
>>> --- llvm/trunk/test/CodeGen/ARM/ldm-tc.ll (added)
>>> +++ llvm/trunk/test/CodeGen/ARM/ldm-tc.ll Fri Jun 18 14:00:18 2010
>>> @@ -0,0 +1,37 @@
>>> +; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s
>>> +
>>> + at X = external global [0 x i32]          ; <[0 x i32]*> [#uses=5]
>>> +
>>> +define i32 @t1() {
>>> +; CHECK: t1:
>>> +; CHECK: ldmia
>>> +        %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 0)            ; <i32> [#uses=1]
>>> +        %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 1)           ; <i32> [#uses=1]
>>> +        %tmp4 = tail call i32 @f1( i32 %tmp, i32 %tmp3 )                ; <i32> [#uses=1]
>>> +        ret i32 %tmp4
>>> +}
>>> +
>>> +define i32 @t2() {
>>> +; CHECK: t2:
>>> +; CHECK: ldmia
>>> +        %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 2)            ; <i32> [#uses=1]
>>> +        %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 3)           ; <i32> [#uses=1]
>>> +        %tmp5 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 4)           ; <i32> [#uses=1]
>>> +        %tmp6 = tail call i32 @f2( i32 %tmp, i32 %tmp3, i32 %tmp5 )             ; <i32> [#uses=1]
>>> +        ret i32 %tmp6
>>> +}
>>> +
>>> +define i32 @t3() {
>>> +; CHECK: t3:
>>> +; CHECK: ldmib
>>> +; CHECK: b.w _f2 @ TAILCALL
>>> +        %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 1)            ; <i32> [#uses=1]
>>> +        %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 2)           ; <i32> [#uses=1]
>>> +        %tmp5 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 3)           ; <i32> [#uses=1]
>>> +        %tmp6 = tail call i32 @f2( i32 %tmp, i32 %tmp3, i32 %tmp5 )             ; <i32> [#uses=1]
>>> +        ret i32 %tmp6
>>> +}
>>> +
>>> +declare i32 @f1(i32, i32)
>>> +
>>> +declare i32 @f2(i32, i32, i32)
>>> 
>>> Added: llvm/trunk/test/CodeGen/Thumb2/thumb2-call-tc.ll
>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb2/thumb2-call-tc.ll?rev=106299&view=auto
>>> ==============================================================================
>>> --- llvm/trunk/test/CodeGen/Thumb2/thumb2-call-tc.ll (added)
>>> +++ llvm/trunk/test/CodeGen/Thumb2/thumb2-call-tc.ll Fri Jun 18 14:00:18 2010
>>> @@ -0,0 +1,27 @@
>>> +; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mattr=+thumb2 | FileCheck %s -check-prefix=DARWIN
>>> +; RUN: llc < %s -mtriple=thumbv7-linux -mattr=+thumb2 | FileCheck %s -check-prefix=LINUX
>>> +
>>> + at t = weak global i32 ()* null           ; <i32 ()**> [#uses=1]
>>> +
>>> +declare void @g(i32, i32, i32, i32)
>>> +
>>> +define void @f() {
>>> +; DARWIN: f:
>>> +; DARWIN: blx _g
>>> +
>>> +; LINUX: f:
>>> +; LINUX: bl g
>>> +        call void @g( i32 1, i32 2, i32 3, i32 4 )
>>> +        ret void
>>> +}
>>> +
>>> +define void @h() {
>>> +; DARWIN: h:
>>> +; DARWIN: bx r0 @ TAILCALL
>>> +
>>> +; LINUX: h:
>>> +; LINUX: bx r0 @ TAILCALL
>>> +        %tmp = load i32 ()** @t         ; <i32 ()*> [#uses=1]
>>> +        %tmp.upgrd.2 = tail call i32 %tmp( )            ; <i32> [#uses=0]
>>> +        ret void
>>> +}
>>> 
>>> Added: llvm/trunk/test/CodeGen/Thumb2/thumb2-ifcvt1-tc.ll
>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb2/thumb2-ifcvt1-tc.ll?rev=106299&view=auto
>>> ==============================================================================
>>> --- llvm/trunk/test/CodeGen/Thumb2/thumb2-ifcvt1-tc.ll (added)
>>> +++ llvm/trunk/test/CodeGen/Thumb2/thumb2-ifcvt1-tc.ll Fri Jun 18 14:00:18 2010
>>> @@ -0,0 +1,86 @@
>>> +; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s
>>> +
>>> +define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
>>> +; CHECK: t1:
>>> +; CHECK: it ne
>>> +; CHECK: cmpne
>>> +	switch i32 %c, label %cond_next [
>>> +		 i32 1, label %cond_true
>>> +		 i32 7, label %cond_true
>>> +	]
>>> +
>>> +cond_true:
>>> +	%tmp12 = add i32 %a, 1
>>> +	%tmp1518 = add i32 %tmp12, %b
>>> +	ret i32 %tmp1518
>>> +
>>> +cond_next:
>>> +	%tmp15 = add i32 %b, %a
>>> +	ret i32 %tmp15
>>> +}
>>> +
>>> +; FIXME: Check for # of unconditional branch after adding branch folding post ifcvt.
>>> +define i32 @t2(i32 %a, i32 %b) nounwind {
>>> +entry:
>>> +; CHECK: t2:
>>> +; CHECK: ite gt
>>> +; CHECK: subgt
>>> +; CHECK: suble
>>> +	%tmp1434 = icmp eq i32 %a, %b		; <i1> [#uses=1]
>>> +	br i1 %tmp1434, label %bb17, label %bb.outer
>>> +
>>> +bb.outer:		; preds = %cond_false, %entry
>>> +	%b_addr.021.0.ph = phi i32 [ %b, %entry ], [ %tmp10, %cond_false ]		; <i32> [#uses=5]
>>> +	%a_addr.026.0.ph = phi i32 [ %a, %entry ], [ %a_addr.026.0, %cond_false ]		; <i32> [#uses=1]
>>> +	br label %bb
>>> +
>>> +bb:		; preds = %cond_true, %bb.outer
>>> +	%indvar = phi i32 [ 0, %bb.outer ], [ %indvar.next, %cond_true ]		; <i32> [#uses=2]
>>> +	%tmp. = sub i32 0, %b_addr.021.0.ph		; <i32> [#uses=1]
>>> +	%tmp.40 = mul i32 %indvar, %tmp.		; <i32> [#uses=1]
>>> +	%a_addr.026.0 = add i32 %tmp.40, %a_addr.026.0.ph		; <i32> [#uses=6]
>>> +	%tmp3 = icmp sgt i32 %a_addr.026.0, %b_addr.021.0.ph		; <i1> [#uses=1]
>>> +	br i1 %tmp3, label %cond_true, label %cond_false
>>> +
>>> +cond_true:		; preds = %bb
>>> +	%tmp7 = sub i32 %a_addr.026.0, %b_addr.021.0.ph		; <i32> [#uses=2]
>>> +	%tmp1437 = icmp eq i32 %tmp7, %b_addr.021.0.ph		; <i1> [#uses=1]
>>> +	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=1]
>>> +	br i1 %tmp1437, label %bb17, label %bb
>>> +
>>> +cond_false:		; preds = %bb
>>> +	%tmp10 = sub i32 %b_addr.021.0.ph, %a_addr.026.0		; <i32> [#uses=2]
>>> +	%tmp14 = icmp eq i32 %a_addr.026.0, %tmp10		; <i1> [#uses=1]
>>> +	br i1 %tmp14, label %bb17, label %bb.outer
>>> +
>>> +bb17:		; preds = %cond_false, %cond_true, %entry
>>> +	%a_addr.026.1 = phi i32 [ %a, %entry ], [ %tmp7, %cond_true ], [ %a_addr.026.0, %cond_false ]		; <i32> [#uses=1]
>>> +	ret i32 %a_addr.026.1
>>> +}
>>> +
>>> + at x = external global i32*		; <i32**> [#uses=1]
>>> +
>>> +define void @foo(i32 %a) nounwind {
>>> +entry:
>>> +	%tmp = load i32** @x		; <i32*> [#uses=1]
>>> +	store i32 %a, i32* %tmp
>>> +	ret void
>>> +}
>>> +
>>> +; Tail call prevents use of ifcvt in this one.  Seems like a win though.
>>> +define void @t3(i32 %a, i32 %b) nounwind {
>>> +entry:
>>> +; CHECK: t3:
>>> +; CHECK-NOT: it lt
>>> +; CHECK-NOT: poplt
>>> +; CHECK: b.w _foo @ TAILCALL
>>> +	%tmp1 = icmp sgt i32 %a, 10		; <i1> [#uses=1]
>>> +	br i1 %tmp1, label %cond_true, label %UnifiedReturnBlock
>>> +
>>> +cond_true:		; preds = %entry
>>> +	tail call void @foo( i32 %b )
>>> +	ret void
>>> +
>>> +UnifiedReturnBlock:		; preds = %entry
>>> +	ret void
>>> +}
>>> 
>>> 
>>> _______________________________________________
>>> llvm-commits mailing list
>>> llvm-commits at cs.uiuc.edu
>>> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>> 
> 





More information about the llvm-commits mailing list