[llvm] r186725 - R600: Don't emit empty then clause and use alu_pop_after
Vincent Lejeune
vljn at ovi.com
Fri Jul 19 14:45:15 PDT 2013
Author: vljn
Date: Fri Jul 19 16:45:15 2013
New Revision: 186725
URL: http://llvm.org/viewvc/llvm-project?rev=186725&view=rev
Log:
R600: Don't emit empty then clause and use alu_pop_after
Added:
llvm/trunk/test/CodeGen/R600/r600cfg.ll
Modified:
llvm/trunk/lib/Target/R600/AMDILCFGStructurizer.cpp
llvm/trunk/lib/Target/R600/R600ControlFlowFinalizer.cpp
llvm/trunk/lib/Target/R600/R600Instructions.td
llvm/trunk/test/CodeGen/R600/jump-address.ll
llvm/trunk/test/CodeGen/R600/loop-address.ll
Modified: llvm/trunk/lib/Target/R600/AMDILCFGStructurizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/AMDILCFGStructurizer.cpp?rev=186725&r1=186724&r2=186725&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/AMDILCFGStructurizer.cpp (original)
+++ llvm/trunk/lib/Target/R600/AMDILCFGStructurizer.cpp Fri Jul 19 16:45:15 2013
@@ -1039,8 +1039,11 @@ int AMDGPUCFGStructurizer::ifPatternMatc
} else if (FalseMBB->succ_size() == 1
&& *FalseMBB->succ_begin() == TrueMBB) {
// Triangle pattern, true is empty
- LandBlk = TrueMBB;
- TrueMBB = NULL;
+ // We reverse the predicate to make a triangle, empty false pattern;
+ std::swap(TrueMBB, FalseMBB);
+ reversePredicateSetter(MBB->end());
+ LandBlk = FalseMBB;
+ FalseMBB = NULL;
} else if (FalseMBB->succ_size() == 1
&& isSameloopDetachedContbreak(TrueMBB, FalseMBB)) {
LandBlk = *FalseMBB->succ_begin();
@@ -1456,6 +1459,7 @@ void AMDGPUCFGStructurizer::mergeSerialB
void AMDGPUCFGStructurizer::mergeIfthenelseBlock(MachineInstr *BranchMI,
MachineBasicBlock *MBB, MachineBasicBlock *TrueMBB,
MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB) {
+ assert (TrueMBB);
DEBUG(
dbgs() << "ifPattern BB" << MBB->getNumber();
dbgs() << "{ ";
Modified: llvm/trunk/lib/Target/R600/R600ControlFlowFinalizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/R600ControlFlowFinalizer.cpp?rev=186725&r1=186724&r2=186725&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/R600ControlFlowFinalizer.cpp (original)
+++ llvm/trunk/lib/Target/R600/R600ControlFlowFinalizer.cpp Fri Jul 19 16:45:15 2013
@@ -347,6 +347,9 @@ public:
MaxStack = 1;
}
std::vector<ClauseFile> FetchClauses, AluClauses;
+ std::vector<MachineInstr *> LastAlu(1);
+ std::vector<MachineInstr *> ToPopAfter;
+
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
I != E;) {
if (TII->usesTextureCache(I) || TII->usesVertexCache(I)) {
@@ -357,6 +360,10 @@ public:
}
MachineBasicBlock::iterator MI = I;
+ if (MI->getOpcode() != AMDGPU::ENDIF)
+ LastAlu.back() = 0;
+ if (MI->getOpcode() == AMDGPU::CF_ALU)
+ LastAlu.back() = MI;
I++;
switch (MI->getOpcode()) {
case AMDGPU::CF_ALU_PUSH_BEFORE:
@@ -403,6 +410,7 @@ public:
break;
}
case AMDGPU::IF_PREDICATE_SET: {
+ LastAlu.push_back(0);
MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
getHWInstrDesc(CF_JUMP))
.addImm(0)
@@ -420,7 +428,7 @@ public:
MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
getHWInstrDesc(CF_ELSE))
.addImm(0)
- .addImm(1);
+ .addImm(0);
DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
IfThenElseStack.push_back(MIb);
MI->eraseFromParent();
@@ -429,17 +437,24 @@ public:
}
case AMDGPU::ENDIF: {
CurrentStack--;
+ if (LastAlu.back()) {
+ ToPopAfter.push_back(LastAlu.back());
+ } else {
+ MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
+ getHWInstrDesc(CF_POP))
+ .addImm(CfCount + 1)
+ .addImm(1);
+ (void)MIb;
+ DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
+ CfCount++;
+ }
+
MachineInstr *IfOrElseInst = IfThenElseStack.back();
IfThenElseStack.pop_back();
- CounterPropagateAddr(IfOrElseInst, CfCount + 1);
- MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
- getHWInstrDesc(CF_POP))
- .addImm(CfCount + 1)
- .addImm(1);
- (void)MIb;
- DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
+ CounterPropagateAddr(IfOrElseInst, CfCount);
+ IfOrElseInst->getOperand(1).setImm(1);
+ LastAlu.pop_back();
MI->eraseFromParent();
- CfCount++;
break;
}
case AMDGPU::PREDICATED_BREAK: {
@@ -484,6 +499,21 @@ public:
break;
}
}
+ for (unsigned i = 0, e = ToPopAfter.size(); i < e; ++i) {
+ MachineInstr *Alu = ToPopAfter[i];
+ BuildMI(MBB, Alu, MBB.findDebugLoc((MachineBasicBlock::iterator)Alu),
+ TII->get(AMDGPU::CF_ALU_POP_AFTER))
+ .addImm(Alu->getOperand(0).getImm())
+ .addImm(Alu->getOperand(1).getImm())
+ .addImm(Alu->getOperand(2).getImm())
+ .addImm(Alu->getOperand(3).getImm())
+ .addImm(Alu->getOperand(4).getImm())
+ .addImm(Alu->getOperand(5).getImm())
+ .addImm(Alu->getOperand(6).getImm())
+ .addImm(Alu->getOperand(7).getImm())
+ .addImm(Alu->getOperand(8).getImm());
+ Alu->eraseFromParent();
+ }
MFI->StackSize = getHWStackSize(MaxStack, HasPush);
}
Modified: llvm/trunk/lib/Target/R600/R600Instructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/R600Instructions.td?rev=186725&r1=186724&r2=186725&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/R600Instructions.td (original)
+++ llvm/trunk/lib/Target/R600/R600Instructions.td Fri Jul 19 16:45:15 2013
@@ -624,6 +624,7 @@ ins, AsmPrint, [] >, CF_WORD0_EG, CF_WOR
def CF_ALU : ALU_CLAUSE<8, "ALU">;
def CF_ALU_PUSH_BEFORE : ALU_CLAUSE<9, "ALU_PUSH_BEFORE">;
+def CF_ALU_POP_AFTER : ALU_CLAUSE<10, "ALU_POP_AFTER">;
def FETCH_CLAUSE : AMDGPUInst <(outs),
(ins i32imm:$addr), "Fetch clause starting at $addr:", [] > {
Modified: llvm/trunk/test/CodeGen/R600/jump-address.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/jump-address.ll?rev=186725&r1=186724&r2=186725&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/R600/jump-address.ll (original)
+++ llvm/trunk/test/CodeGen/R600/jump-address.ll Fri Jul 19 16:45:15 2013
@@ -1,6 +1,6 @@
;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-; CHECK: JUMP @7
+; CHECK: JUMP @5
; CHECK: EXPORT
; CHECK-NOT: EXPORT
Modified: llvm/trunk/test/CodeGen/R600/loop-address.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/loop-address.ll?rev=186725&r1=186724&r2=186725&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/R600/loop-address.ll (original)
+++ llvm/trunk/test/CodeGen/R600/loop-address.ll Fri Jul 19 16:45:15 2013
@@ -2,12 +2,11 @@
;CHECK: TEX
;CHECK: ALU_PUSH
-;CHECK: JUMP @4
-;CHECK: ELSE @16
+;CHECK: JUMP @15
;CHECK: TEX
-;CHECK: LOOP_START_DX10 @15
-;CHECK: LOOP_BREAK @14
-;CHECK: POP @16
+;CHECK: LOOP_START_DX10 @14
+;CHECK: LOOP_BREAK @13
+;CHECK: POP @15
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-v2048:2048:2048-n32:64"
target triple = "r600--"
Added: llvm/trunk/test/CodeGen/R600/r600cfg.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/r600cfg.ll?rev=186725&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/r600cfg.ll (added)
+++ llvm/trunk/test/CodeGen/R600/r600cfg.ll Fri Jul 19 16:45:15 2013
@@ -0,0 +1,124 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood
+;REQUIRES: asserts
+
+define void @main() #0 {
+main_body:
+ %0 = call float @llvm.R600.load.input(i32 4)
+ %1 = call float @llvm.R600.load.input(i32 5)
+ %2 = call float @llvm.R600.load.input(i32 6)
+ %3 = call float @llvm.R600.load.input(i32 7)
+ %4 = bitcast float %0 to i32
+ %5 = icmp eq i32 %4, 0
+ %6 = sext i1 %5 to i32
+ %7 = bitcast i32 %6 to float
+ %8 = bitcast float %7 to i32
+ %9 = icmp ne i32 %8, 0
+ %. = select i1 %9, float 0x36A0000000000000, float %0
+ br label %LOOP
+
+LOOP: ; preds = %LOOP47, %main_body
+ %temp12.0 = phi float [ 0x36A0000000000000, %main_body ], [ %temp12.1, %LOOP47 ]
+ %temp8.0 = phi float [ 0.000000e+00, %main_body ], [ %38, %LOOP47 ]
+ %temp4.1 = phi float [ %., %main_body ], [ %52, %LOOP47 ]
+ %10 = bitcast float %temp4.1 to i32
+ %11 = icmp eq i32 %10, 1
+ %12 = sext i1 %11 to i32
+ %13 = bitcast i32 %12 to float
+ %14 = bitcast float %13 to i32
+ %15 = icmp ne i32 %14, 0
+ br i1 %15, label %IF41, label %ENDIF40
+
+IF41: ; preds = %LOOP
+ %16 = insertelement <4 x float> undef, float %0, i32 0
+ %17 = insertelement <4 x float> %16, float %temp8.0, i32 1
+ %18 = insertelement <4 x float> %17, float %temp12.0, i32 2
+ %19 = insertelement <4 x float> %18, float 0.000000e+00, i32 3
+ call void @llvm.R600.store.stream.output(<4 x float> %19, i32 0, i32 0, i32 1)
+ %20 = insertelement <4 x float> undef, float %0, i32 0
+ %21 = insertelement <4 x float> %20, float %temp8.0, i32 1
+ %22 = insertelement <4 x float> %21, float %temp12.0, i32 2
+ %23 = insertelement <4 x float> %22, float 0.000000e+00, i32 3
+ call void @llvm.R600.store.stream.output(<4 x float> %23, i32 0, i32 0, i32 2)
+ %24 = insertelement <4 x float> undef, float %0, i32 0
+ %25 = insertelement <4 x float> %24, float %temp8.0, i32 1
+ %26 = insertelement <4 x float> %25, float %temp12.0, i32 2
+ %27 = insertelement <4 x float> %26, float 0.000000e+00, i32 3
+ call void @llvm.R600.store.stream.output(<4 x float> %27, i32 0, i32 0, i32 4)
+ %28 = insertelement <4 x float> undef, float 0.000000e+00, i32 0
+ %29 = insertelement <4 x float> %28, float 0.000000e+00, i32 1
+ %30 = insertelement <4 x float> %29, float 0.000000e+00, i32 2
+ %31 = insertelement <4 x float> %30, float 0.000000e+00, i32 3
+ call void @llvm.R600.store.swizzle(<4 x float> %31, i32 60, i32 1)
+ %32 = insertelement <4 x float> undef, float %0, i32 0
+ %33 = insertelement <4 x float> %32, float %temp8.0, i32 1
+ %34 = insertelement <4 x float> %33, float %temp12.0, i32 2
+ %35 = insertelement <4 x float> %34, float 0.000000e+00, i32 3
+ call void @llvm.R600.store.swizzle(<4 x float> %35, i32 0, i32 2)
+ ret void
+
+ENDIF40: ; preds = %LOOP
+ %36 = bitcast float %temp8.0 to i32
+ %37 = add i32 %36, 1
+ %38 = bitcast i32 %37 to float
+ %39 = bitcast float %temp4.1 to i32
+ %40 = urem i32 %39, 2
+ %41 = bitcast i32 %40 to float
+ %42 = bitcast float %41 to i32
+ %43 = icmp eq i32 %42, 0
+ %44 = sext i1 %43 to i32
+ %45 = bitcast i32 %44 to float
+ %46 = bitcast float %45 to i32
+ %47 = icmp ne i32 %46, 0
+ %48 = bitcast float %temp4.1 to i32
+ br i1 %47, label %IF44, label %ELSE45
+
+IF44: ; preds = %ENDIF40
+ %49 = udiv i32 %48, 2
+ br label %ENDIF43
+
+ELSE45: ; preds = %ENDIF40
+ %50 = mul i32 3, %48
+ %51 = add i32 %50, 1
+ br label %ENDIF43
+
+ENDIF43: ; preds = %ELSE45, %IF44
+ %.sink = phi i32 [ %49, %IF44 ], [ %51, %ELSE45 ]
+ %52 = bitcast i32 %.sink to float
+ %53 = load <4 x float> addrspace(8)* null
+ %54 = extractelement <4 x float> %53, i32 0
+ %55 = bitcast float %54 to i32
+ br label %LOOP47
+
+LOOP47: ; preds = %ENDIF48, %ENDIF43
+ %temp12.1 = phi float [ %temp12.0, %ENDIF43 ], [ %67, %ENDIF48 ]
+ %temp28.0 = phi float [ 0.000000e+00, %ENDIF43 ], [ %70, %ENDIF48 ]
+ %56 = bitcast float %temp28.0 to i32
+ %57 = icmp uge i32 %56, %55
+ %58 = sext i1 %57 to i32
+ %59 = bitcast i32 %58 to float
+ %60 = bitcast float %59 to i32
+ %61 = icmp ne i32 %60, 0
+ br i1 %61, label %LOOP, label %ENDIF48
+
+ENDIF48: ; preds = %LOOP47
+ %62 = bitcast float %temp12.1 to i32
+ %63 = mul i32 %62, 2
+ %64 = bitcast i32 %63 to float
+ %65 = bitcast float %64 to i32
+ %66 = urem i32 %65, 2147483647
+ %67 = bitcast i32 %66 to float
+ %68 = bitcast float %temp28.0 to i32
+ %69 = add i32 %68, 1
+ %70 = bitcast i32 %69 to float
+ br label %LOOP47
+}
+
+; Function Attrs: readnone
+declare float @llvm.R600.load.input(i32) #1
+
+declare void @llvm.R600.store.stream.output(<4 x float>, i32, i32, i32)
+
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
+
+attributes #0 = { "ShaderType"="1" }
+attributes #1 = { readnone }
More information about the llvm-commits
mailing list