[llvm-branch-commits] [llvm-branch] r277619 - Merging r277500:
Hans Wennborg via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Wed Aug 3 11:09:48 PDT 2016
Author: hans
Date: Wed Aug 3 13:09:48 2016
New Revision: 277619
URL: http://llvm.org/viewvc/llvm-project?rev=277619&view=rev
Log:
Merging r277500:
------------------------------------------------------------------------
r277500 | nha | 2016-08-02 12:17:37 -0700 (Tue, 02 Aug 2016) | 17 lines
AMDGPU: Track physical registers in SIWholeQuadMode
Summary:
There are cases where uniform branch conditions are computed in VGPRs, and
we didn't correctly mark those as WQM.
The stray change in basic-branch.ll is because invoking the LiveIntervals
analysis leads to the detection of a dead register that would otherwise not
be seen at -O0.
This is a candidate for the 3.9 branch, as it fixes a possible hang.
Reviewers: arsenm, tstellarAMD, mareko
Subscribers: arsenm, llvm-commits, kzhuravl
Differential Revision: https://reviews.llvm.org/D22673
------------------------------------------------------------------------
Modified:
llvm/branches/release_39/ (props changed)
llvm/branches/release_39/lib/Target/AMDGPU/SIWholeQuadMode.cpp
llvm/branches/release_39/test/CodeGen/AMDGPU/basic-branch.ll
llvm/branches/release_39/test/CodeGen/AMDGPU/wqm.ll
Propchange: llvm/branches/release_39/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Wed Aug 3 13:09:48 2016
@@ -1,3 +1,3 @@
/llvm/branches/Apple/Pertwee:110850,110961
/llvm/branches/type-system-rewrite:133420-134817
-/llvm/trunk:155241,275868-275870,275879,275898,275928,275935,275946,275978,275981,276015,276077,276109,276119,276181,276209,276236-276237,276358,276364,276368,276389,276435,276438,276479,276510,276648,276740,276956,276980,277114,277135,277371
+/llvm/trunk:155241,275868-275870,275879,275898,275928,275935,275946,275978,275981,276015,276077,276109,276119,276181,276209,276236-276237,276358,276364,276368,276389,276435,276438,276479,276510,276648,276740,276956,276980,277114,277135,277371,277500
Modified: llvm/branches/release_39/lib/Target/AMDGPU/SIWholeQuadMode.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_39/lib/Target/AMDGPU/SIWholeQuadMode.cpp?rev=277619&r1=277618&r2=277619&view=diff
==============================================================================
--- llvm/branches/release_39/lib/Target/AMDGPU/SIWholeQuadMode.cpp (original)
+++ llvm/branches/release_39/lib/Target/AMDGPU/SIWholeQuadMode.cpp Wed Aug 3 13:09:48 2016
@@ -94,12 +94,15 @@ private:
const SIInstrInfo *TII;
const SIRegisterInfo *TRI;
MachineRegisterInfo *MRI;
+ LiveIntervals *LIS;
DenseMap<const MachineInstr *, InstrInfo> Instructions;
DenseMap<MachineBasicBlock *, BlockInfo> Blocks;
SmallVector<const MachineInstr *, 2> ExecExports;
SmallVector<MachineInstr *, 1> LiveMaskQueries;
+ void markInstruction(MachineInstr &MI, char Flag,
+ std::vector<WorkItem> &Worklist);
char scanInstructions(MachineFunction &MF, std::vector<WorkItem> &Worklist);
void propagateInstruction(MachineInstr &MI, std::vector<WorkItem> &Worklist);
void propagateBlock(MachineBasicBlock &MBB, std::vector<WorkItem> &Worklist);
@@ -126,6 +129,7 @@ public:
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<LiveIntervals>();
AU.setPreservesCFG();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -135,8 +139,11 @@ public:
char SIWholeQuadMode::ID = 0;
-INITIALIZE_PASS(SIWholeQuadMode, DEBUG_TYPE,
- "SI Whole Quad Mode", false, false)
+INITIALIZE_PASS_BEGIN(SIWholeQuadMode, DEBUG_TYPE, "SI Whole Quad Mode", false,
+ false)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_END(SIWholeQuadMode, DEBUG_TYPE, "SI Whole Quad Mode", false,
+ false)
char &llvm::SIWholeQuadModeID = SIWholeQuadMode::ID;
@@ -144,6 +151,23 @@ FunctionPass *llvm::createSIWholeQuadMod
return new SIWholeQuadMode;
}
+void SIWholeQuadMode::markInstruction(MachineInstr &MI, char Flag,
+ std::vector<WorkItem> &Worklist) {
+ InstrInfo &II = Instructions[&MI];
+
+ assert(Flag == StateWQM || Flag == StateExact);
+
+ // Ignore if the instruction is already marked. The typical case is that we
+ // mark an instruction WQM multiple times, but for atomics it can happen that
+ // Flag is StateWQM, but Needs is already set to StateExact. In this case,
+ // letting the atomic run in StateExact is correct as per the relevant specs.
+ if (II.Needs)
+ return;
+
+ II.Needs = Flag;
+ Worklist.push_back(&MI);
+}
+
// Scan instructions to determine which ones require an Exact execmask and
// which ones seed WQM requirements.
char SIWholeQuadMode::scanInstructions(MachineFunction &MF,
@@ -192,8 +216,7 @@ char SIWholeQuadMode::scanInstructions(M
continue;
}
- Instructions[&MI].Needs = Flags;
- Worklist.push_back(&MI);
+ markInstruction(MI, Flags, Worklist);
GlobalFlags |= Flags;
}
@@ -249,32 +272,35 @@ void SIWholeQuadMode::propagateInstructi
if (!Use.isReg() || !Use.isUse())
continue;
- // At this point, physical registers appear as inputs or outputs
- // and following them makes no sense (and would in fact be incorrect
- // when the same VGPR is used as both an output and an input that leads
- // to a NeedsWQM instruction).
- //
- // Note: VCC appears e.g. in 64-bit addition with carry - theoretically we
- // have to trace this, in practice it happens for 64-bit computations like
- // pointers where both dwords are followed already anyway.
- if (!TargetRegisterInfo::isVirtualRegister(Use.getReg()))
- continue;
-
- for (MachineInstr &DefMI : MRI->def_instructions(Use.getReg())) {
- InstrInfo &DefII = Instructions[&DefMI];
+ unsigned Reg = Use.getReg();
- // Obviously skip if DefMI is already flagged as NeedWQM.
- //
- // The instruction might also be flagged as NeedExact. This happens when
- // the result of an atomic is used in a WQM computation. In this case,
- // the atomic must not run for helper pixels and the WQM result is
- // undefined.
- if (DefII.Needs != 0)
+ // Handle physical registers that we need to track; this is mostly relevant
+ // for VCC, which can appear as the (implicit) input of a uniform branch,
+ // e.g. when a loop counter is stored in a VGPR.
+ if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (Reg == AMDGPU::EXEC)
continue;
- DefII.Needs = StateWQM;
- Worklist.push_back(&DefMI);
+ for (MCRegUnitIterator RegUnit(Reg, TRI); RegUnit.isValid(); ++RegUnit) {
+ LiveRange &LR = LIS->getRegUnit(*RegUnit);
+ const VNInfo *Value = LR.Query(LIS->getInstructionIndex(MI)).valueIn();
+ if (!Value)
+ continue;
+
+ // Since we're in machine SSA, we do not need to track physical
+ // registers across basic blocks.
+ if (Value->isPHIDef())
+ continue;
+
+ markInstruction(*LIS->getInstructionFromIndex(Value->def), StateWQM,
+ Worklist);
+ }
+
+ continue;
}
+
+ for (MachineInstr &DefMI : MRI->def_instructions(Use.getReg()))
+ markInstruction(DefMI, StateWQM, Worklist);
}
}
@@ -468,6 +494,7 @@ bool SIWholeQuadMode::runOnMachineFuncti
TII = ST.getInstrInfo();
TRI = &TII->getRegisterInfo();
MRI = &MF.getRegInfo();
+ LIS = &getAnalysis<LiveIntervals>();
char GlobalFlags = analyzeFunction(MF);
if (!(GlobalFlags & StateWQM)) {
Modified: llvm/branches/release_39/test/CodeGen/AMDGPU/basic-branch.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_39/test/CodeGen/AMDGPU/basic-branch.ll?rev=277619&r1=277618&r2=277619&view=diff
==============================================================================
--- llvm/branches/release_39/test/CodeGen/AMDGPU/basic-branch.ll (original)
+++ llvm/branches/release_39/test/CodeGen/AMDGPU/basic-branch.ll Wed Aug 3 13:09:48 2016
@@ -6,7 +6,6 @@
; GCN-LABEL: {{^}}test_branch:
; GCNNOOPT: v_writelane_b32
; GCNNOOPT: v_writelane_b32
-; GCNNOOPT: v_writelane_b32
; GCN: s_cbranch_scc1 [[END:BB[0-9]+_[0-9]+]]
; GCN: ; BB#1
Modified: llvm/branches/release_39/test/CodeGen/AMDGPU/wqm.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_39/test/CodeGen/AMDGPU/wqm.ll?rev=277619&r1=277618&r2=277619&view=diff
==============================================================================
--- llvm/branches/release_39/test/CodeGen/AMDGPU/wqm.ll (original)
+++ llvm/branches/release_39/test/CodeGen/AMDGPU/wqm.ll Wed Aug 3 13:09:48 2016
@@ -350,6 +350,44 @@ main_body:
ret float %s
}
+; CHECK-LABEL: {{^}}test_loop_vcc:
+; CHECK-NEXT: ; %entry
+; CHECK-NEXT: s_mov_b64 [[LIVE:s\[[0-9]+:[0-9]+\]]], exec
+; CHECK: s_wqm_b64 exec, exec
+; CHECK: s_and_b64 exec, exec, [[LIVE]]
+; CHECK: image_store
+; CHECK: s_wqm_b64 exec, exec
+; CHECK: v_mov_b32_e32 [[CTR:v[0-9]+]], -2
+; CHECK: s_branch [[LOOPHDR:BB[0-9]+_[0-9]+]]
+
+; CHECK: [[LOOPHDR]]: ; %loop
+; CHECK: v_add_i32_e32 [[CTR]], vcc, 2, [[CTR]]
+; CHECK: v_cmp_lt_i32_e32 vcc, 7, [[CTR]]
+; CHECK: s_cbranch_vccz
+; CHECK: ; %break
+
+; CHECK: ; return
+define amdgpu_ps <4 x float> @test_loop_vcc(<4 x float> %in) nounwind {
+entry:
+ call void @llvm.amdgcn.image.store.v4i32(<4 x float> %in, <4 x i32> undef, <8 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0)
+ br label %loop
+
+loop:
+ %ctr.iv = phi i32 [ 0, %entry ], [ %ctr.next, %body ]
+ %c.iv = phi <4 x float> [ %in, %entry ], [ %c.next, %body ]
+ %cc = icmp sgt i32 %ctr.iv, 7
+ br i1 %cc, label %break, label %body
+
+body:
+ %c.i = bitcast <4 x float> %c.iv to <4 x i32>
+ %c.next = call <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32> %c.i, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
+ %ctr.next = add i32 %ctr.iv, 2
+ br label %loop
+
+break:
+ ret <4 x float> %c.iv
+}
+
declare void @llvm.amdgcn.image.store.v4i32(<4 x float>, <4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1
declare <4 x float> @llvm.amdgcn.image.load.v4i32(<4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #2
More information about the llvm-branch-commits
mailing list