[llvm] r277500 - AMDGPU: Track physical registers in SIWholeQuadMode
Nicolai Hähnle via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 3 05:07:27 PDT 2016
Hi Hans,
this is a bug fix that should go into the 3.9 release branch.
Thanks,
Nicolai
On 02.08.2016 21:17, Nicolai Haehnle via llvm-commits wrote:
> Author: nha
> Date: Tue Aug 2 14:17:37 2016
> New Revision: 277500
>
> URL: http://llvm.org/viewvc/llvm-project?rev=277500&view=rev
> Log:
> AMDGPU: Track physical registers in SIWholeQuadMode
>
> Summary:
> There are cases where uniform branch conditions are computed in VGPRs, and
> we didn't correctly mark those as WQM.
>
> The stray change in basic-branch.ll is because invoking the LiveIntervals
> analysis leads to the detection of a dead register that would otherwise not
> be seen at -O0.
>
> This is a candidate for the 3.9 branch, as it fixes a possible hang.
>
> Reviewers: arsenm, tstellarAMD, mareko
>
> Subscribers: arsenm, llvm-commits, kzhuravl
>
> Differential Revision: https://reviews.llvm.org/D22673
>
> Modified:
> llvm/trunk/lib/Target/AMDGPU/SIWholeQuadMode.cpp
> llvm/trunk/test/CodeGen/AMDGPU/basic-branch.ll
> llvm/trunk/test/CodeGen/AMDGPU/wqm.ll
>
> Modified: llvm/trunk/lib/Target/AMDGPU/SIWholeQuadMode.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIWholeQuadMode.cpp?rev=277500&r1=277499&r2=277500&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/AMDGPU/SIWholeQuadMode.cpp (original)
> +++ llvm/trunk/lib/Target/AMDGPU/SIWholeQuadMode.cpp Tue Aug 2 14:17:37 2016
> @@ -94,12 +94,15 @@ private:
> const SIInstrInfo *TII;
> const SIRegisterInfo *TRI;
> MachineRegisterInfo *MRI;
> + LiveIntervals *LIS;
>
> DenseMap<const MachineInstr *, InstrInfo> Instructions;
> DenseMap<MachineBasicBlock *, BlockInfo> Blocks;
> SmallVector<const MachineInstr *, 2> ExecExports;
> SmallVector<MachineInstr *, 1> LiveMaskQueries;
>
> + void markInstruction(MachineInstr &MI, char Flag,
> + std::vector<WorkItem> &Worklist);
> char scanInstructions(MachineFunction &MF, std::vector<WorkItem> &Worklist);
> void propagateInstruction(MachineInstr &MI, std::vector<WorkItem> &Worklist);
> void propagateBlock(MachineBasicBlock &MBB, std::vector<WorkItem> &Worklist);
> @@ -126,6 +129,7 @@ public:
> }
>
> void getAnalysisUsage(AnalysisUsage &AU) const override {
> + AU.addRequired<LiveIntervals>();
> AU.setPreservesCFG();
> MachineFunctionPass::getAnalysisUsage(AU);
> }
> @@ -135,8 +139,11 @@ public:
>
> char SIWholeQuadMode::ID = 0;
>
> -INITIALIZE_PASS(SIWholeQuadMode, DEBUG_TYPE,
> - "SI Whole Quad Mode", false, false)
> +INITIALIZE_PASS_BEGIN(SIWholeQuadMode, DEBUG_TYPE, "SI Whole Quad Mode", false,
> + false)
> +INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
> +INITIALIZE_PASS_END(SIWholeQuadMode, DEBUG_TYPE, "SI Whole Quad Mode", false,
> + false)
>
> char &llvm::SIWholeQuadModeID = SIWholeQuadMode::ID;
>
> @@ -144,6 +151,23 @@ FunctionPass *llvm::createSIWholeQuadMod
> return new SIWholeQuadMode;
> }
>
> +void SIWholeQuadMode::markInstruction(MachineInstr &MI, char Flag,
> + std::vector<WorkItem> &Worklist) {
> + InstrInfo &II = Instructions[&MI];
> +
> + assert(Flag == StateWQM || Flag == StateExact);
> +
> + // Ignore if the instruction is already marked. The typical case is that we
> + // mark an instruction WQM multiple times, but for atomics it can happen that
> + // Flag is StateWQM, but Needs is already set to StateExact. In this case,
> + // letting the atomic run in StateExact is correct as per the relevant specs.
> + if (II.Needs)
> + return;
> +
> + II.Needs = Flag;
> + Worklist.push_back(&MI);
> +}
> +
> // Scan instructions to determine which ones require an Exact execmask and
> // which ones seed WQM requirements.
> char SIWholeQuadMode::scanInstructions(MachineFunction &MF,
> @@ -192,8 +216,7 @@ char SIWholeQuadMode::scanInstructions(M
> continue;
> }
>
> - Instructions[&MI].Needs = Flags;
> - Worklist.push_back(&MI);
> + markInstruction(MI, Flags, Worklist);
> GlobalFlags |= Flags;
> }
>
> @@ -249,32 +272,35 @@ void SIWholeQuadMode::propagateInstructi
> if (!Use.isReg() || !Use.isUse())
> continue;
>
> - // At this point, physical registers appear as inputs or outputs
> - // and following them makes no sense (and would in fact be incorrect
> - // when the same VGPR is used as both an output and an input that leads
> - // to a NeedsWQM instruction).
> - //
> - // Note: VCC appears e.g. in 64-bit addition with carry - theoretically we
> - // have to trace this, in practice it happens for 64-bit computations like
> - // pointers where both dwords are followed already anyway.
> - if (!TargetRegisterInfo::isVirtualRegister(Use.getReg()))
> - continue;
> -
> - for (MachineInstr &DefMI : MRI->def_instructions(Use.getReg())) {
> - InstrInfo &DefII = Instructions[&DefMI];
> + unsigned Reg = Use.getReg();
>
> - // Obviously skip if DefMI is already flagged as NeedWQM.
> - //
> - // The instruction might also be flagged as NeedExact. This happens when
> - // the result of an atomic is used in a WQM computation. In this case,
> - // the atomic must not run for helper pixels and the WQM result is
> - // undefined.
> - if (DefII.Needs != 0)
> + // Handle physical registers that we need to track; this is mostly relevant
> + // for VCC, which can appear as the (implicit) input of a uniform branch,
> + // e.g. when a loop counter is stored in a VGPR.
> + if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
> + if (Reg == AMDGPU::EXEC)
> continue;
>
> - DefII.Needs = StateWQM;
> - Worklist.push_back(&DefMI);
> + for (MCRegUnitIterator RegUnit(Reg, TRI); RegUnit.isValid(); ++RegUnit) {
> + LiveRange &LR = LIS->getRegUnit(*RegUnit);
> + const VNInfo *Value = LR.Query(LIS->getInstructionIndex(MI)).valueIn();
> + if (!Value)
> + continue;
> +
> + // Since we're in machine SSA, we do not need to track physical
> + // registers across basic blocks.
> + if (Value->isPHIDef())
> + continue;
> +
> + markInstruction(*LIS->getInstructionFromIndex(Value->def), StateWQM,
> + Worklist);
> + }
> +
> + continue;
> }
> +
> + for (MachineInstr &DefMI : MRI->def_instructions(Use.getReg()))
> + markInstruction(DefMI, StateWQM, Worklist);
> }
> }
>
> @@ -471,6 +497,7 @@ bool SIWholeQuadMode::runOnMachineFuncti
> TII = ST.getInstrInfo();
> TRI = &TII->getRegisterInfo();
> MRI = &MF.getRegInfo();
> + LIS = &getAnalysis<LiveIntervals>();
>
> char GlobalFlags = analyzeFunction(MF);
> if (!(GlobalFlags & StateWQM)) {
>
> Modified: llvm/trunk/test/CodeGen/AMDGPU/basic-branch.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/basic-branch.ll?rev=277500&r1=277499&r2=277500&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/AMDGPU/basic-branch.ll (original)
> +++ llvm/trunk/test/CodeGen/AMDGPU/basic-branch.ll Tue Aug 2 14:17:37 2016
> @@ -6,7 +6,6 @@
> ; GCN-LABEL: {{^}}test_branch:
> ; GCNNOOPT: v_writelane_b32
> ; GCNNOOPT: v_writelane_b32
> -; GCNNOOPT: v_writelane_b32
> ; GCN: s_cbranch_scc1 [[END:BB[0-9]+_[0-9]+]]
>
> ; GCN: ; BB#1
>
> Modified: llvm/trunk/test/CodeGen/AMDGPU/wqm.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/wqm.ll?rev=277500&r1=277499&r2=277500&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/AMDGPU/wqm.ll (original)
> +++ llvm/trunk/test/CodeGen/AMDGPU/wqm.ll Tue Aug 2 14:17:37 2016
> @@ -350,6 +350,44 @@ main_body:
> ret float %s
> }
>
> +; CHECK-LABEL: {{^}}test_loop_vcc:
> +; CHECK-NEXT: ; %entry
> +; CHECK-NEXT: s_mov_b64 [[LIVE:s\[[0-9]+:[0-9]+\]]], exec
> +; CHECK: s_wqm_b64 exec, exec
> +; CHECK: s_and_b64 exec, exec, [[LIVE]]
> +; CHECK: image_store
> +; CHECK: s_wqm_b64 exec, exec
> +; CHECK: v_mov_b32_e32 [[CTR:v[0-9]+]], -2
> +; CHECK: s_branch [[LOOPHDR:BB[0-9]+_[0-9]+]]
> +
> +; CHECK: [[LOOPHDR]]: ; %loop
> +; CHECK: v_add_i32_e32 [[CTR]], vcc, 2, [[CTR]]
> +; CHECK: v_cmp_lt_i32_e32 vcc, 7, [[CTR]]
> +; CHECK: s_cbranch_vccz
> +; CHECK: ; %break
> +
> +; CHECK: ; return
> +define amdgpu_ps <4 x float> @test_loop_vcc(<4 x float> %in) nounwind {
> +entry:
> + call void @llvm.amdgcn.image.store.v4i32(<4 x float> %in, <4 x i32> undef, <8 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0)
> + br label %loop
> +
> +loop:
> + %ctr.iv = phi i32 [ 0, %entry ], [ %ctr.next, %body ]
> + %c.iv = phi <4 x float> [ %in, %entry ], [ %c.next, %body ]
> + %cc = icmp sgt i32 %ctr.iv, 7
> + br i1 %cc, label %break, label %body
> +
> +body:
> + %c.i = bitcast <4 x float> %c.iv to <4 x i32>
> + %c.next = call <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32> %c.i, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
> + %ctr.next = add i32 %ctr.iv, 2
> + br label %loop
> +
> +break:
> + ret <4 x float> %c.iv
> +}
> +
> declare void @llvm.amdgcn.image.store.v4i32(<4 x float>, <4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1
>
> declare <4 x float> @llvm.amdgcn.image.load.v4i32(<4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #2
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>
More information about the llvm-commits
mailing list