[llvm] r277500 - AMDGPU: Track physical registers in SIWholeQuadMode

Hans Wennborg via llvm-commits llvm-commits at lists.llvm.org
Wed Aug 3 09:04:04 PDT 2016


Sounds good to me if Tom approves.

On Wed, Aug 3, 2016 at 5:07 AM, Nicolai Hähnle <nhaehnle at gmail.com> wrote:
> Hi Hans,
>
> this is a bug fix that should go into the 3.9 release branch.
>
> Thanks,
> Nicolai
>
>
> On 02.08.2016 21:17, Nicolai Haehnle via llvm-commits wrote:
>>
>> Author: nha
>> Date: Tue Aug  2 14:17:37 2016
>> New Revision: 277500
>>
>> URL: http://llvm.org/viewvc/llvm-project?rev=277500&view=rev
>> Log:
>> AMDGPU: Track physical registers in SIWholeQuadMode
>>
>> Summary:
>> There are cases where uniform branch conditions are computed in VGPRs, and
>> we didn't correctly mark those as WQM.
>>
>> The stray change in basic-branch.ll is because invoking the LiveIntervals
>> analysis leads to the detection of a dead register that would otherwise
>> not
>> be seen at -O0.
>>
>> This is a candidate for the 3.9 branch, as it fixes a possible hang.
>>
>> Reviewers: arsenm, tstellarAMD, mareko
>>
>> Subscribers: arsenm, llvm-commits, kzhuravl
>>
>> Differential Revision: https://reviews.llvm.org/D22673
>>
>> Modified:
>>     llvm/trunk/lib/Target/AMDGPU/SIWholeQuadMode.cpp
>>     llvm/trunk/test/CodeGen/AMDGPU/basic-branch.ll
>>     llvm/trunk/test/CodeGen/AMDGPU/wqm.ll
>>
>> Modified: llvm/trunk/lib/Target/AMDGPU/SIWholeQuadMode.cpp
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIWholeQuadMode.cpp?rev=277500&r1=277499&r2=277500&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/lib/Target/AMDGPU/SIWholeQuadMode.cpp (original)
>> +++ llvm/trunk/lib/Target/AMDGPU/SIWholeQuadMode.cpp Tue Aug  2 14:17:37
>> 2016
>> @@ -94,12 +94,15 @@ private:
>>    const SIInstrInfo *TII;
>>    const SIRegisterInfo *TRI;
>>    MachineRegisterInfo *MRI;
>> +  LiveIntervals *LIS;
>>
>>    DenseMap<const MachineInstr *, InstrInfo> Instructions;
>>    DenseMap<MachineBasicBlock *, BlockInfo> Blocks;
>>    SmallVector<const MachineInstr *, 2> ExecExports;
>>    SmallVector<MachineInstr *, 1> LiveMaskQueries;
>>
>> +  void markInstruction(MachineInstr &MI, char Flag,
>> +                       std::vector<WorkItem> &Worklist);
>>    char scanInstructions(MachineFunction &MF, std::vector<WorkItem>
>> &Worklist);
>>    void propagateInstruction(MachineInstr &MI, std::vector<WorkItem>
>> &Worklist);
>>    void propagateBlock(MachineBasicBlock &MBB, std::vector<WorkItem>
>> &Worklist);
>> @@ -126,6 +129,7 @@ public:
>>    }
>>
>>    void getAnalysisUsage(AnalysisUsage &AU) const override {
>> +    AU.addRequired<LiveIntervals>();
>>      AU.setPreservesCFG();
>>      MachineFunctionPass::getAnalysisUsage(AU);
>>    }
>> @@ -135,8 +139,11 @@ public:
>>
>>  char SIWholeQuadMode::ID = 0;
>>
>> -INITIALIZE_PASS(SIWholeQuadMode, DEBUG_TYPE,
>> -                "SI Whole Quad Mode", false, false)
>> +INITIALIZE_PASS_BEGIN(SIWholeQuadMode, DEBUG_TYPE, "SI Whole Quad Mode",
>> false,
>> +                      false)
>> +INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
>> +INITIALIZE_PASS_END(SIWholeQuadMode, DEBUG_TYPE, "SI Whole Quad Mode",
>> false,
>> +                    false)
>>
>>  char &llvm::SIWholeQuadModeID = SIWholeQuadMode::ID;
>>
>> @@ -144,6 +151,23 @@ FunctionPass *llvm::createSIWholeQuadMod
>>    return new SIWholeQuadMode;
>>  }
>>
>> +void SIWholeQuadMode::markInstruction(MachineInstr &MI, char Flag,
>> +                                      std::vector<WorkItem> &Worklist) {
>> +  InstrInfo &II = Instructions[&MI];
>> +
>> +  assert(Flag == StateWQM || Flag == StateExact);
>> +
>> +  // Ignore if the instruction is already marked. The typical case is
>> that we
>> +  // mark an instruction WQM multiple times, but for atomics it can
>> happen that
>> +  // Flag is StateWQM, but Needs is already set to StateExact. In this
>> case,
>> +  // letting the atomic run in StateExact is correct as per the relevant
>> specs.
>> +  if (II.Needs)
>> +    return;
>> +
>> +  II.Needs = Flag;
>> +  Worklist.push_back(&MI);
>> +}
>> +
>>  // Scan instructions to determine which ones require an Exact execmask
>> and
>>  // which ones seed WQM requirements.
>>  char SIWholeQuadMode::scanInstructions(MachineFunction &MF,
>> @@ -192,8 +216,7 @@ char SIWholeQuadMode::scanInstructions(M
>>            continue;
>>        }
>>
>> -      Instructions[&MI].Needs = Flags;
>> -      Worklist.push_back(&MI);
>> +      markInstruction(MI, Flags, Worklist);
>>        GlobalFlags |= Flags;
>>      }
>>
>> @@ -249,32 +272,35 @@ void SIWholeQuadMode::propagateInstructi
>>      if (!Use.isReg() || !Use.isUse())
>>        continue;
>>
>> -    // At this point, physical registers appear as inputs or outputs
>> -    // and following them makes no sense (and would in fact be incorrect
>> -    // when the same VGPR is used as both an output and an input that
>> leads
>> -    // to a NeedsWQM instruction).
>> -    //
>> -    // Note: VCC appears e.g. in 64-bit addition with carry -
>> theoretically we
>> -    // have to trace this, in practice it happens for 64-bit computations
>> like
>> -    // pointers where both dwords are followed already anyway.
>> -    if (!TargetRegisterInfo::isVirtualRegister(Use.getReg()))
>> -      continue;
>> -
>> -    for (MachineInstr &DefMI : MRI->def_instructions(Use.getReg())) {
>> -      InstrInfo &DefII = Instructions[&DefMI];
>> +    unsigned Reg = Use.getReg();
>>
>> -      // Obviously skip if DefMI is already flagged as NeedWQM.
>> -      //
>> -      // The instruction might also be flagged as NeedExact. This happens
>> when
>> -      // the result of an atomic is used in a WQM computation. In this
>> case,
>> -      // the atomic must not run for helper pixels and the WQM result is
>> -      // undefined.
>> -      if (DefII.Needs != 0)
>> +    // Handle physical registers that we need to track; this is mostly
>> relevant
>> +    // for VCC, which can appear as the (implicit) input of a uniform
>> branch,
>> +    // e.g. when a loop counter is stored in a VGPR.
>> +    if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
>> +      if (Reg == AMDGPU::EXEC)
>>          continue;
>>
>> -      DefII.Needs = StateWQM;
>> -      Worklist.push_back(&DefMI);
>> +      for (MCRegUnitIterator RegUnit(Reg, TRI); RegUnit.isValid();
>> ++RegUnit) {
>> +        LiveRange &LR = LIS->getRegUnit(*RegUnit);
>> +        const VNInfo *Value =
>> LR.Query(LIS->getInstructionIndex(MI)).valueIn();
>> +        if (!Value)
>> +          continue;
>> +
>> +        // Since we're in machine SSA, we do not need to track physical
>> +        // registers across basic blocks.
>> +        if (Value->isPHIDef())
>> +          continue;
>> +
>> +        markInstruction(*LIS->getInstructionFromIndex(Value->def),
>> StateWQM,
>> +                        Worklist);
>> +      }
>> +
>> +      continue;
>>      }
>> +
>> +    for (MachineInstr &DefMI : MRI->def_instructions(Use.getReg()))
>> +      markInstruction(DefMI, StateWQM, Worklist);
>>    }
>>  }
>>
>> @@ -471,6 +497,7 @@ bool SIWholeQuadMode::runOnMachineFuncti
>>    TII = ST.getInstrInfo();
>>    TRI = &TII->getRegisterInfo();
>>    MRI = &MF.getRegInfo();
>> +  LIS = &getAnalysis<LiveIntervals>();
>>
>>    char GlobalFlags = analyzeFunction(MF);
>>    if (!(GlobalFlags & StateWQM)) {
>>
>> Modified: llvm/trunk/test/CodeGen/AMDGPU/basic-branch.ll
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/basic-branch.ll?rev=277500&r1=277499&r2=277500&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/test/CodeGen/AMDGPU/basic-branch.ll (original)
>> +++ llvm/trunk/test/CodeGen/AMDGPU/basic-branch.ll Tue Aug  2 14:17:37
>> 2016
>> @@ -6,7 +6,6 @@
>>  ; GCN-LABEL: {{^}}test_branch:
>>  ; GCNNOOPT: v_writelane_b32
>>  ; GCNNOOPT: v_writelane_b32
>> -; GCNNOOPT: v_writelane_b32
>>  ; GCN: s_cbranch_scc1 [[END:BB[0-9]+_[0-9]+]]
>>
>>  ; GCN: ; BB#1
>>
>> Modified: llvm/trunk/test/CodeGen/AMDGPU/wqm.ll
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/wqm.ll?rev=277500&r1=277499&r2=277500&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/test/CodeGen/AMDGPU/wqm.ll (original)
>> +++ llvm/trunk/test/CodeGen/AMDGPU/wqm.ll Tue Aug  2 14:17:37 2016
>> @@ -350,6 +350,44 @@ main_body:
>>    ret float %s
>>  }
>>
>> +; CHECK-LABEL: {{^}}test_loop_vcc:
>> +; CHECK-NEXT: ; %entry
>> +; CHECK-NEXT: s_mov_b64 [[LIVE:s\[[0-9]+:[0-9]+\]]], exec
>> +; CHECK: s_wqm_b64 exec, exec
>> +; CHECK: s_and_b64 exec, exec, [[LIVE]]
>> +; CHECK: image_store
>> +; CHECK: s_wqm_b64 exec, exec
>> +; CHECK: v_mov_b32_e32 [[CTR:v[0-9]+]], -2
>> +; CHECK: s_branch [[LOOPHDR:BB[0-9]+_[0-9]+]]
>> +
>> +; CHECK: [[LOOPHDR]]: ; %loop
>> +; CHECK: v_add_i32_e32 [[CTR]], vcc, 2, [[CTR]]
>> +; CHECK: v_cmp_lt_i32_e32 vcc, 7, [[CTR]]
>> +; CHECK: s_cbranch_vccz
>> +; CHECK: ; %break
>> +
>> +; CHECK: ; return
>> +define amdgpu_ps <4 x float> @test_loop_vcc(<4 x float> %in) nounwind {
>> +entry:
>> +  call void @llvm.amdgcn.image.store.v4i32(<4 x float> %in, <4 x i32>
>> undef, <8 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0)
>> +  br label %loop
>> +
>> +loop:
>> +  %ctr.iv = phi i32 [ 0, %entry ], [ %ctr.next, %body ]
>> +  %c.iv = phi <4 x float> [ %in, %entry ], [ %c.next, %body ]
>> +  %cc = icmp sgt i32 %ctr.iv, 7
>> +  br i1 %cc, label %break, label %body
>> +
>> +body:
>> +  %c.i = bitcast <4 x float> %c.iv to <4 x i32>
>> +  %c.next = call <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32> %c.i,
>> <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0,
>> i32 0, i32 0)
>> +  %ctr.next = add i32 %ctr.iv, 2
>> +  br label %loop
>> +
>> +break:
>> +  ret <4 x float> %c.iv
>> +}
>> +
>>  declare void @llvm.amdgcn.image.store.v4i32(<4 x float>, <4 x i32>, <8 x
>> i32>, i32, i1, i1, i1, i1) #1
>>
>>  declare <4 x float> @llvm.amdgcn.image.load.v4i32(<4 x i32>, <8 x i32>,
>> i32, i1, i1, i1, i1) #2
>>
>>
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at lists.llvm.org
>> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>>
>


More information about the llvm-commits mailing list