[PATCH] D31350: AMDGPU : Fix common dominator of two incoming blocks terminates with uniform branch issue.
Wei Ding via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 6 13:00:08 PDT 2017
wdng updated this revision to Diff 94425.
wdng added a comment.
Address code reviews: check terminators for all predecessors.
Repository:
rL LLVM
https://reviews.llvm.org/D31350
Files:
lib/Target/AMDGPU/SIFixSGPRCopies.cpp
test/CodeGen/AMDGPU/sgprcopies.ll
Index: test/CodeGen/AMDGPU/sgprcopies.ll
===================================================================
--- /dev/null
+++ test/CodeGen/AMDGPU/sgprcopies.ll
@@ -0,0 +1,58 @@
+; RUN: llc -march=amdgcn < %s | FileCheck -check-prefix=SI %s
+
+; SI-LABEL: {{^}}sgprcopies
+; SI: BB0_2
+; SI: v_add
+define void @sgprcopies(i32 addrspace(1)* nocapture %out, i32 %width, float %xPos, float %yPos, float %xStep, float %yStep, i32 %maxIter) {
+entry:
+ %conv = call i32 @llvm.amdgcn.workitem.id.x() #1
+ %rem = urem i32 %conv, %width
+ %div = udiv i32 %conv, %width
+ %conv1 = sitofp i32 %rem to float
+ %0 = tail call float @llvm.fmuladd.f32(float %xStep, float %conv1, float %xPos)
+ %conv2 = sitofp i32 %div to float
+ %1 = tail call float @llvm.fmuladd.f32(float %yStep, float %conv2, float %yPos)
+ %mul334 = fmul float %1, %1
+ %2 = tail call float @llvm.fmuladd.f32(float %0, float %0, float %mul334)
+ %cmp35 = fcmp ole float %2, 4.000000e+00
+ %cmp536 = icmp ne i32 %maxIter, 0
+ %or.cond37 = and i1 %cmp536, %cmp35
+ br i1 %or.cond37, label %for.body.preheader, label %for.end
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %x.040 = phi float [ %call8, %for.body ], [ %0, %for.body.preheader ]
+ %iter.039 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+ %y.038 = phi float [ %call9, %for.body ], [ %1, %for.body.preheader ]
+ %sub = fsub float -0.000000e+00, %y.038
+ %call7 = tail call float @llvm.fmuladd.f32(float %x.040, float %x.040, float %0) #1
+ %call8 = tail call float @llvm.fmuladd.f32(float %sub, float %y.038, float %call7) #1
+ %mul = fmul float %x.040, 2.000000e+00
+ %call9 = tail call float @llvm.fmuladd.f32(float %mul, float %y.038, float %1) #1
+ %inc = add nuw i32 %iter.039, 1
+ %mul3 = fmul float %call9, %call9
+ %3 = tail call float @llvm.fmuladd.f32(float %call8, float %call8, float %mul3)
+ %cmp = fcmp ole float %3, 4.000000e+00
+ %cmp5 = icmp ult i32 %inc, %maxIter
+ %or.cond = and i1 %cmp5, %cmp
+ br i1 %or.cond, label %for.body, label %for.end.loopexit
+
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
+ %iter.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %for.end.loopexit ]
+ %idxprom = ashr exact i32 %conv, 32
+ %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %idxprom
+ store i32 %iter.0.lcssa, i32 addrspace(1)* %arrayidx, align 4
+ ret void
+}
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.amdgcn.workitem.id.x() #0
+declare float @llvm.fmuladd.f32(float, float, float) #1
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { readnone }
Index: lib/Target/AMDGPU/SIFixSGPRCopies.cpp
===================================================================
--- lib/Target/AMDGPU/SIFixSGPRCopies.cpp
+++ lib/Target/AMDGPU/SIFixSGPRCopies.cpp
@@ -327,6 +327,15 @@
return true;
}
+static bool hasTerminator(MachineBasicBlock *MBB, const TargetRegisterInfo *TRI) {
+ for (MachineBasicBlock *Pred : MBB->predecessors()) {
+ if (hasTerminatorThatModifiesExec(*Pred, *TRI))
+ return true;
+ }
+
+ return false;
+}
+
bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -382,9 +391,7 @@
if (MI.getNumExplicitOperands() == 5) {
MachineBasicBlock *MBB0 = MI.getOperand(2).getMBB();
MachineBasicBlock *MBB1 = MI.getOperand(4).getMBB();
-
- MachineBasicBlock *NCD = MDT->findNearestCommonDominator(MBB0, MBB1);
- if (NCD && !hasTerminatorThatModifiesExec(*NCD, *TRI)) {
+ if (!hasTerminator(MBB0, TRI) && !hasTerminator(MBB1, TRI)) {
DEBUG(dbgs() << "Not fixing PHI for uniform branch: " << MI << '\n');
break;
}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D31350.94425.patch
Type: text/x-patch
Size: 4032 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20170406/67de57c9/attachment.bin>
More information about the llvm-commits
mailing list