[PATCH] D77344: [AMDGPU] Propagate AGPR RC from PHI to its PHI operands
Stanislav Mekhanoshin via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Fri Apr 3 11:54:12 PDT 2020
This revision was automatically updated to reflect the committed changes.
Closed by commit rG0462795095e5: [AMDGPU] Propagate AGPR RC from PHI to its PHI operands (authored by rampitec).
Herald added a project: LLVM.
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D77344/new/
https://reviews.llvm.org/D77344
Files:
llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
llvm/test/CodeGen/AMDGPU/mfma-loop.ll
Index: llvm/test/CodeGen/AMDGPU/mfma-loop.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/mfma-loop.ll
+++ llvm/test/CodeGen/AMDGPU/mfma-loop.ll
@@ -487,5 +487,50 @@
ret void
}
+; GCN-COUNT-32: v_accvgpr_write_b32 a{{[0-9]+}}, 0{{$}}
+
+; Check that we do not copy agprs to vgprs and back in an outer loop.
+
+; GCN: [[OUTER_LOOP:BB[0-9_]+]]:
+; GCN-NOT: v_accvgpr
+; GCN: [[INNER_LOOP:BB[0-9_]+]]:
+; GCN-NOT: v_accvgpr
+; GCN: v_mfma_f32_32x32x1f32
+; GCN-NOT: v_accvgpr
+; GCN: s_cbranch_scc1 [[INNER_LOOP]]
+; GCN-NOT: v_accvgpr
+; GCN: s_cbranch_scc1 [[OUTER_LOOP]]
+
+; Final result should be read only once after the loop.
+
+; GCN-COUNT-32: v_accvgpr_read_b32
+
+define amdgpu_kernel void @test_mfma_nested_loop_zeroinit(<32 x float> addrspace(1)* %arg) {
+entry:
+ br label %for.cond.preheader
+
+for.cond.preheader:
+ %phi.0 = phi <32 x float> [ zeroinitializer, %entry ], [ %mai.1, %inner.exit ]
+ %c.0 = phi i32 [ 0, %entry ], [ %inc.0, %inner.exit ]
+ br label %inner.for.cond.preheader
+
+inner.for.cond.preheader:
+ %phi = phi <32 x float> [ %phi.0, %for.cond.preheader ], [ %mai.1, %inner.for.cond.preheader ]
+ %c = phi i32 [ 0, %for.cond.preheader ], [ %inc, %inner.for.cond.preheader ]
+ %mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> %phi, i32 0, i32 0, i32 0)
+ %inc = add nuw nsw i32 %c, 1
+ %cc = icmp eq i32 %inc, 16
+ br i1 %cc, label %inner.exit, label %inner.for.cond.preheader
+
+inner.exit:
+ %inc.0 = add nuw nsw i32 %c.0, 1
+ %cc.0 = icmp eq i32 %inc.0, 16
+ br i1 %cc.0, label %exit, label %for.cond.preheader
+
+exit:
+ store <32 x float> %mai.1, <32 x float> addrspace(1)* %arg
+ ret void
+}
+
declare <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float, float, <32 x float>, i32, i32, i32)
declare i32 @llvm.amdgcn.workitem.id.x()
Index: llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
+++ llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
@@ -766,6 +766,7 @@
bool AllAGPRUses = true;
SetVector<const MachineInstr *> worklist;
SmallSet<const MachineInstr *, 4> Visited;
+ SetVector<MachineInstr *> PHIOperands;
worklist.insert(&MI);
Visited.insert(&MI);
while (!worklist.empty()) {
@@ -810,6 +811,11 @@
if (AllAGPRUses && numVGPRUses && !TRI->hasAGPRs(RC0)) {
LLVM_DEBUG(dbgs() << "Moving PHI to AGPR: " << MI);
MRI->setRegClass(PHIRes, TRI->getEquivalentAGPRClass(RC0));
+ for (unsigned I = 1, N = MI.getNumOperands(); I != N; I += 2) {
+ MachineInstr *DefMI = MRI->getVRegDef(MI.getOperand(I).getReg());
+ if (DefMI && DefMI->isPHI())
+ PHIOperands.insert(DefMI);
+ }
}
bool hasVGPRInput = false;
@@ -845,4 +851,8 @@
TII->legalizeOperands(MI, MDT);
}
+ // Propagate register class back to PHI operands which are PHI themselves.
+ while (!PHIOperands.empty()) {
+ processPHINode(*PHIOperands.pop_back_val());
+ }
}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D77344.254874.patch
Type: text/x-patch
Size: 3062 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20200403/b82d4bab/attachment.bin>
More information about the llvm-commits
mailing list