[PATCH] D34727: AMDGPU/SI: Don't fix a PHI under uniform branch in SIFixSGPRCopies only when sources and destination are all sgprs
Changpeng Fang via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Wed Jun 28 14:04:51 PDT 2017
cfang updated this revision to Diff 104503.
cfang added a comment.
Update based on Matt's review.
For the test:
1. need details on how to change to a kernel, especially what the argument should be.
- I tried a few options, but it seems we could not reproduce the issue with the kernel version, possibly because code is optimized away.
2. not clear what to merge!
https://reviews.llvm.org/D34727
Files:
lib/Target/AMDGPU/SIFixSGPRCopies.cpp
test/CodeGen/AMDGPU/vgpr-to-sgpr-phi.ll
Index: test/CodeGen/AMDGPU/vgpr-to-sgpr-phi.ll
===================================================================
--- /dev/null
+++ test/CodeGen/AMDGPU/vgpr-to-sgpr-phi.ll
@@ -0,0 +1,33 @@
+; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+
+; GCN-LABEL: {{^}}vgpr_to_sgpr_phi:
+; GCN: BB0_1:
+; GCN: v_add_i32_e32
+; GCN: s_branch BB0_1
+define void @vgpr_to_sgpr_phi(i32* nocapture %arg) local_unnamed_addr #0 {
+bb:
+ br label %bb1
+
+bb1: ; preds = %bb8, %bb
+ %tmp = phi i32 [ 8, %bb ], [ %tmp9, %bb8 ]
+ %tmp2 = add nsw i32 %tmp, -1
+ %tmp3 = getelementptr inbounds i32, i32* %arg, i32 %tmp2
+ br i1 undef, label %bb4, label %bb8
+
+bb4: ; preds = %bb1
+ %tmp5 = load i32, i32* %tmp3, align 4
+ %tmp6 = tail call i32 @llvm.amdgcn.ubfe.i32(i32 %tmp5, i32 16, i32 8) #2
+ %tmp7 = zext i32 %tmp6 to i64
+ br label %bb8
+
+bb8: ; preds = %bb4, %bb1
+ %tmp9 = add nsw i32 %tmp, 2
+ br label %bb1
+}
+
+; Function Attrs: nounwind readnone speculatable
+declare i32 @llvm.amdgcn.ubfe.i32(i32, i32, i32) #1
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone speculatable }
+attributes #2 = { nounwind readnone }
Index: lib/Target/AMDGPU/SIFixSGPRCopies.cpp
===================================================================
--- lib/Target/AMDGPU/SIFixSGPRCopies.cpp
+++ lib/Target/AMDGPU/SIFixSGPRCopies.cpp
@@ -604,7 +604,8 @@
// We don't need to fix the PHI if the common dominator of the
// two incoming blocks terminates with a uniform branch.
- if (MI.getNumExplicitOperands() == 5) {
+ bool HasVGPROperand = phiHasVGPROperands(MI, MRI, TRI, TII);
+ if (MI.getNumExplicitOperands() == 5 && !HasVGPROperand) {
MachineBasicBlock *MBB0 = MI.getOperand(2).getMBB();
MachineBasicBlock *MBB1 = MI.getOperand(4).getMBB();
@@ -649,8 +650,7 @@
// is no chance for values to be over-written.
SmallSet<unsigned, 8> Visited;
- if (phiHasVGPROperands(MI, MRI, TRI, TII) ||
- !phiHasBreakDef(MI, MRI, Visited)) {
+ if (HasVGPROperand || !phiHasBreakDef(MI, MRI, Visited)) {
DEBUG(dbgs() << "Fixing PHI: " << MI);
TII->moveToVALU(MI);
}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D34727.104503.patch
Type: text/x-patch
Size: 2379 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20170628/cff721d5/attachment.bin>
More information about the llvm-commits
mailing list