[PATCH] D34727: AMDGPU/SI: Don't fix a PHI under uniform branch in SIFixSGPRCopies only when sources and destination are all sgprs
Changpeng Fang via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 27 16:52:01 PDT 2017
cfang created this revision.
Herald added subscribers: t-tye, tpr, dstuttard, yaxunl, nhaehnle, wdng, kzhuravl.
We still have to fix the PHI if there is at least one VGPR operand.
https://reviews.llvm.org/D34727
Files:
lib/Target/AMDGPU/SIFixSGPRCopies.cpp
test/CodeGen/AMDGPU/vgpr-to-sgpr-phi.ll
Index: test/CodeGen/AMDGPU/vgpr-to-sgpr-phi.ll
===================================================================
--- /dev/null
+++ test/CodeGen/AMDGPU/vgpr-to-sgpr-phi.ll
@@ -0,0 +1,40 @@
+; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck %s
+
+; CHECK-LABEL: {{^}}vgpr_to_sgpr_phi:
+; CHECK: BB0_1:
+; CHECK: v_add_i32_e32
+; CHECK: s_branch BB0_1
+define void @vgpr_to_sgpr_phi(i32* nocapture) local_unnamed_addr #0 {
+ br label %2
+
+; <label>:2: ; preds = %10, %1
+ %3 = phi i32 [ 8, %1 ], [ %11, %10 ]
+ %4 = add nsw i32 %3, -1
+ %5 = getelementptr inbounds i32, i32* %0, i32 %4
+ br i1 undef, label %6, label %10
+
+; <label>:6: ; preds = %2
+ %7 = load i32, i32* %5, align 4, !tbaa !1
+ %8 = tail call i32 @llvm.amdgcn.ubfe.i32(i32 %7, i32 16, i32 8) #2
+ %9 = zext i32 %8 to i64
+ br label %10
+
+; <label>:10: ; preds = %6, %2
+ %11 = add nsw i32 %3, 2
+ br label %2
+}
+
+; Function Attrs: nounwind readnone speculatable
+declare i32 @llvm.amdgcn.ubfe.i32(i32, i32, i32) #1
+
+attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="fiji" "target-features"="+16-bit-insts,+dpp,+fp64-fp16-denormals,+s-memrealtime,-fp32-denormals" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone speculatable }
+attributes #2 = { nounwind readnone }
+
+!llvm.module.flags = !{!0}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!2, !2, i64 0}
+!2 = !{!"int", !3, i64 0}
+!3 = !{!"omnipotent char", !4, i64 0}
+!4 = !{!"Simple C/C++ TBAA"}
Index: lib/Target/AMDGPU/SIFixSGPRCopies.cpp
===================================================================
--- lib/Target/AMDGPU/SIFixSGPRCopies.cpp
+++ lib/Target/AMDGPU/SIFixSGPRCopies.cpp
@@ -604,7 +604,8 @@
// We don't need to fix the PHI if the common dominator of the
// two incoming blocks terminates with a uniform branch.
- if (MI.getNumExplicitOperands() == 5) {
+ if (MI.getNumExplicitOperands() == 5 &&
+ !phiHasVGPROperands(MI, MRI, TRI, TII)) {
MachineBasicBlock *MBB0 = MI.getOperand(2).getMBB();
MachineBasicBlock *MBB1 = MI.getOperand(4).getMBB();
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D34727.104312.patch
Type: text/x-patch
Size: 2586 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20170627/9ae58357/attachment.bin>
More information about the llvm-commits
mailing list