[PATCH] D34727: AMDGPU/SI: Don't fix a PHI under uniform branch in SIFixSGPRCopies only when sources and destination are all sgprs

Tue Jun 27 16:52:01 PDT 2017

cfang created this revision.
Herald added subscribers: t-tye, tpr, dstuttard, yaxunl, nhaehnle, wdng, kzhuravl.

We still have to fix the PHI if there is at least one VGPR operand.


https://reviews.llvm.org/D34727

Files:
  lib/Target/AMDGPU/SIFixSGPRCopies.cpp
  test/CodeGen/AMDGPU/vgpr-to-sgpr-phi.ll


Index: test/CodeGen/AMDGPU/vgpr-to-sgpr-phi.ll
===================================================================

--- /dev/null
+++ test/CodeGen/AMDGPU/vgpr-to-sgpr-phi.ll
@@ -0,0 +1,40 @@
+; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck %s
+
+; CHECK-LABEL: {{^}}vgpr_to_sgpr_phi:
+; CHECK: BB0_1:
+; CHECK: v_add_i32_e32
+; CHECK: s_branch BB0_1
+define void @vgpr_to_sgpr_phi(i32* nocapture) local_unnamed_addr #0 {
+  br label %2
+
+; <label>:2:                                      ; preds = %10, %1
+  %3 = phi i32 [ 8, %1 ], [ %11, %10 ]
+  %4 = add nsw i32 %3, -1
+  %5 = getelementptr inbounds i32, i32* %0, i32 %4
+  br i1 undef, label %6, label %10
+
+; <label>:6:                                      ; preds = %2
+  %7 = load i32, i32* %5, align 4, !tbaa !1
+  %8 = tail call i32 @llvm.amdgcn.ubfe.i32(i32 %7, i32 16, i32 8) #2
+  %9 = zext i32 %8 to i64
+  br label %10
+
+; <label>:10:                                     ; preds = %6, %2
+  %11 = add nsw i32 %3, 2
+  br label %2
+}
+
+; Function Attrs: nounwind readnone speculatable
+declare i32 @llvm.amdgcn.ubfe.i32(i32, i32, i32) #1
+
+attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="fiji" "target-features"="+16-bit-insts,+dpp,+fp64-fp16-denormals,+s-memrealtime,-fp32-denormals" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone speculatable }
+attributes #2 = { nounwind readnone }
+
+!llvm.module.flags = !{!0}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!2, !2, i64 0}
+!2 = !{!"int", !3, i64 0}
+!3 = !{!"omnipotent char", !4, i64 0}
+!4 = !{!"Simple C/C++ TBAA"}
Index: lib/Target/AMDGPU/SIFixSGPRCopies.cpp
===================================================================
--- lib/Target/AMDGPU/SIFixSGPRCopies.cpp
+++ lib/Target/AMDGPU/SIFixSGPRCopies.cpp
@@ -604,7 +604,8 @@
 
         // We don't need to fix the PHI if the common dominator of the
         // two incoming blocks terminates with a uniform branch.
-        if (MI.getNumExplicitOperands() == 5) {
+        if (MI.getNumExplicitOperands() == 5 &&
+            !phiHasVGPROperands(MI, MRI, TRI, TII)) {
           MachineBasicBlock *MBB0 = MI.getOperand(2).getMBB();
           MachineBasicBlock *MBB1 = MI.getOperand(4).getMBB();
 


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D34727.104312.patch
Type: text/x-patch
Size: 2586 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20170627/9ae58357/attachment.bin>