[PATCH] D34727: AMDGPU/SI: Don't fix a PHI under uniform branch in SIFixSGPRCopies only when sources and destination are all sgprs

Changpeng Fang via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Wed Jun 28 14:04:51 PDT 2017


cfang updated this revision to Diff 104503.
cfang added a comment.

Update based on Matt's review.

For the test:

1. need details on how to change to a kernel, especially what the argument should be.
  - I tried a few options, but it seems we could not reproduce the issue with the kernel version, possibly because code is optimized away.

2. not clear what to merge!


https://reviews.llvm.org/D34727

Files:
  lib/Target/AMDGPU/SIFixSGPRCopies.cpp
  test/CodeGen/AMDGPU/vgpr-to-sgpr-phi.ll


Index: test/CodeGen/AMDGPU/vgpr-to-sgpr-phi.ll
===================================================================
--- /dev/null
+++ test/CodeGen/AMDGPU/vgpr-to-sgpr-phi.ll
@@ -0,0 +1,33 @@
+; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+
+; GCN-LABEL: {{^}}vgpr_to_sgpr_phi:
+; GCN: BB0_1:
+; GCN: v_add_i32_e32
+; GCN: s_branch BB0_1
+define void @vgpr_to_sgpr_phi(i32* nocapture %arg) local_unnamed_addr #0 {
+bb:
+  br label %bb1
+
+bb1:                                              ; preds = %bb8, %bb
+  %tmp = phi i32 [ 8, %bb ], [ %tmp9, %bb8 ]
+  %tmp2 = add nsw i32 %tmp, -1
+  %tmp3 = getelementptr inbounds i32, i32* %arg, i32 %tmp2
+  br i1 undef, label %bb4, label %bb8
+
+bb4:                                              ; preds = %bb1
+  %tmp5 = load i32, i32* %tmp3, align 4
+  %tmp6 = tail call i32 @llvm.amdgcn.ubfe.i32(i32 %tmp5, i32 16, i32 8) #2
+  %tmp7 = zext i32 %tmp6 to i64
+  br label %bb8
+
+bb8:                                              ; preds = %bb4, %bb1
+  %tmp9 = add nsw i32 %tmp, 2
+  br label %bb1
+}
+
+; Function Attrs: nounwind readnone speculatable
+declare i32 @llvm.amdgcn.ubfe.i32(i32, i32, i32) #1
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone speculatable }
+attributes #2 = { nounwind readnone }
Index: lib/Target/AMDGPU/SIFixSGPRCopies.cpp
===================================================================
--- lib/Target/AMDGPU/SIFixSGPRCopies.cpp
+++ lib/Target/AMDGPU/SIFixSGPRCopies.cpp
@@ -604,7 +604,8 @@
 
         // We don't need to fix the PHI if the common dominator of the
         // two incoming blocks terminates with a uniform branch.
-        if (MI.getNumExplicitOperands() == 5) {
+        bool HasVGPROperand = phiHasVGPROperands(MI, MRI, TRI, TII);
+        if (MI.getNumExplicitOperands() == 5 && !HasVGPROperand) {
           MachineBasicBlock *MBB0 = MI.getOperand(2).getMBB();
           MachineBasicBlock *MBB1 = MI.getOperand(4).getMBB();
 
@@ -649,8 +650,7 @@
         // is no chance for values to be over-written.
 
         SmallSet<unsigned, 8> Visited;
-        if (phiHasVGPROperands(MI, MRI, TRI, TII) ||
-            !phiHasBreakDef(MI, MRI, Visited)) {
+        if (HasVGPROperand || !phiHasBreakDef(MI, MRI, Visited)) {
           DEBUG(dbgs() << "Fixing PHI: " << MI);
           TII->moveToVALU(MI);
         }


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D34727.104503.patch
Type: text/x-patch
Size: 2379 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20170628/cff721d5/attachment.bin>


More information about the llvm-commits mailing list