[PATCH] D35393: AMDGPU: Preserve undef flag in eliminateFrameIndex

Thu Jul 13 17:09:31 PDT 2017

arsenm created this revision.
Herald added subscribers: t-tye, tpr, dstuttard, yaxunl, wdng, kzhuravl.

Fixes verifier errors in some call tests.
Not sure why we haven't run into this before.


https://reviews.llvm.org/D35393

Files:
  lib/Target/AMDGPU/SIRegisterInfo.cpp
  test/CodeGen/AMDGPU/frame-index-elimination.ll


Index: test/CodeGen/AMDGPU/frame-index-elimination.ll
===================================================================

--- test/CodeGen/AMDGPU/frame-index-elimination.ll
+++ test/CodeGen/AMDGPU/frame-index-elimination.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mattr=-promote-alloca -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mattr=-promote-alloca -amdgpu-function-calls -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
 
 ; Test that non-entry function frame indices are expanded properly to
 ; give an index relative to the scratch wave offset register
@@ -165,4 +165,28 @@
   ret void
 }
 
+declare void @func(<4 x float>* nocapture) #0
+
+; undef flag not preserved in eliminateFrameIndex when handling the
+; stores in the middle block.
+
+; GCN-LABEL: {{^}}undefined_stack_store_reg:
+define void @undefined_stack_store_reg(float %arg, i32 %arg1) #0 {
+bb:
+  %tmp = alloca <4 x float>, align 16
+  %tmp2 = insertelement <4 x float> undef, float %arg, i32 0
+  store <4 x float> %tmp2, <4 x float>* undef
+  %tmp3 = icmp eq i32 %arg1, 0
+  br i1 %tmp3, label %bb4, label %bb5
+
+bb4:
+  call void @func(<4 x float>* nonnull undef)
+  store <4 x float> %tmp2, <4 x float>* %tmp, align 16
+  call void @func(<4 x float>* nonnull %tmp)
+  br label %bb5
+
+bb5:
+  ret void
+}
+
 attributes #0 = { nounwind }
Index: lib/Target/AMDGPU/SIRegisterInfo.cpp
===================================================================
--- lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -479,17 +479,16 @@
   if (LoadStoreOp == -1)
     return false;
 
-  unsigned Reg = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata)->getReg();
-
+  const MachineOperand *Reg = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata);
   BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))
-      .addReg(Reg, getDefRegState(!IsStore))
-      .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc))
-      .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::soffset))
-      .addImm(Offset)
-      .addImm(0) // glc
-      .addImm(0) // slc
-      .addImm(0) // tfe
-      .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+    .add(*Reg)
+    .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc))
+    .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::soffset))
+    .addImm(Offset)
+    .addImm(0) // glc
+    .addImm(0) // slc
+    .addImm(0) // tfe
+    .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
   return true;
 }
 


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D35393.106559.patch
Type: text/x-patch
Size: 2593 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20170714/4acf048d/attachment.bin>