[PATCH] D35393: AMDGPU: Preserve undef flag in eliminateFrameIndex
Matt Arsenault via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 13 17:09:31 PDT 2017
arsenm created this revision.
Herald added subscribers: t-tye, tpr, dstuttard, yaxunl, wdng, kzhuravl.
Fixes verifier errors in some call tests.
Not sure why we haven't run into this before.
https://reviews.llvm.org/D35393
Files:
lib/Target/AMDGPU/SIRegisterInfo.cpp
test/CodeGen/AMDGPU/frame-index-elimination.ll
Index: test/CodeGen/AMDGPU/frame-index-elimination.ll
===================================================================
--- test/CodeGen/AMDGPU/frame-index-elimination.ll
+++ test/CodeGen/AMDGPU/frame-index-elimination.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mattr=-promote-alloca -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mattr=-promote-alloca -amdgpu-function-calls -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
; Test that non-entry function frame indices are expanded properly to
; give an index relative to the scratch wave offset register
@@ -165,4 +165,28 @@
ret void
}
+declare void @func(<4 x float>* nocapture) #0
+
+; undef flag not preserved in eliminateFrameIndex when handling the
+; stores in the middle block.
+
+; GCN-LABEL: {{^}}undefined_stack_store_reg:
+define void @undefined_stack_store_reg(float %arg, i32 %arg1) #0 {
+bb:
+ %tmp = alloca <4 x float>, align 16
+ %tmp2 = insertelement <4 x float> undef, float %arg, i32 0
+ store <4 x float> %tmp2, <4 x float>* undef
+ %tmp3 = icmp eq i32 %arg1, 0
+ br i1 %tmp3, label %bb4, label %bb5
+
+bb4:
+ call void @func(<4 x float>* nonnull undef)
+ store <4 x float> %tmp2, <4 x float>* %tmp, align 16
+ call void @func(<4 x float>* nonnull %tmp)
+ br label %bb5
+
+bb5:
+ ret void
+}
+
attributes #0 = { nounwind }
Index: lib/Target/AMDGPU/SIRegisterInfo.cpp
===================================================================
--- lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -479,17 +479,16 @@
if (LoadStoreOp == -1)
return false;
- unsigned Reg = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata)->getReg();
-
+ const MachineOperand *Reg = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata);
BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))
- .addReg(Reg, getDefRegState(!IsStore))
- .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc))
- .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::soffset))
- .addImm(Offset)
- .addImm(0) // glc
- .addImm(0) // slc
- .addImm(0) // tfe
- .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+ .add(*Reg)
+ .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc))
+ .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::soffset))
+ .addImm(Offset)
+ .addImm(0) // glc
+ .addImm(0) // slc
+ .addImm(0) // tfe
+ .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
return true;
}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D35393.106559.patch
Type: text/x-patch
Size: 2593 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20170714/4acf048d/attachment.bin>
More information about the llvm-commits
mailing list