[PATCH] D73831: AMDGPU/GFX10: Fix NSA reassign pass when operands are undef
Nicolai Hähnle via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Sat Feb 1 13:47:06 PST 2020
This revision was automatically updated to reflect the committed changes.
Closed by commit rGba8110161dfb: AMDGPU/GFX10: Fix NSA reassign pass when operands are undef (authored by nhaehnle).
Changed prior to commit:
https://reviews.llvm.org/D73831?vs=241878&id=241903#toc
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D73831/new/
https://reviews.llvm.org/D73831
Files:
llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.nsa.ll
Index: llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.nsa.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.nsa.ll
+++ llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.nsa.ll
@@ -80,6 +80,45 @@
ret <2 x float> %r
}
+; Test that undef inputs with NSA are handled safely; these tests used to crash.
+
+; GCN-LABEL: {{^}}sample_undef_undef_undef_undef:
+; GCN: image_sample_c_b v0, v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D_ARRAY
+define amdgpu_ps float @sample_undef_undef_undef_undef(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp) {
+ %r = call float @llvm.amdgcn.image.sample.c.b.1darray.f32.f32.f32(i32 1, float undef, float undef, float undef, float undef, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
+ ret float %r
+}
+
+; GCN-LABEL: {{^}}sample_undef_undef_undef_def:
+; NONSA: v_mov_b32_e32 v3, v0
+; NONSA: image_sample_c_b v0, v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D_ARRAY
+; NSA: image_sample_c_b v0, [v0, v0, v0, v0], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D_ARRAY
+define amdgpu_ps float @sample_undef_undef_undef_def(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %layer) {
+ %r = call float @llvm.amdgcn.image.sample.c.b.1darray.f32.f32.f32(i32 1, float undef, float undef, float undef, float %layer, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
+ ret float %r
+}
+
+; GCN-LABEL: {{^}}sample_undef_undef_undef_def_rnd:
+; GCN: v_rndne_f32_e32 v3, v0
+; GCN: image_sample_c_b v0, v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D_ARRAY
+define amdgpu_ps float @sample_undef_undef_undef_def_rnd(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %layer) {
+ %layer_rnd = call float @llvm.rint.f32(float %layer)
+ %r = call float @llvm.amdgcn.image.sample.c.b.1darray.f32.f32.f32(i32 1, float undef, float undef, float undef, float %layer_rnd, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
+ ret float %r
+}
+
+; GCN-LABEL: {{^}}sample_def_undef_undef_undef:
+; GCN: v_add_f32_e32 v0, 1.0, v0
+; GCN: image_sample_c_b v0, v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D_ARRAY
+define amdgpu_ps float @sample_def_undef_undef_undef(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %z0) {
+ ; The NSA reassign pass is conservative (quite reasonably!) when one of the operands
+ ; comes directly from a function argument (via COPY). To test that NSA can be
+ ; eliminated in the presence of undef, just add an arbitrary intermediate
+ ; computation.
+ %c0 = fadd float %z0, 1.0
+ %r = call float @llvm.amdgcn.image.sample.c.b.1darray.f32.f32.f32(i32 1, float %c0, float undef, float undef, float undef, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
+ ret float %r
+}
declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
@@ -88,4 +127,8 @@
declare float @llvm.amdgcn.image.sample.3d.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
declare float @llvm.amdgcn.image.sample.c.l.3d.f32.f32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare float @llvm.rint.f32(float) #2
+declare float @llvm.amdgcn.image.sample.c.b.1darray.f32.f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1
+
attributes #1 = { nounwind readonly }
+attributes #2 = { nounwind readnone speculatable willreturn }
Index: llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp
+++ llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp
@@ -286,8 +286,15 @@
}
Intervals.push_back(LI);
OrigRegs.push_back(VRM->getPhys(Reg));
- MinInd = I ? std::min(MinInd, LI->beginIndex()) : LI->beginIndex();
- MaxInd = I ? std::max(MaxInd, LI->endIndex()) : LI->endIndex();
+ if (LI->empty()) {
+ // The address input is undef, so it doesn't contribute to the relevant
+ // range. Seed a reasonable index range if required.
+ if (I == 0)
+ MinInd = MaxInd = LIS->getInstructionIndex(*MI);
+ continue;
+ }
+ MinInd = I != 0 ? std::min(MinInd, LI->beginIndex()) : LI->beginIndex();
+ MaxInd = I != 0 ? std::max(MaxInd, LI->endIndex()) : LI->endIndex();
}
if (Intervals.empty())
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D73831.241903.patch
Type: text/x-patch
Size: 4506 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20200201/22a6a2d7/attachment.bin>
More information about the llvm-commits
mailing list