[PATCH] D73831: AMDGPU/GFX10: Fix NSA reassign pass when operands are undef

Sat Feb 1 07:03:47 PST 2020

nhaehnle created this revision.
nhaehnle added reviewers: arsenm, rampitec, mareko.
Herald added subscribers: kerbowa, hiraditya, t-tye, tpr, dstuttard, yaxunl, wdng, jvesely, kzhuravl.
Herald added a project: LLVM.

Virtual registers that are undef have an empty LiveInterval at this
point, which means beginIndex() and endIndex() cannot be used. We
only need those indices to determine the range in which to scan for
affected other NSA instructions, and undef operands cannot contribute
to that range.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D73831

Files:
  llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp
  llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.nsa.ll


Index: llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.nsa.ll
===================================================================

--- llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.nsa.ll
+++ llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.nsa.ll
@@ -80,6 +80,45 @@
   ret <2 x float> %r
 }
 
+; Test that undef inputs with NSA are handled safely; these tests used to crash.
+
+; GCN-LABEL: {{^}}sample_undef_undef_undef_undef:
+; GCN: image_sample_c_b v0, v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D_ARRAY
+define amdgpu_ps float @sample_undef_undef_undef_undef(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp) {
+  %r = call float @llvm.amdgcn.image.sample.c.b.1darray.f32.f32.f32(i32 1, float undef, float undef, float undef, float undef, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
+  ret float %r
+}
+
+; GCN-LABEL: {{^}}sample_undef_undef_undef_def:
+; NONSA: v_mov_b32_e32 v3, v0
+; NONSA: image_sample_c_b v0, v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D_ARRAY
+; NSA: image_sample_c_b v0, [v0, v0, v0, v0], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D_ARRAY
+define amdgpu_ps float @sample_undef_undef_undef_def(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %layer) {
+  %r = call float @llvm.amdgcn.image.sample.c.b.1darray.f32.f32.f32(i32 1, float undef, float undef, float undef, float %layer, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
+  ret float %r
+}
+
+; GCN-LABEL: {{^}}sample_undef_undef_undef_def_rnd:
+; GCN: v_rndne_f32_e32 v3, v0
+; GCN: image_sample_c_b v0, v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D_ARRAY
+define amdgpu_ps float @sample_undef_undef_undef_def_rnd(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %layer) {
+  %layer_rnd = call float @llvm.rint.f32(float %layer)
+  %r = call float @llvm.amdgcn.image.sample.c.b.1darray.f32.f32.f32(i32 1, float undef, float undef, float undef, float %layer_rnd, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
+  ret float %r
+}
+
+; GCN-LABEL: {{^}}sample_def_undef_undef_undef:
+; GCN: v_add_f32_e32 v0, 1.0, v0
+; GCN: image_sample_c_b v0, v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D_ARRAY
+define amdgpu_ps float @sample_def_undef_undef_undef(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %z0) {
+  ; The NSA reassign pass is conservative (quite reasonably!) when one of the operands
+  ; comes directly from a function argument (via COPY). To test that NSA can be
+  ; eliminated in the presence of undef, just add an arbitrary intermediate
+  ; computation.
+  %c0 = fadd float %z0, 1.0
+  %r = call float @llvm.amdgcn.image.sample.c.b.1darray.f32.f32.f32(i32 1, float %c0, float undef, float undef, float undef, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
+  ret float %r
+}
 
 declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 declare <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
@@ -88,4 +127,8 @@
 declare float @llvm.amdgcn.image.sample.3d.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 declare float @llvm.amdgcn.image.sample.c.l.3d.f32.f32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 
+declare float @llvm.rint.f32(float) #2
+declare float @llvm.amdgcn.image.sample.c.b.1darray.f32.f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1
+
 attributes #1 = { nounwind readonly }
+attributes #2 = { nounwind readnone speculatable willreturn }
Index: llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp
+++ llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp
@@ -286,6 +286,13 @@
       }
       Intervals.push_back(LI);
       OrigRegs.push_back(VRM->getPhys(Reg));
+      if (LI->empty()) {
+        // The address input is undef, so it doesn't contribute to the relevant
+        // range. Seed a reasonable index range if required.
+        if (!I)
+          MinInd = MaxInd = LIS->getInstructionIndex(*MI);
+        continue;
+      }
       MinInd = I ? std::min(MinInd, LI->beginIndex()) : LI->beginIndex();
       MaxInd = I ? std::max(MaxInd, LI->endIndex()) : LI->endIndex();
     }


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D73831.241878.patch
Type: text/x-patch
Size: 4316 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20200201/47312cbf/attachment.bin>