[PATCH] D73831: AMDGPU/GFX10: Fix NSA reassign pass when operands are undef

Sat Feb 1 13:47:06 PST 2020

This revision was automatically updated to reflect the committed changes.
Closed by commit rGba8110161dfb: AMDGPU/GFX10: Fix NSA reassign pass when operands are undef (authored by nhaehnle).

Changed prior to commit:
  https://reviews.llvm.org/D73831?vs=241878&id=241903#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D73831/new/

https://reviews.llvm.org/D73831

Files:
  llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp
  llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.nsa.ll


Index: llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.nsa.ll
===================================================================

--- llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.nsa.ll
+++ llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.nsa.ll
@@ -80,6 +80,45 @@
   ret <2 x float> %r
 }
 
+; Test that undef inputs with NSA are handled safely; these tests used to crash.
+
+; GCN-LABEL: {{^}}sample_undef_undef_undef_undef:
+; GCN: image_sample_c_b v0, v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D_ARRAY
+define amdgpu_ps float @sample_undef_undef_undef_undef(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp) {
+  %r = call float @llvm.amdgcn.image.sample.c.b.1darray.f32.f32.f32(i32 1, float undef, float undef, float undef, float undef, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
+  ret float %r
+}
+
+; GCN-LABEL: {{^}}sample_undef_undef_undef_def:
+; NONSA: v_mov_b32_e32 v3, v0
+; NONSA: image_sample_c_b v0, v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D_ARRAY
+; NSA: image_sample_c_b v0, [v0, v0, v0, v0], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D_ARRAY
+define amdgpu_ps float @sample_undef_undef_undef_def(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %layer) {
+  %r = call float @llvm.amdgcn.image.sample.c.b.1darray.f32.f32.f32(i32 1, float undef, float undef, float undef, float %layer, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
+  ret float %r
+}
+
+; GCN-LABEL: {{^}}sample_undef_undef_undef_def_rnd:
+; GCN: v_rndne_f32_e32 v3, v0
+; GCN: image_sample_c_b v0, v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D_ARRAY
+define amdgpu_ps float @sample_undef_undef_undef_def_rnd(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %layer) {
+  %layer_rnd = call float @llvm.rint.f32(float %layer)
+  %r = call float @llvm.amdgcn.image.sample.c.b.1darray.f32.f32.f32(i32 1, float undef, float undef, float undef, float %layer_rnd, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
+  ret float %r
+}
+
+; GCN-LABEL: {{^}}sample_def_undef_undef_undef:
+; GCN: v_add_f32_e32 v0, 1.0, v0
+; GCN: image_sample_c_b v0, v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D_ARRAY
+define amdgpu_ps float @sample_def_undef_undef_undef(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %z0) {
+  ; The NSA reassign pass is conservative (quite reasonably!) when one of the operands
+  ; comes directly from a function argument (via COPY). To test that NSA can be
+  ; eliminated in the presence of undef, just add an arbitrary intermediate
+  ; computation.
+  %c0 = fadd float %z0, 1.0
+  %r = call float @llvm.amdgcn.image.sample.c.b.1darray.f32.f32.f32(i32 1, float %c0, float undef, float undef, float undef, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
+  ret float %r
+}
 
 declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 declare <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
@@ -88,4 +127,8 @@
 declare float @llvm.amdgcn.image.sample.3d.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 declare float @llvm.amdgcn.image.sample.c.l.3d.f32.f32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 
+declare float @llvm.rint.f32(float) #2
+declare float @llvm.amdgcn.image.sample.c.b.1darray.f32.f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1
+
 attributes #1 = { nounwind readonly }
+attributes #2 = { nounwind readnone speculatable willreturn }
Index: llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp
+++ llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp
@@ -286,8 +286,15 @@
       }
       Intervals.push_back(LI);
       OrigRegs.push_back(VRM->getPhys(Reg));
-      MinInd = I ? std::min(MinInd, LI->beginIndex()) : LI->beginIndex();
-      MaxInd = I ? std::max(MaxInd, LI->endIndex()) : LI->endIndex();
+      if (LI->empty()) {
+        // The address input is undef, so it doesn't contribute to the relevant
+        // range. Seed a reasonable index range if required.
+        if (I == 0)
+          MinInd = MaxInd = LIS->getInstructionIndex(*MI);
+        continue;
+      }
+      MinInd = I != 0 ? std::min(MinInd, LI->beginIndex()) : LI->beginIndex();
+      MaxInd = I != 0 ? std::max(MaxInd, LI->endIndex()) : LI->endIndex();
     }
 
     if (Intervals.empty())


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D73831.241903.patch
Type: text/x-patch
Size: 4506 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20200201/22a6a2d7/attachment.bin>