[llvm] r295878 - AMDGPU: Don't look at chain users when adjusting writemask

Wed Feb 22 13:16:42 PST 2017

Author: arsenm
Date: Wed Feb 22 15:16:41 2017
New Revision: 295878

URL: http://llvm.org/viewvc/llvm-project?rev=295878&view=rev
Log:
AMDGPU: Don't look at chain users when adjusting writemask

Fixes not adjusting using new intrinsics with chains.

Modified:
    llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
    llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.ll

Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp?rev=295878&r1=295877&r2=295878&view=diff
==============================================================================

--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp Wed Feb 22 15:16:41 2017
@@ -4487,6 +4487,10 @@ void SITargetLowering::adjustWritemask(M
   for (SDNode::use_iterator I = Node->use_begin(), E = Node->use_end();
        I != E; ++I) {
 
+    // Don't look at users of the chain.
+    if (I.getUse().getResNo() != 0)
+      continue;
+
     // Abort if we can't understand the usage
     if (!I->isMachineOpcode() ||
         I->getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG)

Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.ll?rev=295878&r1=295877&r2=295878&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.ll Wed Feb 22 15:16:41 2017
@@ -199,6 +199,92 @@ main_body:
   ret void
 }
 
+; GCN-LABEL: {{^}}adjust_writemask_sample_0:
+; GCN: image_sample v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} dmask:0x1{{$}}
+define void @adjust_writemask_sample_0(float addrspace(1)* %out) {
+main_body:
+  %r = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0)
+  %elt0 = extractelement <4 x float> %r, i32 0
+  store float %elt0, float addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}adjust_writemask_sample_01:
+; GCN: image_sample v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} dmask:0x3{{$}}
+define void @adjust_writemask_sample_01(float addrspace(1)* %out) {
+main_body:
+  %r = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0)
+  %elt0 = extractelement <4 x float> %r, i32 0
+  %elt1 = extractelement <4 x float> %r, i32 1
+  store volatile float %elt0, float addrspace(1)* %out
+  store volatile float %elt1, float addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}adjust_writemask_sample_012:
+; GCN: image_sample v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} dmask:0x7{{$}}
+define void @adjust_writemask_sample_012(float addrspace(1)* %out) {
+main_body:
+  %r = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0)
+  %elt0 = extractelement <4 x float> %r, i32 0
+  %elt1 = extractelement <4 x float> %r, i32 1
+  %elt2 = extractelement <4 x float> %r, i32 2
+  store volatile float %elt0, float addrspace(1)* %out
+  store volatile float %elt1, float addrspace(1)* %out
+  store volatile float %elt2, float addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}adjust_writemask_sample_12:
+; GCN: image_sample v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} dmask:0x6{{$}}
+define void @adjust_writemask_sample_12(float addrspace(1)* %out) {
+main_body:
+  %r = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0)
+  %elt1 = extractelement <4 x float> %r, i32 1
+  %elt2 = extractelement <4 x float> %r, i32 2
+  store volatile float %elt1, float addrspace(1)* %out
+  store volatile float %elt2, float addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}adjust_writemask_sample_03:
+; GCN: image_sample v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} dmask:0x9{{$}}
+define void @adjust_writemask_sample_03(float addrspace(1)* %out) {
+main_body:
+  %r = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0)
+  %elt0 = extractelement <4 x float> %r, i32 0
+  %elt3 = extractelement <4 x float> %r, i32 3
+  store volatile float %elt0, float addrspace(1)* %out
+  store volatile float %elt3, float addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}adjust_writemask_sample_13:
+; GCN: image_sample v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} dmask:0xa{{$}}
+define void @adjust_writemask_sample_13(float addrspace(1)* %out) {
+main_body:
+  %r = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0)
+  %elt1 = extractelement <4 x float> %r, i32 1
+  %elt3 = extractelement <4 x float> %r, i32 3
+  store volatile float %elt1, float addrspace(1)* %out
+  store volatile float %elt3, float addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}adjust_writemask_sample_123:
+; GCN: image_sample v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} dmask:0xe{{$}}
+define void @adjust_writemask_sample_123(float addrspace(1)* %out) {
+main_body:
+  %r = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0)
+  %elt1 = extractelement <4 x float> %r, i32 1
+  %elt2 = extractelement <4 x float> %r, i32 2
+  %elt3 = extractelement <4 x float> %r, i32 3
+  store volatile float %elt1, float addrspace(1)* %out
+  store volatile float %elt2, float addrspace(1)* %out
+  store volatile float %elt3, float addrspace(1)* %out
+  ret void
+}
+
 declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0
 declare <4 x float> @llvm.amdgcn.image.sample.cl.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0
 declare <4 x float> @llvm.amdgcn.image.sample.d.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #0