[PATCH] D46051: [AMDGPU] Don't force WQM for DS op

Mon May 7 06:26:27 PDT 2018

This revision was automatically updated to reflect the committed changes.
Closed by commit rL331633: [AMDGPU] Don't force WQM for DS op (authored by tpr, committed by ).

Changed prior to commit:
  https://reviews.llvm.org/D46051?vs=143891&id=145455#toc

Repository:
  rL LLVM

https://reviews.llvm.org/D46051

Files:
  llvm/trunk/lib/Target/AMDGPU/SIWholeQuadMode.cpp
  llvm/trunk/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll
  llvm/trunk/test/CodeGen/AMDGPU/spill-m0.ll


Index: llvm/trunk/lib/Target/AMDGPU/SIWholeQuadMode.cpp
===================================================================

--- llvm/trunk/lib/Target/AMDGPU/SIWholeQuadMode.cpp
+++ llvm/trunk/lib/Target/AMDGPU/SIWholeQuadMode.cpp
@@ -325,9 +325,7 @@
       unsigned Opcode = MI.getOpcode();
       char Flags = 0;
 
-      if (TII->isDS(Opcode) && CallingConv == CallingConv::AMDGPU_PS) {
-        Flags = StateWQM;
-      } else if (TII->isWQM(Opcode)) {
+      if (TII->isWQM(Opcode)) {
         // Sampling instructions don't need to produce results for all pixels
         // in a quad, they just require all inputs of a quad to have been
         // computed for derivatives.
Index: llvm/trunk/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll
===================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll
+++ llvm/trunk/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll
@@ -355,7 +355,7 @@
 
 ; GCN: v_mov_b32_e32 v0, 2.0
 ; GCN: s_or_b64 exec, exec
-; GCN: s_and_b64 exec, exec
+; GCN-NOT: s_and_b64 exec, exec
 ; GCN: v_mov_b32_e32 v0, 1.0
 
 ; GCN: {{^BB[0-9]+_[0-9]+}}: ; %UnifiedReturnBlock
Index: llvm/trunk/test/CodeGen/AMDGPU/spill-m0.ll
===================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/spill-m0.ll
+++ llvm/trunk/test/CodeGen/AMDGPU/spill-m0.ll
@@ -95,7 +95,8 @@
 
 if:                                               ; preds = %main_body
   %lds_ptr = getelementptr [64 x float], [64 x float] addrspace(3)* @lds, i32 0, i32 0
-  %lds_data = load float, float addrspace(3)* %lds_ptr
+  %lds_data_ = load float, float addrspace(3)* %lds_ptr
+  %lds_data = call float @llvm.amdgcn.wqm.f32(float %lds_data_)
   br label %endif
 
 else:                                             ; preds = %main_body
@@ -208,6 +209,7 @@
 declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
 declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) #0
 declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #1
+declare float @llvm.amdgcn.wqm.f32(float) #1
 
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D46051.145455.patch
Type: text/x-patch
Size: 2208 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20180507/0d27b969/attachment.bin>