[PATCH] D46051: [AMDGPU] Don't force WQM for DS op

Wed Apr 25 03:24:00 PDT 2018

tpr created this revision.
Herald added subscribers: llvm-commits, t-tye, dstuttard, yaxunl, nhaehnle, wdng, kzhuravl, arsenm, qcolombet.

Previously, all DS ops forced WQM in a pixel shader. That was a hack to
allow for graphics frontends using ds_swizzle to implement explicit
derivatives, on SI/CI at least where DPP is not available. But it forced
WQM for _any_ DS op.

With this commit, DS ops no longer force WQM. Both graphics frontends
(Mesa and LLPC) need to change to issue an explicit llvm.amdgcn.wqm
intrinsic call when calculating explicit derivatives.


Repository:
  rL LLVM

https://reviews.llvm.org/D46051

Files:
  lib/Target/AMDGPU/SIWholeQuadMode.cpp
  test/CodeGen/AMDGPU/multi-divergent-exit-region.ll
  test/CodeGen/AMDGPU/spill-m0.ll


Index: test/CodeGen/AMDGPU/spill-m0.ll
===================================================================

--- test/CodeGen/AMDGPU/spill-m0.ll
+++ test/CodeGen/AMDGPU/spill-m0.ll
@@ -95,7 +95,8 @@
 
 if:                                               ; preds = %main_body
   %lds_ptr = getelementptr [64 x float], [64 x float] addrspace(3)* @lds, i32 0, i32 0
-  %lds_data = load float, float addrspace(3)* %lds_ptr
+  %lds_data_ = load float, float addrspace(3)* %lds_ptr
+  %lds_data = call float @llvm.amdgcn.wqm.f32(float %lds_data_)
   br label %endif
 
 else:                                             ; preds = %main_body
@@ -208,6 +209,7 @@
 declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
 declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) #0
 declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #1
+declare float @llvm.amdgcn.wqm.f32(float) #1
 
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
Index: test/CodeGen/AMDGPU/multi-divergent-exit-region.ll
===================================================================
--- test/CodeGen/AMDGPU/multi-divergent-exit-region.ll
+++ test/CodeGen/AMDGPU/multi-divergent-exit-region.ll
@@ -355,7 +355,6 @@
 
 ; GCN: v_mov_b32_e32 v0, 2.0
 ; GCN: s_or_b64 exec, exec
-; GCN: s_and_b64 exec, exec
 ; GCN: v_mov_b32_e32 v0, 1.0
 
 ; GCN: {{^BB[0-9]+_[0-9]+}}: ; %UnifiedReturnBlock
Index: lib/Target/AMDGPU/SIWholeQuadMode.cpp
===================================================================
--- lib/Target/AMDGPU/SIWholeQuadMode.cpp
+++ lib/Target/AMDGPU/SIWholeQuadMode.cpp
@@ -325,9 +325,7 @@
       unsigned Opcode = MI.getOpcode();
       char Flags = 0;
 
-      if (TII->isDS(Opcode) && CallingConv == CallingConv::AMDGPU_PS) {
-        Flags = StateWQM;
-      } else if (TII->isWQM(Opcode)) {
+      if (TII->isWQM(Opcode)) {
         // Sampling instructions don't need to produce results for all pixels
         // in a quad, they just require all inputs of a quad to have been
         // computed for derivatives.


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D46051.143891.patch
Type: text/x-patch
Size: 2076 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20180425/2978e3f4/attachment.bin>