[PATCH] D57894: AMDGPU: Fix @llvm.amdgcn.wqm.vote implementation

Connor Abbott via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Thu Feb 7 07:31:45 PST 2019


cwabbott created this revision.
cwabbott added reviewers: arsenm, nhaehnle, mareko.
Herald added subscribers: llvm-commits, t-tye, tpr, dstuttard, yaxunl, wdng, jvesely, kzhuravl.
Herald added a project: LLVM.

My understanding, and the current behavior of the backend, is that
booleans lowered to bitmasks have undefined values in bits corresponding
to lanes that aren't active. If we just lower this to a WQM intrinsic,
then if the values for inactive lanes are 1, then this may change the
result if not every thread in a quad is active. Fix this by AND'ing with
EXEC to mask out the garbage lanes.

This fixes some VK conformance tests when making the clustered subgroup
reduce operations use @llvm.amdgcn.wqm.vote. I added an extra test which
currently miscompiles.

One other way to handle this would be to change the way we lower boolean
operations like NOT so that inactive lanes are always 0. So far as I can
see, in terms of code quality, this would only prevent a few transforms,
like S_ORN2_B64 for src0 | ~src1. The main thing would be lowering NOT
to an XOR with EXEC. Otherwise we'd have to do an analysis to prove
that the AND is redundant in order to get rid of these instructions.
Thoughts?


Repository:
  rL LLVM

https://reviews.llvm.org/D57894

Files:
  lib/Target/AMDGPU/SOPInstructions.td
  test/CodeGen/AMDGPU/llvm.amdgcn.wqm.vote.ll


Index: test/CodeGen/AMDGPU/llvm.amdgcn.wqm.vote.ll
===================================================================
--- test/CodeGen/AMDGPU/llvm.amdgcn.wqm.vote.ll
+++ test/CodeGen/AMDGPU/llvm.amdgcn.wqm.vote.ll
@@ -2,7 +2,8 @@
 
 ;CHECK-LABEL: {{^}}ret:
 ;CHECK: v_cmp_eq_u32_e32 [[CMP:[^,]+]], v0, v1
-;CHECK: s_wqm_b64 [[WQM:[^,]+]], [[CMP]]
+;CHECK: s_and_b64 [[AND:[^,]+]], [[CMP]], exec
+;CHECK: s_wqm_b64 [[WQM:[^,]+]], [[AND]]
 ;CHECK: v_cndmask_b32_e64 v0, 0, 1.0, [[WQM]]
 define amdgpu_ps float @ret(i32 %v0, i32 %v1) #1 {
 main_body:
@@ -32,7 +33,8 @@
 
 ;CHECK-LABEL: {{^}}kill:
 ;CHECK: v_cmp_eq_u32_e32 [[CMP:[^,]+]], v0, v1
-;CHECK: s_wqm_b64 [[WQM:[^,]+]], [[CMP]]
+;CHECK: s_and_b64 [[AND:[^,]+]], [[CMP]], exec
+;CHECK: s_wqm_b64 [[WQM:[^,]+]], [[AND]]
 ;CHECK: s_and_b64 exec, exec, [[WQM]]
 ;CHECK: s_endpgm
 define amdgpu_ps void @kill(i32 %v0, i32 %v1) #1 {
@@ -43,6 +45,32 @@
   ret void
 }
 
+;CHECK-LABEL: {{^}}phi_not:
+;CHECK: s_xor_b64 [[NOT:[^,]+]], {{[^,]+}}, -1
+;CHECK-NEXT: s_and_b64 [[AND:[^,]+]], [[NOT]], exec
+;CHECK-NEXT: s_wqm_b64 [[WQM:[^,]+]], [[AND]]
+;CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, [[WQM]]
+define amdgpu_ps float @phi_not(i32 %v0, i32 %v1, i32 %v2) {
+  %cc = icmp eq i32 %v0, 0
+
+  br i1 %cc, label %if, label %else
+
+  if:
+  %tmp1 = icmp ne i32 %v1, 1
+  br label %endif
+
+  else:
+  %tmp2 = icmp eq i32 %v2, 2
+  br label %endif
+
+  endif:
+  %sel = phi i1 [ %tmp1, %if ], [ %tmp2, %else ]
+  %not = xor i1 %sel, true
+  %w = call i1 @llvm.amdgcn.wqm.vote(i1 %not)
+  %r = select i1 %w, float 1.0, float 0.0
+  ret float %r
+}
+
 declare void @llvm.amdgcn.kill(i1) #1
 declare i1 @llvm.amdgcn.wqm.vote(i1)
 
Index: lib/Target/AMDGPU/SOPInstructions.td
===================================================================
--- lib/Target/AMDGPU/SOPInstructions.td
+++ lib/Target/AMDGPU/SOPInstructions.td
@@ -146,9 +146,7 @@
     [(set i64:$sdst, (not i64:$src0))]
   >;
   def S_WQM_B32 : SOP1_32 <"s_wqm_b32">;
-  def S_WQM_B64 : SOP1_64 <"s_wqm_b64",
-    [(set i1:$sdst, (int_amdgcn_wqm_vote i1:$src0))]
-  >;
+  def S_WQM_B64 : SOP1_64 <"s_wqm_b64">;
 } // End Defs = [SCC]
 
 
@@ -1037,6 +1035,15 @@
   (S_GETREG_B32 (as_i16imm $simm16))
 >;
 
+//===----------------------------------------------------------------------===//
+// WQM Intrinsic Pattern.
+//===----------------------------------------------------------------------===//
+def : GCNPat<
+  (i1 (int_amdgcn_wqm_vote i1:$src0)),
+  (S_WQM_B64 (S_AND_B64 $src0, (i64 EXEC)))
+>;
+
+
 //===----------------------------------------------------------------------===//
 // SOP1 Patterns
 //===----------------------------------------------------------------------===//


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D57894.185761.patch
Type: text/x-patch
Size: 2696 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20190207/989759cd/attachment.bin>


More information about the llvm-commits mailing list