[PATCH] D97392: [amdgpu] Atomic should be source of divergence.

Wed Feb 24 08:13:48 PST 2021

hliao created this revision.
Herald added subscribers: kerbowa, jfb, hiraditya, t-tye, tpr, dstuttard, yaxunl, nhaehnle, jvesely, kzhuravl, arsenm.
hliao requested review of this revision.
Herald added subscribers: llvm-commits, wdng.
Herald added a project: LLVM.

Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D97392

Files:
  llvm/lib/Target/AMDGPU/SIISelLowering.cpp
  llvm/test/CodeGen/AMDGPU/dag-divergence-atomic.ll


Index: llvm/test/CodeGen/AMDGPU/dag-divergence-atomic.ll
===================================================================

--- /dev/null
+++ llvm/test/CodeGen/AMDGPU/dag-divergence-atomic.ll
@@ -0,0 +1,27 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -march=amdgcn -mcpu=gfx906 -o - %s | FileCheck %s
+
+%S = type <{ float, double }>
+
+; The result of that atomic ops should not be used as a uniform value.
+define protected amdgpu_kernel void @foo(i32 addrspace(1)* %p, %S addrspace(1)* %q) {
+; CHECK-LABEL: foo:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; CHECK-NEXT:    v_mov_b32_e32 v2, 0
+; CHECK-NEXT:    v_mov_b32_e32 v3, 1
+; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
+; CHECK-NEXT:    global_atomic_add v2, v2, v3, s[0:1] glc
+; CHECK-NEXT:    v_mov_b32_e32 v0, s2
+; CHECK-NEXT:    v_mov_b32_e32 v1, s3
+; CHECK-NEXT:    s_waitcnt vmcnt(0)
+; CHECK-NEXT:    v_mad_u64_u32 v[0:1], s[0:1], v2, 12, v[0:1]
+; CHECK-NEXT:    v_mov_b32_e32 v2, 1.0
+; CHECK-NEXT:    global_store_dword v[0:1], v2, off
+; CHECK-NEXT:    s_endpgm
+  %n32 = atomicrmw add i32 addrspace(1)* %p, i32 1 monotonic
+  %n64 = zext i32 %n32 to i64
+  %p1 = getelementptr inbounds %S, %S addrspace(1)* %q, i64 %n64, i32 0
+  store float 1.0, float addrspace(1)* %p1
+  ret void
+}
Index: llvm/lib/Target/AMDGPU/SIISelLowering.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -11799,6 +11799,29 @@
   case ISD::INTRINSIC_W_CHAIN:
     return AMDGPU::isIntrinsicSourceOfDivergence(
         cast<ConstantSDNode>(N->getOperand(1))->getZExtValue());
+  case ISD::ATOMIC_SWAP:
+  case ISD::ATOMIC_LOAD_ADD:
+  case ISD::ATOMIC_LOAD_SUB:
+  case ISD::ATOMIC_LOAD_AND:
+  case ISD::ATOMIC_LOAD_CLR:
+  case ISD::ATOMIC_LOAD_OR:
+  case ISD::ATOMIC_LOAD_XOR:
+  case ISD::ATOMIC_LOAD_NAND:
+  case ISD::ATOMIC_LOAD_MIN:
+  case ISD::ATOMIC_LOAD_MAX:
+  case ISD::ATOMIC_LOAD_UMIN:
+  case ISD::ATOMIC_LOAD_UMAX:
+  case ISD::ATOMIC_LOAD_FADD:
+  case ISD::ATOMIC_LOAD_FSUB:
+    // TODO: May need to check operand values to determine divergence. Besides
+    // some atomic ops like CLR are alway divergent. The divergence of some
+    // atomic ops is value dependant. For instance, atomic<op> p, v is always
+    // uniform if p is uniform and v is 0 for ADD/SUB/XOR/FADD/FSUB. But, for
+    // other non-zero v, those atomic ops are always divergent for non-zero v
+    // no matter the divergence of p or v. The remaining atomic ops only depend
+    // on divergences of their operands. For example, atomic<op> p, v is always
+    // uniform if p and v are uniform.
+    return true;
   }
   return false;
 }


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D97392.326099.patch
Type: text/x-patch
Size: 2784 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20210224/ea455590/attachment.bin>