[PATCH] D31731: [AMDGPU] Eliminate barrier if workgroup size is not greater than wavefront size

Wed Apr 5 16:08:07 PDT 2017

rampitec created this revision.
Herald added subscribers: t-tye, tpr, dstuttard, yaxunl, nhaehnle, wdng.

If a workgroup size is known to be not greater than wavefront size
the s_barrier instruction is not needed since all threads are guarantied
to come to the same point at the same time.

Note, that fence translated into s_waitcnt still remain, since it is a
separate IR instruction.


Repository:
  rL LLVM

https://reviews.llvm.org/D31731

Files:
  lib/Target/AMDGPU/SIISelLowering.cpp
  test/CodeGen/AMDGPU/barrier-elimination.ll
  test/CodeGen/AMDGPU/indirect-private-64.ll


Index: test/CodeGen/AMDGPU/indirect-private-64.ll
===================================================================

--- test/CodeGen/AMDGPU/indirect-private-64.ll
+++ test/CodeGen/AMDGPU/indirect-private-64.ll
@@ -121,4 +121,4 @@
 }
 
 attributes #0 = { convergent nounwind }
-attributes #1 = { nounwind "amdgpu-waves-per-eu"="1,2" "amdgpu-flat-work-group-size"="64,64" }
+attributes #1 = { nounwind "amdgpu-waves-per-eu"="1,2" "amdgpu-flat-work-group-size"="64,128" }
Index: test/CodeGen/AMDGPU/barrier-elimination.ll
===================================================================
--- /dev/null
+++ test/CodeGen/AMDGPU/barrier-elimination.ll
@@ -0,0 +1,29 @@
+; RUN: llc -march=amdgcn < %s | FileCheck %s
+
+; CHECK-LABEL: {{^}}unknown_wgs:
+; CHECK: s_barrier
+define amdgpu_kernel void @unknown_wgs() {
+  tail call void @llvm.amdgcn.s.barrier() #0
+  ret void
+}
+
+; CHECK-LABEL: {{^}}flat_wgs_attr_32_128:
+; CHECK: s_barrier
+define amdgpu_kernel void @flat_wgs_attr_32_128() #1 {
+  tail call void @llvm.amdgcn.s.barrier() #0
+  ret void
+}
+
+; CHECK-LABEL: {{^}}flat_wgs_attr_32_64:
+; CHECK: :
+; CHECK-NEXT: s_endpgm
+define amdgpu_kernel void @flat_wgs_attr_32_64() #2 {
+  tail call void @llvm.amdgcn.s.barrier() #0
+  ret void
+}
+
+declare void @llvm.amdgcn.s.barrier() #0
+
+attributes #0 = { convergent nounwind }
+attributes #1 = { nounwind "amdgpu-flat-work-group-size"="32,128" }
+attributes #2 = { nounwind "amdgpu-flat-work-group-size"="32,64" }
Index: lib/Target/AMDGPU/SIISelLowering.cpp
===================================================================
--- lib/Target/AMDGPU/SIISelLowering.cpp
+++ lib/Target/AMDGPU/SIISelLowering.cpp
@@ -3159,6 +3159,16 @@
     SDValue Cast = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Src);
     return DAG.getNode(AMDGPUISD::KILL, DL, MVT::Other, Chain, Cast);
   }
+  case Intrinsic::amdgcn_s_barrier: {
+    if (getTargetMachine().getOptLevel() > CodeGenOpt::None) {
+      const MachineFunction &MF = DAG.getMachineFunction();
+      const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
+      unsigned WGSize = ST.getFlatWorkGroupSizes(*MF.getFunction()).second;
+      if (WGSize <= ST.getWavefrontSize())
+        return Op.getOperand(0); // Chain
+    }
+    return SDValue();
+  };
   default:
     return Op;
   }


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D31731.94300.patch
Type: text/x-patch
Size: 2295 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20170405/c420e411/attachment.bin>