[llvm] r299659 - [AMDGPU] Eliminate barrier if workgroup size is not greater than wavefront size
Stanislav Mekhanoshin via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 6 09:48:30 PDT 2017
Author: rampitec
Date: Thu Apr 6 11:48:30 2017
New Revision: 299659
URL: http://llvm.org/viewvc/llvm-project?rev=299659&view=rev
Log:
[AMDGPU] Eliminate barrier if workgroup size is not greater than wavefront size
If a workgroup size is known to be not greater than wavefront size
the s_barrier instruction is not needed since all threads are guarantied
to come to the same point at the same time.
Differential Revision: https://reviews.llvm.org/D31731
Added:
llvm/trunk/test/CodeGen/AMDGPU/barrier-elimination.ll
Modified:
llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/trunk/test/CodeGen/AMDGPU/indirect-private-64.ll
Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp?rev=299659&r1=299658&r2=299659&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp Thu Apr 6 11:48:30 2017
@@ -3159,6 +3159,17 @@ SDValue SITargetLowering::LowerINTRINSIC
SDValue Cast = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Src);
return DAG.getNode(AMDGPUISD::KILL, DL, MVT::Other, Chain, Cast);
}
+ case Intrinsic::amdgcn_s_barrier: {
+ if (getTargetMachine().getOptLevel() > CodeGenOpt::None) {
+ const MachineFunction &MF = DAG.getMachineFunction();
+ const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
+ unsigned WGSize = ST.getFlatWorkGroupSizes(*MF.getFunction()).second;
+ if (WGSize <= ST.getWavefrontSize())
+ return SDValue(DAG.getMachineNode(AMDGPU::WAVE_BARRIER, DL, MVT::Other,
+ Op.getOperand(0)), 0);
+ }
+ return SDValue();
+ };
default:
return Op;
}
Added: llvm/trunk/test/CodeGen/AMDGPU/barrier-elimination.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/barrier-elimination.ll?rev=299659&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/barrier-elimination.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/barrier-elimination.ll Thu Apr 6 11:48:30 2017
@@ -0,0 +1,30 @@
+; RUN: llc -march=amdgcn < %s | FileCheck %s
+
+; CHECK-LABEL: {{^}}unknown_wgs:
+; CHECK: s_barrier
+define amdgpu_kernel void @unknown_wgs() {
+ tail call void @llvm.amdgcn.s.barrier() #0
+ ret void
+}
+
+; CHECK-LABEL: {{^}}flat_wgs_attr_32_128:
+; CHECK: s_barrier
+define amdgpu_kernel void @flat_wgs_attr_32_128() #1 {
+ tail call void @llvm.amdgcn.s.barrier() #0
+ ret void
+}
+
+; CHECK-LABEL: {{^}}flat_wgs_attr_32_64:
+; CHECK: :
+; CHECK-NEXT: ; wave barrier
+; CHECK-NEXT: s_endpgm
+define amdgpu_kernel void @flat_wgs_attr_32_64() #2 {
+ tail call void @llvm.amdgcn.s.barrier() #0
+ ret void
+}
+
+declare void @llvm.amdgcn.s.barrier() #0
+
+attributes #0 = { convergent nounwind }
+attributes #1 = { nounwind "amdgpu-flat-work-group-size"="32,128" }
+attributes #2 = { nounwind "amdgpu-flat-work-group-size"="32,64" }
Modified: llvm/trunk/test/CodeGen/AMDGPU/indirect-private-64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/indirect-private-64.ll?rev=299659&r1=299658&r2=299659&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/indirect-private-64.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/indirect-private-64.ll Thu Apr 6 11:48:30 2017
@@ -121,4 +121,4 @@ define amdgpu_kernel void @private_acces
}
attributes #0 = { convergent nounwind }
-attributes #1 = { nounwind "amdgpu-waves-per-eu"="1,2" "amdgpu-flat-work-group-size"="64,64" }
+attributes #1 = { nounwind "amdgpu-waves-per-eu"="1,2" "amdgpu-flat-work-group-size"="64,128" }
More information about the llvm-commits
mailing list