[llvm] 831457c - [AMDGPU][GlobalISel] Eliminate barrier if workgroup size is not greater than wavefront size
Jay Foad via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 26 05:52:26 PDT 2020
Author: Jay Foad
Date: 2020-08-26T13:47:51+01:00
New Revision: 831457c6d59edb0e381917b35ca6099f9b86c6e8
URL: https://github.com/llvm/llvm-project/commit/831457c6d59edb0e381917b35ca6099f9b86c6e8
DIFF: https://github.com/llvm/llvm-project/commit/831457c6d59edb0e381917b35ca6099f9b86c6e8.diff
LOG: [AMDGPU][GlobalISel] Eliminate barrier if workgroup size is not greater than wavefront size
If a workgroup size is known to be not greater than wavefront size
the s_barrier instruction is not needed since all threads are guaranteed
to come to the same point at the same time.
This is the same optimization that was implemented for SelectionDAG in
D31731.
Differential Revision: https://reviews.llvm.org/D86609
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
llvm/test/CodeGen/AMDGPU/barrier-elimination.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 880c7e4d44c7..3f39f6f21c1c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -1422,6 +1422,20 @@ bool AMDGPUInstructionSelector::selectDSAppendConsume(MachineInstr &MI,
return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
}
+bool AMDGPUInstructionSelector::selectSBarrier(MachineInstr &MI) const {
+ if (TM.getOptLevel() > CodeGenOpt::None) {
+ unsigned WGSize = STI.getFlatWorkGroupSizes(MF->getFunction()).second;
+ if (WGSize <= STI.getWavefrontSize()) {
+ MachineBasicBlock *MBB = MI.getParent();
+ const DebugLoc &DL = MI.getDebugLoc();
+ BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::WAVE_BARRIER));
+ MI.eraseFromParent();
+ return true;
+ }
+ }
+ return selectImpl(MI, *CoverageInfo);
+}
+
static bool parseTexFail(uint64_t TexFailCtrl, bool &TFE, bool &LWE,
bool &IsTexFail) {
if (TexFailCtrl)
@@ -1726,6 +1740,8 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
return selectDSAppendConsume(I, true);
case Intrinsic::amdgcn_ds_consume:
return selectDSAppendConsume(I, false);
+ case Intrinsic::amdgcn_s_barrier:
+ return selectSBarrier(I);
default: {
return selectImpl(I, *CoverageInfo);
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
index 447893891147..2176e2b54951 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
@@ -118,6 +118,7 @@ class AMDGPUInstructionSelector final : public InstructionSelector {
bool selectDSOrderedIntrinsic(MachineInstr &MI, Intrinsic::ID IID) const;
bool selectDSGWSIntrinsic(MachineInstr &MI, Intrinsic::ID IID) const;
bool selectDSAppendConsume(MachineInstr &MI, bool IsAppend) const;
+ bool selectSBarrier(MachineInstr &MI) const;
bool selectImageIntrinsic(MachineInstr &MI,
const AMDGPU::ImageDimIntrinsicInfo *Intr) const;
diff --git a/llvm/test/CodeGen/AMDGPU/barrier-elimination.ll b/llvm/test/CodeGen/AMDGPU/barrier-elimination.ll
index c526baaab9cd..84b17f08bc3b 100644
--- a/llvm/test/CodeGen/AMDGPU/barrier-elimination.ll
+++ b/llvm/test/CodeGen/AMDGPU/barrier-elimination.ll
@@ -1,4 +1,5 @@
; RUN: llc -march=amdgcn < %s | FileCheck %s
+; RUN: llc -march=amdgcn < %s -global-isel | FileCheck %s
; CHECK-LABEL: {{^}}unknown_wgs:
; CHECK: s_barrier
More information about the llvm-commits
mailing list