[llvm] r273937 - AMDGPU: Move subtarget feature checks into passes
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Mon Jun 27 13:32:14 PDT 2016
Author: arsenm
Date: Mon Jun 27 15:32:13 2016
New Revision: 273937
URL: http://llvm.org/viewvc/llvm-project?rev=273937&view=rev
Log:
AMDGPU: Move subtarget feature checks into passes
Modified:
llvm/trunk/lib/Target/AMDGPU/AMDGPU.td
llvm/trunk/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h
llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
llvm/trunk/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
llvm/trunk/test/CodeGen/AMDGPU/captured-frame-index.ll
llvm/trunk/test/CodeGen/AMDGPU/cgp-addressing-modes.ll
llvm/trunk/test/CodeGen/AMDGPU/extload-private.ll
llvm/trunk/test/CodeGen/AMDGPU/parallelandifcollapse.ll
llvm/trunk/test/CodeGen/AMDGPU/structurize1.ll
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPU.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPU.td?rev=273937&r1=273936&r2=273937&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPU.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPU.td Mon Jun 27 15:32:13 2016
@@ -241,12 +241,6 @@ def FeatureEnableUnsafeDSOffsetFolding :
"Force using DS instruction immediate offsets on SI"
>;
-def FeatureIfCvt : SubtargetFeature <"disable-ifcvt",
- "EnableIfCvt",
- "false",
- "Disable the if conversion pass"
->;
-
def FeatureEnableSIScheduler : SubtargetFeature<"si-scheduler",
"EnableSIScheduler",
"true",
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp?rev=273937&r1=273936&r2=273937&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp Mon Jun 27 15:32:13 2016
@@ -124,6 +124,10 @@ bool AMDGPUPromoteAlloca::runOnFunction(
if (!TM || skipFunction(F))
return false;
+ const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(F);
+ if (!ST.isPromoteAllocaEnabled())
+ return false;
+
FunctionType *FTy = F.getFunctionType();
// If the function has any arguments in the local address space, then it's
@@ -139,8 +143,6 @@ bool AMDGPUPromoteAlloca::runOnFunction(
}
}
- const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(F);
-
LocalMemLimit = ST.getLocalMemorySize();
if (LocalMemLimit == 0)
return false;
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp?rev=273937&r1=273936&r2=273937&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp Mon Jun 27 15:32:13 2016
@@ -105,7 +105,6 @@ AMDGPUSubtarget::AMDGPUSubtarget(const T
EnableVGPRSpilling(false),
EnablePromoteAlloca(false),
- EnableIfCvt(true),
EnableLoadStoreOpt(false),
EnableUnsafeDSOffsetFolding(false),
EnableSIScheduler(false),
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h?rev=273937&r1=273936&r2=273937&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h Mon Jun 27 15:32:13 2016
@@ -82,7 +82,6 @@ protected:
// Used as options.
bool EnableVGPRSpilling;
bool EnablePromoteAlloca;
- bool EnableIfCvt;
bool EnableLoadStoreOpt;
bool EnableUnsafeDSOffsetFolding;
bool EnableSIScheduler;
@@ -222,10 +221,6 @@ public:
return EnablePromoteAlloca;
}
- bool isIfCvtEnabled() const {
- return EnableIfCvt;
- }
-
bool unsafeDSOffsetFoldingEnabled() const {
return EnableUnsafeDSOffsetFolding;
}
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp?rev=273937&r1=273936&r2=273937&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp Mon Jun 27 15:32:13 2016
@@ -45,6 +45,18 @@ static cl::opt<bool> EnableR600Structuri
cl::desc("Use StructurizeCFG IR pass"),
cl::init(true));
+static cl::opt<bool> EnableSROA(
+ "amdgpu-sroa",
+ cl::desc("Run SROA after promote alloca pass"),
+ cl::ReallyHidden,
+ cl::init(true));
+
+static cl::opt<bool> EnableR600IfConvert(
+ "r600-if-convert",
+ cl::desc("Use if conversion pass"),
+ cl::ReallyHidden,
+ cl::init(true));
+
extern "C" void LLVMInitializeAMDGPUTarget() {
// Register the target
RegisterTargetMachine<R600TargetMachine> X(TheAMDGPUTarget);
@@ -212,12 +224,7 @@ public:
}
ScheduleDAGInstrs *
- createMachineScheduler(MachineSchedContext *C) const override {
- const SISubtarget *ST = getGCNTargetMachine().getSubtargetImpl();
- if (ST->enableSIScheduler())
- return createSIMachineScheduler(C);
- return nullptr;
- }
+ createMachineScheduler(MachineSchedContext *C) const override;
bool addPreISel() override;
void addMachineSSAOptimization() override;
@@ -285,10 +292,11 @@ void AMDGPUPassConfig::addIRPasses() {
addPass(createAMDGPUOpenCLImageTypeLoweringPass());
const AMDGPUTargetMachine &TM = getAMDGPUTargetMachine();
- const AMDGPUSubtarget &ST = *TM.getSubtargetImpl();
- if (TM.getOptLevel() > CodeGenOpt::None && ST.isPromoteAllocaEnabled()) {
+ if (TM.getOptLevel() > CodeGenOpt::None) {
addPass(createAMDGPUPromoteAlloca(&TM));
- addPass(createSROAPass());
+
+ if (EnableSROA)
+ addPass(createSROAPass());
}
addStraightLineScalarOptimizationPasses();
@@ -344,9 +352,8 @@ void R600PassConfig::addPreRegAlloc() {
}
void R600PassConfig::addPreSched2() {
- const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl();
addPass(createR600EmitClauseMarkers(), false);
- if (ST.isIfCvtEnabled())
+ if (EnableR600IfConvert)
addPass(&IfConverterID, false);
addPass(createR600ClauseMergePass(*TM), false);
}
@@ -367,6 +374,14 @@ TargetPassConfig *R600TargetMachine::cre
// GCN Pass Setup
//===----------------------------------------------------------------------===//
+ScheduleDAGInstrs *GCNPassConfig::createMachineScheduler(
+ MachineSchedContext *C) const {
+ const SISubtarget &ST = C->MF->getSubtarget<SISubtarget>();
+ if (ST.enableSIScheduler())
+ return createSIMachineScheduler(C);
+ return nullptr;
+}
+
bool GCNPassConfig::addPreISel() {
AMDGPUPassConfig::addPreISel();
@@ -415,8 +430,6 @@ bool GCNPassConfig::addRegBankSelect() {
#endif
void GCNPassConfig::addPreRegAlloc() {
- const SISubtarget &ST = *getGCNTargetMachine().getSubtargetImpl();
-
// This needs to be run directly before register allocation because
// earlier passes might recompute live intervals.
// TODO: handle CodeGenOpt::None; fast RA ignores spill weights set by the pass
@@ -424,15 +437,18 @@ void GCNPassConfig::addPreRegAlloc() {
insertPass(&MachineSchedulerID, &SIFixControlFlowLiveIntervalsID);
}
- if (getOptLevel() > CodeGenOpt::None && ST.loadStoreOptEnabled()) {
+ if (getOptLevel() > CodeGenOpt::None) {
// Don't do this with no optimizations since it throws away debug info by
// merging nonadjacent loads.
// This should be run after scheduling, but before register allocation. It
// also need extra copies to the address operand to be eliminated.
+
+ // FIXME: Move pre-RA and remove extra reg coalescer run.
insertPass(&MachineSchedulerID, &SILoadStoreOptimizerID);
insertPass(&MachineSchedulerID, &RegisterCoalescerID);
}
+
addPass(createSIShrinkInstructionsPass());
addPass(createSIWholeQuadModePass());
}
Modified: llvm/trunk/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp?rev=273937&r1=273936&r2=273937&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp Mon Jun 27 15:32:13 2016
@@ -412,6 +412,9 @@ bool SILoadStoreOptimizer::runOnMachineF
return false;
const SISubtarget &STM = MF.getSubtarget<SISubtarget>();
+ if (!STM.loadStoreOptEnabled())
+ return false;
+
TII = STM.getInstrInfo();
TRI = &TII->getRegisterInfo();
Modified: llvm/trunk/test/CodeGen/AMDGPU/captured-frame-index.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/captured-frame-index.ll?rev=273937&r1=273936&r2=273937&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/captured-frame-index.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/captured-frame-index.ll Mon Jun 27 15:32:13 2016
@@ -1,4 +1,4 @@
-; RUN: llc -march=amdgcn -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mattr=-promote-alloca -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; GCN-LABEL: {{^}}stored_fi_to_lds:
; GCN: s_load_dword [[LDSPTR:s[0-9]+]]
Modified: llvm/trunk/test/CodeGen/AMDGPU/cgp-addressing-modes.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/cgp-addressing-modes.ll?rev=273937&r1=273936&r2=273937&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/cgp-addressing-modes.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/cgp-addressing-modes.ll Mon Jun 27 15:32:13 2016
@@ -1,9 +1,9 @@
; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=tahiti < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-SI %s
; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=bonaire < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-CI %s
; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=tonga < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-VI %s
-; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
-; RUN: llc -march=amdgcn -mcpu=bonaire -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s
-; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
+; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=-promote-alloca -amdgpu-sroa=0 < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=bonaire -mattr=-promote-alloca -amdgpu-sroa=0 < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s
+; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-promote-alloca -amdgpu-sroa=0 < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
; OPT-LABEL: @test_sink_global_small_offset_i32(
; OPT-CI-NOT: getelementptr i32, i32 addrspace(1)* %in
Modified: llvm/trunk/test/CodeGen/AMDGPU/extload-private.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/extload-private.ll?rev=273937&r1=273936&r2=273937&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/extload-private.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/extload-private.ll Mon Jun 27 15:32:13 2016
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=amdgcn -mcpu=SI -mattr=-promote-alloca -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s
-; RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-promote-alloca -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mattr=-promote-alloca -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-promote-alloca -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; FUNC-LABEL: {{^}}load_i8_sext_private:
; SI: buffer_load_sbyte v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen
@@ -39,7 +39,7 @@ entry:
define void @load_i16_zext_private(i32 addrspace(1)* %out) {
entry:
%tmp0 = alloca i16
- %tmp1 = load i16, i16* %tmp0
+ %tmp1 = load volatile i16, i16* %tmp0
%tmp2 = zext i16 %tmp1 to i32
store i32 %tmp2, i32 addrspace(1)* %out
ret void
Modified: llvm/trunk/test/CodeGen/AMDGPU/parallelandifcollapse.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/parallelandifcollapse.ll?rev=273937&r1=273936&r2=273937&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/parallelandifcollapse.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/parallelandifcollapse.ll Mon Jun 27 15:32:13 2016
@@ -1,5 +1,4 @@
-; Function Attrs: nounwind
-; RUN: llc -march=r600 -mcpu=redwood -mattr=-promote-alloca < %s | FileCheck %s
+; RUN: llc -march=r600 -mcpu=redwood -mattr=-promote-alloca -amdgpu-sroa=0 < %s | FileCheck %s
;
; CFG flattening should use parallel-and mode to generate branch conditions and
; then merge if-regions with the same bodies.
Modified: llvm/trunk/test/CodeGen/AMDGPU/structurize1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/structurize1.ll?rev=273937&r1=273936&r2=273937&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/structurize1.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/structurize1.ll Mon Jun 27 15:32:13 2016
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=r600 -mattr=disable-ifcvt -mcpu=redwood | FileCheck %s
+; RUN: llc -march=r600 -mcpu=redwood -r600-if-convert=0 < %s | FileCheck %s
; This tests for abug where the AMDILCFGStructurizer was crashing on loops
; like this:
More information about the llvm-commits
mailing list