[llvm] r333605 - AMDGPU: Split AMDGPUTTI into GCNTTI and R600TTI
Tom Stellard via llvm-commits
llvm-commits at lists.llvm.org
Wed May 30 15:55:36 PDT 2018
Author: tstellar
Date: Wed May 30 15:55:35 2018
New Revision: 333605
URL: http://llvm.org/viewvc/llvm-project?rev=333605&view=rev
Log:
AMDGPU: Split AMDGPUTTI into GCNTTI and R600TTI
Reviewers: arsenm, nhaehnle
Reviewed By: arsenm
Subscribers: kzhuravl, wdng, yaxunl, dstuttard, tpr, llvm-commits, t-tye
Differential Revision: https://reviews.llvm.org/D47359
Modified:
llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.h
llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp?rev=333605&r1=333604&r2=333605&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp Wed May 30 15:55:35 2018
@@ -439,6 +439,11 @@ const R600Subtarget *R600TargetMachine::
return I.get();
}
+TargetTransformInfo
+R600TargetMachine::getTargetTransformInfo(const Function &F) {
+ return TargetTransformInfo(R600TTIImpl(this, F));
+}
+
//===----------------------------------------------------------------------===//
// GCN Target Machine (SI+)
//===----------------------------------------------------------------------===//
@@ -472,6 +477,11 @@ const SISubtarget *GCNTargetMachine::get
return I.get();
}
+TargetTransformInfo
+GCNTargetMachine::getTargetTransformInfo(const Function &F) {
+ return TargetTransformInfo(GCNTTIImpl(this, F));
+}
+
//===----------------------------------------------------------------------===//
// AMDGPU Pass Setup
//===----------------------------------------------------------------------===//
@@ -561,11 +571,6 @@ public:
} // end anonymous namespace
-TargetTransformInfo
-AMDGPUTargetMachine::getTargetTransformInfo(const Function &F) {
- return TargetTransformInfo(AMDGPUTTIImpl(this, F));
-}
-
void AMDGPUPassConfig::addEarlyCSEOrGVNPass() {
if (getOptLevel() == CodeGenOpt::Aggressive)
addPass(createGVNPass());
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.h?rev=333605&r1=333604&r2=333605&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.h Wed May 30 15:55:35 2018
@@ -55,7 +55,6 @@ public:
const AMDGPUIntrinsicInfo *getIntrinsicInfo() const override {
return &IntrinsicInfo;
}
- TargetTransformInfo getTargetTransformInfo(const Function &F) override;
TargetLoweringObjectFile *getObjFileLowering() const override {
return TLOF.get();
@@ -91,6 +90,8 @@ public:
const R600Subtarget *getSubtargetImpl(const Function &) const override;
+ TargetTransformInfo getTargetTransformInfo(const Function &F) override;
+
bool isMachineVerifierClean() const override {
return false;
}
@@ -114,6 +115,8 @@ public:
const SISubtarget *getSubtargetImpl(const Function &) const override;
+ TargetTransformInfo getTargetTransformInfo(const Function &F) override;
+
bool useIPRA() const override {
return true;
}
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp?rev=333605&r1=333604&r2=333605&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp Wed May 30 15:55:35 2018
@@ -211,32 +211,27 @@ void AMDGPUTTIImpl::getUnrollingPreferen
}
}
-unsigned AMDGPUTTIImpl::getHardwareNumberOfRegisters(bool Vec) const {
+unsigned GCNTTIImpl::getHardwareNumberOfRegisters(bool Vec) const {
// The concept of vector registers doesn't really exist. Some packed vector
// operations operate on the normal 32-bit registers.
-
- // Number of VGPRs on SI.
- if (ST->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS)
- return 256;
-
- return 4 * 128; // XXX - 4 channels. Should these count as vector instead?
+ return 256;
}
-unsigned AMDGPUTTIImpl::getNumberOfRegisters(bool Vec) const {
+unsigned GCNTTIImpl::getNumberOfRegisters(bool Vec) const {
// This is really the number of registers to fill when vectorizing /
// interleaving loops, so we lie to avoid trying to use all registers.
return getHardwareNumberOfRegisters(Vec) >> 3;
}
-unsigned AMDGPUTTIImpl::getRegisterBitWidth(bool Vector) const {
+unsigned GCNTTIImpl::getRegisterBitWidth(bool Vector) const {
return 32;
}
-unsigned AMDGPUTTIImpl::getMinVectorRegisterBitWidth() const {
+unsigned GCNTTIImpl::getMinVectorRegisterBitWidth() const {
return 32;
}
-unsigned AMDGPUTTIImpl::getLoadVectorFactor(unsigned VF, unsigned LoadSize,
+unsigned GCNTTIImpl::getLoadVectorFactor(unsigned VF, unsigned LoadSize,
unsigned ChainSizeInBytes,
VectorType *VecTy) const {
unsigned VecRegBitWidth = VF * LoadSize;
@@ -247,7 +242,7 @@ unsigned AMDGPUTTIImpl::getLoadVectorFac
return VF;
}
-unsigned AMDGPUTTIImpl::getStoreVectorFactor(unsigned VF, unsigned StoreSize,
+unsigned GCNTTIImpl::getStoreVectorFactor(unsigned VF, unsigned StoreSize,
unsigned ChainSizeInBytes,
VectorType *VecTy) const {
unsigned VecRegBitWidth = VF * StoreSize;
@@ -257,13 +252,11 @@ unsigned AMDGPUTTIImpl::getStoreVectorFa
return VF;
}
-unsigned AMDGPUTTIImpl::getLoadStoreVecRegBitWidth(unsigned AddrSpace) const {
+unsigned GCNTTIImpl::getLoadStoreVecRegBitWidth(unsigned AddrSpace) const {
AMDGPUAS AS = ST->getAMDGPUAS();
if (AddrSpace == AS.GLOBAL_ADDRESS ||
AddrSpace == AS.CONSTANT_ADDRESS ||
AddrSpace == AS.CONSTANT_ADDRESS_32BIT) {
- if (ST->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
- return 128;
return 512;
}
@@ -275,16 +268,10 @@ unsigned AMDGPUTTIImpl::getLoadStoreVecR
if (AddrSpace == AS.PRIVATE_ADDRESS)
return 8 * ST->getMaxPrivateElementSize();
- if (ST->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS &&
- (AddrSpace == AS.PARAM_D_ADDRESS ||
- AddrSpace == AS.PARAM_I_ADDRESS ||
- (AddrSpace >= AS.CONSTANT_BUFFER_0 &&
- AddrSpace <= AS.CONSTANT_BUFFER_15)))
- return 128;
llvm_unreachable("unhandled address space");
}
-bool AMDGPUTTIImpl::isLegalToVectorizeMemChain(unsigned ChainSizeInBytes,
+bool GCNTTIImpl::isLegalToVectorizeMemChain(unsigned ChainSizeInBytes,
unsigned Alignment,
unsigned AddrSpace) const {
// We allow vectorization of flat stores, even though we may need to decompose
@@ -297,19 +284,19 @@ bool AMDGPUTTIImpl::isLegalToVectorizeMe
return true;
}
-bool AMDGPUTTIImpl::isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
+bool GCNTTIImpl::isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
unsigned Alignment,
unsigned AddrSpace) const {
return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace);
}
-bool AMDGPUTTIImpl::isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
+bool GCNTTIImpl::isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
unsigned Alignment,
unsigned AddrSpace) const {
return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace);
}
-unsigned AMDGPUTTIImpl::getMaxInterleaveFactor(unsigned VF) {
+unsigned GCNTTIImpl::getMaxInterleaveFactor(unsigned VF) {
// Disable unrolling if the loop is not vectorized.
// TODO: Enable this again.
if (VF == 1)
@@ -318,7 +305,7 @@ unsigned AMDGPUTTIImpl::getMaxInterleave
return 8;
}
-bool AMDGPUTTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
+bool GCNTTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
MemIntrinsicInfo &Info) const {
switch (Inst->getIntrinsicID()) {
case Intrinsic::amdgcn_atomic_inc:
@@ -347,7 +334,7 @@ bool AMDGPUTTIImpl::getTgtMemIntrinsic(I
}
}
-int AMDGPUTTIImpl::getArithmeticInstrCost(
+int GCNTTIImpl::getArithmeticInstrCost(
unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info,
TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args ) {
@@ -457,7 +444,7 @@ int AMDGPUTTIImpl::getArithmeticInstrCos
Opd1PropInfo, Opd2PropInfo);
}
-unsigned AMDGPUTTIImpl::getCFInstrCost(unsigned Opcode) {
+unsigned GCNTTIImpl::getCFInstrCost(unsigned Opcode) {
// XXX - For some reason this isn't called for switch.
switch (Opcode) {
case Instruction::Br:
@@ -468,7 +455,7 @@ unsigned AMDGPUTTIImpl::getCFInstrCost(u
}
}
-int AMDGPUTTIImpl::getArithmeticReductionCost(unsigned Opcode, Type *Ty,
+int GCNTTIImpl::getArithmeticReductionCost(unsigned Opcode, Type *Ty,
bool IsPairwise) {
EVT OrigTy = TLI->getValueType(DL, Ty);
@@ -483,7 +470,7 @@ int AMDGPUTTIImpl::getArithmeticReductio
return LT.first * getFullRateInstrCost();
}
-int AMDGPUTTIImpl::getMinMaxReductionCost(Type *Ty, Type *CondTy,
+int GCNTTIImpl::getMinMaxReductionCost(Type *Ty, Type *CondTy,
bool IsPairwise,
bool IsUnsigned) {
EVT OrigTy = TLI->getValueType(DL, Ty);
@@ -499,7 +486,7 @@ int AMDGPUTTIImpl::getMinMaxReductionCos
return LT.first * getHalfRateInstrCost();
}
-int AMDGPUTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
+int GCNTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
unsigned Index) {
switch (Opcode) {
case Instruction::ExtractElement:
@@ -554,7 +541,7 @@ static bool isArgPassedInSGPR(const Argu
/// \returns true if the result of the value could potentially be
/// different across workitems in a wavefront.
-bool AMDGPUTTIImpl::isSourceOfDivergence(const Value *V) const {
+bool GCNTTIImpl::isSourceOfDivergence(const Value *V) const {
if (const Argument *A = dyn_cast<Argument>(V))
return !isArgPassedInSGPR(A);
@@ -584,7 +571,7 @@ bool AMDGPUTTIImpl::isSourceOfDivergence
return false;
}
-bool AMDGPUTTIImpl::isAlwaysUniform(const Value *V) const {
+bool GCNTTIImpl::isAlwaysUniform(const Value *V) const {
if (const IntrinsicInst *Intrinsic = dyn_cast<IntrinsicInst>(V)) {
switch (Intrinsic->getIntrinsicID()) {
default:
@@ -597,7 +584,7 @@ bool AMDGPUTTIImpl::isAlwaysUniform(cons
return false;
}
-unsigned AMDGPUTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
+unsigned GCNTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
Type *SubTp) {
if (ST->hasVOP3PInsts()) {
VectorType *VT = cast<VectorType>(Tp);
@@ -620,7 +607,7 @@ unsigned AMDGPUTTIImpl::getShuffleCost(T
return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
}
-bool AMDGPUTTIImpl::areInlineCompatible(const Function *Caller,
+bool GCNTTIImpl::areInlineCompatible(const Function *Caller,
const Function *Callee) const {
const TargetMachine &TM = getTLI()->getTargetMachine();
const FeatureBitset &CallerBits =
@@ -632,3 +619,114 @@ bool AMDGPUTTIImpl::areInlineCompatible(
FeatureBitset RealCalleeBits = CalleeBits & ~InlineFeatureIgnoreList;
return ((RealCallerBits & RealCalleeBits) == RealCalleeBits);
}
+
+void GCNTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
+ TTI::UnrollingPreferences &UP) {
+ CommonTTI.getUnrollingPreferences(L, SE, UP);
+}
+
+unsigned R600TTIImpl::getHardwareNumberOfRegisters(bool Vec) const {
+ return 4 * 128; // XXX - 4 channels. Should these count as vector instead?
+}
+
+unsigned R600TTIImpl::getNumberOfRegisters(bool Vec) const {
+ return getHardwareNumberOfRegisters(Vec);
+}
+
+unsigned R600TTIImpl::getRegisterBitWidth(bool Vector) const {
+ return 32;
+}
+
+unsigned R600TTIImpl::getMinVectorRegisterBitWidth() const {
+ return 32;
+}
+
+unsigned R600TTIImpl::getLoadStoreVecRegBitWidth(unsigned AddrSpace) const {
+ AMDGPUAS AS = ST->getAMDGPUAS();
+ if (AddrSpace == AS.GLOBAL_ADDRESS ||
+ AddrSpace == AS.CONSTANT_ADDRESS)
+ return 128;
+ if (AddrSpace == AS.LOCAL_ADDRESS ||
+ AddrSpace == AS.REGION_ADDRESS)
+ return 64;
+ if (AddrSpace == AS.PRIVATE_ADDRESS)
+ return 32;
+
+ if ((AddrSpace == AS.PARAM_D_ADDRESS ||
+ AddrSpace == AS.PARAM_I_ADDRESS ||
+ (AddrSpace >= AS.CONSTANT_BUFFER_0 &&
+ AddrSpace <= AS.CONSTANT_BUFFER_15)))
+ return 128;
+ llvm_unreachable("unhandled address space");
+}
+
+bool R600TTIImpl::isLegalToVectorizeMemChain(unsigned ChainSizeInBytes,
+ unsigned Alignment,
+ unsigned AddrSpace) const {
+ // We allow vectorization of flat stores, even though we may need to decompose
+ // them later if they may access private memory. We don't have enough context
+ // here, and legalization can handle it.
+ if (AddrSpace == ST->getAMDGPUAS().PRIVATE_ADDRESS)
+ return false;
+ return true;
+}
+
+bool R600TTIImpl::isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
+ unsigned Alignment,
+ unsigned AddrSpace) const {
+ return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace);
+}
+
+bool R600TTIImpl::isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
+ unsigned Alignment,
+ unsigned AddrSpace) const {
+ return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace);
+}
+
+unsigned R600TTIImpl::getMaxInterleaveFactor(unsigned VF) {
+ // Disable unrolling if the loop is not vectorized.
+ // TODO: Enable this again.
+ if (VF == 1)
+ return 1;
+
+ return 8;
+}
+
+unsigned R600TTIImpl::getCFInstrCost(unsigned Opcode) {
+ // XXX - For some reason this isn't called for switch.
+ switch (Opcode) {
+ case Instruction::Br:
+ case Instruction::Ret:
+ return 10;
+ default:
+ return BaseT::getCFInstrCost(Opcode);
+ }
+}
+
+int R600TTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
+ unsigned Index) {
+ switch (Opcode) {
+ case Instruction::ExtractElement:
+ case Instruction::InsertElement: {
+ unsigned EltSize
+ = DL.getTypeSizeInBits(cast<VectorType>(ValTy)->getElementType());
+ if (EltSize < 32) {
+ return BaseT::getVectorInstrCost(Opcode, ValTy, Index);
+ }
+
+ // Extracts are just reads of a subregister, so are free. Inserts are
+ // considered free because we don't want to have any cost for scalarizing
+ // operations, and we don't have to copy into a different register class.
+
+ // Dynamic indexing isn't free and is best avoided.
+ return Index == ~0u ? 2 : 0;
+ }
+ default:
+ return BaseT::getVectorInstrCost(Opcode, ValTy, Index);
+ }
+}
+
+void R600TTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
+ TTI::UnrollingPreferences &UP) {
+ CommonTTI.getUnrollingPreferences(L, SE, UP);
+}
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h?rev=333605&r1=333604&r2=333605&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h Wed May 30 15:55:35 2018
@@ -47,6 +47,29 @@ class AMDGPUTTIImpl final : public Basic
const AMDGPUSubtarget *ST;
const AMDGPUTargetLowering *TLI;
+
+public:
+ explicit AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
+ : BaseT(TM, F.getParent()->getDataLayout()),
+ ST(TM->getSubtargetImpl(F)),
+ TLI(ST->getTargetLowering()) {}
+
+ const AMDGPUSubtarget *getST() const { return ST; }
+ const AMDGPUTargetLowering *getTLI() const { return TLI; }
+
+ void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
+ TTI::UnrollingPreferences &UP);
+};
+
+class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
+ using BaseT = BasicTTIImplBase<GCNTTIImpl>;
+ using TTI = TargetTransformInfo;
+
+ friend BaseT;
+
+ const AMDGPUSubtarget *ST;
+ const AMDGPUTargetLowering *TLI;
+ AMDGPUTTIImpl CommonTTI;
bool IsGraphicsShader;
const FeatureBitset InlineFeatureIgnoreList = {
@@ -99,10 +122,11 @@ class AMDGPUTTIImpl final : public Basic
}
public:
- explicit AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
+ explicit GCNTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
: BaseT(TM, F.getParent()->getDataLayout()),
ST(TM->getSubtargetImpl(F)),
TLI(ST->getTargetLowering()),
+ CommonTTI(TM, F),
IsGraphicsShader(AMDGPU::isShader(F.getCallingConv())) {}
bool hasBranchDivergence() { return true; }
@@ -182,6 +206,46 @@ public:
bool IsUnsigned);
};
+class R600TTIImpl final : public BasicTTIImplBase<R600TTIImpl> {
+ using BaseT = BasicTTIImplBase<R600TTIImpl>;
+ using TTI = TargetTransformInfo;
+
+ friend BaseT;
+
+ const AMDGPUSubtarget *ST;
+ const AMDGPUTargetLowering *TLI;
+ AMDGPUTTIImpl CommonTTI;
+
+public:
+ explicit R600TTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
+ : BaseT(TM, F.getParent()->getDataLayout()),
+ ST(TM->getSubtargetImpl(F)),
+ TLI(ST->getTargetLowering()),
+ CommonTTI(TM, F) {}
+
+ const AMDGPUSubtarget *getST() const { return ST; }
+ const AMDGPUTargetLowering *getTLI() const { return TLI; }
+
+ void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
+ TTI::UnrollingPreferences &UP);
+ unsigned getHardwareNumberOfRegisters(bool Vec) const;
+ unsigned getNumberOfRegisters(bool Vec) const;
+ unsigned getRegisterBitWidth(bool Vector) const;
+ unsigned getMinVectorRegisterBitWidth() const;
+ unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
+ bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, unsigned Alignment,
+ unsigned AddrSpace) const;
+ bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
+ unsigned Alignment,
+ unsigned AddrSpace) const;
+ bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
+ unsigned Alignment,
+ unsigned AddrSpace) const;
+ unsigned getMaxInterleaveFactor(unsigned VF);
+ unsigned getCFInstrCost(unsigned Opcode);
+ int getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index);
+};
+
} // end namespace llvm
#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
More information about the llvm-commits
mailing list