[llvm] [LowerMemIntrinsics][NFC] Use Align in TTI::getMemcpyLoopLoweringType (PR #100984)
Fabian Ritter via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 29 01:24:02 PDT 2024
https://github.com/ritter-x2a created https://github.com/llvm/llvm-project/pull/100984
...and also in TTI::getMemcpyLoopResidualLoweringType.
>From a9a9c2ebe5f76821e6f8949e67e615afa345c332 Mon Sep 17 00:00:00 2001
From: Fabian Ritter <fabian.ritter at amd.com>
Date: Mon, 29 Jul 2024 04:17:17 -0400
Subject: [PATCH] [LowerMemIntrinsics][NFC] Use Align in
TTI::getMemcpyLoopLoweringType
...and also in TTI::getMemcpyLoopResidualLoweringType.
---
.../llvm/Analysis/TargetTransformInfo.h | 12 ++++-----
.../llvm/Analysis/TargetTransformInfoImpl.h | 4 +--
llvm/lib/Analysis/TargetTransformInfo.cpp | 4 +--
.../AMDGPU/AMDGPUTargetTransformInfo.cpp | 12 ++++-----
.../Target/AMDGPU/AMDGPUTargetTransformInfo.h | 11 ++++----
.../Transforms/Utils/LowerMemIntrinsics.cpp | 26 +++++++++----------
6 files changed, 34 insertions(+), 35 deletions(-)
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 2411b2b31d293..38e8b9da21397 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -1587,7 +1587,7 @@ class TargetTransformInfo {
/// \returns The type to use in a loop expansion of a memcpy call.
Type *getMemcpyLoopLoweringType(
LLVMContext &Context, Value *Length, unsigned SrcAddrSpace,
- unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign,
+ unsigned DestAddrSpace, Align SrcAlign, Align DestAlign,
std::optional<uint32_t> AtomicElementSize = std::nullopt) const;
/// \param[out] OpsOut The operand types to copy RemainingBytes of memory.
@@ -1599,7 +1599,7 @@ class TargetTransformInfo {
void getMemcpyLoopResidualLoweringType(
SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
- unsigned SrcAlign, unsigned DestAlign,
+ Align SrcAlign, Align DestAlign,
std::optional<uint32_t> AtomicCpySize = std::nullopt) const;
/// \returns True if the two functions have compatible attributes for inlining
@@ -2133,13 +2133,13 @@ class TargetTransformInfo::Concept {
Type *ExpectedType) = 0;
virtual Type *getMemcpyLoopLoweringType(
LLVMContext &Context, Value *Length, unsigned SrcAddrSpace,
- unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign,
+ unsigned DestAddrSpace, Align SrcAlign, Align DestAlign,
std::optional<uint32_t> AtomicElementSize) const = 0;
virtual void getMemcpyLoopResidualLoweringType(
SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
- unsigned SrcAlign, unsigned DestAlign,
+ Align SrcAlign, Align DestAlign,
std::optional<uint32_t> AtomicCpySize) const = 0;
virtual bool areInlineCompatible(const Function *Caller,
const Function *Callee) const = 0;
@@ -2838,7 +2838,7 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
}
Type *getMemcpyLoopLoweringType(
LLVMContext &Context, Value *Length, unsigned SrcAddrSpace,
- unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign,
+ unsigned DestAddrSpace, Align SrcAlign, Align DestAlign,
std::optional<uint32_t> AtomicElementSize) const override {
return Impl.getMemcpyLoopLoweringType(Context, Length, SrcAddrSpace,
DestAddrSpace, SrcAlign, DestAlign,
@@ -2847,7 +2847,7 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
void getMemcpyLoopResidualLoweringType(
SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
- unsigned SrcAlign, unsigned DestAlign,
+ Align SrcAlign, Align DestAlign,
std::optional<uint32_t> AtomicCpySize) const override {
Impl.getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes,
SrcAddrSpace, DestAddrSpace,
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 00efa474a91b5..899c5041aba4d 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -839,7 +839,7 @@ class TargetTransformInfoImplBase {
Type *
getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
unsigned SrcAddrSpace, unsigned DestAddrSpace,
- unsigned SrcAlign, unsigned DestAlign,
+ Align SrcAlign, Align DestAlign,
std::optional<uint32_t> AtomicElementSize) const {
return AtomicElementSize ? Type::getIntNTy(Context, *AtomicElementSize * 8)
: Type::getInt8Ty(Context);
@@ -848,7 +848,7 @@ class TargetTransformInfoImplBase {
void getMemcpyLoopResidualLoweringType(
SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
- unsigned SrcAlign, unsigned DestAlign,
+ Align SrcAlign, Align DestAlign,
std::optional<uint32_t> AtomicCpySize) const {
unsigned OpSizeInBytes = AtomicCpySize ? *AtomicCpySize : 1;
Type *OpType = Type::getIntNTy(Context, OpSizeInBytes * 8);
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index 6a0fa98089ba5..dcde78925bfa9 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -1198,7 +1198,7 @@ Value *TargetTransformInfo::getOrCreateResultFromMemIntrinsic(
Type *TargetTransformInfo::getMemcpyLoopLoweringType(
LLVMContext &Context, Value *Length, unsigned SrcAddrSpace,
- unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign,
+ unsigned DestAddrSpace, Align SrcAlign, Align DestAlign,
std::optional<uint32_t> AtomicElementSize) const {
return TTIImpl->getMemcpyLoopLoweringType(Context, Length, SrcAddrSpace,
DestAddrSpace, SrcAlign, DestAlign,
@@ -1208,7 +1208,7 @@ Type *TargetTransformInfo::getMemcpyLoopLoweringType(
void TargetTransformInfo::getMemcpyLoopResidualLoweringType(
SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
- unsigned SrcAlign, unsigned DestAlign,
+ Align SrcAlign, Align DestAlign,
std::optional<uint32_t> AtomicCpySize) const {
TTIImpl->getMemcpyLoopResidualLoweringType(
OpsOut, Context, RemainingBytes, SrcAddrSpace, DestAddrSpace, SrcAlign,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index 0b1ecc002ae25..2b18d501e60b5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -418,19 +418,19 @@ int64_t GCNTTIImpl::getMaxMemIntrinsicInlineSizeThreshold() const {
// FIXME: This could use fine tuning and microbenchmarks.
Type *GCNTTIImpl::getMemcpyLoopLoweringType(
LLVMContext &Context, Value *Length, unsigned SrcAddrSpace,
- unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign,
+ unsigned DestAddrSpace, Align SrcAlign, Align DestAlign,
std::optional<uint32_t> AtomicElementSize) const {
if (AtomicElementSize)
return Type::getIntNTy(Context, *AtomicElementSize * 8);
- unsigned MinAlign = std::min(SrcAlign, DestAlign);
+ Align MinAlign = std::min(SrcAlign, DestAlign);
// A (multi-)dword access at an address == 2 (mod 4) will be decomposed by the
// hardware into byte accesses. If you assume all alignments are equally
// probable, it's more efficient on average to use short accesses for this
// case.
- if (MinAlign == 2)
+ if (MinAlign == Align(2))
return Type::getInt16Ty(Context);
// Not all subtargets have 128-bit DS instructions, and we currently don't
@@ -450,7 +450,7 @@ Type *GCNTTIImpl::getMemcpyLoopLoweringType(
void GCNTTIImpl::getMemcpyLoopResidualLoweringType(
SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
- unsigned SrcAlign, unsigned DestAlign,
+ Align SrcAlign, Align DestAlign,
std::optional<uint32_t> AtomicCpySize) const {
assert(RemainingBytes < 16);
@@ -459,9 +459,9 @@ void GCNTTIImpl::getMemcpyLoopResidualLoweringType(
OpsOut, Context, RemainingBytes, SrcAddrSpace, DestAddrSpace, SrcAlign,
DestAlign, AtomicCpySize);
- unsigned MinAlign = std::min(SrcAlign, DestAlign);
+ Align MinAlign = std::min(SrcAlign, DestAlign);
- if (MinAlign != 2) {
+ if (MinAlign != Align(2)) {
Type *I64Ty = Type::getInt64Ty(Context);
while (RemainingBytes >= 8) {
OpsOut.push_back(I64Ty);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
index b423df17302ca..01df2e6caaba1 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
@@ -137,15 +137,16 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
unsigned AddrSpace) const;
int64_t getMaxMemIntrinsicInlineSizeThreshold() const;
- Type *getMemcpyLoopLoweringType(
- LLVMContext & Context, Value * Length, unsigned SrcAddrSpace,
- unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign,
- std::optional<uint32_t> AtomicElementSize) const;
+ Type *
+ getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
+ unsigned SrcAddrSpace, unsigned DestAddrSpace,
+ Align SrcAlign, Align DestAlign,
+ std::optional<uint32_t> AtomicElementSize) const;
void getMemcpyLoopResidualLoweringType(
SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
- unsigned SrcAlign, unsigned DestAlign,
+ Align SrcAlign, Align DestAlign,
std::optional<uint32_t> AtomicCpySize) const;
unsigned getMaxInterleaveFactor(ElementCount VF);
diff --git a/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp b/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp
index ee0d95bbce64f..f0c7e31b9c223 100644
--- a/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp
+++ b/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp
@@ -45,8 +45,7 @@ void llvm::createMemCpyLoopKnownSize(
Type *TypeOfCopyLen = CopyLen->getType();
Type *LoopOpType = TTI.getMemcpyLoopLoweringType(
- Ctx, CopyLen, SrcAS, DstAS, SrcAlign.value(), DstAlign.value(),
- AtomicElementSize);
+ Ctx, CopyLen, SrcAS, DstAS, SrcAlign, DstAlign, AtomicElementSize);
assert((!AtomicElementSize || !LoopOpType->isVectorTy()) &&
"Atomic memcpy lowering is not supported for vector operand type");
@@ -111,8 +110,8 @@ void llvm::createMemCpyLoopKnownSize(
SmallVector<Type *, 5> RemainingOps;
TTI.getMemcpyLoopResidualLoweringType(RemainingOps, Ctx, RemainingBytes,
- SrcAS, DstAS, SrcAlign.value(),
- DstAlign.value(), AtomicElementSize);
+ SrcAS, DstAS, SrcAlign, DstAlign,
+ AtomicElementSize);
for (auto *OpTy : RemainingOps) {
Align PartSrcAlign(commonAlignment(SrcAlign, BytesCopied));
@@ -197,8 +196,7 @@ void llvm::createMemCpyLoopUnknownSize(
unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace();
Type *LoopOpType = TTI.getMemcpyLoopLoweringType(
- Ctx, CopyLen, SrcAS, DstAS, SrcAlign.value(), DstAlign.value(),
- AtomicElementSize);
+ Ctx, CopyLen, SrcAS, DstAS, SrcAlign, DstAlign, AtomicElementSize);
assert((!AtomicElementSize || !LoopOpType->isVectorTy()) &&
"Atomic memcpy lowering is not supported for vector operand type");
unsigned LoopOpSize = DL.getTypeStoreSize(LoopOpType);
@@ -411,8 +409,8 @@ static void createMemMoveLoopUnknownSize(Instruction *InsertBefore,
unsigned SrcAS = cast<PointerType>(SrcAddr->getType())->getAddressSpace();
unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace();
- Type *LoopOpType = TTI.getMemcpyLoopLoweringType(
- Ctx, CopyLen, SrcAS, DstAS, SrcAlign.value(), DstAlign.value());
+ Type *LoopOpType = TTI.getMemcpyLoopLoweringType(Ctx, CopyLen, SrcAS, DstAS,
+ SrcAlign, DstAlign);
unsigned LoopOpSize = DL.getTypeStoreSize(LoopOpType);
Type *Int8Type = Type::getInt8Ty(Ctx);
bool LoopOpIsInt8 = LoopOpType == Int8Type;
@@ -668,8 +666,8 @@ static void createMemMoveLoopKnownSize(Instruction *InsertBefore,
unsigned SrcAS = cast<PointerType>(SrcAddr->getType())->getAddressSpace();
unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace();
- Type *LoopOpType = TTI.getMemcpyLoopLoweringType(
- Ctx, CopyLen, SrcAS, DstAS, SrcAlign.value(), DstAlign.value());
+ Type *LoopOpType = TTI.getMemcpyLoopLoweringType(Ctx, CopyLen, SrcAS, DstAS,
+ SrcAlign, DstAlign);
unsigned LoopOpSize = DL.getTypeStoreSize(LoopOpType);
// Calculate the loop trip count and remaining bytes to copy after the loop.
@@ -737,8 +735,8 @@ static void createMemMoveLoopKnownSize(Instruction *InsertBefore,
IRBuilder<> BwdResBuilder(CopyBackwardsBB->getFirstNonPHI());
SmallVector<Type *, 5> RemainingOps;
TTI.getMemcpyLoopResidualLoweringType(RemainingOps, Ctx, RemainingBytes,
- SrcAS, DstAS, PartSrcAlign.value(),
- PartDstAlign.value());
+ SrcAS, DstAS, PartSrcAlign,
+ PartDstAlign);
for (auto *OpTy : RemainingOps) {
// reverse the order of the emitted operations
BwdResBuilder.SetInsertPoint(CopyBackwardsBB->getFirstNonPHI());
@@ -818,8 +816,8 @@ static void createMemMoveLoopKnownSize(Instruction *InsertBefore,
IRBuilder<> FwdResBuilder(FwdResidualBB->getTerminator());
SmallVector<Type *, 5> RemainingOps;
TTI.getMemcpyLoopResidualLoweringType(RemainingOps, Ctx, RemainingBytes,
- SrcAS, DstAS, PartSrcAlign.value(),
- PartDstAlign.value());
+ SrcAS, DstAS, PartSrcAlign,
+ PartDstAlign);
for (auto *OpTy : RemainingOps)
GenerateResidualLdStPair(OpTy, FwdResBuilder, BytesCopied);
}
More information about the llvm-commits
mailing list