[llvm] Revert "Revert "[llvm][ARM]Add widen global arrays pass"" (PR #113288)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 22 02:46:15 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-arm
Author: Nashe Mncube (nasherm)
<details>
<summary>Changes</summary>
Reverts llvm/llvm-project#<!-- -->112701
Also adds config file for tests so that they pass buildbot
---
Patch is 34.02 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/113288.diff
18 Files Affected:
- (modified) llvm/include/llvm/Analysis/TargetTransformInfo.h (+11)
- (modified) llvm/include/llvm/Analysis/TargetTransformInfoImpl.h (+4)
- (modified) llvm/lib/Analysis/TargetTransformInfo.cpp (+6)
- (modified) llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp (+33)
- (modified) llvm/lib/Target/ARM/ARMTargetTransformInfo.h (+3)
- (modified) llvm/lib/Transforms/IPO/GlobalOpt.cpp (+165)
- (added) llvm/test/Transforms/GlobalOpt/ARM/arm-widen-dest-non-array.ll (+39)
- (added) llvm/test/Transforms/GlobalOpt/ARM/arm-widen-global-dest.ll (+28)
- (added) llvm/test/Transforms/GlobalOpt/ARM/arm-widen-non-byte-array.ll (+22)
- (added) llvm/test/Transforms/GlobalOpt/ARM/arm-widen-non-const-global.ll (+21)
- (added) llvm/test/Transforms/GlobalOpt/ARM/arm-widen-string-multi-use.ll (+33)
- (added) llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-1.ll (+21)
- (added) llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-2.ll (+21)
- (added) llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-lengths-dont-match.ll (+26)
- (added) llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-more-than-64-bytes.ll (+28)
- (added) llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-ptrtoint.ll (+54)
- (added) llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-struct-test.ll (+45)
- (added) llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-volatile.ll (+29)
``````````diff
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 0459941fe05cdc..0dc513d8e65b76 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -1819,6 +1819,10 @@ class TargetTransformInfo {
/// \return The maximum number of function arguments the target supports.
unsigned getMaxNumArgs() const;
+ /// \return For an array of given Size, return alignment boundary to
+ /// pad to. Default is no padding.
+ unsigned getNumBytesToPadGlobalArray(unsigned Size, Type *ArrayType) const;
+
/// @}
private:
@@ -2225,6 +2229,8 @@ class TargetTransformInfo::Concept {
getVPLegalizationStrategy(const VPIntrinsic &PI) const = 0;
virtual bool hasArmWideBranch(bool Thumb) const = 0;
virtual unsigned getMaxNumArgs() const = 0;
+ virtual unsigned getNumBytesToPadGlobalArray(unsigned Size,
+ Type *ArrayType) const = 0;
};
template <typename T>
@@ -3026,6 +3032,11 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
unsigned getMaxNumArgs() const override {
return Impl.getMaxNumArgs();
}
+
+ unsigned getNumBytesToPadGlobalArray(unsigned Size,
+ Type *ArrayType) const override {
+ return Impl.getNumBytesToPadGlobalArray(Size, ArrayType);
+ }
};
template <typename T>
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index dbdfb4d8cdfa32..0b7792f89a05c4 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -1006,6 +1006,10 @@ class TargetTransformInfoImplBase {
unsigned getMaxNumArgs() const { return UINT_MAX; }
+ unsigned getNumBytesToPadGlobalArray(unsigned Size, Type *ArrayType) const {
+ return 0;
+ }
+
protected:
// Obtain the minimum required size to hold the value (without the sign)
// In case of a vector it returns the min required size for one element.
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index a47462b61e03b2..60704733637614 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -1383,6 +1383,12 @@ bool TargetTransformInfo::isVectorShiftByScalarCheap(Type *Ty) const {
return TTIImpl->isVectorShiftByScalarCheap(Ty);
}
+unsigned
+TargetTransformInfo::getNumBytesToPadGlobalArray(unsigned Size,
+ Type *ArrayType) const {
+ return TTIImpl->getNumBytesToPadGlobalArray(Size, ArrayType);
+}
+
TargetTransformInfo::Concept::~Concept() = default;
TargetIRAnalysis::TargetIRAnalysis() : TTICallback(&getDefaultTTI) {}
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 835ae98efb852d..9f6e5e5ab1421c 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -56,6 +56,10 @@ static cl::opt<bool>
AllowWLSLoops("allow-arm-wlsloops", cl::Hidden, cl::init(true),
cl::desc("Enable the generation of WLS loops"));
+static cl::opt<bool> UseWidenGlobalArrays(
+ "widen-global-strings", cl::Hidden, cl::init(true),
+ cl::desc("Enable the widening of global strings to alignment boundaries"));
+
extern cl::opt<TailPredication::Mode> EnableTailPredication;
extern cl::opt<bool> EnableMaskedGatherScatters;
@@ -2805,3 +2809,32 @@ bool ARMTTIImpl::isProfitableToSinkOperands(Instruction *I,
}
return true;
}
+
+unsigned ARMTTIImpl::getNumBytesToPadGlobalArray(unsigned Size,
+ Type *ArrayType) const {
+ if (!UseWidenGlobalArrays) {
+ LLVM_DEBUG(dbgs() << "Padding global arrays disabled\n");
+ return false;
+ }
+
+ // Don't modify none integer array types
+ if (!ArrayType || !ArrayType->isArrayTy() ||
+ !ArrayType->getArrayElementType()->isIntegerTy())
+ return 0;
+
+ // We pad to 4 byte boundaries
+ if (Size % 4 == 0)
+ return 0;
+
+ unsigned NumBytesToPad = 4 - (Size % 4);
+ unsigned NewSize = Size + NumBytesToPad;
+
+ // Max number of bytes that memcpy allows for lowering to load/stores before
+ // it uses library function (__aeabi_memcpy).
+ unsigned MaxMemIntrinsicSize = getMaxMemIntrinsicInlineSizeThreshold();
+
+ if (NewSize > MaxMemIntrinsicSize)
+ return 0;
+
+ return NumBytesToPad;
+}
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
index b0a75134ee02b7..3a4f940088b2e3 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -337,6 +337,9 @@ class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> {
bool isProfitableToSinkOperands(Instruction *I,
SmallVectorImpl<Use *> &Ops) const;
+
+ unsigned getNumBytesToPadGlobalArray(unsigned Size, Type *ArrayType) const;
+
/// @}
};
diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
index aae4926e027ff4..4647c65a5c850f 100644
--- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
@@ -92,6 +92,8 @@ STATISTIC(NumInternalFunc, "Number of internal functions");
STATISTIC(NumColdCC, "Number of functions marked coldcc");
STATISTIC(NumIFuncsResolved, "Number of statically resolved IFuncs");
STATISTIC(NumIFuncsDeleted, "Number of IFuncs removed");
+STATISTIC(NumGlobalArraysPadded,
+ "Number of global arrays padded to alignment boundary");
static cl::opt<bool>
EnableColdCCStressTest("enable-coldcc-stress-test",
@@ -2029,6 +2031,165 @@ OptimizeFunctions(Module &M,
return Changed;
}
+static bool callInstIsMemcpy(CallInst *CI) {
+ if (!CI)
+ return false;
+
+ Function *F = CI->getCalledFunction();
+ if (!F || !F->isIntrinsic() || F->getIntrinsicID() != Intrinsic::memcpy)
+ return false;
+
+ return true;
+}
+
+static bool destArrayCanBeWidened(CallInst *CI) {
+ auto *IsVolatile = dyn_cast<ConstantInt>(CI->getArgOperand(3));
+ auto *Alloca = dyn_cast<AllocaInst>(CI->getArgOperand(0));
+
+ if (!Alloca || !IsVolatile || IsVolatile->isOne())
+ return false;
+
+ if (!Alloca->isStaticAlloca())
+ return false;
+
+ if (!Alloca->getAllocatedType()->isArrayTy())
+ return false;
+
+ return true;
+}
+
+static GlobalVariable *widenGlobalVariable(GlobalVariable *OldVar, Function *F,
+ unsigned NumBytesToPad,
+ unsigned NumBytesToCopy) {
+ if (!OldVar->hasInitializer())
+ return nullptr;
+
+ ConstantDataArray *DataArray =
+ dyn_cast<ConstantDataArray>(OldVar->getInitializer());
+ if (!DataArray)
+ return nullptr;
+
+ // Update to be word aligned (memcpy(...,X,...))
+ // create replacement with padded null bytes.
+ StringRef Data = DataArray->getRawDataValues();
+ std::vector<uint8_t> StrData(Data.begin(), Data.end());
+ for (unsigned int p = 0; p < NumBytesToPad; p++)
+ StrData.push_back('\0');
+ auto Arr = ArrayRef(StrData.data(), NumBytesToCopy + NumBytesToPad);
+ // Create new padded version of global variable.
+ Constant *SourceReplace = ConstantDataArray::get(F->getContext(), Arr);
+ GlobalVariable *NewGV = new GlobalVariable(
+ *(F->getParent()), SourceReplace->getType(), true, OldVar->getLinkage(),
+ SourceReplace, SourceReplace->getName());
+ // Copy any other attributes from original global variable
+ // e.g. unamed_addr
+ NewGV->copyAttributesFrom(OldVar);
+ NewGV->takeName(OldVar);
+ return NewGV;
+}
+
+static void widenDestArray(CallInst *CI, const unsigned NumBytesToPad,
+ const unsigned NumBytesToCopy,
+ ConstantDataArray *SourceDataArray) {
+
+ auto *Alloca = dyn_cast<AllocaInst>(CI->getArgOperand(0));
+ if (Alloca) {
+ unsigned ElementByteWidth = SourceDataArray->getElementByteSize();
+ unsigned int TotalBytes = NumBytesToCopy + NumBytesToPad;
+ unsigned NumElementsToCopy = divideCeil(TotalBytes, ElementByteWidth);
+ // Update destination array to be word aligned (memcpy(X,...,...))
+ IRBuilder<> BuildAlloca(Alloca);
+ AllocaInst *NewAlloca = BuildAlloca.CreateAlloca(ArrayType::get(
+ Alloca->getAllocatedType()->getArrayElementType(), NumElementsToCopy));
+ NewAlloca->takeName(Alloca);
+ NewAlloca->setAlignment(Alloca->getAlign());
+ Alloca->replaceAllUsesWith(NewAlloca);
+ Alloca->eraseFromParent();
+ }
+}
+
+static bool tryWidenGlobalArrayAndDests(Function *F, GlobalVariable *SourceVar,
+ const unsigned NumBytesToPad,
+ const unsigned NumBytesToCopy,
+ ConstantInt *BytesToCopyOp,
+ ConstantDataArray *SourceDataArray) {
+ auto *NewSourceGV =
+ widenGlobalVariable(SourceVar, F, NumBytesToPad, NumBytesToCopy);
+ if (!NewSourceGV)
+ return false;
+
+ // Update arguments of remaining uses that
+ // are memcpys.
+ for (auto *User : SourceVar->users()) {
+ auto *CI = dyn_cast<CallInst>(User);
+ if (!callInstIsMemcpy(CI) || !destArrayCanBeWidened(CI))
+ continue;
+
+ if (CI->getArgOperand(1) != SourceVar)
+ continue;
+
+ widenDestArray(CI, NumBytesToPad, NumBytesToCopy, SourceDataArray);
+
+ CI->setArgOperand(2, ConstantInt::get(BytesToCopyOp->getType(),
+ NumBytesToCopy + NumBytesToPad));
+ }
+ SourceVar->replaceAllUsesWith(NewSourceGV);
+
+ NumGlobalArraysPadded++;
+ return true;
+}
+
+static bool tryWidenGlobalArraysUsedByMemcpy(
+ GlobalVariable *GV,
+ function_ref<TargetTransformInfo &(Function &)> GetTTI) {
+
+ if (!GV->hasInitializer() || !GV->isConstant() || !GV->hasLocalLinkage() ||
+ !GV->hasGlobalUnnamedAddr())
+ return false;
+
+ for (auto *User : GV->users()) {
+ CallInst *CI = dyn_cast<CallInst>(User);
+ if (!callInstIsMemcpy(CI) || !destArrayCanBeWidened(CI))
+ continue;
+
+ Function *F = CI->getCalledFunction();
+
+ auto *BytesToCopyOp = dyn_cast<ConstantInt>(CI->getArgOperand(2));
+ if (!BytesToCopyOp)
+ continue;
+
+ ConstantDataArray *SourceDataArray =
+ dyn_cast<ConstantDataArray>(GV->getInitializer());
+ if (!SourceDataArray)
+ continue;
+
+ unsigned NumBytesToCopy = BytesToCopyOp->getZExtValue();
+
+ auto *Alloca = dyn_cast<AllocaInst>(CI->getArgOperand(0));
+ uint64_t DZSize = Alloca->getAllocatedType()->getArrayNumElements();
+ uint64_t SZSize = SourceDataArray->getType()->getNumElements();
+ unsigned ElementByteWidth = SourceDataArray->getElementByteSize();
+ // Calculate the number of elements to copy while avoiding floored
+ // division of integers returning wrong values i.e. copying one byte
+ // from an array of i16 would yield 0 elements to copy as supposed to 1.
+ unsigned NumElementsToCopy = divideCeil(NumBytesToCopy, ElementByteWidth);
+
+ // For safety purposes lets add a constraint and only pad when
+ // NumElementsToCopy == destination array size ==
+ // source which is a constant
+ if (NumElementsToCopy != DZSize || DZSize != SZSize)
+ continue;
+
+ unsigned NumBytesToPad = GetTTI(*F).getNumBytesToPadGlobalArray(
+ NumBytesToCopy, SourceDataArray->getType());
+ if (NumBytesToPad) {
+ return tryWidenGlobalArrayAndDests(F, GV, NumBytesToPad, NumBytesToCopy,
+ BytesToCopyOp, SourceDataArray);
+ }
+ }
+ return false;
+}
+
static bool
OptimizeGlobalVars(Module &M,
function_ref<TargetTransformInfo &(Function &)> GetTTI,
@@ -2058,6 +2219,10 @@ OptimizeGlobalVars(Module &M,
continue;
}
+ // For global variable arrays called in a memcpy
+ // we try to pad to nearest valid alignment boundary
+ Changed |= tryWidenGlobalArraysUsedByMemcpy(&GV, GetTTI);
+
Changed |= processGlobal(GV, GetTTI, GetTLI, LookupDomTree);
}
return Changed;
diff --git a/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-dest-non-array.ll b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-dest-non-array.ll
new file mode 100644
index 00000000000000..ab04e0a5bc697e
--- /dev/null
+++ b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-dest-non-array.ll
@@ -0,0 +1,39 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -mtriple=arm-none-eabi -passes=globalopt -S | FileCheck %s
+
+ at .i8 = private unnamed_addr constant [3 x i8] [i8 1, i8 2, i8 3] , align 1
+
+define void @memcpy_struct() {
+; CHECK-LABEL: define void @memcpy_struct() local_unnamed_addr {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[SOMETHING:%.*]] = alloca { i8, i8, i8 }, align 1
+; CHECK-NEXT: [[CALL1:%.*]] = call i32 @bar(ptr nonnull [[SOMETHING]])
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) [[SOMETHING]], ptr noundef nonnull align 1 dereferenceable(3) @.i8, i32 3, i1 false)
+; CHECK-NEXT: ret void
+;
+entry:
+ %something = alloca {i8, i8, i8}, align 1
+ %call1 = call i32 @bar(ptr nonnull %something)
+ call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) %something, ptr noundef nonnull align 1 dereferenceable(3) @.i8, i32 3, i1 false)
+ ret void
+}
+
+
+ at .i8_multi = private unnamed_addr constant [2 x [3 x i8]] [[3 x i8] [i8 1, i8 2, i8 3], [3 x i8] [i8 4, i8 5, i8 6]] , align 1
+
+define void @memcpy_array_multidimensional() {
+; CHECK-LABEL: define void @memcpy_array_multidimensional() local_unnamed_addr {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[SOMETHING:%.*]] = alloca [2 x [3 x i8]], align 1
+; CHECK-NEXT: [[CALL1:%.*]] = call i32 @bar(ptr nonnull [[SOMETHING]])
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) [[SOMETHING]], ptr noundef nonnull align 1 dereferenceable(3) @.i8_multi, i32 3, i1 false)
+; CHECK-NEXT: ret void
+;
+entry:
+ %something = alloca [2 x [3 x i8]], align 1
+ %call1 = call i32 @bar(ptr nonnull %something)
+ call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) %something, ptr noundef nonnull align 1 dereferenceable(3) @.i8_multi, i32 3, i1 false)
+ ret void
+}
+
+declare i32 @bar(...)
diff --git a/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-global-dest.ll b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-global-dest.ll
new file mode 100644
index 00000000000000..f435ffdeed2c8e
--- /dev/null
+++ b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-global-dest.ll
@@ -0,0 +1,28 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -mtriple=arm-none-eabi -passes=globalopt -S | FileCheck %s
+
+; CHECK: [3 x i8]
+ at other = private unnamed_addr global [3 x i8] [i8 1, i8 2, i8 3] , align 1
+; CHECK: [4 x i8]
+ at .i8 = private unnamed_addr constant [3 x i8] [i8 1, i8 2, i8 3] , align 1
+
+define void @memcpy_multiple() {
+; CHECK-LABEL: define void @memcpy_multiple() local_unnamed_addr {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[SOMETHING:%.*]] = alloca [4 x i8], align 1
+; CHECK-NEXT: [[CALL2:%.*]] = call i32 @bar(ptr nonnull [[SOMETHING]])
+; CHECK-NEXT: [[CALL3:%.*]] = call i32 @bar(ptr nonnull @other)
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) @other, ptr noundef nonnull align 1 dereferenceable(3) @.i8, i32 3, i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) [[SOMETHING]], ptr noundef nonnull align 1 dereferenceable(3) @.i8, i32 4, i1 false)
+; CHECK-NEXT: ret void
+;
+entry:
+ %something = alloca [3 x i8], align 1
+ %call1 = call i32 @bar(ptr nonnull %something)
+ %call2 = call i32 @bar(ptr nonnull @other)
+ call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) @other, ptr noundef nonnull align 1 dereferenceable(3) @.i8, i32 3, i1 false)
+ call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) %something, ptr noundef nonnull align 1 dereferenceable(3) @.i8, i32 3, i1 false)
+ ret void
+}
+
+declare i32 @bar(...)
diff --git a/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-non-byte-array.ll b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-non-byte-array.ll
new file mode 100644
index 00000000000000..c7ca7271fd3d27
--- /dev/null
+++ b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-non-byte-array.ll
@@ -0,0 +1,22 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -mtriple=arm-none-eabi -passes=globalopt -S | FileCheck %s
+
+ at .i16 = private unnamed_addr constant [5 x i16] [i16 1, i16 2, i16 3, i16 4, i16 5] , align 1
+
+define void @memcpy_i16_array() {
+; CHECK-LABEL: define void @memcpy_i16_array() local_unnamed_addr {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[SOMETHING1:%.*]] = alloca [6 x i16], align 1
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(10) [[SOMETHING1]], ptr noundef nonnull align 1 dereferenceable(10) @.i16, i32 12, i1 false)
+; CHECK-NEXT: [[CALL2:%.*]] = call i32 @bar(ptr nonnull [[SOMETHING1]])
+; CHECK-NEXT: ret void
+;
+entry:
+ %something = alloca [5 x i16], align 1
+ call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(10) %something, ptr noundef nonnull align 1 dereferenceable(10) @.i16, i32 10, i1 false)
+ %call2 = call i32 @bar(ptr nonnull %something)
+ ret void
+}
+
+
+declare i32 @bar(...)
diff --git a/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-non-const-global.ll b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-non-const-global.ll
new file mode 100644
index 00000000000000..3d9c42fe1f3dd8
--- /dev/null
+++ b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-non-const-global.ll
@@ -0,0 +1,21 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -mtriple=arm-none-eabi -passes=globalopt -S | FileCheck %s
+
+ at .str = unnamed_addr global [3 x i8] c"12\00", align 1
+
+define void @foo() {
+; CHECK-LABEL: define void @foo() local_unnamed_addr {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[SOMETHING:%.*]] = alloca [3 x i8], align 1
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) [[SOMETHING]], ptr noundef nonnull align 1 dereferenceable(3) @.str, i32 3, i1 false)
+; CHECK-NEXT: [[CALL1:%.*]] = call i32 @bar(ptr nonnull [[SOMETHING]])
+; CHECK-NEXT: ret void
+;
+entry:
+ %something = alloca [3 x i8], align 1
+ call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) %something, ptr noundef nonnull align 1 dereferenceable(3) @.str, i32 3, i1 false)
+ %call1 = call i32 @bar(ptr nonnull %something)
+ ret void
+}
+
+declare i32 @bar(...)
diff --git a/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-string-multi-use.ll b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-string-multi-use.ll
new file mode 100644
index 00000000000000..e37925a78d2c3a
--- /dev/null
+++ b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-string-multi-use.ll
@@ -0,0 +1,33 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -mtriple=arm-none-eabi -passes=globalopt -S | FileCheck %s
+
+ at .i8 = private unnamed_addr constant [3 x i8] [i8 1, i8 2, i8 3] , align 1
+
+define void @memcpy_multiple() {
+; CHECK-LABEL: define void @memcpy_multiple() local_unnamed_addr {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[SOMETHING2:%.*]] = alloca [4 x i8], align 1
+; CHECK-NEXT: [[SOMETHING1:%.*]] = alloca [4 x i8], align 1
+; CHECK-NEXT: [[SOMETHING3:%.*]] = alloca [4 x i8], align 1
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) [[SOMETHING2]], ptr noundef nonnull align 1 dereferenceable(3) @.i8, i32 4, i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) [[SOMETHING1]], ptr noundef nonnull align 1 dereferenceable(3) @.i8, i32 4, i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferencea...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/113288
More information about the llvm-commits
mailing list