[llvm] Revert "Revert "[llvm][ARM]Add widen global arrays pass"" (PR #113288)

via llvm-commits llvm-commits at lists.llvm.org
Tue Oct 22 02:46:15 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-arm

Author: Nashe Mncube (nasherm)

<details>
<summary>Changes</summary>

Reverts llvm/llvm-project#<!-- -->112701

Also adds config file for tests so that they pass buildbot

---

Patch is 34.02 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/113288.diff


18 Files Affected:

- (modified) llvm/include/llvm/Analysis/TargetTransformInfo.h (+11) 
- (modified) llvm/include/llvm/Analysis/TargetTransformInfoImpl.h (+4) 
- (modified) llvm/lib/Analysis/TargetTransformInfo.cpp (+6) 
- (modified) llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp (+33) 
- (modified) llvm/lib/Target/ARM/ARMTargetTransformInfo.h (+3) 
- (modified) llvm/lib/Transforms/IPO/GlobalOpt.cpp (+165) 
- (added) llvm/test/Transforms/GlobalOpt/ARM/arm-widen-dest-non-array.ll (+39) 
- (added) llvm/test/Transforms/GlobalOpt/ARM/arm-widen-global-dest.ll (+28) 
- (added) llvm/test/Transforms/GlobalOpt/ARM/arm-widen-non-byte-array.ll (+22) 
- (added) llvm/test/Transforms/GlobalOpt/ARM/arm-widen-non-const-global.ll (+21) 
- (added) llvm/test/Transforms/GlobalOpt/ARM/arm-widen-string-multi-use.ll (+33) 
- (added) llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-1.ll (+21) 
- (added) llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-2.ll (+21) 
- (added) llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-lengths-dont-match.ll (+26) 
- (added) llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-more-than-64-bytes.ll (+28) 
- (added) llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-ptrtoint.ll (+54) 
- (added) llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-struct-test.ll (+45) 
- (added) llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-volatile.ll (+29) 


``````````diff
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 0459941fe05cdc..0dc513d8e65b76 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -1819,6 +1819,10 @@ class TargetTransformInfo {
   /// \return The maximum number of function arguments the target supports.
   unsigned getMaxNumArgs() const;
 
+  /// \return For an array of given Size, return alignment boundary to
+  /// pad to. Default is no padding.
+  unsigned getNumBytesToPadGlobalArray(unsigned Size, Type *ArrayType) const;
+
   /// @}
 
 private:
@@ -2225,6 +2229,8 @@ class TargetTransformInfo::Concept {
   getVPLegalizationStrategy(const VPIntrinsic &PI) const = 0;
   virtual bool hasArmWideBranch(bool Thumb) const = 0;
   virtual unsigned getMaxNumArgs() const = 0;
+  virtual unsigned getNumBytesToPadGlobalArray(unsigned Size,
+                                               Type *ArrayType) const = 0;
 };
 
 template <typename T>
@@ -3026,6 +3032,11 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
   unsigned getMaxNumArgs() const override {
     return Impl.getMaxNumArgs();
   }
+
+  unsigned getNumBytesToPadGlobalArray(unsigned Size,
+                                       Type *ArrayType) const override {
+    return Impl.getNumBytesToPadGlobalArray(Size, ArrayType);
+  }
 };
 
 template <typename T>
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index dbdfb4d8cdfa32..0b7792f89a05c4 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -1006,6 +1006,10 @@ class TargetTransformInfoImplBase {
 
   unsigned getMaxNumArgs() const { return UINT_MAX; }
 
+  unsigned getNumBytesToPadGlobalArray(unsigned Size, Type *ArrayType) const {
+    return 0;
+  }
+
 protected:
   // Obtain the minimum required size to hold the value (without the sign)
   // In case of a vector it returns the min required size for one element.
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index a47462b61e03b2..60704733637614 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -1383,6 +1383,12 @@ bool TargetTransformInfo::isVectorShiftByScalarCheap(Type *Ty) const {
   return TTIImpl->isVectorShiftByScalarCheap(Ty);
 }
 
+unsigned
+TargetTransformInfo::getNumBytesToPadGlobalArray(unsigned Size,
+                                                 Type *ArrayType) const {
+  return TTIImpl->getNumBytesToPadGlobalArray(Size, ArrayType);
+}
+
 TargetTransformInfo::Concept::~Concept() = default;
 
 TargetIRAnalysis::TargetIRAnalysis() : TTICallback(&getDefaultTTI) {}
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 835ae98efb852d..9f6e5e5ab1421c 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -56,6 +56,10 @@ static cl::opt<bool>
     AllowWLSLoops("allow-arm-wlsloops", cl::Hidden, cl::init(true),
                   cl::desc("Enable the generation of WLS loops"));
 
+static cl::opt<bool> UseWidenGlobalArrays(
+    "widen-global-strings", cl::Hidden, cl::init(true),
+    cl::desc("Enable the widening of global strings to alignment boundaries"));
+
 extern cl::opt<TailPredication::Mode> EnableTailPredication;
 
 extern cl::opt<bool> EnableMaskedGatherScatters;
@@ -2805,3 +2809,32 @@ bool ARMTTIImpl::isProfitableToSinkOperands(Instruction *I,
   }
   return true;
 }
+
+unsigned ARMTTIImpl::getNumBytesToPadGlobalArray(unsigned Size,
+                                                 Type *ArrayType) const {
+  if (!UseWidenGlobalArrays) {
+    LLVM_DEBUG(dbgs() << "Padding global arrays disabled\n");
+    return false;
+  }
+
+  // Don't modify none integer array types
+  if (!ArrayType || !ArrayType->isArrayTy() ||
+      !ArrayType->getArrayElementType()->isIntegerTy())
+    return 0;
+
+  // We pad to 4 byte boundaries
+  if (Size % 4 == 0)
+    return 0;
+
+  unsigned NumBytesToPad = 4 - (Size % 4);
+  unsigned NewSize = Size + NumBytesToPad;
+
+  // Max number of bytes that memcpy allows for lowering to load/stores before
+  // it uses library function (__aeabi_memcpy).
+  unsigned MaxMemIntrinsicSize = getMaxMemIntrinsicInlineSizeThreshold();
+
+  if (NewSize > MaxMemIntrinsicSize)
+    return 0;
+
+  return NumBytesToPad;
+}
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
index b0a75134ee02b7..3a4f940088b2e3 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -337,6 +337,9 @@ class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> {
 
   bool isProfitableToSinkOperands(Instruction *I,
                                   SmallVectorImpl<Use *> &Ops) const;
+
+  unsigned getNumBytesToPadGlobalArray(unsigned Size, Type *ArrayType) const;
+
   /// @}
 };
 
diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
index aae4926e027ff4..4647c65a5c850f 100644
--- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
@@ -92,6 +92,8 @@ STATISTIC(NumInternalFunc, "Number of internal functions");
 STATISTIC(NumColdCC, "Number of functions marked coldcc");
 STATISTIC(NumIFuncsResolved, "Number of statically resolved IFuncs");
 STATISTIC(NumIFuncsDeleted, "Number of IFuncs removed");
+STATISTIC(NumGlobalArraysPadded,
+          "Number of global arrays padded to alignment boundary");
 
 static cl::opt<bool>
     EnableColdCCStressTest("enable-coldcc-stress-test",
@@ -2029,6 +2031,165 @@ OptimizeFunctions(Module &M,
   return Changed;
 }
 
+static bool callInstIsMemcpy(CallInst *CI) {
+  if (!CI)
+    return false;
+
+  Function *F = CI->getCalledFunction();
+  if (!F || !F->isIntrinsic() || F->getIntrinsicID() != Intrinsic::memcpy)
+    return false;
+
+  return true;
+}
+
+static bool destArrayCanBeWidened(CallInst *CI) {
+  auto *IsVolatile = dyn_cast<ConstantInt>(CI->getArgOperand(3));
+  auto *Alloca = dyn_cast<AllocaInst>(CI->getArgOperand(0));
+
+  if (!Alloca || !IsVolatile || IsVolatile->isOne())
+    return false;
+
+  if (!Alloca->isStaticAlloca())
+    return false;
+
+  if (!Alloca->getAllocatedType()->isArrayTy())
+    return false;
+
+  return true;
+}
+
+static GlobalVariable *widenGlobalVariable(GlobalVariable *OldVar, Function *F,
+                                           unsigned NumBytesToPad,
+                                           unsigned NumBytesToCopy) {
+  if (!OldVar->hasInitializer())
+    return nullptr;
+
+  ConstantDataArray *DataArray =
+      dyn_cast<ConstantDataArray>(OldVar->getInitializer());
+  if (!DataArray)
+    return nullptr;
+
+  // Update to be word aligned (memcpy(...,X,...))
+  // create replacement with padded null bytes.
+  StringRef Data = DataArray->getRawDataValues();
+  std::vector<uint8_t> StrData(Data.begin(), Data.end());
+  for (unsigned int p = 0; p < NumBytesToPad; p++)
+    StrData.push_back('\0');
+  auto Arr = ArrayRef(StrData.data(), NumBytesToCopy + NumBytesToPad);
+  // Create new padded version of global variable.
+  Constant *SourceReplace = ConstantDataArray::get(F->getContext(), Arr);
+  GlobalVariable *NewGV = new GlobalVariable(
+      *(F->getParent()), SourceReplace->getType(), true, OldVar->getLinkage(),
+      SourceReplace, SourceReplace->getName());
+  // Copy any other attributes from original global variable
+  // e.g. unamed_addr
+  NewGV->copyAttributesFrom(OldVar);
+  NewGV->takeName(OldVar);
+  return NewGV;
+}
+
+static void widenDestArray(CallInst *CI, const unsigned NumBytesToPad,
+                           const unsigned NumBytesToCopy,
+                           ConstantDataArray *SourceDataArray) {
+
+  auto *Alloca = dyn_cast<AllocaInst>(CI->getArgOperand(0));
+  if (Alloca) {
+    unsigned ElementByteWidth = SourceDataArray->getElementByteSize();
+    unsigned int TotalBytes = NumBytesToCopy + NumBytesToPad;
+    unsigned NumElementsToCopy = divideCeil(TotalBytes, ElementByteWidth);
+    // Update destination array to be word aligned (memcpy(X,...,...))
+    IRBuilder<> BuildAlloca(Alloca);
+    AllocaInst *NewAlloca = BuildAlloca.CreateAlloca(ArrayType::get(
+        Alloca->getAllocatedType()->getArrayElementType(), NumElementsToCopy));
+    NewAlloca->takeName(Alloca);
+    NewAlloca->setAlignment(Alloca->getAlign());
+    Alloca->replaceAllUsesWith(NewAlloca);
+    Alloca->eraseFromParent();
+  }
+}
+
+static bool tryWidenGlobalArrayAndDests(Function *F, GlobalVariable *SourceVar,
+                                        const unsigned NumBytesToPad,
+                                        const unsigned NumBytesToCopy,
+                                        ConstantInt *BytesToCopyOp,
+                                        ConstantDataArray *SourceDataArray) {
+  auto *NewSourceGV =
+      widenGlobalVariable(SourceVar, F, NumBytesToPad, NumBytesToCopy);
+  if (!NewSourceGV)
+    return false;
+
+  // Update arguments of remaining uses  that
+  // are memcpys.
+  for (auto *User : SourceVar->users()) {
+    auto *CI = dyn_cast<CallInst>(User);
+    if (!callInstIsMemcpy(CI) || !destArrayCanBeWidened(CI))
+      continue;
+
+    if (CI->getArgOperand(1) != SourceVar)
+      continue;
+
+    widenDestArray(CI, NumBytesToPad, NumBytesToCopy, SourceDataArray);
+
+    CI->setArgOperand(2, ConstantInt::get(BytesToCopyOp->getType(),
+                                          NumBytesToCopy + NumBytesToPad));
+  }
+  SourceVar->replaceAllUsesWith(NewSourceGV);
+
+  NumGlobalArraysPadded++;
+  return true;
+}
+
+static bool tryWidenGlobalArraysUsedByMemcpy(
+    GlobalVariable *GV,
+    function_ref<TargetTransformInfo &(Function &)> GetTTI) {
+
+  if (!GV->hasInitializer() || !GV->isConstant() || !GV->hasLocalLinkage() ||
+      !GV->hasGlobalUnnamedAddr())
+    return false;
+
+  for (auto *User : GV->users()) {
+    CallInst *CI = dyn_cast<CallInst>(User);
+    if (!callInstIsMemcpy(CI) || !destArrayCanBeWidened(CI))
+      continue;
+
+    Function *F = CI->getCalledFunction();
+
+    auto *BytesToCopyOp = dyn_cast<ConstantInt>(CI->getArgOperand(2));
+    if (!BytesToCopyOp)
+      continue;
+
+    ConstantDataArray *SourceDataArray =
+        dyn_cast<ConstantDataArray>(GV->getInitializer());
+    if (!SourceDataArray)
+      continue;
+
+    unsigned NumBytesToCopy = BytesToCopyOp->getZExtValue();
+
+    auto *Alloca = dyn_cast<AllocaInst>(CI->getArgOperand(0));
+    uint64_t DZSize = Alloca->getAllocatedType()->getArrayNumElements();
+    uint64_t SZSize = SourceDataArray->getType()->getNumElements();
+    unsigned ElementByteWidth = SourceDataArray->getElementByteSize();
+    // Calculate the number of elements to copy while avoiding floored
+    // division of integers returning wrong values i.e. copying one byte
+    // from an array of i16 would yield 0 elements to copy as supposed to 1.
+    unsigned NumElementsToCopy = divideCeil(NumBytesToCopy, ElementByteWidth);
+
+    // For safety purposes lets add a constraint and only pad when
+    // NumElementsToCopy == destination array size ==
+    // source which is a constant
+    if (NumElementsToCopy != DZSize || DZSize != SZSize)
+      continue;
+
+    unsigned NumBytesToPad = GetTTI(*F).getNumBytesToPadGlobalArray(
+        NumBytesToCopy, SourceDataArray->getType());
+    if (NumBytesToPad) {
+      return tryWidenGlobalArrayAndDests(F, GV, NumBytesToPad, NumBytesToCopy,
+                                         BytesToCopyOp, SourceDataArray);
+    }
+  }
+  return false;
+}
+
 static bool
 OptimizeGlobalVars(Module &M,
                    function_ref<TargetTransformInfo &(Function &)> GetTTI,
@@ -2058,6 +2219,10 @@ OptimizeGlobalVars(Module &M,
       continue;
     }
 
+    // For global variable arrays called in a memcpy
+    // we try to pad to nearest valid alignment boundary
+    Changed |= tryWidenGlobalArraysUsedByMemcpy(&GV, GetTTI);
+
     Changed |= processGlobal(GV, GetTTI, GetTLI, LookupDomTree);
   }
   return Changed;
diff --git a/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-dest-non-array.ll b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-dest-non-array.ll
new file mode 100644
index 00000000000000..ab04e0a5bc697e
--- /dev/null
+++ b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-dest-non-array.ll
@@ -0,0 +1,39 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -mtriple=arm-none-eabi -passes=globalopt -S | FileCheck %s
+
+ at .i8 = private unnamed_addr constant [3 x i8] [i8 1, i8 2, i8 3] , align 1
+
+define  void @memcpy_struct()  {
+; CHECK-LABEL: define void @memcpy_struct() local_unnamed_addr {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[SOMETHING:%.*]] = alloca { i8, i8, i8 }, align 1
+; CHECK-NEXT:    [[CALL1:%.*]] = call i32 @bar(ptr nonnull [[SOMETHING]])
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) [[SOMETHING]], ptr noundef nonnull align 1 dereferenceable(3) @.i8, i32 3, i1 false)
+; CHECK-NEXT:    ret void
+;
+entry:
+  %something = alloca {i8, i8, i8}, align 1
+  %call1 = call i32 @bar(ptr nonnull %something)
+  call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) %something, ptr noundef nonnull align 1 dereferenceable(3) @.i8, i32 3, i1 false)
+  ret void
+}
+
+
+ at .i8_multi = private unnamed_addr constant [2 x [3 x i8]] [[3 x i8] [i8 1, i8 2, i8 3], [3 x i8] [i8 4, i8 5, i8 6]] , align 1
+
+define  void @memcpy_array_multidimensional()  {
+; CHECK-LABEL: define void @memcpy_array_multidimensional() local_unnamed_addr {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[SOMETHING:%.*]] = alloca [2 x [3 x i8]], align 1
+; CHECK-NEXT:    [[CALL1:%.*]] = call i32 @bar(ptr nonnull [[SOMETHING]])
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) [[SOMETHING]], ptr noundef nonnull align 1 dereferenceable(3) @.i8_multi, i32 3, i1 false)
+; CHECK-NEXT:    ret void
+;
+entry:
+  %something = alloca [2 x [3 x i8]], align 1
+  %call1 = call i32 @bar(ptr nonnull %something)
+  call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) %something, ptr noundef nonnull align 1 dereferenceable(3) @.i8_multi, i32 3, i1 false)
+  ret void
+}
+
+declare i32 @bar(...)
diff --git a/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-global-dest.ll b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-global-dest.ll
new file mode 100644
index 00000000000000..f435ffdeed2c8e
--- /dev/null
+++ b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-global-dest.ll
@@ -0,0 +1,28 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -mtriple=arm-none-eabi -passes=globalopt -S | FileCheck %s
+
+; CHECK: [3 x i8]
+ at other = private unnamed_addr global [3 x i8] [i8 1, i8 2, i8 3] , align 1
+; CHECK: [4 x i8]
+ at .i8 = private unnamed_addr constant [3 x i8] [i8 1, i8 2, i8 3] , align 1
+
+define  void @memcpy_multiple()  {
+; CHECK-LABEL: define void @memcpy_multiple() local_unnamed_addr {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[SOMETHING:%.*]] = alloca [4 x i8], align 1
+; CHECK-NEXT:    [[CALL2:%.*]] = call i32 @bar(ptr nonnull [[SOMETHING]])
+; CHECK-NEXT:    [[CALL3:%.*]] = call i32 @bar(ptr nonnull @other)
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) @other, ptr noundef nonnull align 1 dereferenceable(3) @.i8, i32 3, i1 false)
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) [[SOMETHING]], ptr noundef nonnull align 1 dereferenceable(3) @.i8, i32 4, i1 false)
+; CHECK-NEXT:    ret void
+;
+entry:
+  %something = alloca [3 x i8], align 1
+  %call1 = call i32 @bar(ptr nonnull %something)
+  %call2 = call i32 @bar(ptr nonnull @other)
+  call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) @other, ptr noundef nonnull align 1 dereferenceable(3) @.i8, i32 3, i1 false)
+  call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) %something, ptr noundef nonnull align 1 dereferenceable(3) @.i8, i32 3, i1 false)
+  ret void
+}
+
+declare i32 @bar(...)
diff --git a/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-non-byte-array.ll b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-non-byte-array.ll
new file mode 100644
index 00000000000000..c7ca7271fd3d27
--- /dev/null
+++ b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-non-byte-array.ll
@@ -0,0 +1,22 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -mtriple=arm-none-eabi -passes=globalopt -S | FileCheck %s
+
+ at .i16 = private unnamed_addr constant [5 x i16] [i16 1, i16 2, i16 3, i16 4, i16 5] , align 1
+
+define  void @memcpy_i16_array()  {
+; CHECK-LABEL: define void @memcpy_i16_array() local_unnamed_addr {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[SOMETHING1:%.*]] = alloca [6 x i16], align 1
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(10) [[SOMETHING1]], ptr noundef nonnull align 1 dereferenceable(10) @.i16, i32 12, i1 false)
+; CHECK-NEXT:    [[CALL2:%.*]] = call i32 @bar(ptr nonnull [[SOMETHING1]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %something = alloca [5 x i16], align 1
+  call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(10) %something, ptr noundef nonnull align 1 dereferenceable(10) @.i16, i32 10, i1 false)
+  %call2 = call i32 @bar(ptr nonnull %something)
+  ret void
+}
+
+
+declare i32 @bar(...)
diff --git a/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-non-const-global.ll b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-non-const-global.ll
new file mode 100644
index 00000000000000..3d9c42fe1f3dd8
--- /dev/null
+++ b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-non-const-global.ll
@@ -0,0 +1,21 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -mtriple=arm-none-eabi -passes=globalopt -S | FileCheck %s
+
+ at .str = unnamed_addr global [3 x i8] c"12\00", align 1
+
+define  void @foo()  {
+; CHECK-LABEL: define void @foo() local_unnamed_addr {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[SOMETHING:%.*]] = alloca [3 x i8], align 1
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) [[SOMETHING]], ptr noundef nonnull align 1 dereferenceable(3) @.str, i32 3, i1 false)
+; CHECK-NEXT:    [[CALL1:%.*]] = call i32 @bar(ptr nonnull [[SOMETHING]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %something = alloca [3 x i8], align 1
+  call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) %something, ptr noundef nonnull align 1 dereferenceable(3) @.str, i32 3, i1 false)
+  %call1 = call i32 @bar(ptr nonnull %something)
+  ret void
+}
+
+declare i32 @bar(...)
diff --git a/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-string-multi-use.ll b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-string-multi-use.ll
new file mode 100644
index 00000000000000..e37925a78d2c3a
--- /dev/null
+++ b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-string-multi-use.ll
@@ -0,0 +1,33 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -mtriple=arm-none-eabi -passes=globalopt -S | FileCheck %s
+
+ at .i8 = private unnamed_addr constant [3 x i8] [i8 1, i8 2, i8 3] , align 1
+
+define  void @memcpy_multiple()  {
+; CHECK-LABEL: define void @memcpy_multiple() local_unnamed_addr {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[SOMETHING2:%.*]] = alloca [4 x i8], align 1
+; CHECK-NEXT:    [[SOMETHING1:%.*]] = alloca [4 x i8], align 1
+; CHECK-NEXT:    [[SOMETHING3:%.*]] = alloca [4 x i8], align 1
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) [[SOMETHING2]], ptr noundef nonnull align 1 dereferenceable(3) @.i8, i32 4, i1 false)
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) [[SOMETHING1]], ptr noundef nonnull align 1 dereferenceable(3) @.i8, i32 4, i1 false)
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferencea...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/113288


More information about the llvm-commits mailing list