[llvm] [llvm][ARM]Add ARM widen strings pass (PR #107120)

Nashe Mncube via llvm-commits llvm-commits at lists.llvm.org
Fri Sep 13 04:27:20 PDT 2024


https://github.com/nasherm updated https://github.com/llvm/llvm-project/pull/107120

>From cc8bf21cbdda1b8ae6338602dc06b8ab139bb168 Mon Sep 17 00:00:00 2001
From: nasmnc01 <nashe.mncube at arm.com>
Date: Tue, 3 Sep 2024 16:09:43 +0100
Subject: [PATCH 1/3] [llvm][ARM]Add ARM widen strings pass

- Pass optimizes memcpy's by padding out destinations and sources to a
  full word to make ARM backend generate full word loads instead of
  loading a single byte (ldrb) and/or half word (ldrh). Only pads
  destination when it's a stack allocated constant size array and source
  when it's constant string. Heuristic to decide whether to pad or not
  is very basic and could be improved to allow more examples to be
  padded.
- Pass works at the midend level

Change-Id: I1c6371f0962e7ad3c166602b800d041ac1cc7b04
---
 .../llvm/Transforms/Scalar/ARMWidenStrings.h  |  30 +++
 llvm/lib/Passes/PassBuilder.cpp               |   1 +
 llvm/lib/Passes/PassRegistry.def              |   1 +
 .../lib/Transforms/Scalar/ARMWidenStrings.cpp | 227 ++++++++++++++++++
 llvm/lib/Transforms/Scalar/CMakeLists.txt     |   1 +
 .../ARMWidenStrings/arm-widen-strings-1.ll    |  25 ++
 .../ARMWidenStrings/arm-widen-strings-2.ll    |  22 ++
 .../arm-widen-strings-lengths-dont-match.ll   |  28 +++
 .../arm-widen-strings-more-than-64-bytes.ll   |  29 +++
 .../arm-widen-strings-ptrtoint.ll             |  42 ++++
 .../arm-widen-strings-struct-test.ll          |  52 ++++
 .../arm-widen-strings-volatile.ll             |  29 +++
 12 files changed, 487 insertions(+)
 create mode 100755 llvm/include/llvm/Transforms/Scalar/ARMWidenStrings.h
 create mode 100644 llvm/lib/Transforms/Scalar/ARMWidenStrings.cpp
 create mode 100644 llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-1.ll
 create mode 100644 llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-2.ll
 create mode 100644 llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-lengths-dont-match.ll
 create mode 100644 llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-more-than-64-bytes.ll
 create mode 100644 llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-ptrtoint.ll
 create mode 100644 llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-struct-test.ll
 create mode 100644 llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-volatile.ll

diff --git a/llvm/include/llvm/Transforms/Scalar/ARMWidenStrings.h b/llvm/include/llvm/Transforms/Scalar/ARMWidenStrings.h
new file mode 100755
index 00000000000000..3bda666660144a
--- /dev/null
+++ b/llvm/include/llvm/Transforms/Scalar/ARMWidenStrings.h
@@ -0,0 +1,30 @@
+//===- ARMWidenStrings.h --------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the interface for the ArmWidenStrings pass
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_SCALAR_ARMWIDENSTRINGS_H
+#define LLVM_TRANSFORMS_SCALAR_ARMWIDENSTRINGS_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+class Module;
+
+class ARMWidenStringsPass : public PassInfoMixin<ARMWidenStringsPass> {
+public:
+  ARMWidenStringsPass() = default;
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &);
+};
+
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_SCALAR_ARMWIDENSTRINGS_H
\ No newline at end of file
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 1df1449fce597c..6b989231cb9861 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -207,6 +207,7 @@
 #include "llvm/Transforms/Instrumentation/ThreadSanitizer.h"
 #include "llvm/Transforms/ObjCARC.h"
 #include "llvm/Transforms/Scalar/ADCE.h"
+#include "llvm/Transforms/Scalar/ARMWidenStrings.h"
 #include "llvm/Transforms/Scalar/AlignmentFromAssumptions.h"
 #include "llvm/Transforms/Scalar/AnnotationRemarks.h"
 #include "llvm/Transforms/Scalar/BDCE.h"
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index d6067089c6b5c1..55566f43e5435d 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -489,6 +489,7 @@ FUNCTION_PASS("view-dom-only", DomOnlyViewer())
 FUNCTION_PASS("view-post-dom", PostDomViewer())
 FUNCTION_PASS("view-post-dom-only", PostDomOnlyViewer())
 FUNCTION_PASS("wasm-eh-prepare", WasmEHPreparePass())
+FUNCTION_PASS("arm-widen-strings", ARMWidenStringsPass())
 #undef FUNCTION_PASS
 
 #ifndef FUNCTION_PASS_WITH_PARAMS
diff --git a/llvm/lib/Transforms/Scalar/ARMWidenStrings.cpp b/llvm/lib/Transforms/Scalar/ARMWidenStrings.cpp
new file mode 100644
index 00000000000000..dd06c2a7ea10d1
--- /dev/null
+++ b/llvm/lib/Transforms/Scalar/ARMWidenStrings.cpp
@@ -0,0 +1,227 @@
+// ARMWidenStrings.cpp - Widen strings to word boundaries to speed up
+// programs that use simple strcpy's with constant strings as source
+// and stack allocated array for destination.
+
+#define DEBUG_TYPE "arm-widen-strings"
+
+#include "llvm/Transforms/Scalar/ARMWidenStrings.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/IR/ValueSymbolTable.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/TargetParser/Triple.h"
+#include "llvm/Transforms/Scalar.h"
+
+using namespace llvm;
+
+cl::opt<bool> DisableARMWidenStrings("disable-arm-widen-strings",
+                                     cl::init(false));
+
+namespace {
+
+class ARMWidenStrings {
+public:
+  /*
+  Max number of bytes that memcpy allows for lowering to load/stores before it
+  uses library function (__aeabi_memcpy).  This is the same value returned by
+  ARMSubtarget::getMaxInlineSizeThreshold which I would have called in place of
+  the constant int but can't get access to the subtarget info class from the
+  midend.
+  */
+  const unsigned int MemcpyInliningLimit = 64;
+
+  bool run(Function &F);
+};
+
+static bool IsCharArray(Type *t) {
+  const unsigned int CHAR_BIT_SIZE = 8;
+  return t && t->isArrayTy() && t->getArrayElementType()->isIntegerTy() &&
+         t->getArrayElementType()->getIntegerBitWidth() == CHAR_BIT_SIZE;
+}
+
+bool ARMWidenStrings::run(Function &F) {
+  if (DisableARMWidenStrings) {
+    return false;
+  }
+
+  LLVM_DEBUG(dbgs() << "Running ARMWidenStrings on module " << F.getName()
+                    << "\n");
+
+  for (Function::iterator b = F.begin(); b != F.end(); ++b) {
+    for (BasicBlock::iterator i = b->begin(); i != b->end(); ++i) {
+      CallInst *CI = dyn_cast<CallInst>(i);
+      if (!CI) {
+        continue;
+      }
+
+      Function *CallMemcpy = CI->getCalledFunction();
+      // find out if the current call instruction is a call to llvm memcpy
+      // intrinsics
+      if (CallMemcpy == NULL || !CallMemcpy->isIntrinsic() ||
+          CallMemcpy->getIntrinsicID() != Intrinsic::memcpy) {
+        continue;
+      }
+
+      LLVM_DEBUG(dbgs() << "Found call to strcpy/memcpy:\n" << *CI << "\n");
+
+      auto *Alloca = dyn_cast<AllocaInst>(CI->getArgOperand(0));
+      auto *SourceVar = dyn_cast<GlobalVariable>(CI->getArgOperand(1));
+      auto *BytesToCopy = dyn_cast<ConstantInt>(CI->getArgOperand(2));
+      auto *IsVolatile = dyn_cast<ConstantInt>(CI->getArgOperand(3));
+
+      if (!BytesToCopy) {
+        LLVM_DEBUG(dbgs() << "Number of bytes to copy is null\n");
+        continue;
+      }
+
+      uint64_t NumBytesToCopy = BytesToCopy->getZExtValue();
+
+      if (!Alloca) {
+        LLVM_DEBUG(dbgs() << "Destination isn't a Alloca\n");
+        continue;
+      }
+
+      if (!SourceVar) {
+        LLVM_DEBUG(dbgs() << "Source isn't a global constant variable\n");
+        continue;
+      }
+
+      if (!IsVolatile || IsVolatile->isOne()) {
+        LLVM_DEBUG(
+            dbgs() << "Not widening strings for this memcpy because it's "
+                      "a volatile operations\n");
+        continue;
+      }
+
+      if (NumBytesToCopy % 4 == 0) {
+        LLVM_DEBUG(dbgs() << "Bytes to copy in strcpy/memcpy is already word "
+                             "aligned so nothing to do here.\n");
+        continue;
+      }
+
+      if (!SourceVar->hasInitializer() || !SourceVar->isConstant() ||
+          !SourceVar->hasLocalLinkage() || !SourceVar->hasGlobalUnnamedAddr()) {
+        LLVM_DEBUG(dbgs() << "Source is not constant global, thus it's "
+                             "mutable therefore it's not safe to pad\n");
+        continue;
+      }
+
+      ConstantDataArray *SourceDataArray =
+          dyn_cast<ConstantDataArray>(SourceVar->getInitializer());
+      if (!SourceDataArray || !IsCharArray(SourceDataArray->getType())) {
+        LLVM_DEBUG(dbgs() << "Source isn't a constant data array\n");
+        continue;
+      }
+
+      if (!Alloca->isStaticAlloca()) {
+        LLVM_DEBUG(dbgs() << "Destination allocation isn't a static "
+                             "constant which is locally allocated in this "
+                             "function, so skipping.\n");
+        continue;
+      }
+
+      // Make sure destination is definitley a char array.
+      if (!IsCharArray(Alloca->getAllocatedType())) {
+        LLVM_DEBUG(dbgs() << "Destination doesn't look like a constant char (8 "
+                             "bits) array\n");
+        continue;
+      }
+      LLVM_DEBUG(dbgs() << "With Alloca: " << *Alloca << "\n");
+
+      uint64_t DZSize = Alloca->getAllocatedType()->getArrayNumElements();
+      uint64_t SZSize = SourceDataArray->getType()->getNumElements();
+
+      // For safety purposes lets add a constraint and only padd when
+      // num bytes to copy == destination array size == source string
+      // which is a constant
+      LLVM_DEBUG(dbgs() << "Number of bytes to copy is: " << NumBytesToCopy
+                        << "\n");
+      LLVM_DEBUG(dbgs() << "Size of destination array is: " << DZSize << "\n");
+      LLVM_DEBUG(dbgs() << "Size of source array is: " << SZSize << "\n");
+      if (NumBytesToCopy != DZSize || DZSize != SZSize) {
+        LLVM_DEBUG(dbgs() << "Size of number of bytes to copy, destination "
+                             "array and source string don't match, so "
+                             "skipping\n");
+        continue;
+      }
+      LLVM_DEBUG(dbgs() << "Going to widen.\n");
+      unsigned int NumBytesToPad = 4 - (NumBytesToCopy % 4);
+      LLVM_DEBUG(dbgs() << "Number of bytes to pad by is " << NumBytesToPad
+                        << "\n");
+      unsigned int TotalBytes = NumBytesToCopy + NumBytesToPad;
+
+      if (TotalBytes > MemcpyInliningLimit) {
+        LLVM_DEBUG(
+            dbgs() << "Not going to pad because total number of bytes is "
+                   << TotalBytes
+                   << "  which be greater than the inlining "
+                      "limit for memcpy which is "
+                   << MemcpyInliningLimit << "\n");
+        continue;
+      }
+
+      // update destination char array to be word aligned (memcpy(X,...,...))
+      IRBuilder<> BuildAlloca(Alloca);
+      AllocaInst *NewAlloca = cast<AllocaInst>(BuildAlloca.CreateAlloca(
+          ArrayType::get(Alloca->getAllocatedType()->getArrayElementType(),
+                         NumBytesToCopy + NumBytesToPad)));
+      NewAlloca->takeName(Alloca);
+      NewAlloca->setAlignment(Alloca->getAlign());
+      Alloca->replaceAllUsesWith(NewAlloca);
+
+      LLVM_DEBUG(dbgs() << "Updating users of destination stack object to use "
+                        << "new size\n");
+
+      // update source to be word aligned (memcpy(...,X,...))
+      // create replacement string with padded null bytes.
+      StringRef Data = SourceDataArray->getRawDataValues();
+      std::vector<uint8_t> StrData(Data.begin(), Data.end());
+      for (unsigned int p = 0; p < NumBytesToPad; p++)
+        StrData.push_back('\0');
+      auto Arr = ArrayRef(StrData.data(), TotalBytes);
+
+      // create new padded version of global variable string.
+      Constant *SourceReplace = ConstantDataArray::get(F.getContext(), Arr);
+      GlobalVariable *NewGV = new GlobalVariable(
+          *F.getParent(), SourceReplace->getType(), true,
+          SourceVar->getLinkage(), SourceReplace, SourceReplace->getName());
+
+      // copy any other attributes from original global variable string
+      // e.g. unamed_addr
+      NewGV->copyAttributesFrom(SourceVar);
+      NewGV->takeName(SourceVar);
+
+      // replace intrinsic source.
+      CI->setArgOperand(1, NewGV);
+
+      // Update number of bytes to copy (memcpy(...,...,X))
+      CI->setArgOperand(2,
+                        ConstantInt::get(BytesToCopy->getType(), TotalBytes));
+      LLVM_DEBUG(dbgs() << "Padded dest/source and increased number of bytes:\n"
+                        << *CI << "\n"
+                        << *NewAlloca << "\n");
+    }
+  }
+  return true;
+}
+
+} // end of anonymous namespace
+
+PreservedAnalyses ARMWidenStringsPass::run(Function &F,
+                                           FunctionAnalysisManager &AM) {
+  if (!ARMWidenStrings().run(F))
+    return PreservedAnalyses::all();
+
+  return PreservedAnalyses::none();
+}
diff --git a/llvm/lib/Transforms/Scalar/CMakeLists.txt b/llvm/lib/Transforms/Scalar/CMakeLists.txt
index 939a1457239567..a9607e4ebc6583 100644
--- a/llvm/lib/Transforms/Scalar/CMakeLists.txt
+++ b/llvm/lib/Transforms/Scalar/CMakeLists.txt
@@ -2,6 +2,7 @@ add_llvm_component_library(LLVMScalarOpts
   ADCE.cpp
   AlignmentFromAssumptions.cpp
   AnnotationRemarks.cpp
+  ARMWidenStrings.cpp
   BDCE.cpp
   CallSiteSplitting.cpp
   ConstantHoisting.cpp
diff --git a/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-1.ll b/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-1.ll
new file mode 100644
index 00000000000000..e11cf372c36a6e
--- /dev/null
+++ b/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-1.ll
@@ -0,0 +1,25 @@
+; RUN: opt < %s -mtriple=arm-arm-none-eabi -passes="default<O2>,arm-widen-strings" -S | FileCheck %s
+; RUN: opt < %s -mtriple=arm-arm-none-eabi -passes="default<O0>" -S | FileCheck %s --check-prefix=TURNED-OFF
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+
+; CHECK: [12 x i8]
+; TURNED-OFF-NOT: [12 x i8]
+ at .str = private unnamed_addr constant [10 x i8] c"123456789\00", align 1
+
+; Function Attrs: nounwind
+define hidden void @foo() #0 {
+entry:
+; CHECK: %something = alloca [12 x i8]
+; TURNED-OFF-NOT: %something = alloca [12 x i8]
+  %something = alloca [10 x i8], align 1
+  %arraydecay = getelementptr inbounds [10 x i8], ptr %something, i32 0, i32 0
+; CHECK: @llvm.memcpy.p0.p0.i32
+  %call = call ptr @strcpy(ptr %arraydecay, ptr @.str)
+  %arraydecay1 = getelementptr inbounds [10 x i8], ptr %something, i32 0, i32 0
+  %call2 = call i32 @bar(ptr %arraydecay1)
+  ret void
+}
+
+declare ptr @strcpy(ptr, ptr) #1
+
+declare i32 @bar(...) #1
diff --git a/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-2.ll b/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-2.ll
new file mode 100644
index 00000000000000..2df8108f445fe1
--- /dev/null
+++ b/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-2.ll
@@ -0,0 +1,22 @@
+; RUN: opt < %s -mtriple=arm-arm-none-eabi -passes="default<O2>,arm-widen-strings" -S | FileCheck %s
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+
+; CHECK: [64 x i8]
+ at .str = private unnamed_addr constant [62 x i8] c"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\00", align 1
+
+; Function Attrs: nounwind
+define hidden void @foo() #0 {
+entry:
+; CHECK: %something = alloca [64 x i8]
+  %something = alloca [62 x i8], align 1
+  %arraydecay = getelementptr inbounds [62 x i8], ptr %something, i32 0, i32 0
+; CHECK: @llvm.memcpy.p0.p0.i32
+  %call = call ptr @strcpy(ptr %arraydecay, ptr @.str)
+  %arraydecay1 = getelementptr inbounds [62 x i8], ptr %something, i32 0, i32 0
+  %call2 = call i32 @bar(ptr %arraydecay1)
+  ret void
+}
+
+declare ptr @strcpy(ptr, ptr) #1
+
+declare i32 @bar(...) #1
diff --git a/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-lengths-dont-match.ll b/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-lengths-dont-match.ll
new file mode 100644
index 00000000000000..a0c1e213298167
--- /dev/null
+++ b/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-lengths-dont-match.ll
@@ -0,0 +1,28 @@
+; RUN: opt < %s -mtriple=arm-arm-none-eabi -passes=arm-widen-strings -S | FileCheck %s
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "thumbv6m-arm-none-eabi"
+
+; CHECK: [17 x i8]
+ at .str = private unnamed_addr constant [17 x i8] c"aaaaaaaaaaaaaaaa\00", align 1
+
+; Function Attrs: nounwind
+define hidden void @foo() local_unnamed_addr #0 {
+entry:
+  %something = alloca [20 x i8], align 1
+  call void @llvm.lifetime.start(i64 20, ptr nonnull %something) #3
+  call void @llvm.memcpy.p0i8.p0i8.i32(ptr align 1 nonnull %something, ptr align 1 @.str, i32 17, i1 false)
+  %call2 = call i32 @bar(ptr nonnull %something) #3
+  call void @llvm.lifetime.end(i64 20, ptr nonnull %something) #3
+  ret void
+}
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.start(i64, ptr nocapture) #1
+
+declare i32 @bar(...) local_unnamed_addr #2
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.end(i64, ptr nocapture) #1
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(ptr nocapture writeonly, ptr nocapture readonly, i32, i1) #1
diff --git a/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-more-than-64-bytes.ll b/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-more-than-64-bytes.ll
new file mode 100644
index 00000000000000..67cb99023c5328
--- /dev/null
+++ b/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-more-than-64-bytes.ll
@@ -0,0 +1,29 @@
+; RUN: opt < %s -mtriple=arm-arm-none-eabi -passes=arm-widen-strings -S | FileCheck %s
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "thumbv6m-arm-none-eabi"
+
+; CHECK: [65 x i8]
+; CHECK-NOT: [68 x i8]
+ at .str = private unnamed_addr constant [65 x i8] c"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaazzz\00", align 1
+
+; Function Attrs: nounwind
+define hidden void @foo() local_unnamed_addr #0 {
+entry:
+  %something = alloca [65 x i8], align 1
+  call void @llvm.lifetime.start(i64 65, ptr nonnull %something) #3
+  call void @llvm.memcpy.p0i8.p0i8.i32(ptr align 1 nonnull %something, ptr align 1 @.str, i32 65, i1 false)
+  %call2 = call i32 @bar(ptr nonnull %something) #3
+  call void @llvm.lifetime.end(i64 65, ptr nonnull %something) #3
+  ret void
+}
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.start(i64, ptr nocapture) #1
+
+declare i32 @bar(...) local_unnamed_addr #2
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.end(i64, ptr nocapture) #1
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(ptr nocapture writeonly, ptr nocapture readonly, i32, i1) #1
diff --git a/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-ptrtoint.ll b/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-ptrtoint.ll
new file mode 100644
index 00000000000000..3f02c02ad845b2
--- /dev/null
+++ b/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-ptrtoint.ll
@@ -0,0 +1,42 @@
+; RUN: opt < %s -mtriple=arm-arm-none-eabi -passes=arm-widen-strings -S | FileCheck %s
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+
+; CHECK: [48 x i8]
+ at f.string1 = private unnamed_addr constant [45 x i8] c"The quick brown dog jumps over the lazy fox.\00", align 1
+
+; Function Attrs: nounwind
+define hidden i32 @f() {
+entry:
+  %string1 = alloca [45 x i8], align 1
+  %pos = alloca i32, align 4
+  %token = alloca ptr, align 4
+  call void @llvm.lifetime.start.p0i8(i64 45, ptr %string1)
+  call void @llvm.memcpy.p0i8.p0i8.i32(ptr align 1 %string1, ptr align 1 @f.string1, i32 45, i1 false)
+  call void @llvm.lifetime.start.p0i8(i64 4, ptr %pos)
+  call void @llvm.lifetime.start.p0i8(i64 4, ptr %token)
+  %call = call ptr @strchr(ptr %string1, i32 101)
+  store ptr %call, ptr %token, align 4
+  %0 = load ptr, ptr %token, align 4
+  %sub.ptr.lhs.cast = ptrtoint ptr %0 to i32
+  %sub.ptr.rhs.cast = ptrtoint ptr %string1 to i32
+  %sub.ptr.sub = sub i32 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast
+  %add = add nsw i32 %sub.ptr.sub, 1
+  store i32 %add, ptr %pos, align 4
+  %1 = load i32, ptr %pos, align 4
+  call void @llvm.lifetime.end.p0i8(i64 4, ptr %token)
+  call void @llvm.lifetime.end.p0i8(i64 4, ptr %pos)
+  call void @llvm.lifetime.end.p0i8(i64 45, ptr %string1)
+  ret i32 %1
+}
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.start.p0i8(i64, ptr nocapture)
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(ptr nocapture writeonly, ptr nocapture readonly, i32, i1)
+
+; Function Attrs: nounwind
+declare ptr @strchr(ptr, i32)
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.end.p0i8(i64, ptr nocapture)
diff --git a/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-struct-test.ll b/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-struct-test.ll
new file mode 100644
index 00000000000000..937bfaecd8e3e9
--- /dev/null
+++ b/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-struct-test.ll
@@ -0,0 +1,52 @@
+; RUN: opt < %s -mtriple=arm-arm-none-eabi -passes=arm-widen-strings -S | FileCheck %s
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "thumbv6m-arm-none-eabi"
+
+%struct.P = type { i32, [13 x i8] }
+
+; CHECK-NOT: [16 x i8]
+ at .str = private unnamed_addr constant [13 x i8] c"hello world\0A\00", align 1
+ at .str.1 = private unnamed_addr constant [4 x i8] c"%s\0A\00", align 1
+ at __ARM_use_no_argv = global i32 1, section ".ARM.use_no_argv", align 4
+ at llvm.used = appending global [1 x ptr] [ptr @__ARM_use_no_argv], section "llvm.metadata"
+
+; Function Attrs: nounwind
+define hidden i32 @main() local_unnamed_addr #0 {
+entry:
+  %p = alloca %struct.P, align 4
+  call void @llvm.lifetime.start(i64 20, ptr nonnull %p) #2
+  store i32 10, ptr %p, align 4, !tbaa !3
+  %arraydecay = getelementptr inbounds %struct.P, ptr %p, i32 0, i32 1, i32 0
+  call void @llvm.memcpy.p0i8.p0i8.i32(ptr align 1 %arraydecay, ptr align 1 @.str, i32 13, i1 false)
+  %puts = call i32 @puts(ptr %arraydecay)
+  call void @llvm.lifetime.end(i64 20, ptr nonnull %p) #2
+  ret i32 0
+}
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.start(i64, ptr nocapture) #1
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.end(i64, ptr nocapture) #1
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(ptr nocapture writeonly, ptr nocapture readonly, i32, i1) #1
+
+; Function Attrs: nounwind
+declare i32 @puts(ptr nocapture readonly) #2
+
+attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "denormal-fp-math"="preserve-sign" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cortex-m0" "target-features"="+strict-align" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { argmemonly nounwind }
+attributes #2 = { nounwind }
+
+!llvm.module.flags = !{!0, !1}
+!llvm.ident = !{!2}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 1, !"min_enum_size", i32 4}
+!2 = !{!"Component: ARM Compiler 6 devbuild Tool: armclang [devbuild]"}
+!3 = !{!4, !5, i64 0}
+!4 = !{!"P", !5, i64 0, !6, i64 4}
+!5 = !{!"int", !6, i64 0}
+!6 = !{!"omnipotent char", !7, i64 0}
+!7 = !{!"Simple C/C++ TBAA"}
diff --git a/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-volatile.ll b/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-volatile.ll
new file mode 100644
index 00000000000000..6cbd823a18c367
--- /dev/null
+++ b/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-volatile.ll
@@ -0,0 +1,29 @@
+; RUN: opt < %s -mtriple=arm-arm-none-eabi -passes=arm-widen-strings -S | FileCheck %s
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "thumbv6m-arm-none-eabi"
+
+; CHECK-NOT: [64 x i8]
+ at .str = private unnamed_addr constant [62 x i8] c"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\00", align 1
+
+; Function Attrs: nounwind
+define hidden void @foo() local_unnamed_addr #0 {
+entry:
+  %something = alloca [62 x i8], align 1
+  %0 = getelementptr inbounds [62 x i8], ptr %something, i32 0, i32 0
+  call void @llvm.lifetime.start(i64 62, ptr nonnull %0) #3
+  call void @llvm.memcpy.p0i8.p0i8.i32(ptr align 1 nonnull %0, ptr align 1 @.str, i32 62, i1 true)
+  %call2 = call i32 @bar(ptr nonnull %0) #3
+  call void @llvm.lifetime.end(i64 62, ptr nonnull %0) #3
+  ret void
+}
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.start(i64, ptr nocapture) #1
+
+declare i32 @bar(...) local_unnamed_addr #2
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.end(i64, ptr nocapture) #1
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(ptr nocapture writeonly, ptr nocapture readonly, i32, i1) #1

>From 3f2fea46fdb265ac83d913bf21ea7443798a87d3 Mon Sep 17 00:00:00 2001
From: nasmnc01 <nashe.mncube at arm.com>
Date: Wed, 11 Sep 2024 16:54:42 +0100
Subject: [PATCH 2/3] Responding to review comments

Change-Id: I492ea4e5b6f589e5d877eeb6be31f7ab4720be9b
---
 .../lib/Transforms/Scalar/ARMWidenStrings.cpp | 61 +++++--------------
 1 file changed, 15 insertions(+), 46 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/ARMWidenStrings.cpp b/llvm/lib/Transforms/Scalar/ARMWidenStrings.cpp
index dd06c2a7ea10d1..1439e8af04292e 100644
--- a/llvm/lib/Transforms/Scalar/ARMWidenStrings.cpp
+++ b/llvm/lib/Transforms/Scalar/ARMWidenStrings.cpp
@@ -1,6 +1,16 @@
-// ARMWidenStrings.cpp - Widen strings to word boundaries to speed up
-// programs that use simple strcpy's with constant strings as source
-// and stack allocated array for destination.
+//===- ARMWidenStrings.cpp - Widen strings to ---------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Widen strings to word boundaries to speed up  programs that use simple
+// strcpy's with constant strings as source and stack allocated array for
+// destination.
+//
+//===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "arm-widen-strings"
 
@@ -25,8 +35,7 @@
 
 using namespace llvm;
 
-cl::opt<bool> DisableARMWidenStrings("disable-arm-widen-strings",
-                                     cl::init(false));
+cl::opt<bool> DisableARMWidenStrings("disable-arm-widen-strings");
 
 namespace {
 
@@ -73,71 +82,53 @@ bool ARMWidenStrings::run(Function &F) {
         continue;
       }
 
-      LLVM_DEBUG(dbgs() << "Found call to strcpy/memcpy:\n" << *CI << "\n");
-
       auto *Alloca = dyn_cast<AllocaInst>(CI->getArgOperand(0));
       auto *SourceVar = dyn_cast<GlobalVariable>(CI->getArgOperand(1));
       auto *BytesToCopy = dyn_cast<ConstantInt>(CI->getArgOperand(2));
       auto *IsVolatile = dyn_cast<ConstantInt>(CI->getArgOperand(3));
 
       if (!BytesToCopy) {
-        LLVM_DEBUG(dbgs() << "Number of bytes to copy is null\n");
         continue;
       }
 
       uint64_t NumBytesToCopy = BytesToCopy->getZExtValue();
 
       if (!Alloca) {
-        LLVM_DEBUG(dbgs() << "Destination isn't a Alloca\n");
         continue;
       }
 
+      // Source isn't a global constant variable
       if (!SourceVar) {
-        LLVM_DEBUG(dbgs() << "Source isn't a global constant variable\n");
         continue;
       }
 
       if (!IsVolatile || IsVolatile->isOne()) {
-        LLVM_DEBUG(
-            dbgs() << "Not widening strings for this memcpy because it's "
-                      "a volatile operations\n");
         continue;
       }
 
       if (NumBytesToCopy % 4 == 0) {
-        LLVM_DEBUG(dbgs() << "Bytes to copy in strcpy/memcpy is already word "
-                             "aligned so nothing to do here.\n");
         continue;
       }
 
       if (!SourceVar->hasInitializer() || !SourceVar->isConstant() ||
           !SourceVar->hasLocalLinkage() || !SourceVar->hasGlobalUnnamedAddr()) {
-        LLVM_DEBUG(dbgs() << "Source is not constant global, thus it's "
-                             "mutable therefore it's not safe to pad\n");
         continue;
       }
 
       ConstantDataArray *SourceDataArray =
           dyn_cast<ConstantDataArray>(SourceVar->getInitializer());
       if (!SourceDataArray || !IsCharArray(SourceDataArray->getType())) {
-        LLVM_DEBUG(dbgs() << "Source isn't a constant data array\n");
         continue;
       }
 
       if (!Alloca->isStaticAlloca()) {
-        LLVM_DEBUG(dbgs() << "Destination allocation isn't a static "
-                             "constant which is locally allocated in this "
-                             "function, so skipping.\n");
         continue;
       }
 
       // Make sure destination is definitley a char array.
       if (!IsCharArray(Alloca->getAllocatedType())) {
-        LLVM_DEBUG(dbgs() << "Destination doesn't look like a constant char (8 "
-                             "bits) array\n");
         continue;
       }
-      LLVM_DEBUG(dbgs() << "With Alloca: " << *Alloca << "\n");
 
       uint64_t DZSize = Alloca->getAllocatedType()->getArrayNumElements();
       uint64_t SZSize = SourceDataArray->getType()->getNumElements();
@@ -145,29 +136,13 @@ bool ARMWidenStrings::run(Function &F) {
       // For safety purposes lets add a constraint and only padd when
       // num bytes to copy == destination array size == source string
       // which is a constant
-      LLVM_DEBUG(dbgs() << "Number of bytes to copy is: " << NumBytesToCopy
-                        << "\n");
-      LLVM_DEBUG(dbgs() << "Size of destination array is: " << DZSize << "\n");
-      LLVM_DEBUG(dbgs() << "Size of source array is: " << SZSize << "\n");
       if (NumBytesToCopy != DZSize || DZSize != SZSize) {
-        LLVM_DEBUG(dbgs() << "Size of number of bytes to copy, destination "
-                             "array and source string don't match, so "
-                             "skipping\n");
         continue;
       }
-      LLVM_DEBUG(dbgs() << "Going to widen.\n");
       unsigned int NumBytesToPad = 4 - (NumBytesToCopy % 4);
-      LLVM_DEBUG(dbgs() << "Number of bytes to pad by is " << NumBytesToPad
-                        << "\n");
       unsigned int TotalBytes = NumBytesToCopy + NumBytesToPad;
 
       if (TotalBytes > MemcpyInliningLimit) {
-        LLVM_DEBUG(
-            dbgs() << "Not going to pad because total number of bytes is "
-                   << TotalBytes
-                   << "  which be greater than the inlining "
-                      "limit for memcpy which is "
-                   << MemcpyInliningLimit << "\n");
         continue;
       }
 
@@ -180,9 +155,6 @@ bool ARMWidenStrings::run(Function &F) {
       NewAlloca->setAlignment(Alloca->getAlign());
       Alloca->replaceAllUsesWith(NewAlloca);
 
-      LLVM_DEBUG(dbgs() << "Updating users of destination stack object to use "
-                        << "new size\n");
-
       // update source to be word aligned (memcpy(...,X,...))
       // create replacement string with padded null bytes.
       StringRef Data = SourceDataArray->getRawDataValues();
@@ -208,9 +180,6 @@ bool ARMWidenStrings::run(Function &F) {
       // Update number of bytes to copy (memcpy(...,...,X))
       CI->setArgOperand(2,
                         ConstantInt::get(BytesToCopy->getType(), TotalBytes));
-      LLVM_DEBUG(dbgs() << "Padded dest/source and increased number of bytes:\n"
-                        << *CI << "\n"
-                        << *NewAlloca << "\n");
     }
   }
   return true;

>From 3b0405bfe44c2cdab939a58a60896268b122e0fa Mon Sep 17 00:00:00 2001
From: nasmnc01 <nashe.mncube at arm.com>
Date: Fri, 13 Sep 2024 12:24:32 +0100
Subject: [PATCH 3/3] Making ARMWidenStrings to be target independent

Change-Id: Ic6ed9a549e39020e8c04b38bc21ba8162b4ebfd9
---
 .../llvm/Analysis/TargetTransformInfo.h       |   8 +
 .../llvm/Analysis/TargetTransformInfoImpl.h   |   2 +
 .../llvm/Transforms/Scalar/ARMWidenStrings.h  |  30 ---
 llvm/lib/Analysis/TargetTransformInfo.cpp     |   4 +
 llvm/lib/Passes/PassBuilder.cpp               |   1 -
 llvm/lib/Passes/PassRegistry.def              |   1 -
 .../lib/Target/ARM/ARMTargetTransformInfo.cpp |   6 +
 llvm/lib/Target/ARM/ARMTargetTransformInfo.h  |   2 +
 llvm/lib/Transforms/IPO/GlobalOpt.cpp         | 149 +++++++++++++
 .../lib/Transforms/Scalar/ARMWidenStrings.cpp | 196 ------------------
 llvm/lib/Transforms/Scalar/CMakeLists.txt     |   1 -
 .../ARMWidenStrings/arm-widen-strings-1.ll    |   2 +-
 .../ARMWidenStrings/arm-widen-strings-2.ll    |   2 +-
 .../arm-widen-strings-lengths-dont-match.ll   |   2 +-
 .../arm-widen-strings-more-than-64-bytes.ll   |   2 +-
 .../arm-widen-strings-ptrtoint.ll             |   2 +-
 .../arm-widen-strings-struct-test.ll          |   2 +-
 .../arm-widen-strings-volatile.ll             |   2 +-
 18 files changed, 178 insertions(+), 236 deletions(-)
 delete mode 100755 llvm/include/llvm/Transforms/Scalar/ARMWidenStrings.h
 delete mode 100644 llvm/lib/Transforms/Scalar/ARMWidenStrings.cpp

diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index b2124c6106198e..2acdd561f61ce0 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -1786,6 +1786,9 @@ class TargetTransformInfo {
   /// \return The maximum number of function arguments the target supports.
   unsigned getMaxNumArgs() const;
 
+  /// \return true if global strings should be padded to an alignment boundary
+  bool useWidenGlobalStrings() const;
+
   /// @}
 
 private:
@@ -2179,6 +2182,7 @@ class TargetTransformInfo::Concept {
   getVPLegalizationStrategy(const VPIntrinsic &PI) const = 0;
   virtual bool hasArmWideBranch(bool Thumb) const = 0;
   virtual unsigned getMaxNumArgs() const = 0;
+  virtual bool useWidenGlobalStrings() const = 0;
 };
 
 template <typename T>
@@ -2952,6 +2956,10 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
   unsigned getMaxNumArgs() const override {
     return Impl.getMaxNumArgs();
   }
+
+  bool useWidenGlobalStrings() const override {
+    return Impl.useWidenGlobalStrings();
+  }
 };
 
 template <typename T>
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 90eef93a2a54d5..ac899608be0efd 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -973,6 +973,8 @@ class TargetTransformInfoImplBase {
 
   unsigned getMaxNumArgs() const { return UINT_MAX; }
 
+  bool useWidenGlobalStrings() const { return false; }
+
 protected:
   // Obtain the minimum required size to hold the value (without the sign)
   // In case of a vector it returns the min required size for one element.
diff --git a/llvm/include/llvm/Transforms/Scalar/ARMWidenStrings.h b/llvm/include/llvm/Transforms/Scalar/ARMWidenStrings.h
deleted file mode 100755
index 3bda666660144a..00000000000000
--- a/llvm/include/llvm/Transforms/Scalar/ARMWidenStrings.h
+++ /dev/null
@@ -1,30 +0,0 @@
-//===- ARMWidenStrings.h --------------------------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file provides the interface for the ArmWidenStrings pass
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TRANSFORMS_SCALAR_ARMWIDENSTRINGS_H
-#define LLVM_TRANSFORMS_SCALAR_ARMWIDENSTRINGS_H
-
-#include "llvm/IR/PassManager.h"
-
-namespace llvm {
-
-class Module;
-
-class ARMWidenStringsPass : public PassInfoMixin<ARMWidenStringsPass> {
-public:
-  ARMWidenStringsPass() = default;
-  PreservedAnalyses run(Function &F, FunctionAnalysisManager &);
-};
-
-} // end namespace llvm
-
-#endif // LLVM_TRANSFORMS_SCALAR_ARMWIDENSTRINGS_H
\ No newline at end of file
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index 2c26493bd3f1ca..e06d7bbb119dab 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -1348,6 +1348,10 @@ bool TargetTransformInfo::hasActiveVectorLength(unsigned Opcode, Type *DataType,
   return TTIImpl->hasActiveVectorLength(Opcode, DataType, Alignment);
 }
 
+bool TargetTransformInfo::useWidenGlobalStrings() const {
+  return TTIImpl->useWidenGlobalStrings();
+}
+
 TargetTransformInfo::Concept::~Concept() = default;
 
 TargetIRAnalysis::TargetIRAnalysis() : TTICallback(&getDefaultTTI) {}
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 6b989231cb9861..1df1449fce597c 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -207,7 +207,6 @@
 #include "llvm/Transforms/Instrumentation/ThreadSanitizer.h"
 #include "llvm/Transforms/ObjCARC.h"
 #include "llvm/Transforms/Scalar/ADCE.h"
-#include "llvm/Transforms/Scalar/ARMWidenStrings.h"
 #include "llvm/Transforms/Scalar/AlignmentFromAssumptions.h"
 #include "llvm/Transforms/Scalar/AnnotationRemarks.h"
 #include "llvm/Transforms/Scalar/BDCE.h"
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index 55566f43e5435d..d6067089c6b5c1 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -489,7 +489,6 @@ FUNCTION_PASS("view-dom-only", DomOnlyViewer())
 FUNCTION_PASS("view-post-dom", PostDomViewer())
 FUNCTION_PASS("view-post-dom-only", PostDomOnlyViewer())
 FUNCTION_PASS("wasm-eh-prepare", WasmEHPreparePass())
-FUNCTION_PASS("arm-widen-strings", ARMWidenStringsPass())
 #undef FUNCTION_PASS
 
 #ifndef FUNCTION_PASS_WITH_PARAMS
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 912569a8fec118..7bc91e2935f3dc 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -56,6 +56,10 @@ static cl::opt<bool>
     AllowWLSLoops("allow-arm-wlsloops", cl::Hidden, cl::init(true),
                   cl::desc("Enable the generation of WLS loops"));
 
+static cl::opt<unsigned> UseWidenGlobalStrings(
+    "widen-global-strings", cl::Hidden, cl::init(true),
+    cl::desc("Enable the widening of global strings to alignment boundaries"));
+
 extern cl::opt<TailPredication::Mode> EnableTailPredication;
 
 extern cl::opt<bool> EnableMaskedGatherScatters;
@@ -2644,3 +2648,5 @@ bool ARMTTIImpl::hasArmWideBranch(bool Thumb) const {
     return ST->hasARMOps();
   }
 }
+
+bool ARMTTIImpl::useWidenGlobalStrings() const { return UseWidenGlobalStrings; }
\ No newline at end of file
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
index bea088065172e0..29b9d8a35eb5eb 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -334,6 +334,8 @@ class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> {
 
   bool hasArmWideBranch(bool Thumb) const;
 
+  bool useWidenGlobalStrings() const;
+
   /// @}
 };
 
diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
index aae4926e027ff4..84c1585fede11c 100644
--- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
@@ -92,6 +92,8 @@ STATISTIC(NumInternalFunc, "Number of internal functions");
 STATISTIC(NumColdCC, "Number of functions marked coldcc");
 STATISTIC(NumIFuncsResolved, "Number of statically resolved IFuncs");
 STATISTIC(NumIFuncsDeleted, "Number of IFuncs removed");
+STATISTIC(NumGlobalStringsPadded,
+          "Number of global strings padded to alignment boundary");
 
 static cl::opt<bool>
     EnableColdCCStressTest("enable-coldcc-stress-test",
@@ -2029,6 +2031,145 @@ OptimizeFunctions(Module &M,
   return Changed;
 }
 
+static bool IsCharArray(Type *t) {
+  const unsigned int CHAR_BIT_SIZE = 8;
+  return t && t->isArrayTy() && t->getArrayElementType()->isIntegerTy() &&
+         t->getArrayElementType()->getIntegerBitWidth() == CHAR_BIT_SIZE;
+}
+
+static bool
+tryWidenGlobalStrings(Function &F,
+                      function_ref<TargetTransformInfo &(Function &)> GetTTI) {
+  bool changed = false;
+
+  for (Function::iterator b = F.begin(); b != F.end(); ++b) {
+    for (BasicBlock::iterator i = b->begin(); i != b->end(); ++i) {
+      CallInst *CI = dyn_cast<CallInst>(i);
+      if (!CI) {
+        continue;
+      }
+
+      TargetTransformInfo &TTI = GetTTI(F);
+
+      Function *CallMemcpy = CI->getCalledFunction();
+      // find out if the current call instruction is a call to llvm memcpy
+      // intrinsics
+      if (CallMemcpy == NULL || !CallMemcpy->isIntrinsic() ||
+          CallMemcpy->getIntrinsicID() != Intrinsic::memcpy) {
+        continue;
+      }
+
+      auto *Alloca = dyn_cast<AllocaInst>(CI->getArgOperand(0));
+      auto *SourceVar = dyn_cast<GlobalVariable>(CI->getArgOperand(1));
+      auto *BytesToCopy = dyn_cast<ConstantInt>(CI->getArgOperand(2));
+      auto *IsVolatile = dyn_cast<ConstantInt>(CI->getArgOperand(3));
+
+      if (!BytesToCopy) {
+        continue;
+      }
+
+      uint64_t NumBytesToCopy = BytesToCopy->getZExtValue();
+
+      if (!Alloca) {
+        continue;
+      }
+
+      // Source isn't a global constant variable
+      if (!SourceVar) {
+        continue;
+      }
+
+      if (!IsVolatile || IsVolatile->isOne()) {
+        continue;
+      }
+
+      if (NumBytesToCopy % 4 == 0) {
+        continue;
+      }
+
+      if (!SourceVar->hasInitializer() || !SourceVar->isConstant() ||
+          !SourceVar->hasLocalLinkage() || !SourceVar->hasGlobalUnnamedAddr()) {
+        continue;
+      }
+
+      ConstantDataArray *SourceDataArray =
+          dyn_cast<ConstantDataArray>(SourceVar->getInitializer());
+      if (!SourceDataArray || !IsCharArray(SourceDataArray->getType())) {
+        continue;
+      }
+
+      if (!Alloca->isStaticAlloca()) {
+        continue;
+      }
+
+      // Make sure destination is definitley a char array.
+      if (!IsCharArray(Alloca->getAllocatedType())) {
+        continue;
+      }
+
+      uint64_t DZSize = Alloca->getAllocatedType()->getArrayNumElements();
+      uint64_t SZSize = SourceDataArray->getType()->getNumElements();
+
+      // For safety purposes lets add a constraint and only padd when
+      // num bytes to copy == destination array size == source string
+      // which is a constant
+      if (NumBytesToCopy != DZSize || DZSize != SZSize) {
+        continue;
+      }
+      unsigned int NumBytesToPad = 4 - (NumBytesToCopy % 4);
+      unsigned int TotalBytes = NumBytesToCopy + NumBytesToPad;
+
+      /*
+      Max number of bytes that memcpy allows for lowering to load/stores before
+      it uses library function (__aeabi_memcpy).
+      */
+      unsigned MaxMemIntrinsicSize =
+          TTI.getMaxMemIntrinsicInlineSizeThreshold();
+      if (TotalBytes > MaxMemIntrinsicSize) {
+        continue;
+      }
+
+      // update destination char array to be word aligned (memcpy(X,...,...))
+      IRBuilder<> BuildAlloca(Alloca);
+      AllocaInst *NewAlloca = cast<AllocaInst>(BuildAlloca.CreateAlloca(
+          ArrayType::get(Alloca->getAllocatedType()->getArrayElementType(),
+                         NumBytesToCopy + NumBytesToPad)));
+      NewAlloca->takeName(Alloca);
+      NewAlloca->setAlignment(Alloca->getAlign());
+      Alloca->replaceAllUsesWith(NewAlloca);
+
+      // update source to be word aligned (memcpy(...,X,...))
+      // create replacement string with padded null bytes.
+      StringRef Data = SourceDataArray->getRawDataValues();
+      std::vector<uint8_t> StrData(Data.begin(), Data.end());
+      for (unsigned int p = 0; p < NumBytesToPad; p++)
+        StrData.push_back('\0');
+      auto Arr = ArrayRef(StrData.data(), TotalBytes);
+
+      // create new padded version of global variable string.
+      Constant *SourceReplace = ConstantDataArray::get(F.getContext(), Arr);
+      GlobalVariable *NewGV = new GlobalVariable(
+          *F.getParent(), SourceReplace->getType(), true,
+          SourceVar->getLinkage(), SourceReplace, SourceReplace->getName());
+
+      // copy any other attributes from original global variable string
+      // e.g. unamed_addr
+      NewGV->copyAttributesFrom(SourceVar);
+      NewGV->takeName(SourceVar);
+
+      // replace intrinsic source.
+      CI->setArgOperand(1, NewGV);
+
+      // Update number of bytes to copy (memcpy(...,...,X))
+      CI->setArgOperand(2,
+                        ConstantInt::get(BytesToCopy->getType(), TotalBytes));
+      NumGlobalStringsPadded++;
+      changed |= true;
+    }
+  }
+  return changed;
+}
+
 static bool
 OptimizeGlobalVars(Module &M,
                    function_ref<TargetTransformInfo &(Function &)> GetTTI,
@@ -2058,6 +2199,14 @@ OptimizeGlobalVars(Module &M,
       continue;
     }
 
+    // Pad global strings if allowed
+    for (Function &F : llvm::make_early_inc_range(M)) {
+      TargetTransformInfo &TTI = GetTTI(F);
+      if (TTI.useWidenGlobalStrings()) {
+        Changed |= tryWidenGlobalStrings(F, GetTTI);
+      }
+    }
+
     Changed |= processGlobal(GV, GetTTI, GetTLI, LookupDomTree);
   }
   return Changed;
diff --git a/llvm/lib/Transforms/Scalar/ARMWidenStrings.cpp b/llvm/lib/Transforms/Scalar/ARMWidenStrings.cpp
deleted file mode 100644
index 1439e8af04292e..00000000000000
--- a/llvm/lib/Transforms/Scalar/ARMWidenStrings.cpp
+++ /dev/null
@@ -1,196 +0,0 @@
-//===- ARMWidenStrings.cpp - Widen strings to ---------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// Widen strings to word boundaries to speed up  programs that use simple
-// strcpy's with constant strings as source and stack allocated array for
-// destination.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "arm-widen-strings"
-
-#include "llvm/Transforms/Scalar/ARMWidenStrings.h"
-#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/GlobalVariable.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/Operator.h"
-#include "llvm/IR/ValueSymbolTable.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/TargetParser/Triple.h"
-#include "llvm/Transforms/Scalar.h"
-
-using namespace llvm;
-
-cl::opt<bool> DisableARMWidenStrings("disable-arm-widen-strings");
-
-namespace {
-
-class ARMWidenStrings {
-public:
-  /*
-  Max number of bytes that memcpy allows for lowering to load/stores before it
-  uses library function (__aeabi_memcpy).  This is the same value returned by
-  ARMSubtarget::getMaxInlineSizeThreshold which I would have called in place of
-  the constant int but can't get access to the subtarget info class from the
-  midend.
-  */
-  const unsigned int MemcpyInliningLimit = 64;
-
-  bool run(Function &F);
-};
-
-static bool IsCharArray(Type *t) {
-  const unsigned int CHAR_BIT_SIZE = 8;
-  return t && t->isArrayTy() && t->getArrayElementType()->isIntegerTy() &&
-         t->getArrayElementType()->getIntegerBitWidth() == CHAR_BIT_SIZE;
-}
-
-bool ARMWidenStrings::run(Function &F) {
-  if (DisableARMWidenStrings) {
-    return false;
-  }
-
-  LLVM_DEBUG(dbgs() << "Running ARMWidenStrings on module " << F.getName()
-                    << "\n");
-
-  for (Function::iterator b = F.begin(); b != F.end(); ++b) {
-    for (BasicBlock::iterator i = b->begin(); i != b->end(); ++i) {
-      CallInst *CI = dyn_cast<CallInst>(i);
-      if (!CI) {
-        continue;
-      }
-
-      Function *CallMemcpy = CI->getCalledFunction();
-      // find out if the current call instruction is a call to llvm memcpy
-      // intrinsics
-      if (CallMemcpy == NULL || !CallMemcpy->isIntrinsic() ||
-          CallMemcpy->getIntrinsicID() != Intrinsic::memcpy) {
-        continue;
-      }
-
-      auto *Alloca = dyn_cast<AllocaInst>(CI->getArgOperand(0));
-      auto *SourceVar = dyn_cast<GlobalVariable>(CI->getArgOperand(1));
-      auto *BytesToCopy = dyn_cast<ConstantInt>(CI->getArgOperand(2));
-      auto *IsVolatile = dyn_cast<ConstantInt>(CI->getArgOperand(3));
-
-      if (!BytesToCopy) {
-        continue;
-      }
-
-      uint64_t NumBytesToCopy = BytesToCopy->getZExtValue();
-
-      if (!Alloca) {
-        continue;
-      }
-
-      // Source isn't a global constant variable
-      if (!SourceVar) {
-        continue;
-      }
-
-      if (!IsVolatile || IsVolatile->isOne()) {
-        continue;
-      }
-
-      if (NumBytesToCopy % 4 == 0) {
-        continue;
-      }
-
-      if (!SourceVar->hasInitializer() || !SourceVar->isConstant() ||
-          !SourceVar->hasLocalLinkage() || !SourceVar->hasGlobalUnnamedAddr()) {
-        continue;
-      }
-
-      ConstantDataArray *SourceDataArray =
-          dyn_cast<ConstantDataArray>(SourceVar->getInitializer());
-      if (!SourceDataArray || !IsCharArray(SourceDataArray->getType())) {
-        continue;
-      }
-
-      if (!Alloca->isStaticAlloca()) {
-        continue;
-      }
-
-      // Make sure destination is definitley a char array.
-      if (!IsCharArray(Alloca->getAllocatedType())) {
-        continue;
-      }
-
-      uint64_t DZSize = Alloca->getAllocatedType()->getArrayNumElements();
-      uint64_t SZSize = SourceDataArray->getType()->getNumElements();
-
-      // For safety purposes lets add a constraint and only padd when
-      // num bytes to copy == destination array size == source string
-      // which is a constant
-      if (NumBytesToCopy != DZSize || DZSize != SZSize) {
-        continue;
-      }
-      unsigned int NumBytesToPad = 4 - (NumBytesToCopy % 4);
-      unsigned int TotalBytes = NumBytesToCopy + NumBytesToPad;
-
-      if (TotalBytes > MemcpyInliningLimit) {
-        continue;
-      }
-
-      // update destination char array to be word aligned (memcpy(X,...,...))
-      IRBuilder<> BuildAlloca(Alloca);
-      AllocaInst *NewAlloca = cast<AllocaInst>(BuildAlloca.CreateAlloca(
-          ArrayType::get(Alloca->getAllocatedType()->getArrayElementType(),
-                         NumBytesToCopy + NumBytesToPad)));
-      NewAlloca->takeName(Alloca);
-      NewAlloca->setAlignment(Alloca->getAlign());
-      Alloca->replaceAllUsesWith(NewAlloca);
-
-      // update source to be word aligned (memcpy(...,X,...))
-      // create replacement string with padded null bytes.
-      StringRef Data = SourceDataArray->getRawDataValues();
-      std::vector<uint8_t> StrData(Data.begin(), Data.end());
-      for (unsigned int p = 0; p < NumBytesToPad; p++)
-        StrData.push_back('\0');
-      auto Arr = ArrayRef(StrData.data(), TotalBytes);
-
-      // create new padded version of global variable string.
-      Constant *SourceReplace = ConstantDataArray::get(F.getContext(), Arr);
-      GlobalVariable *NewGV = new GlobalVariable(
-          *F.getParent(), SourceReplace->getType(), true,
-          SourceVar->getLinkage(), SourceReplace, SourceReplace->getName());
-
-      // copy any other attributes from original global variable string
-      // e.g. unamed_addr
-      NewGV->copyAttributesFrom(SourceVar);
-      NewGV->takeName(SourceVar);
-
-      // replace intrinsic source.
-      CI->setArgOperand(1, NewGV);
-
-      // Update number of bytes to copy (memcpy(...,...,X))
-      CI->setArgOperand(2,
-                        ConstantInt::get(BytesToCopy->getType(), TotalBytes));
-    }
-  }
-  return true;
-}
-
-} // end of anonymous namespace
-
-PreservedAnalyses ARMWidenStringsPass::run(Function &F,
-                                           FunctionAnalysisManager &AM) {
-  if (!ARMWidenStrings().run(F))
-    return PreservedAnalyses::all();
-
-  return PreservedAnalyses::none();
-}
diff --git a/llvm/lib/Transforms/Scalar/CMakeLists.txt b/llvm/lib/Transforms/Scalar/CMakeLists.txt
index a9607e4ebc6583..939a1457239567 100644
--- a/llvm/lib/Transforms/Scalar/CMakeLists.txt
+++ b/llvm/lib/Transforms/Scalar/CMakeLists.txt
@@ -2,7 +2,6 @@ add_llvm_component_library(LLVMScalarOpts
   ADCE.cpp
   AlignmentFromAssumptions.cpp
   AnnotationRemarks.cpp
-  ARMWidenStrings.cpp
   BDCE.cpp
   CallSiteSplitting.cpp
   ConstantHoisting.cpp
diff --git a/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-1.ll b/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-1.ll
index e11cf372c36a6e..6a8adf1af57a49 100644
--- a/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-1.ll
+++ b/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-1.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -mtriple=arm-arm-none-eabi -passes="default<O2>,arm-widen-strings" -S | FileCheck %s
+; RUN: opt < %s -mtriple=arm-arm-none-eabi -passes="default<O2>,globalopt" -S | FileCheck %s
 ; RUN: opt < %s -mtriple=arm-arm-none-eabi -passes="default<O0>" -S | FileCheck %s --check-prefix=TURNED-OFF
 target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
 
diff --git a/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-2.ll b/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-2.ll
index 2df8108f445fe1..46bc715b8f7501 100644
--- a/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-2.ll
+++ b/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-2.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -mtriple=arm-arm-none-eabi -passes="default<O2>,arm-widen-strings" -S | FileCheck %s
+; RUN: opt < %s -mtriple=arm-arm-none-eabi -passes="default<O2>,globalopt" -S | FileCheck %s
 target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
 
 ; CHECK: [64 x i8]
diff --git a/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-lengths-dont-match.ll b/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-lengths-dont-match.ll
index a0c1e213298167..d5545cb9d6b88d 100644
--- a/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-lengths-dont-match.ll
+++ b/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-lengths-dont-match.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -mtriple=arm-arm-none-eabi -passes=arm-widen-strings -S | FileCheck %s
+; RUN: opt < %s -mtriple=arm-arm-none-eabi -passes=globalopt -S | FileCheck %s
 target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
 target triple = "thumbv6m-arm-none-eabi"
 
diff --git a/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-more-than-64-bytes.ll b/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-more-than-64-bytes.ll
index 67cb99023c5328..de11c4a899c8d6 100644
--- a/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-more-than-64-bytes.ll
+++ b/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-more-than-64-bytes.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -mtriple=arm-arm-none-eabi -passes=arm-widen-strings -S | FileCheck %s
+; RUN: opt < %s -mtriple=arm-arm-none-eabi -passes=globalopt -S | FileCheck %s
 target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
 target triple = "thumbv6m-arm-none-eabi"
 
diff --git a/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-ptrtoint.ll b/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-ptrtoint.ll
index 3f02c02ad845b2..1ec13eb72a6e29 100644
--- a/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-ptrtoint.ll
+++ b/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-ptrtoint.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -mtriple=arm-arm-none-eabi -passes=arm-widen-strings -S | FileCheck %s
+; RUN: opt < %s -mtriple=arm-arm-none-eabi -passes=globalopt -S | FileCheck %s
 target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
 
 ; CHECK: [48 x i8]
diff --git a/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-struct-test.ll b/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-struct-test.ll
index 937bfaecd8e3e9..7e9ddf7b1a8798 100644
--- a/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-struct-test.ll
+++ b/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-struct-test.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -mtriple=arm-arm-none-eabi -passes=arm-widen-strings -S | FileCheck %s
+; RUN: opt < %s -mtriple=arm-arm-none-eabi -passes=globalopt -S | FileCheck %s
 target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
 target triple = "thumbv6m-arm-none-eabi"
 
diff --git a/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-volatile.ll b/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-volatile.ll
index 6cbd823a18c367..24e9131b11907b 100644
--- a/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-volatile.ll
+++ b/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-volatile.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -mtriple=arm-arm-none-eabi -passes=arm-widen-strings -S | FileCheck %s
+; RUN: opt < %s -mtriple=arm-arm-none-eabi -passes=globalopt -S | FileCheck %s
 target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
 target triple = "thumbv6m-arm-none-eabi"
 



More information about the llvm-commits mailing list