[llvm] [llvm][ARM]Add ARM widen strings pass (PR #107120)

Tue Sep 10 11:30:40 PDT 2024

================
@@ -0,0 +1,227 @@
+// ARMWidenStrings.cpp - Widen strings to word boundaries to speed up
+// programs that use simple strcpy's with constant strings as source
+// and stack allocated array for destination.
+
+#define DEBUG_TYPE "arm-widen-strings"
+
+#include "llvm/Transforms/Scalar/ARMWidenStrings.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/IR/ValueSymbolTable.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/TargetParser/Triple.h"
+#include "llvm/Transforms/Scalar.h"
+
+using namespace llvm;
+
+cl::opt<bool> DisableARMWidenStrings("disable-arm-widen-strings",
+                                     cl::init(false));
+
+namespace {
+
+class ARMWidenStrings {
+public:
+  /*
+  Max number of bytes that memcpy allows for lowering to load/stores before it
+  uses library function (__aeabi_memcpy).  This is the same value returned by
+  ARMSubtarget::getMaxInlineSizeThreshold which I would have called in place of
+  the constant int but can't get access to the subtarget info class from the
+  midend.
+  */
+  const unsigned int MemcpyInliningLimit = 64;
+
+  bool run(Function &F);
+};
+
+static bool IsCharArray(Type *t) {
+  const unsigned int CHAR_BIT_SIZE = 8;
+  return t && t->isArrayTy() && t->getArrayElementType()->isIntegerTy() &&
+         t->getArrayElementType()->getIntegerBitWidth() == CHAR_BIT_SIZE;
+}
+
+bool ARMWidenStrings::run(Function &F) {
+  if (DisableARMWidenStrings) {
+    return false;
+  }
+
+  LLVM_DEBUG(dbgs() << "Running ARMWidenStrings on module " << F.getName()
+                    << "\n");
+
+  for (Function::iterator b = F.begin(); b != F.end(); ++b) {
+    for (BasicBlock::iterator i = b->begin(); i != b->end(); ++i) {
+      CallInst *CI = dyn_cast<CallInst>(i);
+      if (!CI) {
+        continue;
+      }
+
+      Function *CallMemcpy = CI->getCalledFunction();
+      // find out if the current call instruction is a call to llvm memcpy
+      // intrinsics
+      if (CallMemcpy == NULL || !CallMemcpy->isIntrinsic() ||
+          CallMemcpy->getIntrinsicID() != Intrinsic::memcpy) {
+        continue;
+      }
+
+      LLVM_DEBUG(dbgs() << "Found call to strcpy/memcpy:\n" << *CI << "\n");
+
+      auto *Alloca = dyn_cast<AllocaInst>(CI->getArgOperand(0));
+      auto *SourceVar = dyn_cast<GlobalVariable>(CI->getArgOperand(1));
+      auto *BytesToCopy = dyn_cast<ConstantInt>(CI->getArgOperand(2));
+      auto *IsVolatile = dyn_cast<ConstantInt>(CI->getArgOperand(3));
+
+      if (!BytesToCopy) {
+        LLVM_DEBUG(dbgs() << "Number of bytes to copy is null\n");
----------------
efriedma-quic wrote:

Consider cutting down the DEBUG lines; printing a message for every memcpy is extremely noisy (and not particularly useful without printing the instruction in question).

https://github.com/llvm/llvm-project/pull/107120