[llvm] [llvm][ARM]Add ARM widen strings pass (PR #107120)
Nashe Mncube via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 9 07:32:45 PDT 2024
https://github.com/nasherm updated https://github.com/llvm/llvm-project/pull/107120
>From cc8bf21cbdda1b8ae6338602dc06b8ab139bb168 Mon Sep 17 00:00:00 2001
From: nasmnc01 <nashe.mncube at arm.com>
Date: Tue, 3 Sep 2024 16:09:43 +0100
Subject: [PATCH] [llvm][ARM]Add ARM widen strings pass
- Pass optimizes memcpy's by padding out destinations and sources to a
full word to make ARM backend generate full word loads instead of
loading a single byte (ldrb) and/or half word (ldrh). Only pads
destination when it's a stack allocated constant size array and source
when it's constant string. Heuristic to decide whether to pad or not
is very basic and could be improved to allow more examples to be
padded.
- Pass works at the midend level
Change-Id: I1c6371f0962e7ad3c166602b800d041ac1cc7b04
---
.../llvm/Transforms/Scalar/ARMWidenStrings.h | 30 +++
llvm/lib/Passes/PassBuilder.cpp | 1 +
llvm/lib/Passes/PassRegistry.def | 1 +
.../lib/Transforms/Scalar/ARMWidenStrings.cpp | 227 ++++++++++++++++++
llvm/lib/Transforms/Scalar/CMakeLists.txt | 1 +
.../ARMWidenStrings/arm-widen-strings-1.ll | 25 ++
.../ARMWidenStrings/arm-widen-strings-2.ll | 22 ++
.../arm-widen-strings-lengths-dont-match.ll | 28 +++
.../arm-widen-strings-more-than-64-bytes.ll | 29 +++
.../arm-widen-strings-ptrtoint.ll | 42 ++++
.../arm-widen-strings-struct-test.ll | 52 ++++
.../arm-widen-strings-volatile.ll | 29 +++
12 files changed, 487 insertions(+)
create mode 100755 llvm/include/llvm/Transforms/Scalar/ARMWidenStrings.h
create mode 100644 llvm/lib/Transforms/Scalar/ARMWidenStrings.cpp
create mode 100644 llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-1.ll
create mode 100644 llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-2.ll
create mode 100644 llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-lengths-dont-match.ll
create mode 100644 llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-more-than-64-bytes.ll
create mode 100644 llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-ptrtoint.ll
create mode 100644 llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-struct-test.ll
create mode 100644 llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-volatile.ll
diff --git a/llvm/include/llvm/Transforms/Scalar/ARMWidenStrings.h b/llvm/include/llvm/Transforms/Scalar/ARMWidenStrings.h
new file mode 100755
index 00000000000000..3bda666660144a
--- /dev/null
+++ b/llvm/include/llvm/Transforms/Scalar/ARMWidenStrings.h
@@ -0,0 +1,30 @@
+//===- ARMWidenStrings.h --------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the interface for the ArmWidenStrings pass
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_SCALAR_ARMWIDENSTRINGS_H
+#define LLVM_TRANSFORMS_SCALAR_ARMWIDENSTRINGS_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+class Module;
+
+class ARMWidenStringsPass : public PassInfoMixin<ARMWidenStringsPass> {
+public:
+ ARMWidenStringsPass() = default;
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &);
+};
+
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_SCALAR_ARMWIDENSTRINGS_H
\ No newline at end of file
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 1df1449fce597c..6b989231cb9861 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -207,6 +207,7 @@
#include "llvm/Transforms/Instrumentation/ThreadSanitizer.h"
#include "llvm/Transforms/ObjCARC.h"
#include "llvm/Transforms/Scalar/ADCE.h"
+#include "llvm/Transforms/Scalar/ARMWidenStrings.h"
#include "llvm/Transforms/Scalar/AlignmentFromAssumptions.h"
#include "llvm/Transforms/Scalar/AnnotationRemarks.h"
#include "llvm/Transforms/Scalar/BDCE.h"
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index d6067089c6b5c1..55566f43e5435d 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -489,6 +489,7 @@ FUNCTION_PASS("view-dom-only", DomOnlyViewer())
FUNCTION_PASS("view-post-dom", PostDomViewer())
FUNCTION_PASS("view-post-dom-only", PostDomOnlyViewer())
FUNCTION_PASS("wasm-eh-prepare", WasmEHPreparePass())
+FUNCTION_PASS("arm-widen-strings", ARMWidenStringsPass())
#undef FUNCTION_PASS
#ifndef FUNCTION_PASS_WITH_PARAMS
diff --git a/llvm/lib/Transforms/Scalar/ARMWidenStrings.cpp b/llvm/lib/Transforms/Scalar/ARMWidenStrings.cpp
new file mode 100644
index 00000000000000..dd06c2a7ea10d1
--- /dev/null
+++ b/llvm/lib/Transforms/Scalar/ARMWidenStrings.cpp
@@ -0,0 +1,227 @@
+// ARMWidenStrings.cpp - Widen strings to word boundaries to speed up
+// programs that use simple strcpy's with constant strings as source
+// and stack allocated array for destination.
+
+#define DEBUG_TYPE "arm-widen-strings"
+
+#include "llvm/Transforms/Scalar/ARMWidenStrings.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/IR/ValueSymbolTable.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/TargetParser/Triple.h"
+#include "llvm/Transforms/Scalar.h"
+
+using namespace llvm;
+
+cl::opt<bool> DisableARMWidenStrings("disable-arm-widen-strings",
+ cl::init(false));
+
+namespace {
+
+class ARMWidenStrings {
+public:
+ /*
+ Max number of bytes that memcpy allows for lowering to load/stores before it
+ uses library function (__aeabi_memcpy). This is the same value returned by
+ ARMSubtarget::getMaxInlineSizeThreshold which I would have called in place of
+ the constant int but can't get access to the subtarget info class from the
+ midend.
+ */
+ const unsigned int MemcpyInliningLimit = 64;
+
+ bool run(Function &F);
+};
+
+static bool IsCharArray(Type *t) {
+ const unsigned int CHAR_BIT_SIZE = 8;
+ return t && t->isArrayTy() && t->getArrayElementType()->isIntegerTy() &&
+ t->getArrayElementType()->getIntegerBitWidth() == CHAR_BIT_SIZE;
+}
+
+bool ARMWidenStrings::run(Function &F) {
+ if (DisableARMWidenStrings) {
+ return false;
+ }
+
+ LLVM_DEBUG(dbgs() << "Running ARMWidenStrings on module " << F.getName()
+ << "\n");
+
+ for (Function::iterator b = F.begin(); b != F.end(); ++b) {
+ for (BasicBlock::iterator i = b->begin(); i != b->end(); ++i) {
+ CallInst *CI = dyn_cast<CallInst>(i);
+ if (!CI) {
+ continue;
+ }
+
+ Function *CallMemcpy = CI->getCalledFunction();
+ // find out if the current call instruction is a call to llvm memcpy
+ // intrinsics
+ if (CallMemcpy == NULL || !CallMemcpy->isIntrinsic() ||
+ CallMemcpy->getIntrinsicID() != Intrinsic::memcpy) {
+ continue;
+ }
+
+ LLVM_DEBUG(dbgs() << "Found call to strcpy/memcpy:\n" << *CI << "\n");
+
+ auto *Alloca = dyn_cast<AllocaInst>(CI->getArgOperand(0));
+ auto *SourceVar = dyn_cast<GlobalVariable>(CI->getArgOperand(1));
+ auto *BytesToCopy = dyn_cast<ConstantInt>(CI->getArgOperand(2));
+ auto *IsVolatile = dyn_cast<ConstantInt>(CI->getArgOperand(3));
+
+ if (!BytesToCopy) {
+ LLVM_DEBUG(dbgs() << "Number of bytes to copy is null\n");
+ continue;
+ }
+
+ uint64_t NumBytesToCopy = BytesToCopy->getZExtValue();
+
+ if (!Alloca) {
+ LLVM_DEBUG(dbgs() << "Destination isn't a Alloca\n");
+ continue;
+ }
+
+ if (!SourceVar) {
+ LLVM_DEBUG(dbgs() << "Source isn't a global constant variable\n");
+ continue;
+ }
+
+ if (!IsVolatile || IsVolatile->isOne()) {
+ LLVM_DEBUG(
+ dbgs() << "Not widening strings for this memcpy because it's "
+ "a volatile operations\n");
+ continue;
+ }
+
+ if (NumBytesToCopy % 4 == 0) {
+ LLVM_DEBUG(dbgs() << "Bytes to copy in strcpy/memcpy is already word "
+ "aligned so nothing to do here.\n");
+ continue;
+ }
+
+ if (!SourceVar->hasInitializer() || !SourceVar->isConstant() ||
+ !SourceVar->hasLocalLinkage() || !SourceVar->hasGlobalUnnamedAddr()) {
+ LLVM_DEBUG(dbgs() << "Source is not constant global, thus it's "
+ "mutable therefore it's not safe to pad\n");
+ continue;
+ }
+
+ ConstantDataArray *SourceDataArray =
+ dyn_cast<ConstantDataArray>(SourceVar->getInitializer());
+ if (!SourceDataArray || !IsCharArray(SourceDataArray->getType())) {
+ LLVM_DEBUG(dbgs() << "Source isn't a constant data array\n");
+ continue;
+ }
+
+ if (!Alloca->isStaticAlloca()) {
+ LLVM_DEBUG(dbgs() << "Destination allocation isn't a static "
+ "constant which is locally allocated in this "
+ "function, so skipping.\n");
+ continue;
+ }
+
+ // Make sure destination is definitley a char array.
+ if (!IsCharArray(Alloca->getAllocatedType())) {
+ LLVM_DEBUG(dbgs() << "Destination doesn't look like a constant char (8 "
+ "bits) array\n");
+ continue;
+ }
+ LLVM_DEBUG(dbgs() << "With Alloca: " << *Alloca << "\n");
+
+ uint64_t DZSize = Alloca->getAllocatedType()->getArrayNumElements();
+ uint64_t SZSize = SourceDataArray->getType()->getNumElements();
+
+ // For safety purposes lets add a constraint and only padd when
+ // num bytes to copy == destination array size == source string
+ // which is a constant
+ LLVM_DEBUG(dbgs() << "Number of bytes to copy is: " << NumBytesToCopy
+ << "\n");
+ LLVM_DEBUG(dbgs() << "Size of destination array is: " << DZSize << "\n");
+ LLVM_DEBUG(dbgs() << "Size of source array is: " << SZSize << "\n");
+ if (NumBytesToCopy != DZSize || DZSize != SZSize) {
+ LLVM_DEBUG(dbgs() << "Size of number of bytes to copy, destination "
+ "array and source string don't match, so "
+ "skipping\n");
+ continue;
+ }
+ LLVM_DEBUG(dbgs() << "Going to widen.\n");
+ unsigned int NumBytesToPad = 4 - (NumBytesToCopy % 4);
+ LLVM_DEBUG(dbgs() << "Number of bytes to pad by is " << NumBytesToPad
+ << "\n");
+ unsigned int TotalBytes = NumBytesToCopy + NumBytesToPad;
+
+ if (TotalBytes > MemcpyInliningLimit) {
+ LLVM_DEBUG(
+ dbgs() << "Not going to pad because total number of bytes is "
+ << TotalBytes
+ << " which be greater than the inlining "
+ "limit for memcpy which is "
+ << MemcpyInliningLimit << "\n");
+ continue;
+ }
+
+ // update destination char array to be word aligned (memcpy(X,...,...))
+ IRBuilder<> BuildAlloca(Alloca);
+ AllocaInst *NewAlloca = cast<AllocaInst>(BuildAlloca.CreateAlloca(
+ ArrayType::get(Alloca->getAllocatedType()->getArrayElementType(),
+ NumBytesToCopy + NumBytesToPad)));
+ NewAlloca->takeName(Alloca);
+ NewAlloca->setAlignment(Alloca->getAlign());
+ Alloca->replaceAllUsesWith(NewAlloca);
+
+ LLVM_DEBUG(dbgs() << "Updating users of destination stack object to use "
+ << "new size\n");
+
+ // update source to be word aligned (memcpy(...,X,...))
+ // create replacement string with padded null bytes.
+ StringRef Data = SourceDataArray->getRawDataValues();
+ std::vector<uint8_t> StrData(Data.begin(), Data.end());
+ for (unsigned int p = 0; p < NumBytesToPad; p++)
+ StrData.push_back('\0');
+ auto Arr = ArrayRef(StrData.data(), TotalBytes);
+
+ // create new padded version of global variable string.
+ Constant *SourceReplace = ConstantDataArray::get(F.getContext(), Arr);
+ GlobalVariable *NewGV = new GlobalVariable(
+ *F.getParent(), SourceReplace->getType(), true,
+ SourceVar->getLinkage(), SourceReplace, SourceReplace->getName());
+
+ // copy any other attributes from original global variable string
+ // e.g. unamed_addr
+ NewGV->copyAttributesFrom(SourceVar);
+ NewGV->takeName(SourceVar);
+
+ // replace intrinsic source.
+ CI->setArgOperand(1, NewGV);
+
+ // Update number of bytes to copy (memcpy(...,...,X))
+ CI->setArgOperand(2,
+ ConstantInt::get(BytesToCopy->getType(), TotalBytes));
+ LLVM_DEBUG(dbgs() << "Padded dest/source and increased number of bytes:\n"
+ << *CI << "\n"
+ << *NewAlloca << "\n");
+ }
+ }
+ return true;
+}
+
+} // end of anonymous namespace
+
+PreservedAnalyses ARMWidenStringsPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ if (!ARMWidenStrings().run(F))
+ return PreservedAnalyses::all();
+
+ return PreservedAnalyses::none();
+}
diff --git a/llvm/lib/Transforms/Scalar/CMakeLists.txt b/llvm/lib/Transforms/Scalar/CMakeLists.txt
index 939a1457239567..a9607e4ebc6583 100644
--- a/llvm/lib/Transforms/Scalar/CMakeLists.txt
+++ b/llvm/lib/Transforms/Scalar/CMakeLists.txt
@@ -2,6 +2,7 @@ add_llvm_component_library(LLVMScalarOpts
ADCE.cpp
AlignmentFromAssumptions.cpp
AnnotationRemarks.cpp
+ ARMWidenStrings.cpp
BDCE.cpp
CallSiteSplitting.cpp
ConstantHoisting.cpp
diff --git a/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-1.ll b/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-1.ll
new file mode 100644
index 00000000000000..e11cf372c36a6e
--- /dev/null
+++ b/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-1.ll
@@ -0,0 +1,25 @@
+; RUN: opt < %s -mtriple=arm-arm-none-eabi -passes="default<O2>,arm-widen-strings" -S | FileCheck %s
+; RUN: opt < %s -mtriple=arm-arm-none-eabi -passes="default<O0>" -S | FileCheck %s --check-prefix=TURNED-OFF
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+
+; CHECK: [12 x i8]
+; TURNED-OFF-NOT: [12 x i8]
+ at .str = private unnamed_addr constant [10 x i8] c"123456789\00", align 1
+
+; Function Attrs: nounwind
+define hidden void @foo() #0 {
+entry:
+; CHECK: %something = alloca [12 x i8]
+; TURNED-OFF-NOT: %something = alloca [12 x i8]
+ %something = alloca [10 x i8], align 1
+ %arraydecay = getelementptr inbounds [10 x i8], ptr %something, i32 0, i32 0
+; CHECK: @llvm.memcpy.p0.p0.i32
+ %call = call ptr @strcpy(ptr %arraydecay, ptr @.str)
+ %arraydecay1 = getelementptr inbounds [10 x i8], ptr %something, i32 0, i32 0
+ %call2 = call i32 @bar(ptr %arraydecay1)
+ ret void
+}
+
+declare ptr @strcpy(ptr, ptr) #1
+
+declare i32 @bar(...) #1
diff --git a/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-2.ll b/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-2.ll
new file mode 100644
index 00000000000000..2df8108f445fe1
--- /dev/null
+++ b/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-2.ll
@@ -0,0 +1,22 @@
+; RUN: opt < %s -mtriple=arm-arm-none-eabi -passes="default<O2>,arm-widen-strings" -S | FileCheck %s
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+
+; CHECK: [64 x i8]
+ at .str = private unnamed_addr constant [62 x i8] c"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\00", align 1
+
+; Function Attrs: nounwind
+define hidden void @foo() #0 {
+entry:
+; CHECK: %something = alloca [64 x i8]
+ %something = alloca [62 x i8], align 1
+ %arraydecay = getelementptr inbounds [62 x i8], ptr %something, i32 0, i32 0
+; CHECK: @llvm.memcpy.p0.p0.i32
+ %call = call ptr @strcpy(ptr %arraydecay, ptr @.str)
+ %arraydecay1 = getelementptr inbounds [62 x i8], ptr %something, i32 0, i32 0
+ %call2 = call i32 @bar(ptr %arraydecay1)
+ ret void
+}
+
+declare ptr @strcpy(ptr, ptr) #1
+
+declare i32 @bar(...) #1
diff --git a/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-lengths-dont-match.ll b/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-lengths-dont-match.ll
new file mode 100644
index 00000000000000..a0c1e213298167
--- /dev/null
+++ b/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-lengths-dont-match.ll
@@ -0,0 +1,28 @@
+; RUN: opt < %s -mtriple=arm-arm-none-eabi -passes=arm-widen-strings -S | FileCheck %s
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "thumbv6m-arm-none-eabi"
+
+; CHECK: [17 x i8]
+ at .str = private unnamed_addr constant [17 x i8] c"aaaaaaaaaaaaaaaa\00", align 1
+
+; Function Attrs: nounwind
+define hidden void @foo() local_unnamed_addr #0 {
+entry:
+ %something = alloca [20 x i8], align 1
+ call void @llvm.lifetime.start(i64 20, ptr nonnull %something) #3
+ call void @llvm.memcpy.p0i8.p0i8.i32(ptr align 1 nonnull %something, ptr align 1 @.str, i32 17, i1 false)
+ %call2 = call i32 @bar(ptr nonnull %something) #3
+ call void @llvm.lifetime.end(i64 20, ptr nonnull %something) #3
+ ret void
+}
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.start(i64, ptr nocapture) #1
+
+declare i32 @bar(...) local_unnamed_addr #2
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.end(i64, ptr nocapture) #1
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(ptr nocapture writeonly, ptr nocapture readonly, i32, i1) #1
diff --git a/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-more-than-64-bytes.ll b/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-more-than-64-bytes.ll
new file mode 100644
index 00000000000000..67cb99023c5328
--- /dev/null
+++ b/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-more-than-64-bytes.ll
@@ -0,0 +1,29 @@
+; RUN: opt < %s -mtriple=arm-arm-none-eabi -passes=arm-widen-strings -S | FileCheck %s
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "thumbv6m-arm-none-eabi"
+
+; CHECK: [65 x i8]
+; CHECK-NOT: [68 x i8]
+ at .str = private unnamed_addr constant [65 x i8] c"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaazzz\00", align 1
+
+; Function Attrs: nounwind
+define hidden void @foo() local_unnamed_addr #0 {
+entry:
+ %something = alloca [65 x i8], align 1
+ call void @llvm.lifetime.start(i64 65, ptr nonnull %something) #3
+ call void @llvm.memcpy.p0i8.p0i8.i32(ptr align 1 nonnull %something, ptr align 1 @.str, i32 65, i1 false)
+ %call2 = call i32 @bar(ptr nonnull %something) #3
+ call void @llvm.lifetime.end(i64 65, ptr nonnull %something) #3
+ ret void
+}
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.start(i64, ptr nocapture) #1
+
+declare i32 @bar(...) local_unnamed_addr #2
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.end(i64, ptr nocapture) #1
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(ptr nocapture writeonly, ptr nocapture readonly, i32, i1) #1
diff --git a/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-ptrtoint.ll b/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-ptrtoint.ll
new file mode 100644
index 00000000000000..3f02c02ad845b2
--- /dev/null
+++ b/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-ptrtoint.ll
@@ -0,0 +1,42 @@
+; RUN: opt < %s -mtriple=arm-arm-none-eabi -passes=arm-widen-strings -S | FileCheck %s
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+
+; CHECK: [48 x i8]
+ at f.string1 = private unnamed_addr constant [45 x i8] c"The quick brown dog jumps over the lazy fox.\00", align 1
+
+; Function Attrs: nounwind
+define hidden i32 @f() {
+entry:
+ %string1 = alloca [45 x i8], align 1
+ %pos = alloca i32, align 4
+ %token = alloca ptr, align 4
+ call void @llvm.lifetime.start.p0i8(i64 45, ptr %string1)
+ call void @llvm.memcpy.p0i8.p0i8.i32(ptr align 1 %string1, ptr align 1 @f.string1, i32 45, i1 false)
+ call void @llvm.lifetime.start.p0i8(i64 4, ptr %pos)
+ call void @llvm.lifetime.start.p0i8(i64 4, ptr %token)
+ %call = call ptr @strchr(ptr %string1, i32 101)
+ store ptr %call, ptr %token, align 4
+ %0 = load ptr, ptr %token, align 4
+ %sub.ptr.lhs.cast = ptrtoint ptr %0 to i32
+ %sub.ptr.rhs.cast = ptrtoint ptr %string1 to i32
+ %sub.ptr.sub = sub i32 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast
+ %add = add nsw i32 %sub.ptr.sub, 1
+ store i32 %add, ptr %pos, align 4
+ %1 = load i32, ptr %pos, align 4
+ call void @llvm.lifetime.end.p0i8(i64 4, ptr %token)
+ call void @llvm.lifetime.end.p0i8(i64 4, ptr %pos)
+ call void @llvm.lifetime.end.p0i8(i64 45, ptr %string1)
+ ret i32 %1
+}
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.start.p0i8(i64, ptr nocapture)
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(ptr nocapture writeonly, ptr nocapture readonly, i32, i1)
+
+; Function Attrs: nounwind
+declare ptr @strchr(ptr, i32)
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.end.p0i8(i64, ptr nocapture)
diff --git a/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-struct-test.ll b/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-struct-test.ll
new file mode 100644
index 00000000000000..937bfaecd8e3e9
--- /dev/null
+++ b/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-struct-test.ll
@@ -0,0 +1,52 @@
+; RUN: opt < %s -mtriple=arm-arm-none-eabi -passes=arm-widen-strings -S | FileCheck %s
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "thumbv6m-arm-none-eabi"
+
+%struct.P = type { i32, [13 x i8] }
+
+; CHECK-NOT: [16 x i8]
+ at .str = private unnamed_addr constant [13 x i8] c"hello world\0A\00", align 1
+ at .str.1 = private unnamed_addr constant [4 x i8] c"%s\0A\00", align 1
+ at __ARM_use_no_argv = global i32 1, section ".ARM.use_no_argv", align 4
+ at llvm.used = appending global [1 x ptr] [ptr @__ARM_use_no_argv], section "llvm.metadata"
+
+; Function Attrs: nounwind
+define hidden i32 @main() local_unnamed_addr #0 {
+entry:
+ %p = alloca %struct.P, align 4
+ call void @llvm.lifetime.start(i64 20, ptr nonnull %p) #2
+ store i32 10, ptr %p, align 4, !tbaa !3
+ %arraydecay = getelementptr inbounds %struct.P, ptr %p, i32 0, i32 1, i32 0
+ call void @llvm.memcpy.p0i8.p0i8.i32(ptr align 1 %arraydecay, ptr align 1 @.str, i32 13, i1 false)
+ %puts = call i32 @puts(ptr %arraydecay)
+ call void @llvm.lifetime.end(i64 20, ptr nonnull %p) #2
+ ret i32 0
+}
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.start(i64, ptr nocapture) #1
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.end(i64, ptr nocapture) #1
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(ptr nocapture writeonly, ptr nocapture readonly, i32, i1) #1
+
+; Function Attrs: nounwind
+declare i32 @puts(ptr nocapture readonly) #2
+
+attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "denormal-fp-math"="preserve-sign" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cortex-m0" "target-features"="+strict-align" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { argmemonly nounwind }
+attributes #2 = { nounwind }
+
+!llvm.module.flags = !{!0, !1}
+!llvm.ident = !{!2}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 1, !"min_enum_size", i32 4}
+!2 = !{!"Component: ARM Compiler 6 devbuild Tool: armclang [devbuild]"}
+!3 = !{!4, !5, i64 0}
+!4 = !{!"P", !5, i64 0, !6, i64 4}
+!5 = !{!"int", !6, i64 0}
+!6 = !{!"omnipotent char", !7, i64 0}
+!7 = !{!"Simple C/C++ TBAA"}
diff --git a/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-volatile.ll b/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-volatile.ll
new file mode 100644
index 00000000000000..6cbd823a18c367
--- /dev/null
+++ b/llvm/test/Transforms/ARMWidenStrings/arm-widen-strings-volatile.ll
@@ -0,0 +1,29 @@
+; RUN: opt < %s -mtriple=arm-arm-none-eabi -passes=arm-widen-strings -S | FileCheck %s
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "thumbv6m-arm-none-eabi"
+
+; CHECK-NOT: [64 x i8]
+ at .str = private unnamed_addr constant [62 x i8] c"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\00", align 1
+
+; Function Attrs: nounwind
+define hidden void @foo() local_unnamed_addr #0 {
+entry:
+ %something = alloca [62 x i8], align 1
+ %0 = getelementptr inbounds [62 x i8], ptr %something, i32 0, i32 0
+ call void @llvm.lifetime.start(i64 62, ptr nonnull %0) #3
+ call void @llvm.memcpy.p0i8.p0i8.i32(ptr align 1 nonnull %0, ptr align 1 @.str, i32 62, i1 true)
+ %call2 = call i32 @bar(ptr nonnull %0) #3
+ call void @llvm.lifetime.end(i64 62, ptr nonnull %0) #3
+ ret void
+}
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.start(i64, ptr nocapture) #1
+
+declare i32 @bar(...) local_unnamed_addr #2
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.end(i64, ptr nocapture) #1
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(ptr nocapture writeonly, ptr nocapture readonly, i32, i1) #1
More information about the llvm-commits
mailing list