[llvm] 9959eee - [Attributor] Make sure Heap2Stack works properly on a GPU target

Johannes Doerfert via llvm-commits llvm-commits at lists.llvm.org
Thu Jun 17 23:10:49 PDT 2021


Author: Johannes Doerfert
Date: 2021-06-18T01:07:52-05:00
New Revision: 9959eee0015a97f3eb6bdb1b7b1f500190d66f53

URL: https://github.com/llvm/llvm-project/commit/9959eee0015a97f3eb6bdb1b7b1f500190d66f53
DIFF: https://github.com/llvm/llvm-project/commit/9959eee0015a97f3eb6bdb1b7b1f500190d66f53.diff

LOG: [Attributor] Make sure Heap2Stack works properly on a GPU target

If the target stack is not accessible between different running
"threads" we have to make sure not to create allocas for mallocs
that might be used by multiple "threads". The "use check" is
sufficient to prevent this but if we apply the "free check" we have
to make sure the pointer is not communicated to others before
the free is reached.

Differential Revision: https://reviews.llvm.org/D98608

Added: 
    llvm/test/Transforms/Attributor/heap_to_stack_gpu.ll

Modified: 
    llvm/include/llvm/Transforms/IPO/Attributor.h
    llvm/lib/Transforms/IPO/AttributorAttributes.cpp
    llvm/test/Transforms/Attributor/depgraph.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h
index 0f26e136c587..cb8f3cd80226 100644
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -102,6 +102,7 @@
 #include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/Triple.h"
 #include "llvm/ADT/iterator.h"
 #include "llvm/Analysis/AssumeBundleQueries.h"
 #include "llvm/Analysis/CFG.h"
@@ -818,7 +819,7 @@ struct InformationCache {
             [&](const Function &F) {
               return AG.getAnalysis<PostDominatorTreeAnalysis>(F);
             }),
-        AG(AG), CGSCC(CGSCC) {
+        AG(AG), CGSCC(CGSCC), TargetTriple(M.getTargetTriple()) {
     if (CGSCC)
       initializeModuleSlice(*CGSCC);
   }
@@ -968,6 +969,14 @@ struct InformationCache {
     return ModuleSlice.count(const_cast<Function *>(&F));
   }
 
+  /// Return true if the stack (llvm::Alloca) can be accessed by other threads.
+  bool stackIsAccessibleByOtherThreads() { return !targetIsGPU(); }
+
+  /// Return true if the target is a GPU.
+  bool targetIsGPU() {
+    return TargetTriple.isAMDGPU() || TargetTriple.isNVPTX();
+  }
+
 private:
   struct FunctionInfo {
     ~FunctionInfo();
@@ -1031,6 +1040,9 @@ struct InformationCache {
   DenseMap<std::pair<const Instruction *, const Instruction *>, bool>
       PotentiallyReachableMap;
 
+  /// The triple describing the target machine.
+  Triple TargetTriple;
+
   /// Give the Attributor access to the members so
   /// Attributor::identifyDefaultAbstractAttributes(...) can initialize them.
   friend struct Attributor;

diff  --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 9832b2d6da34..bb2cc3f7659d 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -5016,7 +5016,8 @@ struct AAHeapToStackImpl : public AAHeapToStack {
       : AAHeapToStack(IRP, A) {}
 
   const std::string getAsStr() const override {
-    return "[H2S] Mallocs: " + std::to_string(MallocCalls.size());
+    return "[H2S] Mallocs Good/Bad: " + std::to_string(MallocCalls.size()) +
+           "/" + std::to_string(BadMallocCalls.size());
   }
 
   bool isAssumedHeapToStack(CallBase &CB) const override {
@@ -5125,10 +5126,29 @@ ChangeStatus AAHeapToStackImpl::updateImpl(Attributor &A) {
   MustBeExecutedContextExplorer &Explorer =
       A.getInfoCache().getMustBeExecutedContextExplorer();
 
+  bool StackIsAccessibleByOtherThreads =
+      A.getInfoCache().stackIsAccessibleByOtherThreads();
+
   auto FreeCheck = [&](Instruction &I) {
+    // If the stack is not accessible by other threads, the "must-free" logic
+    // doesn't apply as the pointer could be shared and needs to be places in
+    // "shareable" memory.
+    if (!StackIsAccessibleByOtherThreads) {
+      auto &NoSyncAA =
+          A.getAAFor<AANoSync>(*this, getIRPosition(), DepClassTy::OPTIONAL);
+      if (!NoSyncAA.isAssumedNoSync()) {
+        LLVM_DEBUG(
+            dbgs() << "[H2S] found an escaping use, stack is not accessible by "
+                      "other threads and function is not nosync:\n");
+        return false;
+      }
+    }
     const auto &Frees = FreesForMalloc.lookup(&I);
-    if (Frees.size() != 1)
+    if (Frees.size() != 1) {
+      LLVM_DEBUG(dbgs() << "[H2S] did not find one free call but "
+                        << Frees.size() << "\n");
       return false;
+    }
     Instruction *UniqueFree = *Frees.begin();
     return Explorer.findInContextOf(UniqueFree, I.getNextNode());
   };
@@ -5169,12 +5189,12 @@ ChangeStatus AAHeapToStackImpl::updateImpl(Attributor &A) {
 
         const auto &NoCaptureAA = A.getAAFor<AANoCapture>(
             *this, IRPosition::callsite_argument(*CB, ArgNo),
-            DepClassTy::REQUIRED);
+            DepClassTy::OPTIONAL);
 
         // If a callsite argument use is nofree, we are fine.
         const auto &ArgNoFreeAA = A.getAAFor<AANoFree>(
             *this, IRPosition::callsite_argument(*CB, ArgNo),
-            DepClassTy::REQUIRED);
+            DepClassTy::OPTIONAL);
 
         if (!NoCaptureAA.isAssumedNoCapture() ||
             !ArgNoFreeAA.isAssumedNoFree()) {

diff  --git a/llvm/test/Transforms/Attributor/depgraph.ll b/llvm/test/Transforms/Attributor/depgraph.ll
index ad85e7c6c095..cd47eeebd68e 100644
--- a/llvm/test/Transforms/Attributor/depgraph.ll
+++ b/llvm/test/Transforms/Attributor/depgraph.ll
@@ -124,7 +124,7 @@ define i32* @checkAndAdvance(i32* align 16 %0) {
 ; GRAPH-NEXT:   updates [AAMemoryLocation] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state memory:argument
 ; GRAPH-NEXT:   updates [AAMemoryLocation] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state memory:argument
 ; GRAPH-EMPTY:
-; GRAPH-NEXT: [AAHeapToStack] for CtxI '  %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance at -1]} with state [H2S] Mallocs: 0
+; GRAPH-NEXT: [AAHeapToStack] for CtxI '  %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance at -1]} with state [H2S] Mallocs Good/Bad: 0/1
 ; GRAPH-EMPTY:
 ; GRAPH-NEXT: [AAValueSimplify] for CtxI '  %2 = load i32, i32* %0, align 4' at position {fn_ret:checkAndAdvance [checkAndAdvance at -1]} with state simplified
 ; GRAPH-EMPTY:

diff  --git a/llvm/test/Transforms/Attributor/heap_to_stack_gpu.ll b/llvm/test/Transforms/Attributor/heap_to_stack_gpu.ll
new file mode 100644
index 000000000000..e6a8d2510a37
--- /dev/null
+++ b/llvm/test/Transforms/Attributor/heap_to_stack_gpu.ll
@@ -0,0 +1,644 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals
+; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=5 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM
+; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=5 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM
+; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal  -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM
+; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal  -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM
+
+target triple = "amdgcn-amd-amdhsa"
+
+declare noalias i8* @malloc(i64)
+
+declare void @nocapture_func_frees_pointer(i8* nocapture)
+
+declare void @func_throws(...)
+
+declare void @sync_func(i8* %p)
+
+declare void @sync_will_return(i8* %p) willreturn nounwind
+
+declare void @no_sync_func(i8* nocapture %p) nofree nosync willreturn
+
+declare void @nofree_func(i8* nocapture %p) nofree  nosync willreturn
+
+declare void @foo(i32* %p)
+
+declare void @foo_nounw(i32* %p) nounwind nofree
+
+declare i32 @no_return_call() noreturn
+
+declare void @free(i8* nocapture)
+
+declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) nounwind
+
+define void @nofree_arg_only(i8* %p1, i8* %p2) {
+; CHECK-LABEL: define {{[^@]+}}@nofree_arg_only
+; CHECK-SAME: (i8* nocapture nofree [[P1:%.*]], i8* nocapture [[P2:%.*]]) {
+; CHECK-NEXT:    tail call void @free(i8* nocapture [[P2]])
+; CHECK-NEXT:    tail call void @nofree_func(i8* nocapture nofree [[P1]])
+; CHECK-NEXT:    ret void
+;
+  tail call void @free(i8* %p2)
+  tail call void @nofree_func(i8* %p1)
+  ret void
+}
+
+; TEST 1 - negative, pointer freed in another function.
+
+define void @test1() {
+; CHECK-LABEL: define {{[^@]+}}@test1() {
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 noundef 4)
+; CHECK-NEXT:    tail call void @nocapture_func_frees_pointer(i8* noalias nocapture [[TMP1]])
+; CHECK-NEXT:    tail call void (...) @func_throws()
+; CHECK-NEXT:    tail call void @free(i8* noalias nocapture [[TMP1]])
+; CHECK-NEXT:    ret void
+;
+  %1 = tail call noalias i8* @malloc(i64 4)
+  tail call void @nocapture_func_frees_pointer(i8* %1)
+  tail call void (...) @func_throws()
+  tail call void @free(i8* %1)
+  ret void
+}
+
+; TEST 2 - negative, call to a sync function.
+
+define void @test2() {
+; CHECK-LABEL: define {{[^@]+}}@test2() {
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 noundef 4)
+; CHECK-NEXT:    tail call void @sync_func(i8* [[TMP1]])
+; CHECK-NEXT:    tail call void @free(i8* nocapture [[TMP1]])
+; CHECK-NEXT:    ret void
+;
+  %1 = tail call noalias i8* @malloc(i64 4)
+  tail call void @sync_func(i8* %1)
+  tail call void @free(i8* %1)
+  ret void
+}
+
+; TEST 3 - 1 malloc, 1 free
+
+define void @test3() {
+; CHECK-LABEL: define {{[^@]+}}@test3() {
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 noundef 4)
+; CHECK-NEXT:    tail call void @no_sync_func(i8* noalias nocapture nofree [[TMP1]])
+; CHECK-NEXT:    tail call void @free(i8* noalias nocapture [[TMP1]])
+; CHECK-NEXT:    ret void
+;
+  %1 = tail call noalias i8* @malloc(i64 4)
+  tail call void @no_sync_func(i8* %1)
+  tail call void @free(i8* %1)
+  ret void
+}
+
+define void @test3a(i8* %p) {
+; IS________OPM-LABEL: define {{[^@]+}}@test3a
+; IS________OPM-SAME: (i8* nocapture [[P:%.*]]) {
+; IS________OPM-NEXT:    [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 noundef 4)
+; IS________OPM-NEXT:    tail call void @nofree_arg_only(i8* nocapture nofree [[TMP1]], i8* nocapture [[P]])
+; IS________OPM-NEXT:    tail call void @free(i8* noalias nocapture [[TMP1]])
+; IS________OPM-NEXT:    ret void
+;
+; IS________NPM-LABEL: define {{[^@]+}}@test3a
+; IS________NPM-SAME: (i8* nocapture [[P:%.*]]) {
+; IS________NPM-NEXT:    [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 noundef 4)
+; IS________NPM-NEXT:    tail call void @nofree_arg_only(i8* noalias nocapture nofree [[TMP1]], i8* nocapture [[P]])
+; IS________NPM-NEXT:    tail call void @free(i8* noalias nocapture [[TMP1]])
+; IS________NPM-NEXT:    ret void
+;
+  %1 = tail call noalias i8* @malloc(i64 4)
+  tail call void @nofree_arg_only(i8* %1, i8* %p)
+  tail call void @free(i8* %1)
+  ret void
+}
+
+declare noalias i8* @aligned_alloc(i64, i64)
+
+define void @test3b(i8* %p) {
+; IS________OPM-LABEL: define {{[^@]+}}@test3b
+; IS________OPM-SAME: (i8* nocapture [[P:%.*]]) {
+; IS________OPM-NEXT:    [[TMP1:%.*]] = tail call noalias i8* @aligned_alloc(i64 noundef 32, i64 noundef 128)
+; IS________OPM-NEXT:    tail call void @nofree_arg_only(i8* nocapture nofree [[TMP1]], i8* nocapture [[P]])
+; IS________OPM-NEXT:    tail call void @free(i8* noalias nocapture [[TMP1]])
+; IS________OPM-NEXT:    ret void
+;
+; IS________NPM-LABEL: define {{[^@]+}}@test3b
+; IS________NPM-SAME: (i8* nocapture [[P:%.*]]) {
+; IS________NPM-NEXT:    [[TMP1:%.*]] = tail call noalias i8* @aligned_alloc(i64 noundef 32, i64 noundef 128)
+; IS________NPM-NEXT:    tail call void @nofree_arg_only(i8* noalias nocapture nofree [[TMP1]], i8* nocapture [[P]])
+; IS________NPM-NEXT:    tail call void @free(i8* noalias nocapture [[TMP1]])
+; IS________NPM-NEXT:    ret void
+;
+  %1 = tail call noalias i8* @aligned_alloc(i64 32, i64 128)
+  tail call void @nofree_arg_only(i8* %1, i8* %p)
+  tail call void @free(i8* %1)
+  ret void
+}
+
+; leave alone non-constant alignments.
+define void @test3c(i64 %alignment) {
+; CHECK-LABEL: define {{[^@]+}}@test3c
+; CHECK-SAME: (i64 [[ALIGNMENT:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call noalias i8* @aligned_alloc(i64 [[ALIGNMENT]], i64 noundef 128)
+; CHECK-NEXT:    tail call void @free(i8* noalias nocapture [[TMP1]])
+; CHECK-NEXT:    ret void
+;
+  %1 = tail call noalias i8* @aligned_alloc(i64 %alignment, i64 128)
+  tail call void @free(i8* %1)
+  ret void
+}
+
+declare noalias i8* @calloc(i64, i64)
+
+define void @test0() {
+; CHECK-LABEL: define {{[^@]+}}@test0() {
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call noalias i8* @calloc(i64 noundef 2, i64 noundef 4)
+; CHECK-NEXT:    tail call void @no_sync_func(i8* noalias nocapture nofree [[TMP1]])
+; CHECK-NEXT:    tail call void @free(i8* noalias nocapture [[TMP1]])
+; CHECK-NEXT:    ret void
+;
+  %1 = tail call noalias i8* @calloc(i64 2, i64 4)
+  tail call void @no_sync_func(i8* %1)
+  tail call void @free(i8* %1)
+  ret void
+}
+
+; TEST 4
+define void @test4() {
+; CHECK-LABEL: define {{[^@]+}}@test4() {
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 noundef 4)
+; CHECK-NEXT:    tail call void @nofree_func(i8* noalias nocapture nofree [[TMP1]])
+; CHECK-NEXT:    ret void
+;
+  %1 = tail call noalias i8* @malloc(i64 4)
+  tail call void @nofree_func(i8* %1)
+  ret void
+}
+
+; TEST 5 - not all exit paths have a call to free, but all uses of malloc
+; are in nofree functions and are not captured
+
+define void @test5(i32, i8* %p) {
+; IS________OPM-LABEL: define {{[^@]+}}@test5
+; IS________OPM-SAME: (i32 [[TMP0:%.*]], i8* nocapture [[P:%.*]]) {
+; IS________OPM-NEXT:    [[TMP2:%.*]] = tail call noalias i8* @malloc(i64 noundef 4)
+; IS________OPM-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[TMP0]], 0
+; IS________OPM-NEXT:    br i1 [[TMP3]], label [[TMP5:%.*]], label [[TMP4:%.*]]
+; IS________OPM:       4:
+; IS________OPM-NEXT:    tail call void @nofree_func(i8* noalias nocapture nofree [[TMP2]])
+; IS________OPM-NEXT:    br label [[TMP6:%.*]]
+; IS________OPM:       5:
+; IS________OPM-NEXT:    tail call void @nofree_arg_only(i8* nocapture nofree [[TMP2]], i8* nocapture [[P]])
+; IS________OPM-NEXT:    tail call void @free(i8* noalias nocapture [[TMP2]])
+; IS________OPM-NEXT:    br label [[TMP6]]
+; IS________OPM:       6:
+; IS________OPM-NEXT:    ret void
+;
+; IS________NPM-LABEL: define {{[^@]+}}@test5
+; IS________NPM-SAME: (i32 [[TMP0:%.*]], i8* nocapture [[P:%.*]]) {
+; IS________NPM-NEXT:    [[TMP2:%.*]] = tail call noalias i8* @malloc(i64 noundef 4)
+; IS________NPM-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[TMP0]], 0
+; IS________NPM-NEXT:    br i1 [[TMP3]], label [[TMP5:%.*]], label [[TMP4:%.*]]
+; IS________NPM:       4:
+; IS________NPM-NEXT:    tail call void @nofree_func(i8* noalias nocapture nofree [[TMP2]])
+; IS________NPM-NEXT:    br label [[TMP6:%.*]]
+; IS________NPM:       5:
+; IS________NPM-NEXT:    tail call void @nofree_arg_only(i8* noalias nocapture nofree [[TMP2]], i8* nocapture [[P]])
+; IS________NPM-NEXT:    tail call void @free(i8* noalias nocapture [[TMP2]])
+; IS________NPM-NEXT:    br label [[TMP6]]
+; IS________NPM:       6:
+; IS________NPM-NEXT:    ret void
+;
+  %2 = tail call noalias i8* @malloc(i64 4)
+  %3 = icmp eq i32 %0, 0
+  br i1 %3, label %5, label %4
+
+4:                                                ; preds = %1
+  tail call void @nofree_func(i8* %2)
+  br label %6
+
+5:                                                ; preds = %1
+  tail call void @nofree_arg_only(i8* %2, i8* %p)
+  tail call void @free(i8* %2)
+  br label %6
+
+6:                                                ; preds = %5, %4
+  ret void
+}
+
+; TEST 6 - all exit paths have a call to free
+
+define void @test6(i32) {
+; CHECK-LABEL: define {{[^@]+}}@test6
+; CHECK-SAME: (i32 [[TMP0:%.*]]) {
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call noalias i8* @malloc(i64 noundef 4)
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[TMP0]], 0
+; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP5:%.*]], label [[TMP4:%.*]]
+; CHECK:       4:
+; CHECK-NEXT:    tail call void @nofree_func(i8* noalias nocapture nofree [[TMP2]])
+; CHECK-NEXT:    tail call void @free(i8* noalias nocapture [[TMP2]])
+; CHECK-NEXT:    br label [[TMP6:%.*]]
+; CHECK:       5:
+; CHECK-NEXT:    tail call void @free(i8* noalias nocapture [[TMP2]])
+; CHECK-NEXT:    br label [[TMP6]]
+; CHECK:       6:
+; CHECK-NEXT:    ret void
+;
+  %2 = tail call noalias i8* @malloc(i64 4)
+  %3 = icmp eq i32 %0, 0
+  br i1 %3, label %5, label %4
+
+4:                                                ; preds = %1
+  tail call void @nofree_func(i8* %2)
+  tail call void @free(i8* %2)
+  br label %6
+
+5:                                                ; preds = %1
+  tail call void @free(i8* %2)
+  br label %6
+
+6:                                                ; preds = %5, %4
+  ret void
+}
+
+; TEST 7 - free is dead.
+
+define void @test7() {
+; CHECK: Function Attrs: noreturn
+; CHECK-LABEL: define {{[^@]+}}@test7
+; CHECK-SAME: () #[[ATTR3:[0-9]+]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 noundef 4)
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call i32 @no_return_call() #[[ATTR3]]
+; CHECK-NEXT:    unreachable
+;
+  %1 = tail call noalias i8* @malloc(i64 4)
+  tail call i32 @no_return_call()
+  tail call void @free(i8* %1)
+  ret void
+}
+
+; TEST 8 - Negative: bitcast pointer used in capture function
+
+define void @test8() {
+; CHECK-LABEL: define {{[^@]+}}@test8() {
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 noundef 4)
+; CHECK-NEXT:    tail call void @no_sync_func(i8* noalias nocapture nofree [[TMP1]])
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32*
+; CHECK-NEXT:    store i32 10, i32* [[TMP2]], align 4
+; CHECK-NEXT:    tail call void @foo(i32* noundef align 4 [[TMP2]])
+; CHECK-NEXT:    tail call void @free(i8* nocapture noundef nonnull align 4 dereferenceable(4) [[TMP1]])
+; CHECK-NEXT:    ret void
+;
+  %1 = tail call noalias i8* @malloc(i64 4)
+  tail call void @no_sync_func(i8* %1)
+  %2 = bitcast i8* %1 to i32*
+  store i32 10, i32* %2
+  %3 = load i32, i32* %2
+  tail call void @foo(i32* %2)
+  tail call void @free(i8* %1)
+  ret void
+}
+
+; TEST 9 - FIXME: malloc should be converted.
+define void @test9() {
+; CHECK-LABEL: define {{[^@]+}}@test9() {
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 noundef 4)
+; CHECK-NEXT:    tail call void @no_sync_func(i8* noalias nocapture nofree [[TMP1]])
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32*
+; CHECK-NEXT:    store i32 10, i32* [[TMP2]], align 4
+; CHECK-NEXT:    tail call void @foo_nounw(i32* nofree noundef align 4 [[TMP2]]) #[[ATTR5:[0-9]+]]
+; CHECK-NEXT:    tail call void @free(i8* nocapture noundef nonnull align 4 dereferenceable(4) [[TMP1]])
+; CHECK-NEXT:    ret void
+;
+  %1 = tail call noalias i8* @malloc(i64 4)
+  tail call void @no_sync_func(i8* %1)
+  %2 = bitcast i8* %1 to i32*
+  store i32 10, i32* %2
+  %3 = load i32, i32* %2
+  tail call void @foo_nounw(i32* %2)
+  tail call void @free(i8* %1)
+  ret void
+}
+
+; TEST 10 - 1 malloc, 1 free
+
+define i32 @test10() {
+; CHECK-LABEL: define {{[^@]+}}@test10() {
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 noundef 4)
+; CHECK-NEXT:    tail call void @no_sync_func(i8* noalias nocapture nofree [[TMP1]])
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32*
+; CHECK-NEXT:    store i32 10, i32* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4
+; CHECK-NEXT:    tail call void @free(i8* noalias nocapture noundef nonnull align 4 dereferenceable(4) [[TMP1]])
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %1 = tail call noalias i8* @malloc(i64 4)
+  tail call void @no_sync_func(i8* %1)
+  %2 = bitcast i8* %1 to i32*
+  store i32 10, i32* %2
+  %3 = load i32, i32* %2
+  tail call void @free(i8* %1)
+  ret i32 %3
+}
+
+define i32 @test_lifetime() {
+; CHECK-LABEL: define {{[^@]+}}@test_lifetime() {
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 noundef 4)
+; CHECK-NEXT:    tail call void @no_sync_func(i8* noalias nocapture nofree [[TMP1]])
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noalias nocapture nofree noundef nonnull align 4 dereferenceable(4) [[TMP1]])
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32*
+; CHECK-NEXT:    store i32 10, i32* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4
+; CHECK-NEXT:    tail call void @free(i8* noalias nocapture noundef nonnull align 4 dereferenceable(4) [[TMP1]])
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %1 = tail call noalias i8* @malloc(i64 4)
+  tail call void @no_sync_func(i8* %1)
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* %1)
+  %2 = bitcast i8* %1 to i32*
+  store i32 10, i32* %2
+  %3 = load i32, i32* %2
+  tail call void @free(i8* %1)
+  ret i32 %3
+}
+
+; TEST 11
+
+define void @test11() {
+; CHECK-LABEL: define {{[^@]+}}@test11() {
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 noundef 4)
+; CHECK-NEXT:    tail call void @sync_will_return(i8* [[TMP1]]) #[[ATTR5]]
+; CHECK-NEXT:    tail call void @free(i8* nocapture [[TMP1]])
+; CHECK-NEXT:    ret void
+;
+  %1 = tail call noalias i8* @malloc(i64 4)
+  tail call void @sync_will_return(i8* %1)
+  tail call void @free(i8* %1)
+  ret void
+}
+
+; TEST 12
+define i32 @irreducible_cfg(i32 %0) {
+; CHECK-LABEL: define {{[^@]+}}@irreducible_cfg
+; CHECK-SAME: (i32 [[TMP0:%.*]]) {
+; CHECK-NEXT:    [[TMP2:%.*]] = call noalias i8* @malloc(i64 noundef 4)
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
+; CHECK-NEXT:    store i32 10, i32* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[TMP0]], 1
+; CHECK-NEXT:    br i1 [[TMP4]], label [[TMP5:%.*]], label [[TMP7:%.*]]
+; CHECK:       5:
+; CHECK-NEXT:    [[TMP6:%.*]] = add nsw i32 [[TMP0]], 5
+; CHECK-NEXT:    br label [[TMP13:%.*]]
+; CHECK:       7:
+; CHECK-NEXT:    br label [[TMP8:%.*]]
+; CHECK:       8:
+; CHECK-NEXT:    [[DOT0:%.*]] = phi i32 [ [[TMP14:%.*]], [[TMP13]] ], [ 1, [[TMP7]] ]
+; CHECK-NEXT:    [[TMP9:%.*]] = load i32, i32* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP10:%.*]] = add nsw i32 [[TMP9]], -1
+; CHECK-NEXT:    store i32 [[TMP10]], i32* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP11:%.*]] = icmp ne i32 [[TMP9]], 0
+; CHECK-NEXT:    br i1 [[TMP11]], label [[TMP12:%.*]], label [[TMP15:%.*]]
+; CHECK:       12:
+; CHECK-NEXT:    br label [[TMP13]]
+; CHECK:       13:
+; CHECK-NEXT:    [[DOT1:%.*]] = phi i32 [ [[TMP6]], [[TMP5]] ], [ [[DOT0]], [[TMP12]] ]
+; CHECK-NEXT:    [[TMP14]] = add nsw i32 [[DOT1]], 1
+; CHECK-NEXT:    br label [[TMP8]]
+; CHECK:       15:
+; CHECK-NEXT:    [[TMP16:%.*]] = load i32, i32* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP17:%.*]] = bitcast i32* [[TMP3]] to i8*
+; CHECK-NEXT:    call void @free(i8* nocapture noundef [[TMP17]])
+; CHECK-NEXT:    [[TMP18:%.*]] = load i32, i32* [[TMP3]], align 4
+; CHECK-NEXT:    ret i32 [[TMP18]]
+;
+  %2 = call noalias i8* @malloc(i64 4)
+  %3 = bitcast i8* %2 to i32*
+  store i32 10, i32* %3, align 4
+  %4 = icmp eq i32 %0, 1
+  br i1 %4, label %5, label %7
+
+5:                                                ; preds = %1
+  %6 = add nsw i32 %0, 5
+  br label %13
+
+7:                                                ; preds = %1
+  br label %8
+
+8:                                                ; preds = %13, %7
+  %.0 = phi i32 [ %14, %13 ], [ 1, %7 ]
+  %9 = load i32, i32* %3, align 4
+  %10 = add nsw i32 %9, -1
+  store i32 %10, i32* %3, align 4
+  %11 = icmp ne i32 %9, 0
+  br i1 %11, label %12, label %15
+
+12:                                               ; preds = %8
+  br label %13
+
+13:                                               ; preds = %12, %5
+  %.1 = phi i32 [ %6, %5 ], [ %.0, %12 ]
+  %14 = add nsw i32 %.1, 1
+  br label %8
+
+15:                                               ; preds = %8
+  %16 = load i32, i32* %3, align 4
+  %17 = bitcast i32* %3 to i8*
+  call void @free(i8* %17)
+  %18 = load i32, i32* %3, align 4
+  ret i32 %18
+}
+
+
+define i32 @malloc_in_loop(i32 %0) {
+; CHECK-LABEL: define {{[^@]+}}@malloc_in_loop
+; CHECK-SAME: (i32 [[TMP0:%.*]]) {
+; CHECK-NEXT:    [[TMP2:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = alloca i32*, align 8
+; CHECK-NEXT:    store i32 [[TMP0]], i32* [[TMP2]], align 4
+; CHECK-NEXT:    br label [[TMP4:%.*]]
+; CHECK:       4:
+; CHECK-NEXT:    [[TMP5:%.*]] = load i32, i32* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP6:%.*]] = add nsw i32 [[TMP5]], -1
+; CHECK-NEXT:    store i32 [[TMP6]], i32* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP7:%.*]] = icmp sgt i32 [[TMP6]], 0
+; CHECK-NEXT:    br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11:%.*]]
+; CHECK:       8:
+; CHECK-NEXT:    [[TMP9:%.*]] = call noalias i8* @malloc(i64 noundef 4)
+; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i8* [[TMP9]] to i32*
+; CHECK-NEXT:    store i32 1, i32* [[TMP10]], align 8
+; CHECK-NEXT:    br label [[TMP4]]
+; CHECK:       11:
+; CHECK-NEXT:    ret i32 5
+;
+  %2 = alloca i32, align 4
+  %3 = alloca i32*, align 8
+  store i32 %0, i32* %2, align 4
+  br label %4
+
+4:                                                ; preds = %8, %1
+  %5 = load i32, i32* %2, align 4
+  %6 = add nsw i32 %5, -1
+  store i32 %6, i32* %2, align 4
+  %7 = icmp sgt i32 %6, 0
+  br i1 %7, label %8, label %11
+
+8:                                                ; preds = %4
+  %9 = call noalias i8* @malloc(i64 4)
+  %10 = bitcast i8* %9 to i32*
+  store i32 1, i32* %10, align 8
+  br label %4
+
+11:                                               ; preds = %4
+  ret i32 5
+}
+
+; Malloc/Calloc too large
+define i32 @test13() {
+; CHECK-LABEL: define {{[^@]+}}@test13() {
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 noundef 256)
+; CHECK-NEXT:    tail call void @no_sync_func(i8* noalias nocapture nofree [[TMP1]])
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32*
+; CHECK-NEXT:    store i32 10, i32* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4
+; CHECK-NEXT:    tail call void @free(i8* noalias nocapture noundef nonnull align 4 dereferenceable(4) [[TMP1]])
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %1 = tail call noalias i8* @malloc(i64 256)
+  tail call void @no_sync_func(i8* %1)
+  %2 = bitcast i8* %1 to i32*
+  store i32 10, i32* %2
+  %3 = load i32, i32* %2
+  tail call void @free(i8* %1)
+  ret i32 %3
+}
+
+define i32 @test_sle() {
+; CHECK-LABEL: define {{[^@]+}}@test_sle() {
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 noundef -1)
+; CHECK-NEXT:    tail call void @no_sync_func(i8* noalias nocapture nofree [[TMP1]])
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32*
+; CHECK-NEXT:    store i32 10, i32* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4
+; CHECK-NEXT:    tail call void @free(i8* noalias nocapture noundef nonnull align 4 dereferenceable(4) [[TMP1]])
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %1 = tail call noalias i8* @malloc(i64 -1)
+  tail call void @no_sync_func(i8* %1)
+  %2 = bitcast i8* %1 to i32*
+  store i32 10, i32* %2
+  %3 = load i32, i32* %2
+  tail call void @free(i8* %1)
+  ret i32 %3
+}
+
+define i32 @test_overflow() {
+; CHECK-LABEL: define {{[^@]+}}@test_overflow() {
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call noalias i8* @calloc(i64 noundef 65537, i64 noundef 65537)
+; CHECK-NEXT:    tail call void @no_sync_func(i8* noalias nocapture nofree [[TMP1]])
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32*
+; CHECK-NEXT:    store i32 10, i32* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4
+; CHECK-NEXT:    tail call void @free(i8* noalias nocapture noundef nonnull align 4 dereferenceable(4) [[TMP1]])
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %1 = tail call noalias i8* @calloc(i64 65537, i64 65537)
+  tail call void @no_sync_func(i8* %1)
+  %2 = bitcast i8* %1 to i32*
+  store i32 10, i32* %2
+  %3 = load i32, i32* %2
+  tail call void @free(i8* %1)
+  ret i32 %3
+}
+
+define void @test14() {
+; CHECK-LABEL: define {{[^@]+}}@test14() {
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call noalias i8* @calloc(i64 noundef 64, i64 noundef 4)
+; CHECK-NEXT:    tail call void @no_sync_func(i8* noalias nocapture nofree [[TMP1]])
+; CHECK-NEXT:    tail call void @free(i8* noalias nocapture [[TMP1]])
+; CHECK-NEXT:    ret void
+;
+  %1 = tail call noalias i8* @calloc(i64 64, i64 4)
+  tail call void @no_sync_func(i8* %1)
+  tail call void @free(i8* %1)
+  ret void
+}
+
+define void @test15(i64 %S) {
+; CHECK-LABEL: define {{[^@]+}}@test15
+; CHECK-SAME: (i64 [[S:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 [[S]])
+; CHECK-NEXT:    tail call void @no_sync_func(i8* noalias nocapture nofree [[TMP1]])
+; CHECK-NEXT:    tail call void @free(i8* noalias nocapture [[TMP1]])
+; CHECK-NEXT:    ret void
+;
+  %1 = tail call noalias i8* @malloc(i64 %S)
+  tail call void @no_sync_func(i8* %1)
+  tail call void @free(i8* %1)
+  ret void
+}
+
+define void @test16a(i8 %v, i8** %P) {
+; CHECK-LABEL: define {{[^@]+}}@test16a
+; CHECK-SAME: (i8 [[V:%.*]], i8** nocapture nofree readnone [[P:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 noundef 4)
+; CHECK-NEXT:    store i8 [[V]], i8* [[TMP1]], align 1
+; CHECK-NEXT:    tail call void @no_sync_func(i8* noalias nocapture nofree noundef nonnull dereferenceable(1) [[TMP1]])
+; CHECK-NEXT:    tail call void @free(i8* noalias nocapture noundef nonnull dereferenceable(1) [[TMP1]])
+; CHECK-NEXT:    ret void
+;
+  %1 = tail call noalias i8* @malloc(i64 4)
+  store i8 %v, i8* %1
+  tail call void @no_sync_func(i8* %1)
+  tail call void @free(i8* nonnull dereferenceable(1) %1)
+  ret void
+}
+
+define void @test16b(i8 %v, i8** %P) {
+; CHECK-LABEL: define {{[^@]+}}@test16b
+; CHECK-SAME: (i8 [[V:%.*]], i8** nocapture nofree writeonly [[P:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 noundef 4)
+; CHECK-NEXT:    store i8* [[TMP1]], i8** [[P]], align 8
+; CHECK-NEXT:    tail call void @no_sync_func(i8* nocapture nofree [[TMP1]])
+; CHECK-NEXT:    tail call void @free(i8* nocapture [[TMP1]])
+; CHECK-NEXT:    ret void
+;
+  %1 = tail call noalias i8* @malloc(i64 4)
+  store i8* %1, i8** %P
+  tail call void @no_sync_func(i8* %1)
+  tail call void @free(i8* %1)
+  ret void
+}
+
+define void @test16c(i8 %v, i8** %P) {
+; CHECK-LABEL: define {{[^@]+}}@test16c
+; CHECK-SAME: (i8 [[V:%.*]], i8** nocapture nofree writeonly [[P:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 noundef 4)
+; CHECK-NEXT:    store i8* [[TMP1]], i8** [[P]], align 8
+; CHECK-NEXT:    tail call void @no_sync_func(i8* nocapture nofree [[TMP1]]) #[[ATTR5]]
+; CHECK-NEXT:    tail call void @free(i8* nocapture [[TMP1]])
+; CHECK-NEXT:    ret void
+;
+  %1 = tail call noalias i8* @malloc(i64 4)
+  store i8* %1, i8** %P
+  tail call void @no_sync_func(i8* %1) nounwind
+  tail call void @free(i8* %1)
+  ret void
+}
+
+define void @test16d(i8 %v, i8** %P) {
+; CHECK-LABEL: define {{[^@]+}}@test16d
+; CHECK-SAME: (i8 [[V:%.*]], i8** nocapture nofree writeonly [[P:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 noundef 4)
+; CHECK-NEXT:    store i8* [[TMP1]], i8** [[P]], align 8
+; CHECK-NEXT:    ret void
+;
+  %1 = tail call noalias i8* @malloc(i64 4)
+  store i8* %1, i8** %P
+  ret void
+}
+;.
+; CHECK: attributes #[[ATTR0:[0-9]+]] = { nounwind willreturn }
+; CHECK: attributes #[[ATTR1:[0-9]+]] = { nofree nosync willreturn }
+; CHECK: attributes #[[ATTR2:[0-9]+]] = { nofree nounwind }
+; CHECK: attributes #[[ATTR3]] = { noreturn }
+; CHECK: attributes #[[ATTR4:[0-9]+]] = { argmemonly nofree nosync nounwind willreturn }
+; CHECK: attributes #[[ATTR5]] = { nounwind }
+;.


        


More information about the llvm-commits mailing list