[llvm] 499f1ca - [GlobalOpt] Use generic type when converting malloc to global

Nikita Popov via llvm-commits llvm-commits at lists.llvm.org
Mon Jan 17 00:55:42 PST 2022


Author: Nikita Popov
Date: 2022-01-17T09:55:33+01:00
New Revision: 499f1ca79f232faae09b1793a994d1a22ba403cd

URL: https://github.com/llvm/llvm-project/commit/499f1ca79f232faae09b1793a994d1a22ba403cd
DIFF: https://github.com/llvm/llvm-project/commit/499f1ca79f232faae09b1793a994d1a22ba403cd.diff

LOG: [GlobalOpt] Use generic type when converting malloc to global

The malloc to global transform currently determines the type of the
global by looking at bitcasts of the malloc. This is limited (the
transform fails if there are multiple different types) and
incompatible with opaque pointers.

My initial approach was to construct an appropriate struct type
based on usage in loads/stores. What this patch does instead is
to always create an [i8 x AllocSize] global, without trying to
guess types at all.

This does mean that other transforms that require a certain global
type may break. I fixed two of these in D117034 and D117223, which
I believe should be sufficient to avoid regressions. In particular,
the global SRA change should end up splitting the global into
naturally-typed sub-globals, at which point all other optimizations
should work.

Differential Revision: https://reviews.llvm.org/D117092

Added: 
    llvm/test/Transforms/GlobalOpt/malloc-promote-opaque-ptr.ll

Modified: 
    llvm/lib/Transforms/IPO/GlobalOpt.cpp
    llvm/test/Transforms/GlobalOpt/2021-08-03-StoreOnceLoadMultiCasts.ll
    llvm/test/Transforms/GlobalOpt/malloc-promote-5.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
index 0125d34efd810..1740a709a3146 100644
--- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
@@ -835,18 +835,15 @@ static void ConstantPropUsersOf(Value *V, const DataLayout &DL,
 /// to actually DO the malloc.  Instead, turn the malloc into a global, and any
 /// loads of GV as uses of the new global.
 static GlobalVariable *
-OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy,
-                              ConstantInt *NElements, const DataLayout &DL,
+OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI,
+                              uint64_t AllocSize, const DataLayout &DL,
                               TargetLibraryInfo *TLI) {
   LLVM_DEBUG(errs() << "PROMOTING GLOBAL: " << *GV << "  CALL = " << *CI
                     << '\n');
 
-  Type *GlobalType;
-  if (NElements->getZExtValue() == 1)
-    GlobalType = AllocTy;
-  else
-    // If we have an array allocation, the global variable is of an array.
-    GlobalType = ArrayType::get(AllocTy, NElements->getZExtValue());
+  // Create global of type [AllocSize x i8].
+  Type *GlobalType = ArrayType::get(Type::getInt8Ty(GV->getContext()),
+                                    AllocSize);
 
   // Create the new global variable.  The contents of the malloc'd memory is
   // undefined, so initialize with an undef value.
@@ -1006,95 +1003,26 @@ valueIsOnlyUsedLocallyOrStoredToOneGlobal(const CallInst *CI,
   return true;
 }
 
-/// getMallocType - Returns the PointerType resulting from the malloc call.
-/// The PointerType depends on the number of bitcast uses of the malloc call:
-///   0: PointerType is the calls' return type.
-///   1: PointerType is the bitcast's result type.
-///  >1: Unique PointerType cannot be determined, return NULL.
-static PointerType *getMallocType(const CallInst *CI,
-                                  const TargetLibraryInfo *TLI) {
-  assert(isMallocLikeFn(CI, TLI) && "getMallocType and not malloc call");
-
-  PointerType *MallocType = nullptr;
-  unsigned NumOfBitCastUses = 0;
-
-  // Determine if CallInst has a bitcast use.
-  for (const User *U : CI->users())
-    if (const BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
-      MallocType = cast<PointerType>(BCI->getDestTy());
-      NumOfBitCastUses++;
-    }
-
-  // Malloc call has 1 bitcast use, so type is the bitcast's destination type.
-  if (NumOfBitCastUses == 1)
-    return MallocType;
-
-  // Malloc call was not bitcast, so type is the malloc function's return type.
-  if (NumOfBitCastUses == 0)
-    return cast<PointerType>(CI->getType());
-
-  // Type could not be determined.
-  return nullptr;
-}
-
-/// getMallocAllocatedType - Returns the Type allocated by malloc call.
-/// The Type depends on the number of bitcast uses of the malloc call:
-///   0: PointerType is the malloc calls' return type.
-///   1: PointerType is the bitcast's result type.
-///  >1: Unique PointerType cannot be determined, return NULL.
-static Type *getMallocAllocatedType(const CallInst *CI,
-                                    const TargetLibraryInfo *TLI) {
-  PointerType *PT = getMallocType(CI, TLI);
-  return PT ? PT->getElementType() : nullptr;
-}
-
-static Value *computeArraySize(const CallInst *CI, const DataLayout &DL,
-                               const TargetLibraryInfo *TLI,
-                               bool LookThroughSExt = false) {
-  if (!CI)
-    return nullptr;
-
-  // The size of the malloc's result type must be known to determine array size.
-  Type *T = getMallocAllocatedType(CI, TLI);
-  if (!T || !T->isSized())
-    return nullptr;
-
-  unsigned ElementSize = DL.getTypeAllocSize(T);
-  if (StructType *ST = dyn_cast<StructType>(T))
-    ElementSize = DL.getStructLayout(ST)->getSizeInBytes();
-
-  // If malloc call's arg can be determined to be a multiple of ElementSize,
-  // return the multiple.  Otherwise, return NULL.
-  Value *MallocArg = CI->getArgOperand(0);
-  Value *Multiple = nullptr;
-  if (ComputeMultiple(MallocArg, ElementSize, Multiple, LookThroughSExt))
-    return Multiple;
-
-  return nullptr;
-}
-
-/// getMallocArraySize - Returns the array size of a malloc call.  If the
-/// argument passed to malloc is a multiple of the size of the malloced type,
-/// then return that multiple.  For non-array mallocs, the multiple is
-/// constant 1.  Otherwise, return NULL for mallocs whose array size cannot be
-/// determined.
-static Value *getMallocArraySize(CallInst *CI, const DataLayout &DL,
-                                 const TargetLibraryInfo *TLI,
-                                 bool LookThroughSExt) {
-  assert(isMallocLikeFn(CI, TLI) && "getMallocArraySize and not malloc call");
-  return computeArraySize(CI, DL, TLI, LookThroughSExt);
-}
-
-
-/// This function is called when we see a pointer global variable with a single
-/// value stored it that is a malloc or cast of malloc.
+/// If we have a global that is only initialized with a fixed size malloc,
+/// transform the program to use global memory instead of malloc'd memory.
+/// This eliminates dynamic allocation, avoids an indirection accessing the
+/// data, and exposes the resultant global to further GlobalOpt.
 static bool tryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, CallInst *CI,
-                                               Type *AllocTy,
                                                AtomicOrdering Ordering,
                                                const DataLayout &DL,
                                                TargetLibraryInfo *TLI) {
-  // If this is a malloc of an abstract type, don't touch it.
-  if (!AllocTy->isSized())
+  // TODO: This can be generalized to calloc-like functions by using
+  // getInitialValueOfAllocation() for the global initialization.
+  assert(isMallocLikeFn(CI, TLI) && "Must be malloc-like call");
+
+  uint64_t AllocSize;
+  if (!getObjectSize(CI, AllocSize, DL, TLI, ObjectSizeOpts()) && AllocSize > 0)
+    return false;
+
+  // Restrict this transformation to only working on small allocations
+  // (2048 bytes currently), as we don't want to introduce a 16M global or
+  // something.
+  if (AllocSize >= 2048)
     return false;
 
   // We can't optimize this global unless all uses of it are *known* to be
@@ -1113,25 +1041,8 @@ static bool tryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, CallInst *CI,
   if (!valueIsOnlyUsedLocallyOrStoredToOneGlobal(CI, GV))
     return false;
 
-  // If we have a global that is only initialized with a fixed size malloc,
-  // transform the program to use global memory instead of malloc'd memory.
-  // This eliminates dynamic allocation, avoids an indirection accessing the
-  // data, and exposes the resultant global to further GlobalOpt.
-  // We cannot optimize the malloc if we cannot determine malloc array size.
-  Value *NElems = getMallocArraySize(CI, DL, TLI, true);
-  if (!NElems)
-    return false;
-
-  if (ConstantInt *NElements = dyn_cast<ConstantInt>(NElems))
-    // Restrict this transformation to only working on small allocations
-    // (2048 bytes currently), as we don't want to introduce a 16M global or
-    // something.
-    if (NElements->getZExtValue() * DL.getTypeAllocSize(AllocTy) < 2048) {
-      OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElements, DL, TLI);
-      return true;
-    }
-
-  return false;
+  OptimizeGlobalAddressOfMalloc(GV, CI, AllocSize, DL, TLI);
+  return true;
 }
 
 // Try to optimize globals based on the knowledge that only one value (besides
@@ -1163,9 +1074,7 @@ optimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
     } else if (isMallocLikeFn(StoredOnceVal, GetTLI)) {
       if (auto *CI = dyn_cast<CallInst>(StoredOnceVal)) {
         auto *TLI = &GetTLI(*CI->getFunction());
-        Type *MallocType = getMallocAllocatedType(CI, TLI);
-        if (MallocType && tryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType,
-                                                             Ordering, DL, TLI))
+        if (tryToOptimizeStoreOfMallocToGlobal(GV, CI, Ordering, DL, TLI))
           return true;
       }
     }

diff  --git a/llvm/test/Transforms/GlobalOpt/2021-08-03-StoreOnceLoadMultiCasts.ll b/llvm/test/Transforms/GlobalOpt/2021-08-03-StoreOnceLoadMultiCasts.ll
index e2c90c3851250..7acd56d78d10d 100644
--- a/llvm/test/Transforms/GlobalOpt/2021-08-03-StoreOnceLoadMultiCasts.ll
+++ b/llvm/test/Transforms/GlobalOpt/2021-08-03-StoreOnceLoadMultiCasts.ll
@@ -8,9 +8,9 @@ define signext i32 @f() local_unnamed_addr {
 ; CHECK-LABEL: @f(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    call void @f1()
-; CHECK-NEXT:    store i32 1, i32* @g.body, align 4
+; CHECK-NEXT:    store i32 1, i32* bitcast ([4 x i8]* @g.body to i32*), align 4
 ; CHECK-NEXT:    call void @f1()
-; CHECK-NEXT:    store i8 2, i8* bitcast (i32* @g.body to i8*), align 4
+; CHECK-NEXT:    store i8 2, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @g.body, i32 0, i32 0), align 4
 ; CHECK-NEXT:    ret i32 1
 ;
 entry:
@@ -30,7 +30,7 @@ define signext i32 @main() {
 ; CHECK-LABEL: @main(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CALL:%.*]] = call signext i32 @f()
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* @g.body, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* bitcast ([4 x i8]* @g.body to i32*), align 4
 ; CHECK-NEXT:    ret i32 [[TMP0]]
 ;
 entry:

diff  --git a/llvm/test/Transforms/GlobalOpt/malloc-promote-5.ll b/llvm/test/Transforms/GlobalOpt/malloc-promote-5.ll
index efd74cafa60d2..ba59f054830fc 100644
--- a/llvm/test/Transforms/GlobalOpt/malloc-promote-5.ll
+++ b/llvm/test/Transforms/GlobalOpt/malloc-promote-5.ll
@@ -7,11 +7,7 @@
 define signext i32 @f() local_unnamed_addr {
 ; CHECK-LABEL: @f(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CALL:%.*]] = call i8* @malloc(i64 4)
-; CHECK-NEXT:    [[B:%.*]] = bitcast i8* [[CALL]] to i32*
-; CHECK-NEXT:    store i32* [[B]], i32** @g, align 8
-; CHECK-NEXT:    [[B2:%.*]] = bitcast i8* [[CALL]] to i16*
-; CHECK-NEXT:    store i16 -1, i16* [[B2]], align 2
+; CHECK-NEXT:    store i16 -1, i16* bitcast ([4 x i8]* @g.body to i16*), align 2
 ; CHECK-NEXT:    ret i32 0
 ;
 entry:
@@ -28,14 +24,11 @@ define signext i32 @main() {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CALL:%.*]] = call signext i32 @f()
 ; CHECK-NEXT:    call void @f1()
-; CHECK-NEXT:    [[V0:%.*]] = load i32*, i32** @g, align 8
-; CHECK-NEXT:    store i32 1, i32* [[V0]], align 4
+; CHECK-NEXT:    store i32 1, i32* bitcast ([4 x i8]* @g.body to i32*), align 4
 ; CHECK-NEXT:    call void @f1()
-; CHECK-NEXT:    [[V1:%.*]] = load i8*, i8** bitcast (i32** @g to i8**), align 8
-; CHECK-NEXT:    store i8 2, i8* [[V1]], align 4
+; CHECK-NEXT:    store i8 2, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @g.body, i32 0, i32 0), align 4
 ; CHECK-NEXT:    call void @f1()
-; CHECK-NEXT:    [[V2:%.*]] = load i32*, i32** @g, align 8
-; CHECK-NEXT:    [[RES:%.*]] = load i32, i32* [[V2]], align 4
+; CHECK-NEXT:    [[RES:%.*]] = load i32, i32* bitcast ([4 x i8]* @g.body to i32*), align 4
 ; CHECK-NEXT:    ret i32 [[RES]]
 ;
 entry:

diff  --git a/llvm/test/Transforms/GlobalOpt/malloc-promote-opaque-ptr.ll b/llvm/test/Transforms/GlobalOpt/malloc-promote-opaque-ptr.ll
new file mode 100644
index 0000000000000..caedf919e56ee
--- /dev/null
+++ b/llvm/test/Transforms/GlobalOpt/malloc-promote-opaque-ptr.ll
@@ -0,0 +1,77 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
+; RUN: opt -S -globalopt -opaque-pointers < %s | FileCheck %s
+
+ at g1 = internal global ptr null
+ at g2 = internal global ptr null
+ at g3 = internal global ptr null
+
+declare noalias i8* @malloc(i64)
+
+;.
+; CHECK: @[[G1_BODY_0:[a-zA-Z0-9_$"\\.-]+]] = internal unnamed_addr global i64 undef
+; CHECK: @[[G2_BODY_0:[a-zA-Z0-9_$"\\.-]+]] = internal unnamed_addr global i32 undef
+; CHECK: @[[G2_BODY_1:[a-zA-Z0-9_$"\\.-]+]] = internal unnamed_addr global i32 undef
+; CHECK: @[[G2_BODY_2:[a-zA-Z0-9_$"\\.-]+]] = internal unnamed_addr global i32 undef
+; CHECK: @[[G3_BODY:[a-zA-Z0-9_$"\\.-]+]] = internal unnamed_addr global [8 x i8] undef
+;.
+define void @test_store(i64 %a, i32 %b) {
+; CHECK-LABEL: @test_store(
+; CHECK-NEXT:    store i64 [[A:%.*]], ptr @g1.body.0, align 8
+; CHECK-NEXT:    store i32 [[B:%.*]], ptr @g2.body.0, align 4
+; CHECK-NEXT:    store i32 [[B]], ptr @g2.body.1, align 4
+; CHECK-NEXT:    store i32 [[B]], ptr @g2.body.2, align 4
+; CHECK-NEXT:    store i64 [[A]], ptr @g3.body, align 4
+; CHECK-NEXT:    store i32 [[B]], ptr @g3.body, align 4
+; CHECK-NEXT:    ret void
+;
+  %m1 = call ptr @malloc(i64 8)
+  store ptr %m1, ptr @g1
+  %a1 = load ptr, ptr @g1
+
+  %m2 = call ptr @malloc(i64 16)
+  store ptr %m2, ptr @g2
+  %a2 = load ptr, ptr @g2
+
+  %m3 = call ptr @malloc(i64 8)
+  store ptr %m3, ptr @g3
+  %a3 = load ptr, ptr @g3
+
+  store i64 %a, ptr %a1
+
+  ; Access types at 
diff erent offsets.
+  store i32 %b, ptr %a2
+  %a2.4 = getelementptr i8, ptr %a2, i64 4
+  store i32 %b, ptr %a2.4
+  %a2.10 = getelementptr i8, ptr %a2, i64 10
+  store i32 %b, ptr %a2.10, align 2
+
+  ; Access two 
diff erent types at the same offset.
+  store i64 %a, ptr %a3
+  store i32 %b, ptr %a3
+
+  ret void
+}
+
+define void @test_load() {
+; CHECK-LABEL: @test_load(
+; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @g1.body.0, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr @g2.body.0, align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr @g2.body.1, align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr @g2.body.2, align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = load i64, ptr @g3.body, align 4
+; CHECK-NEXT:    ret void
+;
+  %a1 = load ptr, ptr @g1
+  load i64, ptr %a1
+
+  %a2 = load ptr, ptr @g2
+  load i32, ptr %a2
+  %a2.4 = getelementptr i8, ptr %a2, i64 4
+  load i32, ptr %a2.4
+  %a2.10 = getelementptr i8, ptr %a2, i64 10
+  load i32, ptr %a2.10, align 2
+
+  %a3 = load ptr, ptr @g3
+  load i64, ptr %a3
+  ret void
+}


        


More information about the llvm-commits mailing list