[llvm] c09fbbd - Reapply "[GlobalOpt][FIX] Do not embed initializers into AS!=0 globals""

Johannes Doerfert via llvm-commits llvm-commits at lists.llvm.org
Fri Sep 10 13:23:10 PDT 2021


Author: Johannes Doerfert
Date: 2021-09-10T15:22:56-05:00
New Revision: c09fbbdcfb92c410ea85aee6d9ce976b6f78c4a4

URL: https://github.com/llvm/llvm-project/commit/c09fbbdcfb92c410ea85aee6d9ce976b6f78c4a4
DIFF: https://github.com/llvm/llvm-project/commit/c09fbbdcfb92c410ea85aee6d9ce976b6f78c4a4.diff

LOG: Reapply "[GlobalOpt][FIX] Do not embed initializers into AS!=0 globals""

This reapplies commit 7dbba3376f633cabcf4df568bc9ca95f44a35203, or, put
differently, this reverts commit d9a8d20827dcddad831751bc38ff178e70f0b2f5.

The test now requires the amdgpu and nvptx backend explicitly as it
won't work without properly.

Added: 
    llvm/test/Transforms/GlobalOpt/address_space_initializer.ll

Modified: 
    llvm/include/llvm/Analysis/TargetTransformInfo.h
    llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
    llvm/include/llvm/Transforms/Utils/GlobalStatus.h
    llvm/lib/Analysis/TargetTransformInfo.cpp
    llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
    llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
    llvm/lib/Transforms/IPO/GlobalOpt.cpp
    llvm/lib/Transforms/Utils/GlobalStatus.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 0afd43caa1da..d999ac861dba 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -383,6 +383,10 @@ class TargetTransformInfo {
 
   bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const;
 
+  /// Return true if globals in this address space can have initializers other
+  /// than `undef`.
+  bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const;
+
   unsigned getAssumedAddrSpace(const Value *V) const;
 
   /// Rewrite intrinsic call \p II such that \p OldV will be replaced with \p
@@ -1457,6 +1461,8 @@ class TargetTransformInfo::Concept {
   virtual bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
                                           Intrinsic::ID IID) const = 0;
   virtual bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const = 0;
+  virtual bool
+  canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const = 0;
   virtual unsigned getAssumedAddrSpace(const Value *V) const = 0;
   virtual Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
                                                   Value *OldV,
@@ -1782,6 +1788,11 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
     return Impl.isNoopAddrSpaceCast(FromAS, ToAS);
   }
 
+  bool
+  canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const override {
+    return Impl.canHaveNonUndefGlobalInitializerInAddressSpace(AS);
+  }
+
   unsigned getAssumedAddrSpace(const Value *V) const override {
     return Impl.getAssumedAddrSpace(V);
   }

diff  --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 0e92518e1171..295fc355137e 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -105,6 +105,9 @@ class TargetTransformInfoImplBase {
   }
 
   bool isNoopAddrSpaceCast(unsigned, unsigned) const { return false; }
+  bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const {
+    return AS == 0;
+  };
 
   unsigned getAssumedAddrSpace(const Value *V) const { return -1; }
 

diff  --git a/llvm/include/llvm/Transforms/Utils/GlobalStatus.h b/llvm/include/llvm/Transforms/Utils/GlobalStatus.h
index 519593c96766..78d7845c4353 100644
--- a/llvm/include/llvm/Transforms/Utils/GlobalStatus.h
+++ b/llvm/include/llvm/Transforms/Utils/GlobalStatus.h
@@ -9,6 +9,7 @@
 #ifndef LLVM_TRANSFORMS_UTILS_GLOBALSTATUS_H
 #define LLVM_TRANSFORMS_UTILS_GLOBALSTATUS_H
 
+#include "llvm/IR/Instructions.h"
 #include "llvm/Support/AtomicOrdering.h"
 
 namespace llvm {
@@ -45,7 +46,7 @@ struct GlobalStatus {
 
     /// This global is stored to, but only its initializer and one other value
     /// is ever stored to it.  If this global isStoredOnce, we track the value
-    /// stored to it in StoredOnceValue below.  This is only tracked for scalar
+    /// stored to it via StoredOnceStore below.  This is only tracked for scalar
     /// globals.
     StoredOnce,
 
@@ -55,8 +56,16 @@ struct GlobalStatus {
   } StoredType = NotStored;
 
   /// If only one value (besides the initializer constant) is ever stored to
-  /// this global, keep track of what value it is.
-  Value *StoredOnceValue = nullptr;
+  /// this global, keep track of what value it is via the store instruction.
+  const StoreInst *StoredOnceStore = nullptr;
+
+  /// If only one value (besides the initializer constant) is ever stored to
+  /// this global return the stored value.
+  Value *getStoredOnceValue() const {
+    return (StoredType == StoredOnce && StoredOnceStore)
+               ? StoredOnceStore->getOperand(0)
+               : nullptr;
+  }
 
   /// These start out null/false.  When the first accessing function is noticed,
   /// it is recorded. When a second 
diff erent accessing function is noticed,

diff  --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index 822d606a2d67..800896fa6a05 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -259,6 +259,11 @@ bool TargetTransformInfo::isNoopAddrSpaceCast(unsigned FromAS,
   return TTIImpl->isNoopAddrSpaceCast(FromAS, ToAS);
 }
 
+bool TargetTransformInfo::canHaveNonUndefGlobalInitializerInAddressSpace(
+    unsigned AS) const {
+  return TTIImpl->canHaveNonUndefGlobalInitializerInAddressSpace(AS);
+}
+
 unsigned TargetTransformInfo::getAssumedAddrSpace(const Value *V) const {
   return TTIImpl->getAssumedAddrSpace(V);
 }

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
index 30f5848f1d67..b1148fd0068f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
@@ -182,6 +182,12 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
 
   bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
                                   Intrinsic::ID IID) const;
+
+  bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const {
+    return AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS::REGION_ADDRESS &&
+           AS != AMDGPUAS::PRIVATE_ADDRESS;
+  }
+
   Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
                                           Value *NewV) const;
 

diff  --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
index 2cb1405dccb5..de92671f8c5b 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
@@ -48,6 +48,11 @@ class NVPTXTTIImpl : public BasicTTIImplBase<NVPTXTTIImpl> {
     return AddressSpace::ADDRESS_SPACE_GENERIC;
   }
 
+  bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const {
+    return AS != AddressSpace::ADDRESS_SPACE_SHARED &&
+           AS != AddressSpace::ADDRESS_SPACE_LOCAL && AS != ADDRESS_SPACE_PARAM;
+  }
+
   Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
                                                IntrinsicInst &II) const;
 

diff  --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
index 2c5fec93410d..5a3cec61cea0 100644
--- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
@@ -1507,6 +1507,7 @@ static void makeAllConstantUsesInstructions(Constant *C) {
 /// it if possible.  If we make a change, return true.
 static bool
 processInternalGlobal(GlobalVariable *GV, const GlobalStatus &GS,
+                      function_ref<TargetTransformInfo &(Function &)> GetTTI,
                       function_ref<TargetLibraryInfo &(Function &)> GetTLI,
                       function_ref<DominatorTree &(Function &)> LookupDomTree) {
   auto &DL = GV->getParent()->getDataLayout();
@@ -1605,18 +1606,26 @@ processInternalGlobal(GlobalVariable *GV, const GlobalStatus &GS,
     if (SRAGlobal(GV, DL))
       return true;
   }
-  if (GS.StoredType == GlobalStatus::StoredOnce && GS.StoredOnceValue) {
+  Value *StoredOnceValue = GS.getStoredOnceValue();
+  if (GS.StoredType == GlobalStatus::StoredOnce && StoredOnceValue) {
     // Avoid speculating constant expressions that might trap (div/rem).
-    auto *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue);
+    auto *SOVConstant = dyn_cast<Constant>(StoredOnceValue);
     if (SOVConstant && SOVConstant->canTrap())
       return Changed;
 
+    Function &StoreFn =
+        const_cast<Function &>(*GS.StoredOnceStore->getFunction());
     // If the initial value for the global was an undef value, and if only
     // one other value was stored into it, we can just change the
     // initializer to be the stored value, then delete all stores to the
     // global.  This allows us to mark it constant.
+    // This is restricted to address spaces that allow globals to have
+    // initializers. NVPTX, for example, does not support initializers for
+    // shared memory (AS 3).
     if (SOVConstant && SOVConstant->getType() == GV->getValueType() &&
-        isa<UndefValue>(GV->getInitializer())) {
+        isa<UndefValue>(GV->getInitializer()) &&
+        GetTTI(StoreFn).canHaveNonUndefGlobalInitializerInAddressSpace(
+            GV->getType()->getAddressSpace())) {
       // Change the initial value here.
       GV->setInitializer(SOVConstant);
 
@@ -1635,8 +1644,7 @@ processInternalGlobal(GlobalVariable *GV, const GlobalStatus &GS,
 
     // Try to optimize globals based on the knowledge that only one value
     // (besides its initializer) is ever stored to the global.
-    if (optimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GS.Ordering, DL,
-                                 GetTLI))
+    if (optimizeOnceStoredGlobal(GV, StoredOnceValue, GS.Ordering, DL, GetTLI))
       return true;
 
     // Otherwise, if the global was not a boolean, we can shrink it to be a
@@ -1656,6 +1664,7 @@ processInternalGlobal(GlobalVariable *GV, const GlobalStatus &GS,
 /// make a change, return true.
 static bool
 processGlobal(GlobalValue &GV,
+              function_ref<TargetTransformInfo &(Function &)> GetTTI,
               function_ref<TargetLibraryInfo &(Function &)> GetTLI,
               function_ref<DominatorTree &(Function &)> LookupDomTree) {
   if (GV.getName().startswith("llvm."))
@@ -1688,7 +1697,8 @@ processGlobal(GlobalValue &GV,
   if (GVar->isConstant() || !GVar->hasInitializer())
     return Changed;
 
-  return processInternalGlobal(GVar, GS, GetTLI, LookupDomTree) || Changed;
+  return processInternalGlobal(GVar, GS, GetTTI, GetTLI, LookupDomTree) ||
+         Changed;
 }
 
 /// Walk all of the direct calls of the specified function, changing them to
@@ -1991,7 +2001,7 @@ OptimizeFunctions(Module &M,
       }
     }
 
-    Changed |= processGlobal(*F, GetTLI, LookupDomTree);
+    Changed |= processGlobal(*F, GetTTI, GetTLI, LookupDomTree);
 
     if (!F->hasLocalLinkage())
       continue;
@@ -2060,6 +2070,7 @@ OptimizeFunctions(Module &M,
 
 static bool
 OptimizeGlobalVars(Module &M,
+                   function_ref<TargetTransformInfo &(Function &)> GetTTI,
                    function_ref<TargetLibraryInfo &(Function &)> GetTLI,
                    function_ref<DominatorTree &(Function &)> LookupDomTree,
                    SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats) {
@@ -2088,7 +2099,7 @@ OptimizeGlobalVars(Module &M,
       continue;
     }
 
-    Changed |= processGlobal(*GV, GetTLI, LookupDomTree);
+    Changed |= processGlobal(*GV, GetTTI, GetTLI, LookupDomTree);
   }
   return Changed;
 }
@@ -2666,8 +2677,8 @@ static bool optimizeGlobalsInModule(
     });
 
     // Optimize non-address-taken globals.
-    LocalChange |=
-        OptimizeGlobalVars(M, GetTLI, LookupDomTree, NotDiscardableComdats);
+    LocalChange |= OptimizeGlobalVars(M, GetTTI, GetTLI, LookupDomTree,
+                                      NotDiscardableComdats);
 
     // Resolve aliases, when possible.
     LocalChange |= OptimizeGlobalAliases(M, NotDiscardableComdats);

diff  --git a/llvm/lib/Transforms/Utils/GlobalStatus.cpp b/llvm/lib/Transforms/Utils/GlobalStatus.cpp
index 4c3e16414e40..9bfc73e4ba6c 100644
--- a/llvm/lib/Transforms/Utils/GlobalStatus.cpp
+++ b/llvm/lib/Transforms/Utils/GlobalStatus.cpp
@@ -127,9 +127,9 @@ static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS,
                 GS.StoredType = GlobalStatus::InitializerStored;
             } else if (GS.StoredType < GlobalStatus::StoredOnce) {
               GS.StoredType = GlobalStatus::StoredOnce;
-              GS.StoredOnceValue = StoredVal;
+              GS.StoredOnceStore = SI;
             } else if (GS.StoredType == GlobalStatus::StoredOnce &&
-                       GS.StoredOnceValue == StoredVal) {
+                       GS.getStoredOnceValue() == StoredVal) {
               // noop.
             } else {
               GS.StoredType = GlobalStatus::Stored;

diff  --git a/llvm/test/Transforms/GlobalOpt/address_space_initializer.ll b/llvm/test/Transforms/GlobalOpt/address_space_initializer.ll
new file mode 100644
index 000000000000..ffce653c73a5
--- /dev/null
+++ b/llvm/test/Transforms/GlobalOpt/address_space_initializer.ll
@@ -0,0 +1,44 @@
+; RUN: opt -passes=globalopt < %s -S | FileCheck %s
+; RUN: opt -passes=globalopt --mtriple=nvptx64 < %s -S | FileCheck %s --check-prefix=GPU
+; RUN: opt -passes=globalopt --mtriple=amdgcn < %s -S | FileCheck %s --check-prefix=GPU
+; REQUIRES: amdgpu-registered-target, nvptx-registered-target
+
+; Check that we don't try to set a global initializer for non AS(0) globals.
+
+ at g0 = internal global i16 undef
+ at g1 = internal addrspace(3) global i16 undef
+ at g2 = internal addrspace(1) global i16 undef
+; CHECK: internal unnamed_addr constant i16 3
+; CHECK: internal unnamed_addr addrspace(3) global i16 undef
+; CHECK: internal unnamed_addr addrspace(1) global i16 undef
+; GPU: internal unnamed_addr constant i16 3
+; GPU: internal unnamed_addr addrspace(3) global i16 undef
+; GPU: internal unnamed_addr addrspace(1) constant i16 7
+
+define void @a() {
+  store i16 3, i16* @g0, align 8
+  store i16 5, i16* addrspacecast (i16 addrspace(3)* @g1 to i16*), align 8
+  store i16 7, i16* addrspacecast (i16 addrspace(1)* @g2 to i16*), align 8
+  ret void
+}
+
+define i8 @get0() {
+  %bc = bitcast i16* @g0 to i8*
+  %gep = getelementptr i8, i8* %bc, i64 1
+  %r = load i8, i8* %gep
+  ret i8 %r
+}
+define i8 @get1() {
+  %ac = addrspacecast i16 addrspace(3)* @g1 to i16*
+  %bc = bitcast i16* %ac to i8*
+  %gep = getelementptr i8, i8* %bc, i64 1
+  %r = load i8, i8* %gep
+  ret i8 %r
+}
+define i8 @get2() {
+  %ac = addrspacecast i16 addrspace(1)* @g2 to i16*
+  %bc = bitcast i16* %ac to i8*
+  %gep = getelementptr i8, i8* %bc, i64 1
+  %r = load i8, i8* %gep
+  ret i8 %r
+}


        


More information about the llvm-commits mailing list