[llvm] [TargetMachine] Add `getFlatAddressSpace` to `TargetMachine` (PR #108594)

Fri Sep 13 11:15:40 PDT 2024

https://github.com/shiltian updated https://github.com/llvm/llvm-project/pull/108594

>From 8785aefc5bbf68a284e68df51fc84feefa507352 Mon Sep 17 00:00:00 2001
From: Shilei Tian <i at tianshilei.me>
Date: Fri, 13 Sep 2024 11:58:33 -0400
Subject: [PATCH] [TargetMachine] Add `getFlatAddressSpace` to `TargetMachine`

Currently `getFlatAddressSpace` is a member function of TTI, which is not very
convenient, because it prohibits uses w/o a function (thus we can't get TTI).
However, it should really be a `TargetMachine` function. This patch simply adds
it.

Note that the TTI one was not removed, because `InferAddressSpacePass` relies
it to determine whether it is necessary to run the pass. This might not be a
really good way, but we can do a follow-up patch with it if we want.
---
 .../llvm/Analysis/TargetTransformInfo.h       | 21 ++++++-------------
 .../llvm/Analysis/TargetTransformInfoImpl.h   |  2 +-
 llvm/include/llvm/CodeGen/BasicTTIImpl.h      |  5 +----
 llvm/include/llvm/Target/TargetMachine.h      | 18 ++++++++++++++++
 .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp |  4 ++++
 llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h  |  2 ++
 .../AMDGPU/AMDGPUTargetTransformInfo.cpp      |  4 ++++
 .../Target/AMDGPU/AMDGPUTargetTransformInfo.h |  9 ++------
 llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp  |  4 ++++
 llvm/lib/Target/NVPTX/NVPTXTargetMachine.h    |  2 ++
 .../Target/NVPTX/NVPTXTargetTransformInfo.h   |  8 +++----
 11 files changed, 48 insertions(+), 31 deletions(-)

diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index b2124c6106198e..463ec8f1d968ad 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -452,21 +452,12 @@ class TargetTransformInfo {
   bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const;
 
   /// Returns the address space ID for a target's 'flat' address space. Note
-  /// this is not necessarily the same as addrspace(0), which LLVM sometimes
-  /// refers to as the generic address space. The flat address space is a
-  /// generic address space that can be used access multiple segments of memory
-  /// with different address spaces. Access of a memory location through a
-  /// pointer with this address space is expected to be legal but slower
-  /// compared to the same memory location accessed through a pointer with a
-  /// different address space.
-  //
-  /// This is for targets with different pointer representations which can
-  /// be converted with the addrspacecast instruction. If a pointer is converted
-  /// to this address space, optimizations should attempt to replace the access
-  /// with the source address space.
-  ///
-  /// \returns ~0u if the target does not have such a flat address space to
-  /// optimize away.
+  /// that this is almost same as \p TargetMachine::getFlatAddressSpace. The
+  /// only difference is, it can still return ~0U even if the target has a flat
+  /// address, but the associated function doesn't really have it. For example,
+  /// for AMDGPU, functions with certain calling conventions don't have flat
+  /// address space. This provides fine-grained control for cases like whether
+  /// we want to run InferAddressSpacePass for the associated function.
   unsigned getFlatAddressSpace() const;
 
   /// Return any intrinsic address operand indexes which may be rewritten if
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 90eef93a2a54d5..d5f604387f4d8f 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -115,7 +115,7 @@ class TargetTransformInfoImplBase {
     return true;
   }
 
-  unsigned getFlatAddressSpace() const { return -1; }
+  unsigned getFlatAddressSpace() const { return ~0U; }
 
   bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
                                   Intrinsic::ID IID) const {
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 50dc7d5c54c54a..a4f2cbb2a08362 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -292,10 +292,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
     return true;
   }
 
-  unsigned getFlatAddressSpace() {
-    // Return an invalid address space.
-    return -1;
-  }
+  unsigned getFlatAddressSpace() { return ~0U; }
 
   bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
                                   Intrinsic::ID IID) const {
diff --git a/llvm/include/llvm/Target/TargetMachine.h b/llvm/include/llvm/Target/TargetMachine.h
index c3e9d41315f617..610abf2d79f8fa 100644
--- a/llvm/include/llvm/Target/TargetMachine.h
+++ b/llvm/include/llvm/Target/TargetMachine.h
@@ -434,6 +434,24 @@ class TargetMachine {
       function_ref<void(std::unique_ptr<Module> MPart)> ModuleCallback) {
     return false;
   }
+
+  /// Returns the address space ID for a target's 'flat' address space. Note
+  /// this is not necessarily the same as addrspace(0), which LLVM sometimes
+  /// refers to as the generic address space. The flat address space is a
+  /// generic address space that can be used access multiple segments of memory
+  /// with different address spaces. Access of a memory location through a
+  /// pointer with this address space is expected to be legal but slower
+  /// compared to the same memory location accessed through a pointer with a
+  /// different address space.
+  //
+  /// This is for targets with different pointer representations which can
+  /// be converted with the addrspacecast instruction. If a pointer is converted
+  /// to this address space, optimizations should attempt to replace the access
+  /// with the source address space.
+  ///
+  /// \returns ~0U if the target does not have such a flat address space to
+  /// optimize away.
+  virtual unsigned getFlatAddressSpace() const { return ~0U; }
 };
 
 /// This class describes a target machine that is implemented with the LLVM
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index f860b139945122..6499a295e4559b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -887,6 +887,10 @@ bool AMDGPUTargetMachine::splitModule(
   return true;
 }
 
+unsigned AMDGPUTargetMachine::getFlatAddressSpace() const {
+  return AMDGPUAS::FLAT_ADDRESS;
+}
+
 //===----------------------------------------------------------------------===//
 // GCN Target Machine (SI+)
 //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
index 5b7257ddb36f1e..50e27075bbd96d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
@@ -71,6 +71,8 @@ class AMDGPUTargetMachine : public LLVMTargetMachine {
   bool splitModule(Module &M, unsigned NumParts,
                    function_ref<void(std::unique_ptr<Module> MPart)>
                        ModuleCallback) override;
+
+  unsigned getFlatAddressSpace() const override;
 };
 
 //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index 4cf7733a260ff0..820ca80f7c44b5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -300,6 +300,10 @@ GCNTTIImpl::GCNTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
   HasFP32Denormals = Mode.FP32Denormals != DenormalMode::getPreserveSign();
   HasFP64FP16Denormals =
       Mode.FP64FP16Denormals != DenormalMode::getPreserveSign();
+  // Don't bother running InferAddressSpaces pass on graphics shaders which
+  // don't use flat addressing.
+  if (!IsGraphics)
+    FlatAddressSpace = TM->getFlatAddressSpace();
 }
 
 bool GCNTTIImpl::hasBranchDivergence(const Function *F) const {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
index 01df2e6caaba1d..9dac778eb5d2e7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
@@ -71,6 +71,7 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
   bool IsGraphics;
   bool HasFP32Denormals;
   bool HasFP64FP16Denormals;
+  unsigned FlatAddressSpace = ~0U;
   static constexpr bool InlinerVectorBonusPercent = 0;
 
   static const FeatureBitset InlineFeatureIgnoreList;
@@ -200,13 +201,7 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
     return AMDGPU::addrspacesMayAlias(AS0, AS1);
   }
 
-  unsigned getFlatAddressSpace() const {
-    // Don't bother running InferAddressSpaces pass on graphics shaders which
-    // don't use flat addressing.
-    if (IsGraphics)
-      return -1;
-    return AMDGPUAS::FLAT_ADDRESS;
-  }
+  unsigned getFlatAddressSpace() const { return FlatAddressSpace; }
 
   bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
                                   Intrinsic::ID IID) const;
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index 57b7fa783c14a7..7b12d7d6c1e24b 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -265,6 +265,10 @@ NVPTXTargetMachine::getPredicatedAddrSpace(const Value *V) const {
   return std::make_pair(nullptr, -1);
 }
 
+unsigned NVPTXTargetMachine::getFlatAddressSpace() const {
+  return AddressSpace::ADDRESS_SPACE_GENERIC;
+}
+
 void NVPTXPassConfig::addEarlyCSEOrGVNPass() {
   if (getOptLevel() == CodeGenOptLevel::Aggressive)
     addPass(createGVNPass());
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h
index 2b88da67a50f95..e5e66eaf65ef32 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h
@@ -76,6 +76,8 @@ class NVPTXTargetMachine : public LLVMTargetMachine {
 
   std::pair<const Value *, unsigned>
   getPredicatedAddrSpace(const Value *V) const override;
+
+  unsigned getFlatAddressSpace() const override;
 }; // NVPTXTargetMachine.
 
 class NVPTXTargetMachine32 : public NVPTXTargetMachine {
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
index 4160f5f6bfae76..b9cb3706c43aef 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
@@ -32,6 +32,7 @@ class NVPTXTTIImpl : public BasicTTIImplBase<NVPTXTTIImpl> {
 
   const NVPTXSubtarget *ST;
   const NVPTXTargetLowering *TLI;
+  unsigned FlatAddressSpace = ~0U;
 
   const NVPTXSubtarget *getST() const { return ST; };
   const NVPTXTargetLowering *getTLI() const { return TLI; };
@@ -39,15 +40,14 @@ class NVPTXTTIImpl : public BasicTTIImplBase<NVPTXTTIImpl> {
 public:
   explicit NVPTXTTIImpl(const NVPTXTargetMachine *TM, const Function &F)
       : BaseT(TM, F.getDataLayout()), ST(TM->getSubtargetImpl()),
-        TLI(ST->getTargetLowering()) {}
+        TLI(ST->getTargetLowering()),
+        FlatAddressSpace(TM->getFlatAddressSpace()) {}
 
   bool hasBranchDivergence(const Function *F = nullptr) { return true; }
 
   bool isSourceOfDivergence(const Value *V);
 
-  unsigned getFlatAddressSpace() const {
-    return AddressSpace::ADDRESS_SPACE_GENERIC;
-  }
+  unsigned getFlatAddressSpace() const { return FlatAddressSpace; }
 
   bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const {
     return AS != AddressSpace::ADDRESS_SPACE_SHARED &&