[llvm] [NVPTX] Add TTI support for folding isspacep in InferAS (PR #114486)

Alex MacLean via llvm-commits llvm-commits at lists.llvm.org
Thu Nov 7 15:02:39 PST 2024


https://github.com/AlexMaclean updated https://github.com/llvm/llvm-project/pull/114486

>From ab58488b42bf45b53123ad74672338231c92ec31 Mon Sep 17 00:00:00 2001
From: Alex Maclean <amaclean at nvidia.com>
Date: Mon, 4 Nov 2024 17:09:53 +0000
Subject: [PATCH 1/4] [NVPTX][docs] Add isspacep.* to usage doc

---
 llvm/docs/NVPTXUsage.rst | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/llvm/docs/NVPTXUsage.rst b/llvm/docs/NVPTXUsage.rst
index f225b9e8bd268b..eed68155c73193 100644
--- a/llvm/docs/NVPTXUsage.rst
+++ b/llvm/docs/NVPTXUsage.rst
@@ -250,6 +250,36 @@ The ``@llvm.nvvm.fence.proxy.tensormap_generic.*`` is a uni-directional fence us
 
 The address operand ``addr`` and the operand ``size`` together specify the memory range ``[addr, addr+size)`` on which the ordering guarantees on the memory accesses across the proxies is to be provided. The only supported value for the ``size`` operand is ``128`` and must be an immediate. Generic Addressing is used unconditionally, and the address specified by the operand addr must fall within the ``.global`` state space. Otherwise, the behavior is undefined. For more information, see `PTX ISA <https://docs.nvidia.com/cuda/parallel-thread-execution/#parallel-synchronization-and-communication-instructions-membar>`_.
 
+Address Space Intrinsics
+------------------------
+
+'``llvm.nvvm.isspacep.*``' Intrinsics
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+.. code-block:: llvm
+
+    declare i1 @llvm.nvvm.isspacep.const(ptr %p)
+    declare i1 @llvm.nvvm.isspacep.global(ptr %p)
+    declare i1 @llvm.nvvm.isspacep.local(ptr %p)
+    declare i1 @llvm.nvvm.isspacep.shared(ptr %p)
+    declare i1 @llvm.nvvm.isspacep.shared.cluster(ptr %p)
+
+Overview:
+"""""""""
+
+The '``llvm.nvvm.isspacep.*``' intrinsics determine whether the provided generic
+pointer references memory which falls within a particular address space.
+
+Semantics:
+""""""""""
+
+If the given pointer in the generic address space refers to memory which falls
+within the state space of the intrinsic (and therefore could be safely address
+space casted to this space), 1 is returned, otherwise 0 is returned.
+
 Arithmetic Intrinsics
 ---------------------
 

>From b832e93a6b6aa1a15946e09a41066e86d878b807 Mon Sep 17 00:00:00 2001
From: Alex Maclean <amaclean at nvidia.com>
Date: Thu, 31 Oct 2024 20:25:07 +0000
Subject: [PATCH 2/4] [NVPTX] Add TTI support for folding isspacep in InferAS

---
 .../Target/NVPTX/NVPTXTargetTransformInfo.cpp |  83 +++++++---
 .../Target/NVPTX/NVPTXTargetTransformInfo.h   |   6 +
 .../InferAddressSpaces/NVPTX/isspacep.ll      | 144 ++++++++++++++++++
 3 files changed, 209 insertions(+), 24 deletions(-)
 create mode 100644 llvm/test/Transforms/InferAddressSpaces/NVPTX/isspacep.ll

diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
index 3507573df1869f..5df211726882ec 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
@@ -416,33 +416,34 @@ static Instruction *convertNvvmIntrinsicToLlvm(InstCombiner &IC,
   llvm_unreachable("All SpecialCase enumerators should be handled in switch.");
 }
 
+  // Returns true/false when we know the answer, nullopt otherwise.
+static std::optional<bool> evaluateIsSpace(Intrinsic::ID IID, unsigned AS) {
+  if (AS == NVPTXAS::ADDRESS_SPACE_GENERIC ||
+      AS == NVPTXAS::ADDRESS_SPACE_PARAM)
+    return std::nullopt; // Got to check at run-time.
+  switch (IID) {
+  case Intrinsic::nvvm_isspacep_global:
+    return AS == NVPTXAS::ADDRESS_SPACE_GLOBAL;
+  case Intrinsic::nvvm_isspacep_local:
+    return AS == NVPTXAS::ADDRESS_SPACE_LOCAL;
+  case Intrinsic::nvvm_isspacep_shared:
+    return AS == NVPTXAS::ADDRESS_SPACE_SHARED;
+  case Intrinsic::nvvm_isspacep_shared_cluster:
+    // We can't tell shared from shared_cluster at compile time from AS alone,
+    // but it can't be either is AS is not shared.
+    return AS == NVPTXAS::ADDRESS_SPACE_SHARED ? std::nullopt
+                                               : std::optional{false};
+  case Intrinsic::nvvm_isspacep_const:
+    return AS == NVPTXAS::ADDRESS_SPACE_CONST;
+  default:
+    llvm_unreachable("Unexpected intrinsic");
+  }
+}
+
 // Returns an instruction pointer (may be nullptr if we do not know the answer).
 // Returns nullopt if `II` is not one of the `isspacep` intrinsics.
 static std::optional<Instruction *>
 handleSpaceCheckIntrinsics(InstCombiner &IC, IntrinsicInst &II) {
-  // Returns true/false when we know the answer, nullopt otherwise.
-  auto CheckASMatch = [](unsigned IID, unsigned AS) -> std::optional<bool> {
-    if (AS == NVPTXAS::ADDRESS_SPACE_GENERIC ||
-        AS == NVPTXAS::ADDRESS_SPACE_PARAM)
-      return std::nullopt; // Got to check at run-time.
-    switch (IID) {
-    case Intrinsic::nvvm_isspacep_global:
-      return AS == NVPTXAS::ADDRESS_SPACE_GLOBAL;
-    case Intrinsic::nvvm_isspacep_local:
-      return AS == NVPTXAS::ADDRESS_SPACE_LOCAL;
-    case Intrinsic::nvvm_isspacep_shared:
-      return AS == NVPTXAS::ADDRESS_SPACE_SHARED;
-    case Intrinsic::nvvm_isspacep_shared_cluster:
-      // We can't tell shared from shared_cluster at compile time from AS alone,
-      // but it can't be either is AS is not shared.
-      return AS == NVPTXAS::ADDRESS_SPACE_SHARED ? std::nullopt
-                                                 : std::optional{false};
-    case Intrinsic::nvvm_isspacep_const:
-      return AS == NVPTXAS::ADDRESS_SPACE_CONST;
-    default:
-      llvm_unreachable("Unexpected intrinsic");
-    }
-  };
 
   switch (auto IID = II.getIntrinsicID()) {
   case Intrinsic::nvvm_isspacep_global:
@@ -458,7 +459,7 @@ handleSpaceCheckIntrinsics(InstCombiner &IC, IntrinsicInst &II) {
       if (auto *ASCO = dyn_cast<AddrSpaceCastOperator>(Op0))
         AS = ASCO->getOperand(0)->getType()->getPointerAddressSpace();
 
-    if (std::optional<bool> Answer = CheckASMatch(IID, AS))
+    if (std::optional<bool> Answer = evaluateIsSpace(IID, AS))
       return IC.replaceInstUsesWith(II,
                                     ConstantInt::get(II.getType(), *Answer));
     return nullptr; // Don't know the answer, got to check at run time.
@@ -525,3 +526,37 @@ void NVPTXTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
                                          TTI::PeelingPreferences &PP) {
   BaseT::getPeelingPreferences(L, SE, PP);
 }
+
+bool NVPTXTTIImpl::collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
+                                              Intrinsic::ID IID) const {
+  switch (IID) {
+  case Intrinsic::nvvm_isspacep_const:
+  case Intrinsic::nvvm_isspacep_global:
+  case Intrinsic::nvvm_isspacep_local:
+  case Intrinsic::nvvm_isspacep_shared:
+  case Intrinsic::nvvm_isspacep_shared_cluster: {
+    OpIndexes.push_back(0);
+    return true;
+  }
+  }
+  return false;
+}
+
+Value *NVPTXTTIImpl::rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
+                                                      Value *OldV,
+                                                      Value *NewV) const {
+  const Intrinsic::ID IID = II->getIntrinsicID();
+  switch (IID) {
+  case Intrinsic::nvvm_isspacep_const:
+  case Intrinsic::nvvm_isspacep_global:
+  case Intrinsic::nvvm_isspacep_local:
+  case Intrinsic::nvvm_isspacep_shared:
+  case Intrinsic::nvvm_isspacep_shared_cluster: {
+    const unsigned NewAS = NewV->getType()->getPointerAddressSpace();
+    if (const auto R = evaluateIsSpace(IID, NewAS))
+      return ConstantInt::get(II->getType(), *R);
+    return nullptr;
+  }
+  }
+  return nullptr;
+}
\ No newline at end of file
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
index 86140daa7be489..0f4fb280b2d996 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
@@ -123,6 +123,12 @@ class NVPTXTTIImpl : public BasicTTIImplBase<NVPTXTTIImpl> {
       return true;
     }
   }
+
+  bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
+                                  Intrinsic::ID IID) const;
+
+  Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
+                                          Value *NewV) const;
 };
 
 } // end namespace llvm
diff --git a/llvm/test/Transforms/InferAddressSpaces/NVPTX/isspacep.ll b/llvm/test/Transforms/InferAddressSpaces/NVPTX/isspacep.ll
new file mode 100644
index 00000000000000..348fa688770df6
--- /dev/null
+++ b/llvm/test/Transforms/InferAddressSpaces/NVPTX/isspacep.ll
@@ -0,0 +1,144 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -mtriple=nvptx64-nvidia-cuda -passes=infer-address-spaces,bdce %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-p3:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
+target triple = "nvptx64-nvidia-cuda"
+
+declare i1 @llvm.nvvm.isspacep.const(ptr) readnone noinline
+declare i1 @llvm.nvvm.isspacep.global(ptr) readnone noinline
+declare i1 @llvm.nvvm.isspacep.local(ptr) readnone noinline
+declare i1 @llvm.nvvm.isspacep.shared(ptr) readnone noinline
+declare i1 @llvm.nvvm.isspacep.cluster.shared(ptr) readnone noinline
+
+define i1 @test_isspacep_const_true(ptr addrspace(4) %addr) {
+; CHECK-LABEL: define i1 @test_isspacep_const_true(
+; CHECK-SAME: ptr addrspace(4) [[ADDR:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret i1 true
+;
+entry:
+  %addr0 = addrspacecast ptr addrspace(4) %addr to ptr
+  %addr1 = getelementptr i8, ptr %addr0, i32 10
+  %val = call i1 @llvm.nvvm.isspacep.const(ptr %addr1)
+  ret i1 %val
+}
+
+define i1 @test_isspacep_const_false(ptr addrspace(1) %addr) {
+; CHECK-LABEL: define i1 @test_isspacep_const_false(
+; CHECK-SAME: ptr addrspace(1) [[ADDR:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret i1 false
+;
+entry:
+  %addr0 = addrspacecast ptr addrspace(1) %addr to ptr
+  %addr1 = getelementptr i8, ptr %addr0, i32 10
+  %val = call i1 @llvm.nvvm.isspacep.const(ptr %addr1)
+  ret i1 %val
+}
+
+define i1 @test_isspacep_global_true(ptr addrspace(1) %addr) {
+; CHECK-LABEL: define i1 @test_isspacep_global_true(
+; CHECK-SAME: ptr addrspace(1) [[ADDR:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret i1 true
+;
+entry:
+  %addr0 = addrspacecast ptr addrspace(1) %addr to ptr
+  %addr1 = getelementptr i8, ptr %addr0, i32 10
+  %val = call i1 @llvm.nvvm.isspacep.global(ptr %addr1)
+  ret i1 %val
+}
+
+define i1 @test_isspacep_global_false(ptr addrspace(4) %addr) {
+; CHECK-LABEL: define i1 @test_isspacep_global_false(
+; CHECK-SAME: ptr addrspace(4) [[ADDR:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret i1 false
+;
+entry:
+  %addr0 = addrspacecast ptr addrspace(4) %addr to ptr
+  %addr1 = getelementptr i8, ptr %addr0, i32 10
+  %val = call i1 @llvm.nvvm.isspacep.global(ptr %addr1)
+  ret i1 %val
+}
+
+define i1 @test_isspacep_local_true(ptr addrspace(5) %addr) {
+; CHECK-LABEL: define i1 @test_isspacep_local_true(
+; CHECK-SAME: ptr addrspace(5) [[ADDR:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret i1 true
+;
+entry:
+  %addr0 = addrspacecast ptr addrspace(5) %addr to ptr
+  %addr1 = getelementptr i8, ptr %addr0, i32 10
+  %val = call i1 @llvm.nvvm.isspacep.local(ptr %addr1)
+  ret i1 %val
+}
+
+define i1 @test_isspacep_local_false(ptr addrspace(1) %addr) {
+; CHECK-LABEL: define i1 @test_isspacep_local_false(
+; CHECK-SAME: ptr addrspace(1) [[ADDR:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret i1 false
+;
+entry:
+  %addr0 = addrspacecast ptr addrspace(1) %addr to ptr
+  %addr1 = getelementptr i8, ptr %addr0, i32 10
+  %val = call i1 @llvm.nvvm.isspacep.local(ptr %addr1)
+  ret i1 %val
+}
+
+define i1 @test_isspacep_shared_true(ptr addrspace(3) %addr) {
+; CHECK-LABEL: define i1 @test_isspacep_shared_true(
+; CHECK-SAME: ptr addrspace(3) [[ADDR:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret i1 true
+;
+entry:
+  %addr0 = addrspacecast ptr addrspace(3) %addr to ptr
+  %addr1 = getelementptr i8, ptr %addr0, i32 10
+  %val = call i1 @llvm.nvvm.isspacep.shared(ptr %addr1)
+  ret i1 %val
+}
+
+define i1 @test_isspacep_shared_false(ptr addrspace(1) %addr) {
+; CHECK-LABEL: define i1 @test_isspacep_shared_false(
+; CHECK-SAME: ptr addrspace(1) [[ADDR:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret i1 false
+;
+entry:
+  %addr0 = addrspacecast ptr addrspace(1) %addr to ptr
+  %addr1 = getelementptr i8, ptr %addr0, i32 10
+  %val = call i1 @llvm.nvvm.isspacep.shared(ptr %addr1)
+  ret i1 %val
+}
+
+define i1 @test_isspacep_cluster_shared_unsure(ptr addrspace(3) %addr) {
+; CHECK-LABEL: define i1 @test_isspacep_cluster_shared_unsure(
+; CHECK-SAME: ptr addrspace(3) [[ADDR:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[ADDR1:%.*]] = getelementptr i8, ptr addrspace(3) [[ADDR]], i32 10
+; CHECK-NEXT:    [[TMP0:%.*]] = addrspacecast ptr addrspace(3) [[ADDR1]] to ptr
+; CHECK-NEXT:    [[VAL:%.*]] = call i1 @llvm.nvvm.isspacep.shared.cluster(ptr [[TMP0]])
+; CHECK-NEXT:    ret i1 [[VAL]]
+;
+entry:
+  %addr0 = addrspacecast ptr addrspace(3) %addr to ptr
+  %addr1 = getelementptr i8, ptr %addr0, i32 10
+  %val = call i1 @llvm.nvvm.isspacep.shared.cluster(ptr %addr1)
+  ret i1 %val
+}
+
+define i1 @test_isspacep_cluster_shared_false(ptr addrspace(1) %addr) {
+; CHECK-LABEL: define i1 @test_isspacep_cluster_shared_false(
+; CHECK-SAME: ptr addrspace(1) [[ADDR:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret i1 false
+;
+entry:
+  %addr0 = addrspacecast ptr addrspace(1) %addr to ptr
+  %addr1 = getelementptr i8, ptr %addr0, i32 10
+  %val = call i1 @llvm.nvvm.isspacep.shared.cluster(ptr %addr1)
+  ret i1 %val
+}

>From 431f4f771441dbf1d02e15f8ad3b6f0ecd51c658 Mon Sep 17 00:00:00 2001
From: Alex Maclean <amaclean at nvidia.com>
Date: Thu, 31 Oct 2024 23:25:48 +0000
Subject: [PATCH 3/4] address formatting

---
 llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
index 5df211726882ec..46c909295ade71 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
@@ -416,7 +416,7 @@ static Instruction *convertNvvmIntrinsicToLlvm(InstCombiner &IC,
   llvm_unreachable("All SpecialCase enumerators should be handled in switch.");
 }
 
-  // Returns true/false when we know the answer, nullopt otherwise.
+// Returns true/false when we know the answer, nullopt otherwise.
 static std::optional<bool> evaluateIsSpace(Intrinsic::ID IID, unsigned AS) {
   if (AS == NVPTXAS::ADDRESS_SPACE_GENERIC ||
       AS == NVPTXAS::ADDRESS_SPACE_PARAM)

>From 05f4e4ed6e03c8935bb44e631d07bc284bf913c4 Mon Sep 17 00:00:00 2001
From: Alex Maclean <amaclean at nvidia.com>
Date: Thu, 7 Nov 2024 23:02:08 +0000
Subject: [PATCH 4/4] address comment

---
 llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp       | 2 ++
 llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp | 4 ++++
 2 files changed, 6 insertions(+)

diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index a5a147da8da1c5..31eb0b4fd7b721 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -302,6 +302,8 @@ void NVPTXPassConfig::addAddressSpaceInferencePasses() {
   // be eliminated by SROA.
   addPass(createSROAPass());
   addPass(createNVPTXLowerAllocaPass());
+  // TODO: Consider running InferAddressSpaces during opt, earlier in the
+  // compilation flow.
   addPass(createInferAddressSpacesPass());
   addPass(createNVPTXAtomicLowerPass());
 }
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
index 46c909295ade71..8d482ffb27b143 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
@@ -442,6 +442,10 @@ static std::optional<bool> evaluateIsSpace(Intrinsic::ID IID, unsigned AS) {
 
 // Returns an instruction pointer (may be nullptr if we do not know the answer).
 // Returns nullopt if `II` is not one of the `isspacep` intrinsics.
+//
+// TODO: If InferAddressSpaces were run early enough in the pipeline this could
+// be removed in favor of the constant folding that occurs there through
+// rewriteIntrinsicWithAddressSpace
 static std::optional<Instruction *>
 handleSpaceCheckIntrinsics(InstCombiner &IC, IntrinsicInst &II) {
 



More information about the llvm-commits mailing list