[llvm] [NVPTX] Add TTI support for folding isspacep in InferAS (PR #114486)

Alex MacLean via llvm-commits llvm-commits at lists.llvm.org
Thu Oct 31 16:18:52 PDT 2024


https://github.com/AlexMaclean created https://github.com/llvm/llvm-project/pull/114486

This change enables constant folding of '`@llvm.nvvm.isspacep.*`' intrinsics if the address space can be propagated in InferAdressSpace.

>From 5b5dd06df1a35f8c45e7f0a7d54c64ecb1ce16f5 Mon Sep 17 00:00:00 2001
From: Alex Maclean <amaclean at nvidia.com>
Date: Thu, 31 Oct 2024 20:25:07 +0000
Subject: [PATCH] [NVPTX] Add TTI support for folding isspacep in InferAS

---
 .../Target/NVPTX/NVPTXTargetTransformInfo.cpp |  83 +++++++---
 .../Target/NVPTX/NVPTXTargetTransformInfo.h   |   6 +
 .../InferAddressSpaces/NVPTX/isspacep.ll      | 144 ++++++++++++++++++
 3 files changed, 209 insertions(+), 24 deletions(-)
 create mode 100644 llvm/test/Transforms/InferAddressSpaces/NVPTX/isspacep.ll

diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
index 3507573df1869f..5df211726882ec 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
@@ -416,33 +416,34 @@ static Instruction *convertNvvmIntrinsicToLlvm(InstCombiner &IC,
   llvm_unreachable("All SpecialCase enumerators should be handled in switch.");
 }
 
+  // Returns true/false when we know the answer, nullopt otherwise.
+static std::optional<bool> evaluateIsSpace(Intrinsic::ID IID, unsigned AS) {
+  if (AS == NVPTXAS::ADDRESS_SPACE_GENERIC ||
+      AS == NVPTXAS::ADDRESS_SPACE_PARAM)
+    return std::nullopt; // Got to check at run-time.
+  switch (IID) {
+  case Intrinsic::nvvm_isspacep_global:
+    return AS == NVPTXAS::ADDRESS_SPACE_GLOBAL;
+  case Intrinsic::nvvm_isspacep_local:
+    return AS == NVPTXAS::ADDRESS_SPACE_LOCAL;
+  case Intrinsic::nvvm_isspacep_shared:
+    return AS == NVPTXAS::ADDRESS_SPACE_SHARED;
+  case Intrinsic::nvvm_isspacep_shared_cluster:
+    // We can't tell shared from shared_cluster at compile time from AS alone,
+    // but it can't be either is AS is not shared.
+    return AS == NVPTXAS::ADDRESS_SPACE_SHARED ? std::nullopt
+                                               : std::optional{false};
+  case Intrinsic::nvvm_isspacep_const:
+    return AS == NVPTXAS::ADDRESS_SPACE_CONST;
+  default:
+    llvm_unreachable("Unexpected intrinsic");
+  }
+}
+
 // Returns an instruction pointer (may be nullptr if we do not know the answer).
 // Returns nullopt if `II` is not one of the `isspacep` intrinsics.
 static std::optional<Instruction *>
 handleSpaceCheckIntrinsics(InstCombiner &IC, IntrinsicInst &II) {
-  // Returns true/false when we know the answer, nullopt otherwise.
-  auto CheckASMatch = [](unsigned IID, unsigned AS) -> std::optional<bool> {
-    if (AS == NVPTXAS::ADDRESS_SPACE_GENERIC ||
-        AS == NVPTXAS::ADDRESS_SPACE_PARAM)
-      return std::nullopt; // Got to check at run-time.
-    switch (IID) {
-    case Intrinsic::nvvm_isspacep_global:
-      return AS == NVPTXAS::ADDRESS_SPACE_GLOBAL;
-    case Intrinsic::nvvm_isspacep_local:
-      return AS == NVPTXAS::ADDRESS_SPACE_LOCAL;
-    case Intrinsic::nvvm_isspacep_shared:
-      return AS == NVPTXAS::ADDRESS_SPACE_SHARED;
-    case Intrinsic::nvvm_isspacep_shared_cluster:
-      // We can't tell shared from shared_cluster at compile time from AS alone,
-      // but it can't be either is AS is not shared.
-      return AS == NVPTXAS::ADDRESS_SPACE_SHARED ? std::nullopt
-                                                 : std::optional{false};
-    case Intrinsic::nvvm_isspacep_const:
-      return AS == NVPTXAS::ADDRESS_SPACE_CONST;
-    default:
-      llvm_unreachable("Unexpected intrinsic");
-    }
-  };
 
   switch (auto IID = II.getIntrinsicID()) {
   case Intrinsic::nvvm_isspacep_global:
@@ -458,7 +459,7 @@ handleSpaceCheckIntrinsics(InstCombiner &IC, IntrinsicInst &II) {
       if (auto *ASCO = dyn_cast<AddrSpaceCastOperator>(Op0))
         AS = ASCO->getOperand(0)->getType()->getPointerAddressSpace();
 
-    if (std::optional<bool> Answer = CheckASMatch(IID, AS))
+    if (std::optional<bool> Answer = evaluateIsSpace(IID, AS))
       return IC.replaceInstUsesWith(II,
                                     ConstantInt::get(II.getType(), *Answer));
     return nullptr; // Don't know the answer, got to check at run time.
@@ -525,3 +526,37 @@ void NVPTXTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
                                          TTI::PeelingPreferences &PP) {
   BaseT::getPeelingPreferences(L, SE, PP);
 }
+
+bool NVPTXTTIImpl::collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
+                                              Intrinsic::ID IID) const {
+  switch (IID) {
+  case Intrinsic::nvvm_isspacep_const:
+  case Intrinsic::nvvm_isspacep_global:
+  case Intrinsic::nvvm_isspacep_local:
+  case Intrinsic::nvvm_isspacep_shared:
+  case Intrinsic::nvvm_isspacep_shared_cluster: {
+    OpIndexes.push_back(0);
+    return true;
+  }
+  }
+  return false;
+}
+
+Value *NVPTXTTIImpl::rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
+                                                      Value *OldV,
+                                                      Value *NewV) const {
+  const Intrinsic::ID IID = II->getIntrinsicID();
+  switch (IID) {
+  case Intrinsic::nvvm_isspacep_const:
+  case Intrinsic::nvvm_isspacep_global:
+  case Intrinsic::nvvm_isspacep_local:
+  case Intrinsic::nvvm_isspacep_shared:
+  case Intrinsic::nvvm_isspacep_shared_cluster: {
+    const unsigned NewAS = NewV->getType()->getPointerAddressSpace();
+    if (const auto R = evaluateIsSpace(IID, NewAS))
+      return ConstantInt::get(II->getType(), *R);
+    return nullptr;
+  }
+  }
+  return nullptr;
+}
\ No newline at end of file
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
index 86140daa7be489..0f4fb280b2d996 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
@@ -123,6 +123,12 @@ class NVPTXTTIImpl : public BasicTTIImplBase<NVPTXTTIImpl> {
       return true;
     }
   }
+
+  bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
+                                  Intrinsic::ID IID) const;
+
+  Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
+                                          Value *NewV) const;
 };
 
 } // end namespace llvm
diff --git a/llvm/test/Transforms/InferAddressSpaces/NVPTX/isspacep.ll b/llvm/test/Transforms/InferAddressSpaces/NVPTX/isspacep.ll
new file mode 100644
index 00000000000000..348fa688770df6
--- /dev/null
+++ b/llvm/test/Transforms/InferAddressSpaces/NVPTX/isspacep.ll
@@ -0,0 +1,144 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -mtriple=nvptx64-nvidia-cuda -passes=infer-address-spaces,bdce %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-p3:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
+target triple = "nvptx64-nvidia-cuda"
+
+declare i1 @llvm.nvvm.isspacep.const(ptr) readnone noinline
+declare i1 @llvm.nvvm.isspacep.global(ptr) readnone noinline
+declare i1 @llvm.nvvm.isspacep.local(ptr) readnone noinline
+declare i1 @llvm.nvvm.isspacep.shared(ptr) readnone noinline
+declare i1 @llvm.nvvm.isspacep.cluster.shared(ptr) readnone noinline
+
+define i1 @test_isspacep_const_true(ptr addrspace(4) %addr) {
+; CHECK-LABEL: define i1 @test_isspacep_const_true(
+; CHECK-SAME: ptr addrspace(4) [[ADDR:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret i1 true
+;
+entry:
+  %addr0 = addrspacecast ptr addrspace(4) %addr to ptr
+  %addr1 = getelementptr i8, ptr %addr0, i32 10
+  %val = call i1 @llvm.nvvm.isspacep.const(ptr %addr1)
+  ret i1 %val
+}
+
+define i1 @test_isspacep_const_false(ptr addrspace(1) %addr) {
+; CHECK-LABEL: define i1 @test_isspacep_const_false(
+; CHECK-SAME: ptr addrspace(1) [[ADDR:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret i1 false
+;
+entry:
+  %addr0 = addrspacecast ptr addrspace(1) %addr to ptr
+  %addr1 = getelementptr i8, ptr %addr0, i32 10
+  %val = call i1 @llvm.nvvm.isspacep.const(ptr %addr1)
+  ret i1 %val
+}
+
+define i1 @test_isspacep_global_true(ptr addrspace(1) %addr) {
+; CHECK-LABEL: define i1 @test_isspacep_global_true(
+; CHECK-SAME: ptr addrspace(1) [[ADDR:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret i1 true
+;
+entry:
+  %addr0 = addrspacecast ptr addrspace(1) %addr to ptr
+  %addr1 = getelementptr i8, ptr %addr0, i32 10
+  %val = call i1 @llvm.nvvm.isspacep.global(ptr %addr1)
+  ret i1 %val
+}
+
+define i1 @test_isspacep_global_false(ptr addrspace(4) %addr) {
+; CHECK-LABEL: define i1 @test_isspacep_global_false(
+; CHECK-SAME: ptr addrspace(4) [[ADDR:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret i1 false
+;
+entry:
+  %addr0 = addrspacecast ptr addrspace(4) %addr to ptr
+  %addr1 = getelementptr i8, ptr %addr0, i32 10
+  %val = call i1 @llvm.nvvm.isspacep.global(ptr %addr1)
+  ret i1 %val
+}
+
+define i1 @test_isspacep_local_true(ptr addrspace(5) %addr) {
+; CHECK-LABEL: define i1 @test_isspacep_local_true(
+; CHECK-SAME: ptr addrspace(5) [[ADDR:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret i1 true
+;
+entry:
+  %addr0 = addrspacecast ptr addrspace(5) %addr to ptr
+  %addr1 = getelementptr i8, ptr %addr0, i32 10
+  %val = call i1 @llvm.nvvm.isspacep.local(ptr %addr1)
+  ret i1 %val
+}
+
+define i1 @test_isspacep_local_false(ptr addrspace(1) %addr) {
+; CHECK-LABEL: define i1 @test_isspacep_local_false(
+; CHECK-SAME: ptr addrspace(1) [[ADDR:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret i1 false
+;
+entry:
+  %addr0 = addrspacecast ptr addrspace(1) %addr to ptr
+  %addr1 = getelementptr i8, ptr %addr0, i32 10
+  %val = call i1 @llvm.nvvm.isspacep.local(ptr %addr1)
+  ret i1 %val
+}
+
+define i1 @test_isspacep_shared_true(ptr addrspace(3) %addr) {
+; CHECK-LABEL: define i1 @test_isspacep_shared_true(
+; CHECK-SAME: ptr addrspace(3) [[ADDR:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret i1 true
+;
+entry:
+  %addr0 = addrspacecast ptr addrspace(3) %addr to ptr
+  %addr1 = getelementptr i8, ptr %addr0, i32 10
+  %val = call i1 @llvm.nvvm.isspacep.shared(ptr %addr1)
+  ret i1 %val
+}
+
+define i1 @test_isspacep_shared_false(ptr addrspace(1) %addr) {
+; CHECK-LABEL: define i1 @test_isspacep_shared_false(
+; CHECK-SAME: ptr addrspace(1) [[ADDR:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret i1 false
+;
+entry:
+  %addr0 = addrspacecast ptr addrspace(1) %addr to ptr
+  %addr1 = getelementptr i8, ptr %addr0, i32 10
+  %val = call i1 @llvm.nvvm.isspacep.shared(ptr %addr1)
+  ret i1 %val
+}
+
+define i1 @test_isspacep_cluster_shared_unsure(ptr addrspace(3) %addr) {
+; CHECK-LABEL: define i1 @test_isspacep_cluster_shared_unsure(
+; CHECK-SAME: ptr addrspace(3) [[ADDR:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[ADDR1:%.*]] = getelementptr i8, ptr addrspace(3) [[ADDR]], i32 10
+; CHECK-NEXT:    [[TMP0:%.*]] = addrspacecast ptr addrspace(3) [[ADDR1]] to ptr
+; CHECK-NEXT:    [[VAL:%.*]] = call i1 @llvm.nvvm.isspacep.shared.cluster(ptr [[TMP0]])
+; CHECK-NEXT:    ret i1 [[VAL]]
+;
+entry:
+  %addr0 = addrspacecast ptr addrspace(3) %addr to ptr
+  %addr1 = getelementptr i8, ptr %addr0, i32 10
+  %val = call i1 @llvm.nvvm.isspacep.shared.cluster(ptr %addr1)
+  ret i1 %val
+}
+
+define i1 @test_isspacep_cluster_shared_false(ptr addrspace(1) %addr) {
+; CHECK-LABEL: define i1 @test_isspacep_cluster_shared_false(
+; CHECK-SAME: ptr addrspace(1) [[ADDR:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret i1 false
+;
+entry:
+  %addr0 = addrspacecast ptr addrspace(1) %addr to ptr
+  %addr1 = getelementptr i8, ptr %addr0, i32 10
+  %val = call i1 @llvm.nvvm.isspacep.shared.cluster(ptr %addr1)
+  ret i1 %val
+}



More information about the llvm-commits mailing list