[llvm] [NVPTX] Add TTI support for folding isspacep in InferAS (PR #114486)
Alex MacLean via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 7 15:02:39 PST 2024
https://github.com/AlexMaclean updated https://github.com/llvm/llvm-project/pull/114486
>From ab58488b42bf45b53123ad74672338231c92ec31 Mon Sep 17 00:00:00 2001
From: Alex Maclean <amaclean at nvidia.com>
Date: Mon, 4 Nov 2024 17:09:53 +0000
Subject: [PATCH 1/4] [NVPTX][docs] Add isspacep.* to usage doc
---
llvm/docs/NVPTXUsage.rst | 30 ++++++++++++++++++++++++++++++
1 file changed, 30 insertions(+)
diff --git a/llvm/docs/NVPTXUsage.rst b/llvm/docs/NVPTXUsage.rst
index f225b9e8bd268b..eed68155c73193 100644
--- a/llvm/docs/NVPTXUsage.rst
+++ b/llvm/docs/NVPTXUsage.rst
@@ -250,6 +250,36 @@ The ``@llvm.nvvm.fence.proxy.tensormap_generic.*`` is a uni-directional fence us
The address operand ``addr`` and the operand ``size`` together specify the memory range ``[addr, addr+size)`` on which the ordering guarantees on the memory accesses across the proxies is to be provided. The only supported value for the ``size`` operand is ``128`` and must be an immediate. Generic Addressing is used unconditionally, and the address specified by the operand addr must fall within the ``.global`` state space. Otherwise, the behavior is undefined. For more information, see `PTX ISA <https://docs.nvidia.com/cuda/parallel-thread-execution/#parallel-synchronization-and-communication-instructions-membar>`_.
+Address Space Intrinsics
+------------------------
+
+'``llvm.nvvm.isspacep.*``' Intrinsics
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+.. code-block:: llvm
+
+ declare i1 @llvm.nvvm.isspacep.const(ptr %p)
+ declare i1 @llvm.nvvm.isspacep.global(ptr %p)
+ declare i1 @llvm.nvvm.isspacep.local(ptr %p)
+ declare i1 @llvm.nvvm.isspacep.shared(ptr %p)
+ declare i1 @llvm.nvvm.isspacep.shared.cluster(ptr %p)
+
+Overview:
+"""""""""
+
+The '``llvm.nvvm.isspacep.*``' intrinsics determine whether the provided generic
+pointer references memory which falls within a particular address space.
+
+Semantics:
+""""""""""
+
+If the given pointer in the generic address space refers to memory which falls
+within the state space of the intrinsic (and therefore could be safely address
+space casted to this space), 1 is returned, otherwise 0 is returned.
+
Arithmetic Intrinsics
---------------------
>From b832e93a6b6aa1a15946e09a41066e86d878b807 Mon Sep 17 00:00:00 2001
From: Alex Maclean <amaclean at nvidia.com>
Date: Thu, 31 Oct 2024 20:25:07 +0000
Subject: [PATCH 2/4] [NVPTX] Add TTI support for folding isspacep in InferAS
---
.../Target/NVPTX/NVPTXTargetTransformInfo.cpp | 83 +++++++---
.../Target/NVPTX/NVPTXTargetTransformInfo.h | 6 +
.../InferAddressSpaces/NVPTX/isspacep.ll | 144 ++++++++++++++++++
3 files changed, 209 insertions(+), 24 deletions(-)
create mode 100644 llvm/test/Transforms/InferAddressSpaces/NVPTX/isspacep.ll
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
index 3507573df1869f..5df211726882ec 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
@@ -416,33 +416,34 @@ static Instruction *convertNvvmIntrinsicToLlvm(InstCombiner &IC,
llvm_unreachable("All SpecialCase enumerators should be handled in switch.");
}
+ // Returns true/false when we know the answer, nullopt otherwise.
+static std::optional<bool> evaluateIsSpace(Intrinsic::ID IID, unsigned AS) {
+ if (AS == NVPTXAS::ADDRESS_SPACE_GENERIC ||
+ AS == NVPTXAS::ADDRESS_SPACE_PARAM)
+ return std::nullopt; // Got to check at run-time.
+ switch (IID) {
+ case Intrinsic::nvvm_isspacep_global:
+ return AS == NVPTXAS::ADDRESS_SPACE_GLOBAL;
+ case Intrinsic::nvvm_isspacep_local:
+ return AS == NVPTXAS::ADDRESS_SPACE_LOCAL;
+ case Intrinsic::nvvm_isspacep_shared:
+ return AS == NVPTXAS::ADDRESS_SPACE_SHARED;
+ case Intrinsic::nvvm_isspacep_shared_cluster:
+ // We can't tell shared from shared_cluster at compile time from AS alone,
+ // but it can't be either is AS is not shared.
+ return AS == NVPTXAS::ADDRESS_SPACE_SHARED ? std::nullopt
+ : std::optional{false};
+ case Intrinsic::nvvm_isspacep_const:
+ return AS == NVPTXAS::ADDRESS_SPACE_CONST;
+ default:
+ llvm_unreachable("Unexpected intrinsic");
+ }
+}
+
// Returns an instruction pointer (may be nullptr if we do not know the answer).
// Returns nullopt if `II` is not one of the `isspacep` intrinsics.
static std::optional<Instruction *>
handleSpaceCheckIntrinsics(InstCombiner &IC, IntrinsicInst &II) {
- // Returns true/false when we know the answer, nullopt otherwise.
- auto CheckASMatch = [](unsigned IID, unsigned AS) -> std::optional<bool> {
- if (AS == NVPTXAS::ADDRESS_SPACE_GENERIC ||
- AS == NVPTXAS::ADDRESS_SPACE_PARAM)
- return std::nullopt; // Got to check at run-time.
- switch (IID) {
- case Intrinsic::nvvm_isspacep_global:
- return AS == NVPTXAS::ADDRESS_SPACE_GLOBAL;
- case Intrinsic::nvvm_isspacep_local:
- return AS == NVPTXAS::ADDRESS_SPACE_LOCAL;
- case Intrinsic::nvvm_isspacep_shared:
- return AS == NVPTXAS::ADDRESS_SPACE_SHARED;
- case Intrinsic::nvvm_isspacep_shared_cluster:
- // We can't tell shared from shared_cluster at compile time from AS alone,
- // but it can't be either is AS is not shared.
- return AS == NVPTXAS::ADDRESS_SPACE_SHARED ? std::nullopt
- : std::optional{false};
- case Intrinsic::nvvm_isspacep_const:
- return AS == NVPTXAS::ADDRESS_SPACE_CONST;
- default:
- llvm_unreachable("Unexpected intrinsic");
- }
- };
switch (auto IID = II.getIntrinsicID()) {
case Intrinsic::nvvm_isspacep_global:
@@ -458,7 +459,7 @@ handleSpaceCheckIntrinsics(InstCombiner &IC, IntrinsicInst &II) {
if (auto *ASCO = dyn_cast<AddrSpaceCastOperator>(Op0))
AS = ASCO->getOperand(0)->getType()->getPointerAddressSpace();
- if (std::optional<bool> Answer = CheckASMatch(IID, AS))
+ if (std::optional<bool> Answer = evaluateIsSpace(IID, AS))
return IC.replaceInstUsesWith(II,
ConstantInt::get(II.getType(), *Answer));
return nullptr; // Don't know the answer, got to check at run time.
@@ -525,3 +526,37 @@ void NVPTXTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
TTI::PeelingPreferences &PP) {
BaseT::getPeelingPreferences(L, SE, PP);
}
+
+bool NVPTXTTIImpl::collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
+ Intrinsic::ID IID) const {
+ switch (IID) {
+ case Intrinsic::nvvm_isspacep_const:
+ case Intrinsic::nvvm_isspacep_global:
+ case Intrinsic::nvvm_isspacep_local:
+ case Intrinsic::nvvm_isspacep_shared:
+ case Intrinsic::nvvm_isspacep_shared_cluster: {
+ OpIndexes.push_back(0);
+ return true;
+ }
+ }
+ return false;
+}
+
+Value *NVPTXTTIImpl::rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
+ Value *OldV,
+ Value *NewV) const {
+ const Intrinsic::ID IID = II->getIntrinsicID();
+ switch (IID) {
+ case Intrinsic::nvvm_isspacep_const:
+ case Intrinsic::nvvm_isspacep_global:
+ case Intrinsic::nvvm_isspacep_local:
+ case Intrinsic::nvvm_isspacep_shared:
+ case Intrinsic::nvvm_isspacep_shared_cluster: {
+ const unsigned NewAS = NewV->getType()->getPointerAddressSpace();
+ if (const auto R = evaluateIsSpace(IID, NewAS))
+ return ConstantInt::get(II->getType(), *R);
+ return nullptr;
+ }
+ }
+ return nullptr;
+}
\ No newline at end of file
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
index 86140daa7be489..0f4fb280b2d996 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
@@ -123,6 +123,12 @@ class NVPTXTTIImpl : public BasicTTIImplBase<NVPTXTTIImpl> {
return true;
}
}
+
+ bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
+ Intrinsic::ID IID) const;
+
+ Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
+ Value *NewV) const;
};
} // end namespace llvm
diff --git a/llvm/test/Transforms/InferAddressSpaces/NVPTX/isspacep.ll b/llvm/test/Transforms/InferAddressSpaces/NVPTX/isspacep.ll
new file mode 100644
index 00000000000000..348fa688770df6
--- /dev/null
+++ b/llvm/test/Transforms/InferAddressSpaces/NVPTX/isspacep.ll
@@ -0,0 +1,144 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -mtriple=nvptx64-nvidia-cuda -passes=infer-address-spaces,bdce %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-p3:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
+target triple = "nvptx64-nvidia-cuda"
+
+declare i1 @llvm.nvvm.isspacep.const(ptr) readnone noinline
+declare i1 @llvm.nvvm.isspacep.global(ptr) readnone noinline
+declare i1 @llvm.nvvm.isspacep.local(ptr) readnone noinline
+declare i1 @llvm.nvvm.isspacep.shared(ptr) readnone noinline
+declare i1 @llvm.nvvm.isspacep.cluster.shared(ptr) readnone noinline
+
+define i1 @test_isspacep_const_true(ptr addrspace(4) %addr) {
+; CHECK-LABEL: define i1 @test_isspacep_const_true(
+; CHECK-SAME: ptr addrspace(4) [[ADDR:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: ret i1 true
+;
+entry:
+ %addr0 = addrspacecast ptr addrspace(4) %addr to ptr
+ %addr1 = getelementptr i8, ptr %addr0, i32 10
+ %val = call i1 @llvm.nvvm.isspacep.const(ptr %addr1)
+ ret i1 %val
+}
+
+define i1 @test_isspacep_const_false(ptr addrspace(1) %addr) {
+; CHECK-LABEL: define i1 @test_isspacep_const_false(
+; CHECK-SAME: ptr addrspace(1) [[ADDR:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: ret i1 false
+;
+entry:
+ %addr0 = addrspacecast ptr addrspace(1) %addr to ptr
+ %addr1 = getelementptr i8, ptr %addr0, i32 10
+ %val = call i1 @llvm.nvvm.isspacep.const(ptr %addr1)
+ ret i1 %val
+}
+
+define i1 @test_isspacep_global_true(ptr addrspace(1) %addr) {
+; CHECK-LABEL: define i1 @test_isspacep_global_true(
+; CHECK-SAME: ptr addrspace(1) [[ADDR:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: ret i1 true
+;
+entry:
+ %addr0 = addrspacecast ptr addrspace(1) %addr to ptr
+ %addr1 = getelementptr i8, ptr %addr0, i32 10
+ %val = call i1 @llvm.nvvm.isspacep.global(ptr %addr1)
+ ret i1 %val
+}
+
+define i1 @test_isspacep_global_false(ptr addrspace(4) %addr) {
+; CHECK-LABEL: define i1 @test_isspacep_global_false(
+; CHECK-SAME: ptr addrspace(4) [[ADDR:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: ret i1 false
+;
+entry:
+ %addr0 = addrspacecast ptr addrspace(4) %addr to ptr
+ %addr1 = getelementptr i8, ptr %addr0, i32 10
+ %val = call i1 @llvm.nvvm.isspacep.global(ptr %addr1)
+ ret i1 %val
+}
+
+define i1 @test_isspacep_local_true(ptr addrspace(5) %addr) {
+; CHECK-LABEL: define i1 @test_isspacep_local_true(
+; CHECK-SAME: ptr addrspace(5) [[ADDR:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: ret i1 true
+;
+entry:
+ %addr0 = addrspacecast ptr addrspace(5) %addr to ptr
+ %addr1 = getelementptr i8, ptr %addr0, i32 10
+ %val = call i1 @llvm.nvvm.isspacep.local(ptr %addr1)
+ ret i1 %val
+}
+
+define i1 @test_isspacep_local_false(ptr addrspace(1) %addr) {
+; CHECK-LABEL: define i1 @test_isspacep_local_false(
+; CHECK-SAME: ptr addrspace(1) [[ADDR:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: ret i1 false
+;
+entry:
+ %addr0 = addrspacecast ptr addrspace(1) %addr to ptr
+ %addr1 = getelementptr i8, ptr %addr0, i32 10
+ %val = call i1 @llvm.nvvm.isspacep.local(ptr %addr1)
+ ret i1 %val
+}
+
+define i1 @test_isspacep_shared_true(ptr addrspace(3) %addr) {
+; CHECK-LABEL: define i1 @test_isspacep_shared_true(
+; CHECK-SAME: ptr addrspace(3) [[ADDR:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: ret i1 true
+;
+entry:
+ %addr0 = addrspacecast ptr addrspace(3) %addr to ptr
+ %addr1 = getelementptr i8, ptr %addr0, i32 10
+ %val = call i1 @llvm.nvvm.isspacep.shared(ptr %addr1)
+ ret i1 %val
+}
+
+define i1 @test_isspacep_shared_false(ptr addrspace(1) %addr) {
+; CHECK-LABEL: define i1 @test_isspacep_shared_false(
+; CHECK-SAME: ptr addrspace(1) [[ADDR:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: ret i1 false
+;
+entry:
+ %addr0 = addrspacecast ptr addrspace(1) %addr to ptr
+ %addr1 = getelementptr i8, ptr %addr0, i32 10
+ %val = call i1 @llvm.nvvm.isspacep.shared(ptr %addr1)
+ ret i1 %val
+}
+
+define i1 @test_isspacep_cluster_shared_unsure(ptr addrspace(3) %addr) {
+; CHECK-LABEL: define i1 @test_isspacep_cluster_shared_unsure(
+; CHECK-SAME: ptr addrspace(3) [[ADDR:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[ADDR1:%.*]] = getelementptr i8, ptr addrspace(3) [[ADDR]], i32 10
+; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(3) [[ADDR1]] to ptr
+; CHECK-NEXT: [[VAL:%.*]] = call i1 @llvm.nvvm.isspacep.shared.cluster(ptr [[TMP0]])
+; CHECK-NEXT: ret i1 [[VAL]]
+;
+entry:
+ %addr0 = addrspacecast ptr addrspace(3) %addr to ptr
+ %addr1 = getelementptr i8, ptr %addr0, i32 10
+ %val = call i1 @llvm.nvvm.isspacep.shared.cluster(ptr %addr1)
+ ret i1 %val
+}
+
+define i1 @test_isspacep_cluster_shared_false(ptr addrspace(1) %addr) {
+; CHECK-LABEL: define i1 @test_isspacep_cluster_shared_false(
+; CHECK-SAME: ptr addrspace(1) [[ADDR:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: ret i1 false
+;
+entry:
+ %addr0 = addrspacecast ptr addrspace(1) %addr to ptr
+ %addr1 = getelementptr i8, ptr %addr0, i32 10
+ %val = call i1 @llvm.nvvm.isspacep.shared.cluster(ptr %addr1)
+ ret i1 %val
+}
>From 431f4f771441dbf1d02e15f8ad3b6f0ecd51c658 Mon Sep 17 00:00:00 2001
From: Alex Maclean <amaclean at nvidia.com>
Date: Thu, 31 Oct 2024 23:25:48 +0000
Subject: [PATCH 3/4] address formatting
---
llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
index 5df211726882ec..46c909295ade71 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
@@ -416,7 +416,7 @@ static Instruction *convertNvvmIntrinsicToLlvm(InstCombiner &IC,
llvm_unreachable("All SpecialCase enumerators should be handled in switch.");
}
- // Returns true/false when we know the answer, nullopt otherwise.
+// Returns true/false when we know the answer, nullopt otherwise.
static std::optional<bool> evaluateIsSpace(Intrinsic::ID IID, unsigned AS) {
if (AS == NVPTXAS::ADDRESS_SPACE_GENERIC ||
AS == NVPTXAS::ADDRESS_SPACE_PARAM)
>From 05f4e4ed6e03c8935bb44e631d07bc284bf913c4 Mon Sep 17 00:00:00 2001
From: Alex Maclean <amaclean at nvidia.com>
Date: Thu, 7 Nov 2024 23:02:08 +0000
Subject: [PATCH 4/4] address comment
---
llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp | 2 ++
llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp | 4 ++++
2 files changed, 6 insertions(+)
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index a5a147da8da1c5..31eb0b4fd7b721 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -302,6 +302,8 @@ void NVPTXPassConfig::addAddressSpaceInferencePasses() {
// be eliminated by SROA.
addPass(createSROAPass());
addPass(createNVPTXLowerAllocaPass());
+ // TODO: Consider running InferAddressSpaces during opt, earlier in the
+ // compilation flow.
addPass(createInferAddressSpacesPass());
addPass(createNVPTXAtomicLowerPass());
}
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
index 46c909295ade71..8d482ffb27b143 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
@@ -442,6 +442,10 @@ static std::optional<bool> evaluateIsSpace(Intrinsic::ID IID, unsigned AS) {
// Returns an instruction pointer (may be nullptr if we do not know the answer).
// Returns nullopt if `II` is not one of the `isspacep` intrinsics.
+//
+// TODO: If InferAddressSpaces were run early enough in the pipeline this could
+// be removed in favor of the constant folding that occurs there through
+// rewriteIntrinsicWithAddressSpace
static std::optional<Instruction *>
handleSpaceCheckIntrinsics(InstCombiner &IC, IntrinsicInst &II) {
More information about the llvm-commits
mailing list