[llvm] [NVPTX] Add TTI support for folding isspacep in InferAS (PR #114486)
Alex MacLean via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 31 16:18:52 PDT 2024
https://github.com/AlexMaclean created https://github.com/llvm/llvm-project/pull/114486
This change enables constant folding of '`@llvm.nvvm.isspacep.*`' intrinsics if the address space can be propagated in InferAdressSpace.
>From 5b5dd06df1a35f8c45e7f0a7d54c64ecb1ce16f5 Mon Sep 17 00:00:00 2001
From: Alex Maclean <amaclean at nvidia.com>
Date: Thu, 31 Oct 2024 20:25:07 +0000
Subject: [PATCH] [NVPTX] Add TTI support for folding isspacep in InferAS
---
.../Target/NVPTX/NVPTXTargetTransformInfo.cpp | 83 +++++++---
.../Target/NVPTX/NVPTXTargetTransformInfo.h | 6 +
.../InferAddressSpaces/NVPTX/isspacep.ll | 144 ++++++++++++++++++
3 files changed, 209 insertions(+), 24 deletions(-)
create mode 100644 llvm/test/Transforms/InferAddressSpaces/NVPTX/isspacep.ll
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
index 3507573df1869f..5df211726882ec 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
@@ -416,33 +416,34 @@ static Instruction *convertNvvmIntrinsicToLlvm(InstCombiner &IC,
llvm_unreachable("All SpecialCase enumerators should be handled in switch.");
}
+ // Returns true/false when we know the answer, nullopt otherwise.
+static std::optional<bool> evaluateIsSpace(Intrinsic::ID IID, unsigned AS) {
+ if (AS == NVPTXAS::ADDRESS_SPACE_GENERIC ||
+ AS == NVPTXAS::ADDRESS_SPACE_PARAM)
+ return std::nullopt; // Got to check at run-time.
+ switch (IID) {
+ case Intrinsic::nvvm_isspacep_global:
+ return AS == NVPTXAS::ADDRESS_SPACE_GLOBAL;
+ case Intrinsic::nvvm_isspacep_local:
+ return AS == NVPTXAS::ADDRESS_SPACE_LOCAL;
+ case Intrinsic::nvvm_isspacep_shared:
+ return AS == NVPTXAS::ADDRESS_SPACE_SHARED;
+ case Intrinsic::nvvm_isspacep_shared_cluster:
+ // We can't tell shared from shared_cluster at compile time from AS alone,
+ // but it can't be either is AS is not shared.
+ return AS == NVPTXAS::ADDRESS_SPACE_SHARED ? std::nullopt
+ : std::optional{false};
+ case Intrinsic::nvvm_isspacep_const:
+ return AS == NVPTXAS::ADDRESS_SPACE_CONST;
+ default:
+ llvm_unreachable("Unexpected intrinsic");
+ }
+}
+
// Returns an instruction pointer (may be nullptr if we do not know the answer).
// Returns nullopt if `II` is not one of the `isspacep` intrinsics.
static std::optional<Instruction *>
handleSpaceCheckIntrinsics(InstCombiner &IC, IntrinsicInst &II) {
- // Returns true/false when we know the answer, nullopt otherwise.
- auto CheckASMatch = [](unsigned IID, unsigned AS) -> std::optional<bool> {
- if (AS == NVPTXAS::ADDRESS_SPACE_GENERIC ||
- AS == NVPTXAS::ADDRESS_SPACE_PARAM)
- return std::nullopt; // Got to check at run-time.
- switch (IID) {
- case Intrinsic::nvvm_isspacep_global:
- return AS == NVPTXAS::ADDRESS_SPACE_GLOBAL;
- case Intrinsic::nvvm_isspacep_local:
- return AS == NVPTXAS::ADDRESS_SPACE_LOCAL;
- case Intrinsic::nvvm_isspacep_shared:
- return AS == NVPTXAS::ADDRESS_SPACE_SHARED;
- case Intrinsic::nvvm_isspacep_shared_cluster:
- // We can't tell shared from shared_cluster at compile time from AS alone,
- // but it can't be either is AS is not shared.
- return AS == NVPTXAS::ADDRESS_SPACE_SHARED ? std::nullopt
- : std::optional{false};
- case Intrinsic::nvvm_isspacep_const:
- return AS == NVPTXAS::ADDRESS_SPACE_CONST;
- default:
- llvm_unreachable("Unexpected intrinsic");
- }
- };
switch (auto IID = II.getIntrinsicID()) {
case Intrinsic::nvvm_isspacep_global:
@@ -458,7 +459,7 @@ handleSpaceCheckIntrinsics(InstCombiner &IC, IntrinsicInst &II) {
if (auto *ASCO = dyn_cast<AddrSpaceCastOperator>(Op0))
AS = ASCO->getOperand(0)->getType()->getPointerAddressSpace();
- if (std::optional<bool> Answer = CheckASMatch(IID, AS))
+ if (std::optional<bool> Answer = evaluateIsSpace(IID, AS))
return IC.replaceInstUsesWith(II,
ConstantInt::get(II.getType(), *Answer));
return nullptr; // Don't know the answer, got to check at run time.
@@ -525,3 +526,37 @@ void NVPTXTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
TTI::PeelingPreferences &PP) {
BaseT::getPeelingPreferences(L, SE, PP);
}
+
+bool NVPTXTTIImpl::collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
+ Intrinsic::ID IID) const {
+ switch (IID) {
+ case Intrinsic::nvvm_isspacep_const:
+ case Intrinsic::nvvm_isspacep_global:
+ case Intrinsic::nvvm_isspacep_local:
+ case Intrinsic::nvvm_isspacep_shared:
+ case Intrinsic::nvvm_isspacep_shared_cluster: {
+ OpIndexes.push_back(0);
+ return true;
+ }
+ }
+ return false;
+}
+
+Value *NVPTXTTIImpl::rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
+ Value *OldV,
+ Value *NewV) const {
+ const Intrinsic::ID IID = II->getIntrinsicID();
+ switch (IID) {
+ case Intrinsic::nvvm_isspacep_const:
+ case Intrinsic::nvvm_isspacep_global:
+ case Intrinsic::nvvm_isspacep_local:
+ case Intrinsic::nvvm_isspacep_shared:
+ case Intrinsic::nvvm_isspacep_shared_cluster: {
+ const unsigned NewAS = NewV->getType()->getPointerAddressSpace();
+ if (const auto R = evaluateIsSpace(IID, NewAS))
+ return ConstantInt::get(II->getType(), *R);
+ return nullptr;
+ }
+ }
+ return nullptr;
+}
\ No newline at end of file
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
index 86140daa7be489..0f4fb280b2d996 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
@@ -123,6 +123,12 @@ class NVPTXTTIImpl : public BasicTTIImplBase<NVPTXTTIImpl> {
return true;
}
}
+
+ bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
+ Intrinsic::ID IID) const;
+
+ Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
+ Value *NewV) const;
};
} // end namespace llvm
diff --git a/llvm/test/Transforms/InferAddressSpaces/NVPTX/isspacep.ll b/llvm/test/Transforms/InferAddressSpaces/NVPTX/isspacep.ll
new file mode 100644
index 00000000000000..348fa688770df6
--- /dev/null
+++ b/llvm/test/Transforms/InferAddressSpaces/NVPTX/isspacep.ll
@@ -0,0 +1,144 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -mtriple=nvptx64-nvidia-cuda -passes=infer-address-spaces,bdce %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-p3:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
+target triple = "nvptx64-nvidia-cuda"
+
+declare i1 @llvm.nvvm.isspacep.const(ptr) readnone noinline
+declare i1 @llvm.nvvm.isspacep.global(ptr) readnone noinline
+declare i1 @llvm.nvvm.isspacep.local(ptr) readnone noinline
+declare i1 @llvm.nvvm.isspacep.shared(ptr) readnone noinline
+declare i1 @llvm.nvvm.isspacep.cluster.shared(ptr) readnone noinline
+
+define i1 @test_isspacep_const_true(ptr addrspace(4) %addr) {
+; CHECK-LABEL: define i1 @test_isspacep_const_true(
+; CHECK-SAME: ptr addrspace(4) [[ADDR:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: ret i1 true
+;
+entry:
+ %addr0 = addrspacecast ptr addrspace(4) %addr to ptr
+ %addr1 = getelementptr i8, ptr %addr0, i32 10
+ %val = call i1 @llvm.nvvm.isspacep.const(ptr %addr1)
+ ret i1 %val
+}
+
+define i1 @test_isspacep_const_false(ptr addrspace(1) %addr) {
+; CHECK-LABEL: define i1 @test_isspacep_const_false(
+; CHECK-SAME: ptr addrspace(1) [[ADDR:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: ret i1 false
+;
+entry:
+ %addr0 = addrspacecast ptr addrspace(1) %addr to ptr
+ %addr1 = getelementptr i8, ptr %addr0, i32 10
+ %val = call i1 @llvm.nvvm.isspacep.const(ptr %addr1)
+ ret i1 %val
+}
+
+define i1 @test_isspacep_global_true(ptr addrspace(1) %addr) {
+; CHECK-LABEL: define i1 @test_isspacep_global_true(
+; CHECK-SAME: ptr addrspace(1) [[ADDR:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: ret i1 true
+;
+entry:
+ %addr0 = addrspacecast ptr addrspace(1) %addr to ptr
+ %addr1 = getelementptr i8, ptr %addr0, i32 10
+ %val = call i1 @llvm.nvvm.isspacep.global(ptr %addr1)
+ ret i1 %val
+}
+
+define i1 @test_isspacep_global_false(ptr addrspace(4) %addr) {
+; CHECK-LABEL: define i1 @test_isspacep_global_false(
+; CHECK-SAME: ptr addrspace(4) [[ADDR:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: ret i1 false
+;
+entry:
+ %addr0 = addrspacecast ptr addrspace(4) %addr to ptr
+ %addr1 = getelementptr i8, ptr %addr0, i32 10
+ %val = call i1 @llvm.nvvm.isspacep.global(ptr %addr1)
+ ret i1 %val
+}
+
+define i1 @test_isspacep_local_true(ptr addrspace(5) %addr) {
+; CHECK-LABEL: define i1 @test_isspacep_local_true(
+; CHECK-SAME: ptr addrspace(5) [[ADDR:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: ret i1 true
+;
+entry:
+ %addr0 = addrspacecast ptr addrspace(5) %addr to ptr
+ %addr1 = getelementptr i8, ptr %addr0, i32 10
+ %val = call i1 @llvm.nvvm.isspacep.local(ptr %addr1)
+ ret i1 %val
+}
+
+define i1 @test_isspacep_local_false(ptr addrspace(1) %addr) {
+; CHECK-LABEL: define i1 @test_isspacep_local_false(
+; CHECK-SAME: ptr addrspace(1) [[ADDR:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: ret i1 false
+;
+entry:
+ %addr0 = addrspacecast ptr addrspace(1) %addr to ptr
+ %addr1 = getelementptr i8, ptr %addr0, i32 10
+ %val = call i1 @llvm.nvvm.isspacep.local(ptr %addr1)
+ ret i1 %val
+}
+
+define i1 @test_isspacep_shared_true(ptr addrspace(3) %addr) {
+; CHECK-LABEL: define i1 @test_isspacep_shared_true(
+; CHECK-SAME: ptr addrspace(3) [[ADDR:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: ret i1 true
+;
+entry:
+ %addr0 = addrspacecast ptr addrspace(3) %addr to ptr
+ %addr1 = getelementptr i8, ptr %addr0, i32 10
+ %val = call i1 @llvm.nvvm.isspacep.shared(ptr %addr1)
+ ret i1 %val
+}
+
+define i1 @test_isspacep_shared_false(ptr addrspace(1) %addr) {
+; CHECK-LABEL: define i1 @test_isspacep_shared_false(
+; CHECK-SAME: ptr addrspace(1) [[ADDR:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: ret i1 false
+;
+entry:
+ %addr0 = addrspacecast ptr addrspace(1) %addr to ptr
+ %addr1 = getelementptr i8, ptr %addr0, i32 10
+ %val = call i1 @llvm.nvvm.isspacep.shared(ptr %addr1)
+ ret i1 %val
+}
+
+define i1 @test_isspacep_cluster_shared_unsure(ptr addrspace(3) %addr) {
+; CHECK-LABEL: define i1 @test_isspacep_cluster_shared_unsure(
+; CHECK-SAME: ptr addrspace(3) [[ADDR:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[ADDR1:%.*]] = getelementptr i8, ptr addrspace(3) [[ADDR]], i32 10
+; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(3) [[ADDR1]] to ptr
+; CHECK-NEXT: [[VAL:%.*]] = call i1 @llvm.nvvm.isspacep.shared.cluster(ptr [[TMP0]])
+; CHECK-NEXT: ret i1 [[VAL]]
+;
+entry:
+ %addr0 = addrspacecast ptr addrspace(3) %addr to ptr
+ %addr1 = getelementptr i8, ptr %addr0, i32 10
+ %val = call i1 @llvm.nvvm.isspacep.shared.cluster(ptr %addr1)
+ ret i1 %val
+}
+
+define i1 @test_isspacep_cluster_shared_false(ptr addrspace(1) %addr) {
+; CHECK-LABEL: define i1 @test_isspacep_cluster_shared_false(
+; CHECK-SAME: ptr addrspace(1) [[ADDR:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: ret i1 false
+;
+entry:
+ %addr0 = addrspacecast ptr addrspace(1) %addr to ptr
+ %addr1 = getelementptr i8, ptr %addr0, i32 10
+ %val = call i1 @llvm.nvvm.isspacep.shared.cluster(ptr %addr1)
+ ret i1 %val
+}
More information about the llvm-commits
mailing list