[clang] [llvm] [llvm][opt][Transforms][SPIR-V] Enable `InferAddressSpaces` for SPIR-V (PR #110897)
via cfe-commits
cfe-commits at lists.llvm.org
Wed Oct 2 10:20:15 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang
Author: Alex Voicu (AlexVlx)
<details>
<summary>Changes</summary>
Albeit not currently enabled, the InferAddressSpaces pass is desirable / profitable for SPIR-V, as it can leverage info that might subsequently be lost as transforms are applied to the IR/resulting SPIR-V. This patch enables the pass for all SPIR-V targets, and is modelled after the AMDGPU implementation.
---
Patch is 93.17 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/110897.diff
20 Files Affected:
- (modified) clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu (+27-35)
- (modified) llvm/lib/Target/SPIRV/CMakeLists.txt (+2)
- (modified) llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp (+92)
- (modified) llvm/lib/Target/SPIRV/SPIRVTargetMachine.h (+7)
- (modified) llvm/lib/Target/SPIRV/SPIRVTargetTransformInfo.h (+4)
- (added) llvm/test/Transforms/InferAddressSpaces/SPIRV/assumed-addrspace.ll (+31)
- (added) llvm/test/Transforms/InferAddressSpaces/SPIRV/basic.ll (+236)
- (added) llvm/test/Transforms/InferAddressSpaces/SPIRV/infer-address-space.ll (+211)
- (added) llvm/test/Transforms/InferAddressSpaces/SPIRV/infer-addrspacecast.ll (+65)
- (added) llvm/test/Transforms/InferAddressSpaces/SPIRV/infer-getelementptr.ll (+108)
- (added) llvm/test/Transforms/InferAddressSpaces/SPIRV/insert-pos-assert.ll (+158)
- (added) llvm/test/Transforms/InferAddressSpaces/SPIRV/is.constant.ll (+57)
- (added) llvm/test/Transforms/InferAddressSpaces/SPIRV/lit.local.cfg (+2)
- (added) llvm/test/Transforms/InferAddressSpaces/SPIRV/mem-intrinsics.ll (+145)
- (added) llvm/test/Transforms/InferAddressSpaces/SPIRV/multiple-uses-of-val.ll (+70)
- (added) llvm/test/Transforms/InferAddressSpaces/SPIRV/prefetch.ll (+60)
- (added) llvm/test/Transforms/InferAddressSpaces/SPIRV/preserving-debugloc-addrspacecast.ll (+48)
- (added) llvm/test/Transforms/InferAddressSpaces/SPIRV/redundant-addrspacecast.ll (+28)
- (added) llvm/test/Transforms/InferAddressSpaces/SPIRV/self-phi.ll (+29)
- (added) llvm/test/Transforms/InferAddressSpaces/SPIRV/volatile.ll (+187)
``````````diff
diff --git a/clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu b/clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu
index b295bbbdaaf955..15c8b46d278ea1 100644
--- a/clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu
+++ b/clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu
@@ -58,13 +58,11 @@
// OPT-NEXT: ret void
//
// OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel1Pi(
-// OPT-SPIRV-SAME: ptr addrspace(1) noundef [[X_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0:[0-9]+]] {
+// OPT-SPIRV-SAME: ptr addrspace(1) nocapture noundef [[X_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0:[0-9]+]] {
// OPT-SPIRV-NEXT: [[ENTRY:.*:]]
-// OPT-SPIRV-NEXT: [[TMP0:%.*]] = ptrtoint ptr addrspace(1) [[X_COERCE]] to i64
-// OPT-SPIRV-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4)
-// OPT-SPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP1]], align 4
-// OPT-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1
-// OPT-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[TMP1]], align 4
+// OPT-SPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(1) [[X_COERCE]], align 4
+// OPT-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
+// OPT-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(1) [[X_COERCE]], align 4
// OPT-SPIRV-NEXT: ret void
//
// HOST-LABEL: define dso_local void @_Z22__device_stub__kernel1Pi(
@@ -126,13 +124,11 @@ __global__ void kernel1(int *x) {
// OPT-NEXT: ret void
//
// OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel2Ri(
-// OPT-SPIRV-SAME: ptr addrspace(1) noundef align 4 dereferenceable(4) [[X_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] {
+// OPT-SPIRV-SAME: ptr addrspace(1) nocapture noundef align 4 dereferenceable(4) [[X_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] {
// OPT-SPIRV-NEXT: [[ENTRY:.*:]]
-// OPT-SPIRV-NEXT: [[TMP0:%.*]] = ptrtoint ptr addrspace(1) [[X_COERCE]] to i64
-// OPT-SPIRV-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4)
-// OPT-SPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP1]], align 4
-// OPT-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1
-// OPT-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[TMP1]], align 4
+// OPT-SPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(1) [[X_COERCE]], align 4
+// OPT-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
+// OPT-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(1) [[X_COERCE]], align 4
// OPT-SPIRV-NEXT: ret void
//
// HOST-LABEL: define dso_local void @_Z22__device_stub__kernel2Ri(
@@ -195,7 +191,7 @@ __global__ void kernel2(int &x) {
// OPT-NEXT: ret void
//
// OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel3PU3AS2iPU3AS1i(
-// OPT-SPIRV-SAME: ptr addrspace(2) nocapture noundef readonly [[X:%.*]], ptr addrspace(1) nocapture noundef writeonly [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR1:[0-9]+]] {
+// OPT-SPIRV-SAME: ptr addrspace(2) nocapture noundef readonly [[X:%.*]], ptr addrspace(1) nocapture noundef writeonly [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] {
// OPT-SPIRV-NEXT: [[ENTRY:.*:]]
// OPT-SPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(2) [[X]], align 4
// OPT-SPIRV-NEXT: store i32 [[TMP0]], ptr addrspace(1) [[Y]], align 4
@@ -261,7 +257,7 @@ __global__ void kernel3(__attribute__((address_space(2))) int *x,
// OPT-NEXT: ret void
//
// OPT-SPIRV-LABEL: define spir_func void @_Z4funcPi(
-// OPT-SPIRV-SAME: ptr addrspace(4) nocapture noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR2:[0-9]+]] {
+// OPT-SPIRV-SAME: ptr addrspace(4) nocapture noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR1:[0-9]+]] {
// OPT-SPIRV-NEXT: [[ENTRY:.*:]]
// OPT-SPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) [[X]], align 4
// OPT-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
@@ -343,7 +339,7 @@ struct S {
// OPT-NEXT: ret void
//
// OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel41S(
-// OPT-SPIRV-SAME: [[STRUCT_S:%.*]] [[S_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] {
+// OPT-SPIRV-SAME: [[STRUCT_S:%.*]] [[S_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR2:[0-9]+]] {
// OPT-SPIRV-NEXT: [[ENTRY:.*:]]
// OPT-SPIRV-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_S]] [[S_COERCE]], 0
// OPT-SPIRV-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_S]] [[S_COERCE]], 1
@@ -446,19 +442,17 @@ __global__ void kernel4(struct S s) {
// OPT-NEXT: ret void
//
// OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel5P1S(
-// OPT-SPIRV-SAME: ptr addrspace(1) noundef [[S_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] {
+// OPT-SPIRV-SAME: ptr addrspace(1) nocapture noundef readonly [[S_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR2]] {
// OPT-SPIRV-NEXT: [[ENTRY:.*:]]
-// OPT-SPIRV-NEXT: [[TMP0:%.*]] = ptrtoint ptr addrspace(1) [[S_COERCE]] to i64
-// OPT-SPIRV-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4)
-// OPT-SPIRV-NEXT: [[TMP2:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[TMP1]], align 8
-// OPT-SPIRV-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[TMP2]], align 4
-// OPT-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1
-// OPT-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[TMP2]], align 4
-// OPT-SPIRV-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP1]], i64 8
-// OPT-SPIRV-NEXT: [[TMP4:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[Y]], align 8
-// OPT-SPIRV-NEXT: [[TMP5:%.*]] = load float, ptr addrspace(4) [[TMP4]], align 4
-// OPT-SPIRV-NEXT: [[ADD:%.*]] = fadd contract float [[TMP5]], 1.000000e+00
-// OPT-SPIRV-NEXT: store float [[ADD]], ptr addrspace(4) [[TMP4]], align 4
+// OPT-SPIRV-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(1) [[S_COERCE]], align 8
+// OPT-SPIRV-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[TMP0]], align 4
+// OPT-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1
+// OPT-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[TMP0]], align 4
+// OPT-SPIRV-NEXT: [[Y:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[S_COERCE]], i64 8
+// OPT-SPIRV-NEXT: [[TMP2:%.*]] = load ptr addrspace(4), ptr addrspace(1) [[Y]], align 8
+// OPT-SPIRV-NEXT: [[TMP3:%.*]] = load float, ptr addrspace(4) [[TMP2]], align 4
+// OPT-SPIRV-NEXT: [[ADD:%.*]] = fadd contract float [[TMP3]], 1.000000e+00
+// OPT-SPIRV-NEXT: store float [[ADD]], ptr addrspace(4) [[TMP2]], align 4
// OPT-SPIRV-NEXT: ret void
//
// HOST-LABEL: define dso_local void @_Z22__device_stub__kernel5P1S(
@@ -551,7 +545,7 @@ struct T {
// OPT-NEXT: ret void
//
// OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel61T(
-// OPT-SPIRV-SAME: [[STRUCT_T:%.*]] [[T_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] {
+// OPT-SPIRV-SAME: [[STRUCT_T:%.*]] [[T_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR2]] {
// OPT-SPIRV-NEXT: [[ENTRY:.*:]]
// OPT-SPIRV-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_T]] [[T_COERCE]], 0
// OPT-SPIRV-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x ptr addrspace(4)] [[TMP0]], 0
@@ -631,13 +625,11 @@ __global__ void kernel6(struct T t) {
// OPT-NEXT: ret void
//
// OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel7Pi(
-// OPT-SPIRV-SAME: ptr addrspace(1) noalias noundef [[X_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] {
+// OPT-SPIRV-SAME: ptr addrspace(1) noalias nocapture noundef [[X_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] {
// OPT-SPIRV-NEXT: [[ENTRY:.*:]]
-// OPT-SPIRV-NEXT: [[TMP0:%.*]] = ptrtoint ptr addrspace(1) [[X_COERCE]] to i64
-// OPT-SPIRV-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4)
-// OPT-SPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP1]], align 4
-// OPT-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1
-// OPT-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[TMP1]], align 4
+// OPT-SPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(1) [[X_COERCE]], align 4
+// OPT-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
+// OPT-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(1) [[X_COERCE]], align 4
// OPT-SPIRV-NEXT: ret void
//
// HOST-LABEL: define dso_local void @_Z22__device_stub__kernel7Pi(
@@ -700,7 +692,7 @@ struct SS {
// OPT-NEXT: ret void
//
// OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel82SS(
-// OPT-SPIRV-SAME: [[STRUCT_SS:%.*]] [[A_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] {
+// OPT-SPIRV-SAME: [[STRUCT_SS:%.*]] [[A_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR2]] {
// OPT-SPIRV-NEXT: [[ENTRY:.*:]]
// OPT-SPIRV-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_SS]] [[A_COERCE]], 0
// OPT-SPIRV-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(4) [[TMP0]], align 4
diff --git a/llvm/lib/Target/SPIRV/CMakeLists.txt b/llvm/lib/Target/SPIRV/CMakeLists.txt
index 326343ae278148..0ae292498e4636 100644
--- a/llvm/lib/Target/SPIRV/CMakeLists.txt
+++ b/llvm/lib/Target/SPIRV/CMakeLists.txt
@@ -52,6 +52,8 @@ add_llvm_target(SPIRVCodeGen
Core
Demangle
GlobalISel
+ Passes
+ Scalar
SPIRVAnalysis
MC
SPIRVDesc
diff --git a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp
index e5384b2eb2c2c1..91bcd68813fc55 100644
--- a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp
@@ -26,9 +26,15 @@
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/InitializePasses.h"
+#include "llvm/IR/IntrinsicsAMDGPU.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Pass.h"
+#include "llvm/Passes/OptimizationLevel.h"
+#include "llvm/Passes/PassBuilder.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Scalar/InferAddressSpaces.h"
#include "llvm/Transforms/Utils.h"
#include <optional>
@@ -91,6 +97,89 @@ SPIRVTargetMachine::SPIRVTargetMachine(const Target &T, const Triple &TT,
setRequiresStructuredCFG(false);
}
+namespace {
+ enum AddressSpace {
+ Function = storageClassToAddressSpace(SPIRV::StorageClass::Function),
+ CrossWorkgroup =
+ storageClassToAddressSpace(SPIRV::StorageClass::CrossWorkgroup),
+ UniformConstant =
+ storageClassToAddressSpace(SPIRV::StorageClass::UniformConstant),
+ Workgroup = storageClassToAddressSpace(SPIRV::StorageClass::Workgroup),
+ Generic = storageClassToAddressSpace(SPIRV::StorageClass::Generic)
+ };
+}
+
+unsigned SPIRVTargetMachine::getAssumedAddrSpace(const Value *V) const {
+ const auto *LD = dyn_cast<LoadInst>(V);
+ if (!LD)
+ return UINT32_MAX;
+
+ // It must be a load from a pointer to Generic.
+ assert(V->getType()->isPointerTy() &&
+ V->getType()->getPointerAddressSpace() == AddressSpace::Generic);
+
+ const auto *Ptr = LD->getPointerOperand();
+ if (Ptr->getType()->getPointerAddressSpace() != AddressSpace::UniformConstant)
+ return UINT32_MAX;
+ // For a loaded from a pointer to UniformConstant, we can infer CrossWorkgroup
+ // storage, as this could only have been legally initialised with a
+ // CrossWorkgroup (aka device) constant pointer.
+ return AddressSpace::CrossWorkgroup;
+}
+
+std::pair<const Value *, unsigned>
+SPIRVTargetMachine::getPredicatedAddrSpace(const Value *V) const {
+ using namespace PatternMatch;
+
+ if (auto *II = dyn_cast<IntrinsicInst>(V)) {
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::amdgcn_is_shared:
+ return std::pair(II->getArgOperand(0), AddressSpace::Workgroup);
+ case Intrinsic::amdgcn_is_private:
+ return std::pair(II->getArgOperand(0), AddressSpace::Function);
+ default:
+ break;
+ }
+ return std::pair(nullptr, UINT32_MAX);
+ }
+ // Check the global pointer predication based on
+ // (!is_share(p) && !is_private(p)). Note that logic 'and' is commutative and
+ // the order of 'is_shared' and 'is_private' is not significant.
+ Value *Ptr;
+ if (getTargetTriple().getVendor() == Triple::VendorType::AMD &&
+ match(
+ const_cast<Value *>(V),
+ m_c_And(m_Not(m_Intrinsic<Intrinsic::amdgcn_is_shared>(m_Value(Ptr))),
+ m_Not(m_Intrinsic<Intrinsic::amdgcn_is_private>(m_Deferred(Ptr))))))
+ return std::pair(Ptr, AddressSpace::CrossWorkgroup);
+
+ return std::pair(nullptr, UINT32_MAX);
+}
+
+bool SPIRVTargetMachine::isNoopAddrSpaceCast(unsigned SrcAS,
+ unsigned DestAS) const {
+ if (SrcAS != AddressSpace::Generic && SrcAS != AddressSpace::CrossWorkgroup)
+ return false;
+ return DestAS == AddressSpace::Generic ||
+ DestAS == AddressSpace::CrossWorkgroup;
+}
+
+void SPIRVTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
+ PB.registerCGSCCOptimizerLateEPCallback([](CGSCCPassManager &PM,
+ OptimizationLevel Level) {
+ if (Level == OptimizationLevel::O0)
+ return;
+
+ FunctionPassManager FPM;
+
+ // Add infer address spaces pass to the opt pipeline after inlining
+ // but before SROA to increase SROA opportunities.
+ FPM.addPass(InferAddressSpacesPass(AddressSpace::Generic));
+
+ PM.addPass(createCGSCCToFunctionPassAdaptor(std::move(FPM)));
+ });
+}
+
namespace {
// SPIR-V Code Generator Pass Configuration Options.
class SPIRVPassConfig : public TargetPassConfig {
@@ -178,6 +267,9 @@ void SPIRVPassConfig::addIRPasses() {
addPass(createSPIRVStructurizerPass());
}
+ if (TM.getOptLevel() > CodeGenOptLevel::None)
+ addPass(createInferAddressSpacesPass(AddressSpace::Generic));
+
addPass(createSPIRVRegularizerPass());
addPass(createSPIRVPrepareFunctionsPass(TM));
addPass(createSPIRVStripConvergenceIntrinsicsPass());
diff --git a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.h b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.h
index a1a9f26846153b..24b09febb9d184 100644
--- a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.h
+++ b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.h
@@ -43,6 +43,13 @@ class SPIRVTargetMachine : public LLVMTargetMachine {
TargetLoweringObjectFile *getObjFileLowering() const override {
return TLOF.get();
}
+
+ unsigned getAssumedAddrSpace(const Value *V) const override;
+ std::pair<const Value *, unsigned>
+ getPredicatedAddrSpace(const Value *V) const override;
+ bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DstAS) const override;
+
+ void registerPassBuilderCallbacks(PassBuilder &PB) override;
};
} // namespace llvm
diff --git a/llvm/lib/Target/SPIRV/SPIRVTargetTransformInfo.h b/llvm/lib/Target/SPIRV/SPIRVTargetTransformInfo.h
index 24047f31fab290..295c0ceeade839 100644
--- a/llvm/lib/Target/SPIRV/SPIRVTargetTransformInfo.h
+++ b/llvm/lib/Target/SPIRV/SPIRVTargetTransformInfo.h
@@ -39,6 +39,10 @@ class SPIRVTTIImpl : public BasicTTIImplBase<SPIRVTTIImpl> {
: BaseT(TM, F.getDataLayout()), ST(TM->getSubtargetImpl(F)),
TLI(ST->getTargetLowering()) {}
+ unsigned getFlatAddressSpace() const {
+ return storageClassToAddressSpace(SPIRV::StorageClass::Generic);
+ }
+
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) {
// SPIR-V natively supports OpBitcount, per 3.53.14 in the spec, as such it
// is reasonable to assume the Op is fast / preferable to the expanded loop.
diff --git a/llvm/test/Transforms/InferAddressSpaces/SPIRV/assumed-addrspace.ll b/llvm/test/Transforms/InferAddressSpaces/SPIRV/assumed-addrspace.ll
new file mode 100644
index 00000000000000..9b65ff44f288f2
--- /dev/null
+++ b/llvm/test/Transforms/InferAddressSpaces/SPIRV/assumed-addrspace.ll
@@ -0,0 +1,31 @@
+; RUN: opt -S -mtriple=spirv32-- -passes=infer-address-spaces -o - %s | FileCheck %s
+; RUN: opt -S -mtriple=spirv64-- -passes=infer-address-spaces -o - %s | FileCheck %s
+
+ at c0 = addrspace(2) global ptr undef
+
+; CHECK-LABEL: @generic_ptr_from_constant
+; CHECK: addrspacecast ptr addrspace(4) %p to ptr addrspace(1)
+; CHECK-NEXT: load float, ptr addrspace(1)
+define spir_func float @generic_ptr_from_constant() {
+ %p = load ptr addrspace(4), ptr addrspace(2) @c0
+ %v = load float, ptr addrspace(4) %p
+ ret float %v
+}
+
+%struct.S = type { ptr addrspace(4), ptr addrspace(4) }
+
+; CHECK-LABEL: @generic_ptr_from_aggregate_argument
+; CHECK: addrspacecast ptr addrspace(4) %p0 to ptr addrspace(1)
+; CHECK: addrspacecast ptr addrspace(4) %p1 to ptr addrspace(1)
+; CHECK: load i32, ptr addrspace(1)
+; CHECK: store float %v1, ptr addrspace(1)
+; CHECK: ret
+define spir_kernel void @generic_ptr_from_aggregate_argument(ptr addrspace(2) byval(%struct.S) align 8 %0) {
+ %p0 = load ptr addrspace(4), ptr addrspace(2) %0
+ %f1 = getelementptr inbounds %struct.S, ptr addrspace(2) %0, i64 0, i32 1
+ %p1 = load ptr addrspace(4), ptr addrspace(2) %f1
+ %v0 = load i32, ptr addrspace(4) %p0
+ %v1 = sitofp i32 %v0 to float
+ store float %v1, ptr addrspace(4) %p1
+ ret void
+}
diff --git a/llvm/test/Transforms/InferAddressSpaces/SPIRV/basic.ll b/llvm/test/Transforms/InferAddressSpaces/SPIRV/basic.ll
new file mode 100644
index 00000000000000..75b23aa30349af
--- /dev/null
+++ b/llvm/test/Transforms/InferAddressSpaces/SPIRV/basic.ll
@@ -0,0 +1,236 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -mtriple=spirv32-- -passes=infer-address-spaces %s | FileCheck %s
+; RUN: opt -S -mtriple=spirv64-- -passes=infer-address-spaces %s | FileCheck %s
+
+; Trivial optimization of generic addressing
+
+define float @load_global_from_flat(ptr addrspace(4) %generic_scalar) #0 {
+; CHECK-LABEL: define float @load_global_from_flat(
+; CHECK-SAME: ptr addrspace(4) [[GENERIC_SCALAR:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(4) [[GENERIC_SCALAR]] to ptr addrspace(1)
+; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[TMP0]], align 4
+; CHECK-NEXT: ret float [[TMP1]]
+;
+ %tmp0 = addrspacecast ptr addrspace(4) %generic_scalar to ptr addrspace(1)
+ %tmp1 = load float, ptr addrspace(1) %tmp0
+ ret float %tmp1
+}
+
+define float @load_group_from_flat(ptr addrspace(4) %generic_scalar) #0 {
+; CHECK-LABEL: define float @load_group_from_flat(
+; CHECK-SAME: ptr addrspace(4) [[GENERIC_SCALAR:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(4) [[GENERIC_SCALAR]] to ptr addrspace(3)
+; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(3) [[TMP0]], align 4
+; CHECK-NEXT: ret float [[TMP1]]
+;
+ %tmp0 = addrspacecast ptr addrspace(4) %generic_scalar to ptr addrspace(3)
+ %tmp1 = load float, ptr addrspace(3) %tmp0
+ ret float %tmp1
+}
+
+define float @load_private_from_flat(ptr addrspace(4) %generic_scalar) #0 {
+; CHECK-LABEL: define float @load_private_from_flat(
+; CHECK-SAME: ptr addrspace(4) [[GENERIC_SCALAR:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(4) [[GENERIC_SCALAR]] to ptr
+; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[TMP0]], align 4
+; CHECK-NEXT: ret float [[TMP1]]
+;
+ %tmp0 = addrspacecast ptr addrspace(4) %generic_scalar to ptr
+ %tmp1 = load float, ptr %tmp0
+ ret float %tmp1
+}
+
+define spir_kernel void @store_global_from_flat(ptr addrspace(4) %generic_scalar) #0 {
+; CHECK-LABEL: define spir_kernel void @store_global_from_flat(
+; CHECK-SAME: ptr addrspace(4) [[GENERIC_SCALAR:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(4) [[GENERIC_SCALAR]] to ptr addrspace(1)
+; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(1) [[TMP0]], align 4
+; CHECK-NEXT: ret void
+;
+ %tmp0 = addrspacecast ptr addrspace(4) %generic_scalar to ptr addrspace(1)
+ store float 0.0, ptr addrspace(1) %tmp0
+ ret void
+}
+
+define spir_kernel void @store_group_from_flat(ptr addrspace(4) %generic_scalar) #0 {
+; CHECK-LABEL: define spir_kernel void @store_group_from_flat(
+; CHECK-SAME: ptr addrspace(4) [[GENERIC_SCALAR:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(4) [[GENERIC_SCALAR]] to ptr addrspace(3)
+; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(3) [[TMP0]], align 4
+; CHECK-NEXT: ret void
+;
+ %tmp0 = addrspacecast ptr addrspace(4) %generic_scalar to ptr addrspace(3)
+ store float 0.0, ptr addrspace(3) %tmp0
+ ret void
+}
+
+define spir_kernel void @store_private_from_flat(ptr addrspace(4) %generic_scalar) #0 {
+; CHECK-LABEL: de...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/110897
More information about the cfe-commits
mailing list