[llvm] f375885 - [InferAddrSpace] Teach to handle assumed address space.
Michael Liao via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 16 14:06:47 PST 2020
Author: Michael Liao
Date: 2020-11-16T17:06:33-05:00
New Revision: f375885ab86d1b3e82269725c8e9aa49f347b4a7
URL: https://github.com/llvm/llvm-project/commit/f375885ab86d1b3e82269725c8e9aa49f347b4a7
DIFF: https://github.com/llvm/llvm-project/commit/f375885ab86d1b3e82269725c8e9aa49f347b4a7.diff
LOG: [InferAddrSpace] Teach to handle assumed address space.
- In certain cases, a generic pointer could be assumed as a pointer to
the global memory space or other spaces. With a dedicated target hook
to query that address space from a given value, infer-address-space
pass could infer and propagate that to all its users.
Differential Revision: https://reviews.llvm.org/D91121
Added:
llvm/test/Transforms/InferAddressSpaces/AMDGPU/assumed-addrspace.ll
Modified:
clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu
llvm/docs/AMDGPUUsage.rst
llvm/include/llvm/Analysis/TargetTransformInfo.h
llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
llvm/include/llvm/CodeGen/BasicTTIImpl.h
llvm/include/llvm/Target/TargetMachine.h
llvm/lib/Analysis/TargetTransformInfo.cpp
llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll
Removed:
################################################################################
diff --git a/clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu b/clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu
index dc4659856026..da1f4b65f719 100644
--- a/clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu
+++ b/clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu
@@ -56,20 +56,24 @@ struct S {
int *x;
float *y;
};
-// `by-val` struct will be coerced into a similar struct with all generic
-// pointers lowerd into global ones.
+// `by-val` struct is passed by-indirect-alias (a mix of by-ref and indirect
+// by-val). However, the enhanced address inferring pass should be able to
+// assume they are global pointers.
+//
// HOST: define void @_Z22__device_stub__kernel41S(i32* %s.coerce0, float* %s.coerce1)
// COMMON-LABEL: define amdgpu_kernel void @_Z7kernel41S(%struct.S addrspace(4)*{{.*}} byref(%struct.S) align 8 %0)
// OPT: [[R0:%.*]] = getelementptr inbounds %struct.S, %struct.S addrspace(4)* %0, i64 0, i32 0
// OPT: [[P0:%.*]] = load i32*, i32* addrspace(4)* [[R0]], align 8
+// OPT: [[G0:%.*]] = addrspacecast i32* [[P0]] to i32 addrspace(1)*
// OPT: [[R1:%.*]] = getelementptr inbounds %struct.S, %struct.S addrspace(4)* %0, i64 0, i32 1
// OPT: [[P1:%.*]] = load float*, float* addrspace(4)* [[R1]], align 8
-// OPT: [[V0:%.*]] = load i32, i32* [[P0]], align 4
+// OPT: [[G1:%.*]] = addrspacecast float* [[P1]] to float addrspace(1)*
+// OPT: [[V0:%.*]] = load i32, i32 addrspace(1)* [[G0]], align 4
// OPT: [[INC:%.*]] = add nsw i32 [[V0]], 1
-// OPT: store i32 [[INC]], i32* [[P0]], align 4
-// OPT: [[V1:%.*]] = load float, float* [[P1]], align 4
+// OPT: store i32 [[INC]], i32 addrspace(1)* [[G0]], align 4
+// OPT: [[V1:%.*]] = load float, float addrspace(1)* [[G1]], align 4
// OPT: [[ADD:%.*]] = fadd contract float [[V1]], 1.000000e+00
-// OPT: store float [[ADD]], float* [[P1]], align 4
+// OPT: store float [[ADD]], float addrspace(1)* [[G1]], align 4
// OPT: ret void
__global__ void kernel4(struct S s) {
s.x[0]++;
@@ -87,19 +91,24 @@ __global__ void kernel5(struct S *s) {
struct T {
float *x[2];
};
-// `by-val` array is also coerced.
+// `by-val` array is passed by-indirect-alias (a mix of by-ref and indirect
+// by-val). However, the enhanced address inferring pass should be able to
+// assume they are global pointers.
+//
// HOST: define void @_Z22__device_stub__kernel61T(float* %t.coerce0, float* %t.coerce1)
// COMMON-LABEL: define amdgpu_kernel void @_Z7kernel61T(%struct.T addrspace(4)*{{.*}} byref(%struct.T) align 8 %0)
// OPT: [[R0:%.*]] = getelementptr inbounds %struct.T, %struct.T addrspace(4)* %0, i64 0, i32 0, i64 0
// OPT: [[P0:%.*]] = load float*, float* addrspace(4)* [[R0]], align 8
+// OPT: [[G0:%.*]] = addrspacecast float* [[P0]] to float addrspace(1)*
// OPT: [[R1:%.*]] = getelementptr inbounds %struct.T, %struct.T addrspace(4)* %0, i64 0, i32 0, i64 1
// OPT: [[P1:%.*]] = load float*, float* addrspace(4)* [[R1]], align 8
-// OPT: [[V0:%.*]] = load float, float* [[P0]], align 4
+// OPT: [[G1:%.*]] = addrspacecast float* [[P1]] to float addrspace(1)*
+// OPT: [[V0:%.*]] = load float, float addrspace(1)* [[G0]], align 4
// OPT: [[ADD0:%.*]] = fadd contract float [[V0]], 1.000000e+00
-// OPT: store float [[ADD0]], float* [[P0]], align 4
-// OPT: [[V1:%.*]] = load float, float* [[P1]], align 4
+// OPT: store float [[ADD0]], float addrspace(1)* [[G0]], align 4
+// OPT: [[V1:%.*]] = load float, float addrspace(1)* [[G1]], align 4
// OPT: [[ADD1:%.*]] = fadd contract float [[V1]], 2.000000e+00
-// OPT: store float [[ADD1]], float* [[P1]], align 4
+// OPT: store float [[ADD1]], float addrspace(1)* [[G1]], align 4
// OPT: ret void
__global__ void kernel6(struct T t) {
t.x[0][0] += 1.f;
diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst
index 5b9e1b86df64..750779342466 100644
--- a/llvm/docs/AMDGPUUsage.rst
+++ b/llvm/docs/AMDGPUUsage.rst
@@ -465,9 +465,12 @@ supported for the ``amdgcn`` target.
Using the constant address space indicates that the data will not change
during the execution of the kernel. This allows scalar read instructions to
- be used. The vector and scalar L1 caches are invalidated of volatile data
- before each kernel dispatch execution to allow constant memory to change
- values between kernel dispatches.
+ be used. As the constant address space could only be modified on the host
+ side, a generic pointer loaded from the constant address space is safe to be
+ assumed as a global pointer since only the device global memory is visible
+ and managed on the host side. The vector and scalar L1 caches are invalidated
+ of volatile data before each kernel dispatch execution to allow constant
+ memory to change values between kernel dispatches.
**Region**
The region address space uses the hardware Global Data Store (GDS). All
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 2b9dc2bf129d..9baea335d581 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -387,6 +387,8 @@ class TargetTransformInfo {
bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const;
+ unsigned getAssumedAddrSpace(const Value *V) const;
+
/// Rewrite intrinsic call \p II such that \p OldV will be replaced with \p
/// NewV, which has a
diff erent address space. This should happen for every
/// operand index that collectFlatAddressOperands returned for the intrinsic.
@@ -1384,6 +1386,7 @@ class TargetTransformInfo::Concept {
virtual bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
Intrinsic::ID IID) const = 0;
virtual bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const = 0;
+ virtual unsigned getAssumedAddrSpace(const Value *V) const = 0;
virtual Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
Value *OldV,
Value *NewV) const = 0;
@@ -1677,6 +1680,10 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
return Impl.isNoopAddrSpaceCast(FromAS, ToAS);
}
+ unsigned getAssumedAddrSpace(const Value *V) const override {
+ return Impl.getAssumedAddrSpace(V);
+ }
+
Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
Value *NewV) const override {
return Impl.rewriteIntrinsicWithAddressSpace(II, OldV, NewV);
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 86cab647c603..34e26db8d4f0 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -89,6 +89,8 @@ class TargetTransformInfoImplBase {
bool isNoopAddrSpaceCast(unsigned, unsigned) const { return false; }
+ unsigned getAssumedAddrSpace(const Value *V) const { return -1; }
+
Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
Value *NewV) const {
return nullptr;
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index cd9b85279c19..a2ee5d6835f5 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -224,6 +224,10 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
return getTLI()->getTargetMachine().isNoopAddrSpaceCast(FromAS, ToAS);
}
+ unsigned getAssumedAddrSpace(const Value *V) const {
+ return getTLI()->getTargetMachine().getAssumedAddrSpace(V);
+ }
+
Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
Value *NewV) const {
return nullptr;
diff --git a/llvm/include/llvm/Target/TargetMachine.h b/llvm/include/llvm/Target/TargetMachine.h
index ebf0d8b35b77..3b41b97e0c89 100644
--- a/llvm/include/llvm/Target/TargetMachine.h
+++ b/llvm/include/llvm/Target/TargetMachine.h
@@ -284,6 +284,14 @@ class TargetMachine {
return false;
}
+ /// If the specified generic pointer could be assumed as a pointer to a
+ /// specific address space, return that address space.
+ ///
+ /// Under offloading programming, the offloading target may be passed with
+ /// values only prepared on the host side and could assume certain
+ /// properties.
+ virtual unsigned getAssumedAddrSpace(const Value *V) const { return -1; }
+
/// Get a \c TargetIRAnalysis appropriate for the target.
///
/// This is used to construct the new pass manager's target IR analysis pass,
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index 6443aad1cffb..315e0060834a 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -297,6 +297,10 @@ bool TargetTransformInfo::isNoopAddrSpaceCast(unsigned FromAS,
return TTIImpl->isNoopAddrSpaceCast(FromAS, ToAS);
}
+unsigned TargetTransformInfo::getAssumedAddrSpace(const Value *V) const {
+ return TTIImpl->getAssumedAddrSpace(V);
+}
+
Value *TargetTransformInfo::rewriteIntrinsicWithAddressSpace(
IntrinsicInst *II, Value *OldV, Value *NewV) const {
return TTIImpl->rewriteIntrinsicWithAddressSpace(II, OldV, NewV);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 8a8a3796fa77..d97d6eaaa2aa 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -527,6 +527,25 @@ bool AMDGPUTargetMachine::isNoopAddrSpaceCast(unsigned SrcAS,
AMDGPU::isFlatGlobalAddrSpace(DestAS);
}
+unsigned AMDGPUTargetMachine::getAssumedAddrSpace(const Value *V) const {
+ const auto *LD = dyn_cast<LoadInst>(V);
+ if (!LD)
+ return AMDGPUAS::UNKNOWN_ADDRESS_SPACE;
+
+ // It must be a generic pointer loaded.
+ assert(V->getType()->isPointerTy() &&
+ V->getType()->getPointerAddressSpace() == AMDGPUAS::FLAT_ADDRESS);
+
+ const auto *Ptr = LD->getPointerOperand();
+ if (Ptr->getType()->getPointerAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS)
+ return AMDGPUAS::UNKNOWN_ADDRESS_SPACE;
+ // For a generic pointer loaded from the constant memory, it could be assumed
+ // as a global pointer since the constant memory is only populated on the
+ // host side. As implied by the offload programming model, only global
+ // pointers could be referenced on the host side.
+ return AMDGPUAS::GLOBAL_ADDRESS;
+}
+
TargetTransformInfo
R600TargetMachine::getTargetTransformInfo(const Function &F) {
return TargetTransformInfo(R600TTIImpl(this, F));
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
index aedcaf3fe414..56d3237832be 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
@@ -64,6 +64,8 @@ class AMDGPUTargetMachine : public LLVMTargetMachine {
}
bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override;
+
+ unsigned getAssumedAddrSpace(const Value *V) const override;
};
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
index 0ed6b593a91c..0753a0c259ab 100644
--- a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
+++ b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
@@ -286,7 +286,8 @@ static bool isAddressExpression(const Value &V, const DataLayout &DL,
case Instruction::IntToPtr:
return isNoopPtrIntCastPair(Op, DL, TTI);
default:
- return false;
+ // That value is an address expression if it has an assumed address space.
+ return TTI->getAssumedAddrSpace(&V) != UninitializedAddressSpace;
}
}
@@ -394,8 +395,8 @@ void InferAddressSpaces::appendsFlatAddressExpressionToPostorderStack(
return;
}
- if (isAddressExpression(*V, *DL, TTI) &&
- V->getType()->getPointerAddressSpace() == FlatAddrSpace) {
+ if (V->getType()->getPointerAddressSpace() == FlatAddrSpace &&
+ isAddressExpression(*V, *DL, TTI)) {
if (Visited.insert(V).second) {
PostorderStack.emplace_back(V, false);
@@ -478,9 +479,12 @@ InferAddressSpaces::collectFlatAddressExpressions(Function &F) const {
}
// Otherwise, adds its operands to the stack and explores them.
PostorderStack.back().setInt(true);
- for (Value *PtrOperand : getPointerOperands(*TopVal, *DL, TTI)) {
- appendsFlatAddressExpressionToPostorderStack(PtrOperand, PostorderStack,
- Visited);
+ // Skip values with an assumed address space.
+ if (TTI->getAssumedAddrSpace(TopVal) == UninitializedAddressSpace) {
+ for (Value *PtrOperand : getPointerOperands(*TopVal, *DL, TTI)) {
+ appendsFlatAddressExpressionToPostorderStack(PtrOperand, PostorderStack,
+ Visited);
+ }
}
}
return Postorder;
@@ -555,6 +559,16 @@ Value *InferAddressSpaces::cloneInstructionWithNewAddressSpace(
return nullptr;
}
+ unsigned AS = TTI->getAssumedAddrSpace(I);
+ if (AS != UninitializedAddressSpace) {
+ // For the assumed address space, insert an `addrspacecast` to make that
+ // explicit.
+ auto *NewPtrTy = I->getType()->getPointerElementType()->getPointerTo(AS);
+ auto *NewI = new AddrSpaceCastInst(I, NewPtrTy);
+ NewI->insertAfter(I);
+ return NewI;
+ }
+
// Computes the converted pointer operands.
SmallVector<Value *, 4> NewPointerOperands;
for (const Use &OperandUse : I->operands()) {
@@ -700,8 +714,8 @@ Value *InferAddressSpaces::cloneValueWithNewAddressSpace(
const ValueToValueMapTy &ValueWithNewAddrSpace,
SmallVectorImpl<const Use *> *UndefUsesToFix) const {
// All values in Postorder are flat address expressions.
- assert(isAddressExpression(*V, *DL, TTI) &&
- V->getType()->getPointerAddressSpace() == FlatAddrSpace);
+ assert(V->getType()->getPointerAddressSpace() == FlatAddrSpace &&
+ isAddressExpression(*V, *DL, TTI));
if (Instruction *I = dyn_cast<Instruction>(V)) {
Value *NewV = cloneInstructionWithNewAddressSpace(
@@ -848,15 +862,24 @@ Optional<unsigned> InferAddressSpaces::updateAddressSpace(
else
NewAS = joinAddressSpaces(Src0AS, Src1AS);
} else {
- for (Value *PtrOperand : getPointerOperands(V, *DL, TTI)) {
- auto I = InferredAddrSpace.find(PtrOperand);
- unsigned OperandAS = I != InferredAddrSpace.end() ?
- I->second : PtrOperand->getType()->getPointerAddressSpace();
-
- // join(flat, *) = flat. So we can break if NewAS is already flat.
- NewAS = joinAddressSpaces(NewAS, OperandAS);
- if (NewAS == FlatAddrSpace)
- break;
+ unsigned AS = TTI->getAssumedAddrSpace(&V);
+ if (AS != UninitializedAddressSpace) {
+ // Use the assumed address space directly.
+ NewAS = AS;
+ } else {
+ // Otherwise, infer the address space from its pointer operands.
+ for (Value *PtrOperand : getPointerOperands(V, *DL, TTI)) {
+ auto I = InferredAddrSpace.find(PtrOperand);
+ unsigned OperandAS =
+ I != InferredAddrSpace.end()
+ ? I->second
+ : PtrOperand->getType()->getPointerAddressSpace();
+
+ // join(flat, *) = flat. So we can break if NewAS is already flat.
+ NewAS = joinAddressSpaces(NewAS, OperandAS);
+ if (NewAS == FlatAddrSpace)
+ break;
+ }
}
}
@@ -1068,6 +1091,9 @@ bool InferAddressSpaces::rewriteWithNewAddressSpaces(
}
User *CurUser = U.getUser();
+ // Skip if the current user is the new value itself.
+ if (CurUser == NewV)
+ continue;
// Handle more complex cases like intrinsic that need to be remangled.
if (auto *MI = dyn_cast<MemIntrinsic>(CurUser)) {
if (!MI->isVolatile() && handleMemIntrinsicPtrUse(MI, V, NewV))
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll
index bf1f0ccbc2e2..4e531badb0d7 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll
@@ -138,35 +138,34 @@ define void @constrained_if_register_class() {
; CHECK-NEXT: s_cselect_b32 s4, 1, 0
; CHECK-NEXT: s_and_b32 s4, s4, 1
; CHECK-NEXT: s_cmp_lg_u32 s4, 0
-; CHECK-NEXT: s_cbranch_scc1 BB4_6
+; CHECK-NEXT: s_cbranch_scc1 BB4_4
; CHECK-NEXT: ; %bb.1: ; %bb2
; CHECK-NEXT: s_getpc_b64 s[6:7]
; CHECK-NEXT: s_add_u32 s6, s6, const.ptr at gotpcrel32@lo+4
; CHECK-NEXT: s_addc_u32 s7, s7, const.ptr at gotpcrel32@hi+12
; CHECK-NEXT: s_load_dwordx2 s[6:7], s[6:7], 0x0
+; CHECK-NEXT: v_mov_b32_e32 v0, 0
; CHECK-NEXT: s_mov_b32 s4, -1
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_load_dwordx2 s[6:7], s[6:7], 0x0
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
-; CHECK-NEXT: v_mov_b32_e32 v0, s6
-; CHECK-NEXT: v_mov_b32_e32 v1, s7
-; CHECK-NEXT: flat_load_dword v0, v[0:1]
-; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; CHECK-NEXT: v_cmp_ngt_f32_e32 vcc, 1.0, v0
-; CHECK-NEXT: s_and_saveexec_b64 s[6:7], vcc
+; CHECK-NEXT: global_load_dword v0, v0, s[6:7]
+; CHECK-NEXT: s_waitcnt vmcnt(0)
+; CHECK-NEXT: v_cmp_gt_f32_e32 vcc, 1.0, v0
+; CHECK-NEXT: s_cbranch_vccnz BB4_3
; CHECK-NEXT: ; %bb.2: ; %bb7
; CHECK-NEXT: s_mov_b32 s4, 0
-; CHECK-NEXT: ; %bb.3: ; %bb8
-; CHECK-NEXT: s_or_b64 exec, exec, s[6:7]
-; CHECK-NEXT: v_cmp_eq_u32_e64 s[6:7], s4, 0
-; CHECK-NEXT: s_and_saveexec_b64 s[4:5], s[6:7]
-; CHECK-NEXT: s_cbranch_execz BB4_5
-; CHECK-NEXT: ; %bb.4: ; %bb11
+; CHECK-NEXT: BB4_3: ; %bb8
+; CHECK-NEXT: s_cmp_lg_u32 s4, 0
+; CHECK-NEXT: s_cselect_b32 s4, 1, 0
+; CHECK-NEXT: s_and_b32 s4, s4, 1
+; CHECK-NEXT: s_cmp_lg_u32 s4, 0
+; CHECK-NEXT: s_cbranch_scc0 BB4_5
+; CHECK-NEXT: BB4_4: ; %bb12
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+; CHECK-NEXT: BB4_5: ; %bb11
; CHECK-NEXT: v_mov_b32_e32 v0, 4.0
; CHECK-NEXT: buffer_store_dword v0, v0, s[0:3], 0 offen
-; CHECK-NEXT: BB4_5: ; %Flow
-; CHECK-NEXT: s_or_b64 exec, exec, s[4:5]
-; CHECK-NEXT: BB4_6: ; %bb12
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
bb:
diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/assumed-addrspace.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/assumed-addrspace.ll
new file mode 100644
index 000000000000..8ce9ecf4281e
--- /dev/null
+++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/assumed-addrspace.ll
@@ -0,0 +1,31 @@
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -infer-address-spaces -o - %s | FileCheck %s
+
+ at c0 = addrspace(4) global float* undef
+
+; CHECK-LABEL: @generic_ptr_from_constant
+; CHECK: addrspacecast float* %p to float addrspace(1)*
+; CHECK-NEXT: load float, float addrspace(1)*
+define float @generic_ptr_from_constant() {
+ %p = load float*, float* addrspace(4)* @c0
+ %v = load float, float* %p
+ ret float %v
+}
+
+%struct.S = type { i32*, float* }
+
+; CHECK-LABEL: @generic_ptr_from_aggregate_argument
+; CHECK: addrspacecast i32* %p0 to i32 addrspace(1)*
+; CHECK: addrspacecast float* %p1 to float addrspace(1)*
+; CHECK: load i32, i32 addrspace(1)*
+; CHECK: store float %v1, float addrspace(1)*
+; CHECK: ret
+define amdgpu_kernel void @generic_ptr_from_aggregate_argument(%struct.S addrspace(4)* byref(%struct.S) align 8 %0) {
+ %f0 = getelementptr inbounds %struct.S, %struct.S addrspace(4)* %0, i64 0, i32 0
+ %p0 = load i32*, i32* addrspace(4)* %f0
+ %f1 = getelementptr inbounds %struct.S, %struct.S addrspace(4)* %0, i64 0, i32 1
+ %p1 = load float*, float* addrspace(4)* %f1
+ %v0 = load i32, i32* %p0
+ %v1 = sitofp i32 %v0 to float
+ store float %v1, float* %p1
+ ret void
+}
More information about the llvm-commits
mailing list