[PATCH] D92394: [amdgpu] Teach one more case for assumed global pointers.
Michael Liao via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 1 07:34:46 PST 2020
hliao created this revision.
hliao added reviewers: yaxunl, arsenm, msearles.
Herald added subscribers: llvm-commits, kerbowa, hiraditya, t-tye, tpr, dstuttard, nhaehnle, jvesely, kzhuravl.
Herald added a project: LLVM.
hliao requested review of this revision.
Herald added a subscriber: wdng.
- If a generic pointer is loaded from a `readonly` and `noalias` kernel pointer argument, it could be assumed as a global one. + `readonly` prevents the possible modifications from the device side, and + `noalias` ensures that pointer won't alias to any other objects in the whole kernel execution lifetime. + Taking them together, it's safe to assume that memory object could only be modified on the host side and contains global pointers only.
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D92394
Files:
llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
llvm/test/Transforms/InferAddressSpaces/AMDGPU/assumed-addrspace.ll
Index: llvm/test/Transforms/InferAddressSpaces/AMDGPU/assumed-addrspace.ll
===================================================================
--- llvm/test/Transforms/InferAddressSpaces/AMDGPU/assumed-addrspace.ll
+++ llvm/test/Transforms/InferAddressSpaces/AMDGPU/assumed-addrspace.ll
@@ -29,3 +29,33 @@
store float %v1, float* %p1
ret void
}
+
+%struct.arg = type { float* }
+
+; CHECK-LABEL: @generic_ptr_from_readonly_arg0
+; CHECK: addrspacecast i32* %p0 to i32 addrspace(1)*
+; CHECK: load i32, i32 addrspace(1)*
+; CHECK: store i32 %v0, i32 addrspace(1)*
+; CHECK: ret
+define amdgpu_kernel void @generic_ptr_from_readonly_arg0(%struct.arg addrspace(1)* noalias readonly %in, float addrspace(1)* nocapture %out) {
+ %f0 = bitcast %struct.arg addrspace(1)* %in to i32* addrspace(1)*
+ %p0 = load i32*, i32* addrspace(1)* %f0, align 8
+ %v0 = load i32, i32* %p0, align 4
+ %q0 = bitcast float addrspace(1)* %out to i32 addrspace(1)*
+ store i32 %v0, i32 addrspace(1)* %q0, align 4
+ ret void
+}
+
+; CHECK-LABEL: @generic_ptr_from_readonly_arg1
+; CHECK-NOT: addrspacecast i32* %p0 to i32 addrspace(1)*
+; CHECK-NOT: load i32, i32 addrspace(1)*
+; CHECK: store i32 %v0, i32 addrspace(1)*
+; CHECK: ret
+define amdgpu_kernel void @generic_ptr_from_readonly_arg1(%struct.arg addrspace(1)* readonly %in, float addrspace(1)* nocapture %out) {
+ %f0 = bitcast %struct.arg addrspace(1)* %in to i32* addrspace(1)*
+ %p0 = load i32*, i32* addrspace(1)* %f0, align 8
+ %v0 = load i32, i32* %p0, align 4
+ %q0 = bitcast float addrspace(1)* %out to i32 addrspace(1)*
+ store i32 %v0, i32 addrspace(1)* %q0, align 4
+ ret void
+}
Index: llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -29,6 +29,7 @@
#include "SIMachineFunctionInfo.h"
#include "SIMachineScheduler.h"
#include "TargetInfo/AMDGPUTargetInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
#include "llvm/CodeGen/GlobalISel/Legalizer.h"
@@ -537,13 +538,23 @@
V->getType()->getPointerAddressSpace() == AMDGPUAS::FLAT_ADDRESS);
const auto *Ptr = LD->getPointerOperand();
- if (Ptr->getType()->getPointerAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS)
- return AMDGPUAS::UNKNOWN_ADDRESS_SPACE;
- // For a generic pointer loaded from the constant memory, it could be assumed
- // as a global pointer since the constant memory is only populated on the
- // host side. As implied by the offload programming model, only global
- // pointers could be referenced on the host side.
- return AMDGPUAS::GLOBAL_ADDRESS;
+
+ // For a generic pointer loaded from the constant memory, it could be
+ // assumed as a global pointer since the constant memory is only populated
+ // on the host side. As implied by the offload programming model, only
+ // global pointers could be referenced on the host side.
+ if (Ptr->getType()->getPointerAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS)
+ return AMDGPUAS::GLOBAL_ADDRESS;
+
+ // For a generic pointer loaded from the readonly kernel function pointer
+ // arguments, it could be assumed as a global pointer since that memory is
+ // also only prepared on the host side.
+ const Argument *Arg = dyn_cast_or_null<Argument>(getUnderlyingObject(Ptr));
+ if (Arg && Arg->onlyReadsMemory() && Arg->hasNoAliasAttr() &&
+ Arg->getParent()->getCallingConv() == CallingConv::AMDGPU_KERNEL)
+ return AMDGPUAS::GLOBAL_ADDRESS;
+
+ return AMDGPUAS::UNKNOWN_ADDRESS_SPACE;
}
TargetTransformInfo
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D92394.308654.patch
Type: text/x-patch
Size: 3715 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20201201/dafb52f6/attachment.bin>
More information about the llvm-commits
mailing list