[PATCH] D92394: [amdgpu] Teach one more case for assumed global pointers.

Michael Liao via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Tue Dec 1 07:34:46 PST 2020


hliao created this revision.
hliao added reviewers: yaxunl, arsenm, msearles.
Herald added subscribers: llvm-commits, kerbowa, hiraditya, t-tye, tpr, dstuttard, nhaehnle, jvesely, kzhuravl.
Herald added a project: LLVM.
hliao requested review of this revision.
Herald added a subscriber: wdng.

- If a generic pointer is loaded from a `readonly` and `noalias` kernel pointer argument, it could be assumed as a global one. + `readonly` prevents the possible modifications from the device side, and + `noalias` ensures that pointer won't alias to any other objects in the whole kernel execution lifetime. + Taking them together, it's safe to assume that memory object could only be modified on the host side and contains global pointers only.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D92394

Files:
  llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
  llvm/test/Transforms/InferAddressSpaces/AMDGPU/assumed-addrspace.ll


Index: llvm/test/Transforms/InferAddressSpaces/AMDGPU/assumed-addrspace.ll
===================================================================
--- llvm/test/Transforms/InferAddressSpaces/AMDGPU/assumed-addrspace.ll
+++ llvm/test/Transforms/InferAddressSpaces/AMDGPU/assumed-addrspace.ll
@@ -29,3 +29,33 @@
   store float %v1, float* %p1
   ret void
 }
+
+%struct.arg = type { float* }
+
+; CHECK-LABEL: @generic_ptr_from_readonly_arg0
+; CHECK: addrspacecast i32* %p0 to i32 addrspace(1)*
+; CHECK: load i32, i32 addrspace(1)*
+; CHECK: store i32 %v0, i32 addrspace(1)*
+; CHECK: ret
+define amdgpu_kernel void @generic_ptr_from_readonly_arg0(%struct.arg addrspace(1)* noalias readonly %in, float addrspace(1)* nocapture %out) {
+  %f0 = bitcast %struct.arg addrspace(1)* %in to i32* addrspace(1)*
+  %p0 = load i32*, i32* addrspace(1)* %f0, align 8
+  %v0 = load i32, i32* %p0, align 4
+  %q0 = bitcast float addrspace(1)* %out to i32 addrspace(1)*
+  store i32 %v0, i32 addrspace(1)* %q0, align 4
+  ret void
+}
+
+; CHECK-LABEL: @generic_ptr_from_readonly_arg1
+; CHECK-NOT: addrspacecast i32* %p0 to i32 addrspace(1)*
+; CHECK-NOT: load i32, i32 addrspace(1)*
+; CHECK: store i32 %v0, i32 addrspace(1)*
+; CHECK: ret
+define amdgpu_kernel void @generic_ptr_from_readonly_arg1(%struct.arg addrspace(1)* readonly %in, float addrspace(1)* nocapture %out) {
+  %f0 = bitcast %struct.arg addrspace(1)* %in to i32* addrspace(1)*
+  %p0 = load i32*, i32* addrspace(1)* %f0, align 8
+  %v0 = load i32, i32* %p0, align 4
+  %q0 = bitcast float addrspace(1)* %out to i32 addrspace(1)*
+  store i32 %v0, i32 addrspace(1)* %q0, align 4
+  ret void
+}
Index: llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -29,6 +29,7 @@
 #include "SIMachineFunctionInfo.h"
 #include "SIMachineScheduler.h"
 #include "TargetInfo/AMDGPUTargetInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/CodeGen/GlobalISel/IRTranslator.h"
 #include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
 #include "llvm/CodeGen/GlobalISel/Legalizer.h"
@@ -537,13 +538,23 @@
          V->getType()->getPointerAddressSpace() == AMDGPUAS::FLAT_ADDRESS);
 
   const auto *Ptr = LD->getPointerOperand();
-  if (Ptr->getType()->getPointerAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS)
-    return AMDGPUAS::UNKNOWN_ADDRESS_SPACE;
-  // For a generic pointer loaded from the constant memory, it could be assumed
-  // as a global pointer since the constant memory is only populated on the
-  // host side. As implied by the offload programming model, only global
-  // pointers could be referenced on the host side.
-  return AMDGPUAS::GLOBAL_ADDRESS;
+
+  // For a generic pointer loaded from the constant memory, it could be
+  // assumed as a global pointer since the constant memory is only populated
+  // on the host side. As implied by the offload programming model, only
+  // global pointers could be referenced on the host side.
+  if (Ptr->getType()->getPointerAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS)
+    return AMDGPUAS::GLOBAL_ADDRESS;
+
+  // For a generic pointer loaded from the readonly kernel function pointer
+  // arguments, it could be assumed as a global pointer since that memory is
+  // also only prepared on the host side.
+  const Argument *Arg = dyn_cast_or_null<Argument>(getUnderlyingObject(Ptr));
+  if (Arg && Arg->onlyReadsMemory() && Arg->hasNoAliasAttr() &&
+      Arg->getParent()->getCallingConv() == CallingConv::AMDGPU_KERNEL)
+    return AMDGPUAS::GLOBAL_ADDRESS;
+
+  return AMDGPUAS::UNKNOWN_ADDRESS_SPACE;
 }
 
 TargetTransformInfo


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D92394.308654.patch
Type: text/x-patch
Size: 3715 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20201201/dafb52f6/attachment.bin>


More information about the llvm-commits mailing list