[PATCH] D54947: [OpenCL][CodeGen] Fix replacing memcpy with addrspacecast

Dmitry Sidorov via Phabricator via cfe-commits cfe-commits at lists.llvm.org
Tue Nov 27 05:34:48 PST 2018


sidorovd created this revision.
sidorovd added reviewers: yaxunl, Anastasia.
Herald added a subscriber: cfe-commits.

If a function argument is byval and RV is located in default or alloca address space
an optimization of creating addrspacecast instead of memcpy is performed. That is
not correct for OpenCL, where that can lead to a situation of address space casting
from __private * to __global *. See an example below:

  typedef struct {
    int x;
  } MyStruct;
  
  void foo(MyStruct val) {}
  
  kernel void KernelOneMember(__global MyStruct* x) {
    foo (*x);
  }

for this code clang generated following IR:
...
%0 = load %struct.MyStruct addrspace(1)*, %struct.MyStruct addrspace(1)**
%x.addr, align 4
%1 = addrspacecast %struct.MyStruct addrspace(1)* %0 to %struct.MyStruct*
...

So the optimization was disallowed for OpenCL if RV is located in an address space
different than that of the argument (0).


Repository:
  rC Clang

https://reviews.llvm.org/D54947

Files:
  lib/CodeGen/CGCall.cpp
  test/CodeGenOpenCL/addr-space-struct-arg.cl


Index: test/CodeGenOpenCL/addr-space-struct-arg.cl
===================================================================
--- test/CodeGenOpenCL/addr-space-struct-arg.cl
+++ test/CodeGenOpenCL/addr-space-struct-arg.cl
@@ -1,6 +1,7 @@
 // RUN: %clang_cc1 %s -emit-llvm -o - -O0 -finclude-default-header -ffake-address-space-map -triple i686-pc-darwin | FileCheck -enable-var-scope -check-prefixes=COM,X86 %s
 // RUN: %clang_cc1 %s -emit-llvm -o - -O0 -finclude-default-header -triple amdgcn | FileCheck -enable-var-scope -check-prefixes=COM,AMDGCN %s
 // RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL2.0 -O0 -finclude-default-header -triple amdgcn | FileCheck -enable-var-scope -check-prefixes=COM,AMDGCN,AMDGCN20 %s
+// RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL1.2 -O0 -finclude-default-header -triple spir-unknown-unknown-unknown | FileCheck -enable-var-scope -check-prefixes=SPIR %s
 
 typedef struct {
   int cells[9];
@@ -130,6 +131,12 @@
   FuncOneMember(u);
 }
 
+// SPIR: call void @llvm.memcpy.p0i8.p1i8.i32
+// SPIR-NOT: addrspacecast
+kernel void KernelOneMemberSpir(global struct StructOneMember* u) {
+  FuncOneMember(*u);
+}
+
 // AMDGCN-LABEL: define amdgpu_kernel void @KernelLargeOneMember(
 // AMDGCN:  %[[U:.*]] = alloca %struct.LargeStructOneMember, align 8, addrspace(5)
 // AMDGCN:  store %struct.LargeStructOneMember %u.coerce, %struct.LargeStructOneMember addrspace(5)* %[[U]], align 8
Index: lib/CodeGen/CGCall.cpp
===================================================================
--- lib/CodeGen/CGCall.cpp
+++ lib/CodeGen/CGCall.cpp
@@ -3954,15 +3954,30 @@
         } else if (I->hasLValue()) {
           auto LV = I->getKnownLValue();
           auto AS = LV.getAddressSpace();
+
           if ((!ArgInfo.getIndirectByVal() &&
                (LV.getAlignment() >=
-                getContext().getTypeAlignInChars(I->Ty))) ||
-              (ArgInfo.getIndirectByVal() &&
-               ((AS != LangAS::Default && AS != LangAS::opencl_private &&
-                 AS != CGM.getASTAllocaAddressSpace())))) {
+                getContext().getTypeAlignInChars(I->Ty)))) {
+            NeedCopy = true;
+          }
+          if (!getLangOpts().OpenCL) {
+            if ((ArgInfo.getIndirectByVal() &&
+                (AS != LangAS::Default &&
+                 AS != CGM.getASTAllocaAddressSpace()))) {
+              NeedCopy = true;
+            }
+          }
+          // For OpenCL even if RV is located in default or alloca address space
+          // we don't want to perform address space cast for it, since that
+          // leads to casting __private * (default addr space in OpenCL) to
+          // __global * which is not valid. Create memcpy call instead.
+          else if ((ArgInfo.getIndirectByVal() &&
+                    Addr.getType()->getAddressSpace() != IRFuncTy->
+                      getParamType(FirstIRArg)->getPointerAddressSpace())) {
             NeedCopy = true;
           }
         }
+
         if (NeedCopy) {
           // Create an aligned temporary, and copy to it.
           Address AI = CreateMemTempWithoutCast(


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D54947.175458.patch
Type: text/x-patch
Size: 3093 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20181127/065fcaed/attachment.bin>


More information about the cfe-commits mailing list