r306721 - CodeGen: Fix invalid bitcast for coerced function argument

Yaxun Liu via cfe-commits cfe-commits at lists.llvm.org
Thu Jun 29 11:47:45 PDT 2017


Author: yaxunl
Date: Thu Jun 29 11:47:45 2017
New Revision: 306721

URL: http://llvm.org/viewvc/llvm-project?rev=306721&view=rev
Log:
CodeGen: Fix invalid bitcast for coerced function argument

Clang assumes coerced function argument is in address space 0, which is not always true and results in invalid bitcasts.

This patch fixes failure in OpenCL conformance test api/get_kernel_arg_info with amdgcn---amdgizcl triple, where non-zero alloca address space is used.

Differential Revision: https://reviews.llvm.org/D34777

Modified:
    cfe/trunk/lib/CodeGen/CGCall.cpp
    cfe/trunk/test/CodeGenOpenCL/addr-space-struct-arg.cl

Modified: cfe/trunk/lib/CodeGen/CGCall.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGCall.cpp?rev=306721&r1=306720&r2=306721&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGCall.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGCall.cpp Thu Jun 29 11:47:45 2017
@@ -1297,7 +1297,7 @@ static void CreateCoercedStore(llvm::Val
 
   // If store is legal, just bitcast the src pointer.
   if (SrcSize <= DstSize) {
-    Dst = CGF.Builder.CreateBitCast(Dst, llvm::PointerType::getUnqual(SrcTy));
+    Dst = CGF.Builder.CreateElementBitCast(Dst, SrcTy);
     BuildAggStore(CGF, Src, Dst, DstIsVolatile);
   } else {
     // Otherwise do coercion through memory. This is stupid, but
@@ -2412,8 +2412,7 @@ void CodeGenFunction::EmitFunctionProlog
 
         Address AddrToStoreInto = Address::invalid();
         if (SrcSize <= DstSize) {
-          AddrToStoreInto =
-            Builder.CreateBitCast(Ptr, llvm::PointerType::getUnqual(STy));
+          AddrToStoreInto = Builder.CreateElementBitCast(Ptr, STy);
         } else {
           AddrToStoreInto =
             CreateTempAlloca(STy, Alloca.getAlignment(), "coerce");

Modified: cfe/trunk/test/CodeGenOpenCL/addr-space-struct-arg.cl
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/addr-space-struct-arg.cl?rev=306721&r1=306720&r2=306721&view=diff
==============================================================================
--- cfe/trunk/test/CodeGenOpenCL/addr-space-struct-arg.cl (original)
+++ cfe/trunk/test/CodeGenOpenCL/addr-space-struct-arg.cl Thu Jun 29 11:47:45 2017
@@ -1,4 +1,5 @@
-// RUN: %clang_cc1 %s -emit-llvm -o - -O0 -ffake-address-space-map -triple i686-pc-darwin | FileCheck %s
+// RUN: %clang_cc1 %s -emit-llvm -o - -O0 -finclude-default-header -ffake-address-space-map -triple i686-pc-darwin | FileCheck -check-prefixes=COM,X86 %s
+// RUN: %clang_cc1 %s -emit-llvm -o - -O0 -finclude-default-header -triple amdgcn-amdhsa-amd-amdgizcl | FileCheck -check-prefixes=COM,AMD %s
 
 typedef struct {
   int cells[9];
@@ -8,16 +9,57 @@ typedef struct {
   int cells[16];
 } Mat4X4;
 
+struct StructOneMember {
+  int2 x;
+};
+
+struct StructTwoMember {
+  int2 x;
+  int2 y;
+};
+
+// COM-LABEL: define void @foo
 Mat4X4 __attribute__((noinline)) foo(Mat3X3 in) {
   Mat4X4 out;
   return out;
 }
 
+// COM-LABEL: define {{.*}} void @ker
+// Expect two mem copies: one for the argument "in", and one for
+// the return value.
+// X86: call void @llvm.memcpy.p0i8.p1i8.i32(i8*
+// X86: call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)*
+// AMD: call void @llvm.memcpy.p5i8.p1i8.i64(i8 addrspace(5)*
+// AMD: call void @llvm.memcpy.p1i8.p5i8.i64(i8 addrspace(1)*
 kernel void ker(global Mat3X3 *in, global Mat4X4 *out) {
   out[0] = foo(in[1]);
 }
 
-// Expect two mem copies: one for the argument "in", and one for
-// the return value.
-// CHECK: call void @llvm.memcpy.p0i8.p1i8.i32(i8*
-// CHECK: call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)*
+// AMD-LABEL: define void @FuncOneMember(%struct.StructOneMember addrspace(5)* byval align 8 %u)
+void FuncOneMember(struct StructOneMember u) {
+  u.x = (int2)(0, 0);
+}
+
+// AMD-LABEL: define amdgpu_kernel void @KernelOneMember
+// AMD-SAME:  (<2 x i32> %[[u_coerce:.*]])
+// AMD:  %[[u:.*]] = alloca %struct.StructOneMember, align 8, addrspace(5)
+// AMD:  %[[coerce_dive:.*]] = getelementptr inbounds %struct.StructOneMember, %struct.StructOneMember addrspace(5)* %[[u]], i32 0, i32 0
+// AMD:  store <2 x i32> %[[u_coerce]], <2 x i32> addrspace(5)* %[[coerce_dive]]
+// AMD:  call void @FuncOneMember(%struct.StructOneMember addrspace(5)* byval align 8 %[[u]])
+kernel void KernelOneMember(struct StructOneMember u) {
+  FuncOneMember(u);
+}
+
+// AMD-LABEL: define void @FuncTwoMember(%struct.StructTwoMember addrspace(5)* byval align 8 %u)
+void FuncTwoMember(struct StructTwoMember u) {
+  u.x = (int2)(0, 0);
+}
+
+// AMD-LABEL: define amdgpu_kernel void @KernelTwoMember
+// AMD-SAME:  (%struct.StructTwoMember %[[u_coerce:.*]])
+// AMD:  %[[u:.*]] = alloca %struct.StructTwoMember, align 8, addrspace(5)
+// AMD:  store %struct.StructTwoMember %[[u_coerce]], %struct.StructTwoMember addrspace(5)* %[[u]]
+// AMD:  call void @FuncTwoMember(%struct.StructTwoMember addrspace(5)* byval align 8 %[[u]])
+kernel void KernelTwoMember(struct StructTwoMember u) {
+  FuncTwoMember(u);
+}




More information about the cfe-commits mailing list