[clang] 18834dc - [OpenCL] Mark kernel arguments as ABI aligned

Nikita Popov via cfe-commits cfe-commits at lists.llvm.org
Tue Feb 8 07:17:41 PST 2022


Author: Nikita Popov
Date: 2022-02-08T16:12:51+01:00
New Revision: 18834dca2d787fb46532c0b688e396613e132020

URL: https://github.com/llvm/llvm-project/commit/18834dca2d787fb46532c0b688e396613e132020
DIFF: https://github.com/llvm/llvm-project/commit/18834dca2d787fb46532c0b688e396613e132020.diff

LOG: [OpenCL] Mark kernel arguments as ABI aligned

Following the discussion on D118229, this marks all pointer-typed
kernel arguments as having ABI alignment, per section 6.3.5 of
the OpenCL spec:

> For arguments to a __kernel function declared to be a pointer to
> a data type, the OpenCL compiler can assume that the pointee is
> always appropriately aligned as required by the data type.

Differential Revision: https://reviews.llvm.org/D118894

Added: 
    clang/test/CodeGenOpenCL/kernel-param-alignment.cl

Modified: 
    clang/lib/CodeGen/CGCall.cpp
    clang/test/CodeGenOpenCL/amdgpu-call-kernel.cl
    clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
    clang/test/CodeGenOpenCL/kernels-have-spir-cc-by-default.cl
    clang/test/CodeGenOpenCL/spir-calling-conv.cl

Removed: 
    


################################################################################
diff  --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index 7ee4cdbb6ef08..ab1aa6cc7649f 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -2485,6 +2485,20 @@ void CodeGenModule::ConstructAttributeList(StringRef Name,
       }
     }
 
+    // From OpenCL spec v3.0.10 section 6.3.5 Alignment of Types:
+    // > For arguments to a __kernel function declared to be a pointer to a
+    // > data type, the OpenCL compiler can assume that the pointee is always
+    // > appropriately aligned as required by the data type.
+    if (TargetDecl && TargetDecl->hasAttr<OpenCLKernelAttr>() &&
+        ParamType->isPointerType()) {
+      QualType PTy = ParamType->getPointeeType();
+      if (!PTy->isIncompleteType() && PTy->isConstantSizeType()) {
+        llvm::Align Alignment =
+            getNaturalPointeeTypeAlignment(ParamType).getAsAlign();
+        Attrs.addAlignmentAttr(Alignment);
+      }
+    }
+
     switch (FI.getExtParameterInfo(ArgNo).getABI()) {
     case ParameterABI::Ordinary:
       break;

diff  --git a/clang/test/CodeGenOpenCL/amdgpu-call-kernel.cl b/clang/test/CodeGenOpenCL/amdgpu-call-kernel.cl
index 757cc0bd577de..db7270d1c4bc2 100755
--- a/clang/test/CodeGenOpenCL/amdgpu-call-kernel.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-call-kernel.cl
@@ -1,6 +1,6 @@
 // REQUIRES: amdgpu-registered-target
 // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s
-// CHECK: define{{.*}} amdgpu_kernel void @test_call_kernel(i32 addrspace(1)* nocapture noundef writeonly %out)
+// CHECK: define{{.*}} amdgpu_kernel void @test_call_kernel(i32 addrspace(1)* nocapture noundef writeonly align 4 %out)
 // CHECK: store i32 4, i32 addrspace(1)* %out, align 4
 
 kernel void test_kernel(global int *out)

diff  --git a/clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl b/clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
index fdbc0aaa6e263..f90c48ef0572b 100644
--- a/clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
+++ b/clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
@@ -34,7 +34,7 @@ void callee(int id, __global int *out) {
   out[id] = id;
 }
 
-// COMMON-LABEL: define{{.*}} spir_kernel void @device_side_enqueue(i32 addrspace(1)* %{{.*}}, i32 addrspace(1)* %b, i32 %i)
+// COMMON-LABEL: define{{.*}} spir_kernel void @device_side_enqueue(i32 addrspace(1)* align 4 %{{.*}}, i32 addrspace(1)* align 4 %b, i32 %i)
 kernel void device_side_enqueue(global int *a, global int *b, int i) {
   // COMMON: %default_queue = alloca %opencl.queue_t*
   queue_t default_queue;

diff  --git a/clang/test/CodeGenOpenCL/kernel-param-alignment.cl b/clang/test/CodeGenOpenCL/kernel-param-alignment.cl
new file mode 100644
index 0000000000000..862f0b62ca981
--- /dev/null
+++ b/clang/test/CodeGenOpenCL/kernel-param-alignment.cl
@@ -0,0 +1,26 @@
+// RUN: %clang_cc1 %s -cl-std=CL1.2 -emit-llvm -triple x86_64-unknown-unknown -o - | FileCheck %s
+
+// Test that pointer arguments to kernels are assumed to be ABI aligned.
+
+struct __attribute__((packed, aligned(1))) packed {
+  int i32;
+};
+
+typedef __attribute__((ext_vector_type(4))) int int4;
+typedef __attribute__((ext_vector_type(2))) float float2;
+
+kernel void test(
+    global int *i32,
+    global long *i64,
+    global int4 *v4i32,
+    global float2 *v2f32,
+    global void *v,
+    global struct packed *p) {
+// CHECK-LABEL: spir_kernel void @test(
+// CHECK-SAME: i32* nocapture noundef align 4 %i32,
+// CHECK-SAME: i64* nocapture noundef align 8 %i64,
+// CHECK-SAME: <4 x i32>* nocapture noundef align 16 %v4i32,
+// CHECK-SAME: <2 x float>* nocapture noundef align 8 %v2f32,
+// CHECK-SAME: i8* nocapture noundef %v,
+// CHECK-SAME: %struct.packed* nocapture noundef align 1 %p)
+}

diff  --git a/clang/test/CodeGenOpenCL/kernels-have-spir-cc-by-default.cl b/clang/test/CodeGenOpenCL/kernels-have-spir-cc-by-default.cl
index 5797cd75f58ac..8c7592119cd60 100644
--- a/clang/test/CodeGenOpenCL/kernels-have-spir-cc-by-default.cl
+++ b/clang/test/CodeGenOpenCL/kernels-have-spir-cc-by-default.cl
@@ -28,7 +28,7 @@ kernel void test_single(int_single input, global int* output) {
 // CHECK: spir_kernel
 // AMDGCN: define{{.*}} amdgpu_kernel void @test_single
 // CHECK: struct.int_single* nocapture {{.*}} byval(%struct.int_single)
-// CHECK: i32* nocapture noundef writeonly %output
+// CHECK: i32* nocapture noundef writeonly align 4 %output
  output[0] = input.a;
 }
 
@@ -36,7 +36,7 @@ kernel void test_pair(int_pair input, global int* output) {
 // CHECK: spir_kernel
 // AMDGCN: define{{.*}} amdgpu_kernel void @test_pair
 // CHECK: struct.int_pair* nocapture {{.*}} byval(%struct.int_pair)
-// CHECK: i32* nocapture noundef writeonly %output
+// CHECK: i32* nocapture noundef writeonly align 4 %output
  output[0] = (int)input.a;
  output[1] = (int)input.b;
 }
@@ -45,7 +45,7 @@ kernel void test_kernel(test_struct input, global int* output) {
 // CHECK: spir_kernel
 // AMDGCN: define{{.*}} amdgpu_kernel void @test_kernel
 // CHECK: struct.test_struct* nocapture {{.*}} byval(%struct.test_struct)
-// CHECK: i32* nocapture noundef writeonly %output
+// CHECK: i32* nocapture noundef writeonly align 4 %output
  output[0] = input.elementA;
  output[1] = input.elementB;
  output[2] = (int)input.elementC;

diff  --git a/clang/test/CodeGenOpenCL/spir-calling-conv.cl b/clang/test/CodeGenOpenCL/spir-calling-conv.cl
index 9bd70f8cedb24..1d7645ad0fe13 100644
--- a/clang/test/CodeGenOpenCL/spir-calling-conv.cl
+++ b/clang/test/CodeGenOpenCL/spir-calling-conv.cl
@@ -5,14 +5,14 @@ int get_dummy_id(int D);
 kernel void bar(global int *A);
 
 kernel void foo(global int *A)
-// CHECK: define{{.*}} spir_kernel void @foo(i32 addrspace(1)* noundef %A)
+// CHECK: define{{.*}} spir_kernel void @foo(i32 addrspace(1)* noundef align 4 %A)
 {
   int id = get_dummy_id(0);
   // CHECK: %{{[a-z0-9_]+}} = tail call spir_func i32 @get_dummy_id(i32 noundef 0)
   A[id] = id;
   bar(A);
-  // CHECK: tail call spir_kernel void @bar(i32 addrspace(1)* noundef %A)
+  // CHECK: tail call spir_kernel void @bar(i32 addrspace(1)* noundef align 4 %A)
 }
 
 // CHECK: declare spir_func i32 @get_dummy_id(i32 noundef)
-// CHECK: declare spir_kernel void @bar(i32 addrspace(1)* noundef)
+// CHECK: declare spir_kernel void @bar(i32 addrspace(1)* noundef align 4)


        


More information about the cfe-commits mailing list