[llvm] 35f2584 - [amdgpu] Error, instead of miscompile, anonymous kernels using lds

Jon Chesterfield via llvm-commits llvm-commits at lists.llvm.org
Wed Sep 28 08:40:03 PDT 2022


Author: Jon Chesterfield
Date: 2022-09-28T16:30:04+01:00
New Revision: 35f2584ef9897ec9e5455ad2660dc1be5171c38c

URL: https://github.com/llvm/llvm-project/commit/35f2584ef9897ec9e5455ad2660dc1be5171c38c
DIFF: https://github.com/llvm/llvm-project/commit/35f2584ef9897ec9e5455ad2660dc1be5171c38c.diff

LOG: [amdgpu] Error, instead of miscompile, anonymous kernels using lds

The association between kernel and struct is done by symbol name.
This doesn't work robustly for anonymous kernels as shown by the modified
test case.

An alternative association between function and struct can be constructed
if necessary, probably though metadata, but on the basis that we currently
miscompile anonymous kernels and that they are difficult to construct from
application code and difficult to call from the runtime, this patch makes
it a fatal error for now.

Reviewed By: arsenm

Differential Revision: https://reviews.llvm.org/D134741

Added: 
    llvm/test/CodeGen/AMDGPU/lds-reject-anonymous-kernels.ll

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
    llvm/test/CodeGen/AMDGPU/lower-kernel-and-module-lds.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
index f0e32f456ee3..de56ab7d305a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
@@ -293,6 +293,16 @@ class AMDGPULowerModuleLDS : public ModulePass {
           AMDGPU::findLDSVariablesToLower(M, &F);
 
       if (!KernelUsedVariables.empty()) {
+        // The association between kernel function and LDS struct is done by
+        // symbol name, which only works if the function in question has a name
+        // This is not expected to be a problem in practice as kernels are
+        // called by name making anonymous ones (which are named by the backend)
+        // 
diff icult to use. This does mean that llvm test cases need
+        // to name the kernels.
+        if (!F.hasName()) {
+          report_fatal_error("Anonymous kernels cannot use LDS variables");
+        }
+
         std::string VarName =
             (Twine("llvm.amdgcn.kernel.") + F.getName() + ".lds").str();
         GlobalVariable *SGV;

diff  --git a/llvm/test/CodeGen/AMDGPU/lds-reject-anonymous-kernels.ll b/llvm/test/CodeGen/AMDGPU/lds-reject-anonymous-kernels.ll
new file mode 100644
index 000000000000..2c45c08dbaaa
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/lds-reject-anonymous-kernels.ll
@@ -0,0 +1,12 @@
+; RUN: not --crash opt -S -mtriple=amdgcn-- -amdgpu-lower-module-lds < %s 2>&1 | FileCheck %s
+; RUN: not --crash opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds < %s 2>&1 | FileCheck %s
+
+ at var1 = addrspace(3) global i32 undef, align 8
+
+; CHECK: LLVM ERROR: Anonymous kernels cannot use LDS variables
+define amdgpu_kernel void @0() {
+  %val0 = load i32, i32 addrspace(3)* @var1
+  %val1 = add i32 %val0, 4
+  store i32 %val1, i32 addrspace(3)* @var1
+  ret void
+}

diff  --git a/llvm/test/CodeGen/AMDGPU/lower-kernel-and-module-lds.ll b/llvm/test/CodeGen/AMDGPU/lower-kernel-and-module-lds.ll
index 69e14724d3d8..efd5701b5ed6 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-kernel-and-module-lds.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-kernel-and-module-lds.ll
@@ -10,16 +10,16 @@
 ; CHECK: %llvm.amdgcn.module.lds.t = type { [8 x i8], [1 x i8] }
 ; CHECK: %llvm.amdgcn.kernel.k0.lds.t = type { [16 x i8], [4 x i8], [2 x i8] }
 ; CHECK: %llvm.amdgcn.kernel.k1.lds.t = type { [16 x i8], [4 x i8], [2 x i8] }
-; CHECK: %llvm.amdgcn.kernel..lds.t = type { [2 x i8] }
-; CHECK: %llvm.amdgcn.kernel..lds.t.0 = type { [4 x i8] }
+; CHECK: %llvm.amdgcn.kernel.k2.lds.t = type { [2 x i8] }
+; CHECK: %llvm.amdgcn.kernel.k3.lds.t = type { [4 x i8] }
 
 ;.
 ; CHECK: @llvm.amdgcn.module.lds = internal addrspace(3) global %llvm.amdgcn.module.lds.t undef, align 8
 ; CHECK: @llvm.compiler.used = appending global [1 x i8*] [i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds (%llvm.amdgcn.module.lds.t, %llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds, i32 0, i32 0, i32 0) to i8*)], section "llvm.metadata"
 ; CHECK: @llvm.amdgcn.kernel.k0.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k0.lds.t undef, align 16
 ; CHECK: @llvm.amdgcn.kernel.k1.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k1.lds.t undef, align 16
-; CHECK: @llvm.amdgcn.kernel..lds = internal addrspace(3) global %llvm.amdgcn.kernel..lds.t undef, align 2
-; CHECK: @llvm.amdgcn.kernel..lds.1 = internal addrspace(3) global %llvm.amdgcn.kernel..lds.t.0 undef, align 4
+; CHECK: @llvm.amdgcn.kernel.k2.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k2.lds.t undef, align 2
+; CHECK: @llvm.amdgcn.kernel.k3.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k3.lds.t undef, align 4
 ;.
 define amdgpu_kernel void @k0() #0 {
 ; CHECK-LABEL: @k0(
@@ -70,9 +70,9 @@ define amdgpu_kernel void @k1() #0 {
   ret void
 }
 
-define amdgpu_kernel void @0() #0 {
-; CHECK-LABEL: @0(
-; CHECK-NEXT:    %lds.size.2.align.2.bc = bitcast [2 x i8] addrspace(3)* getelementptr inbounds (%llvm.amdgcn.kernel..lds.t, %llvm.amdgcn.kernel..lds.t addrspace(3)* @llvm.amdgcn.kernel..lds, i32 0, i32 0) to i8 addrspace(3)*
+define amdgpu_kernel void @k2() #0 {
+; CHECK-LABEL: @k2(
+; CHECK-NEXT:    %lds.size.2.align.2.bc = bitcast [2 x i8] addrspace(3)* getelementptr inbounds (%llvm.amdgcn.kernel.k2.lds.t, %llvm.amdgcn.kernel.k2.lds.t addrspace(3)* @llvm.amdgcn.kernel.k2.lds, i32 0, i32 0) to i8 addrspace(3)*
 ; CHECK-NEXT:    store i8 2, i8 addrspace(3)* %lds.size.2.align.2.bc, align 2
 ; CHECK-NEXT:    ret void
 ;
@@ -82,9 +82,9 @@ define amdgpu_kernel void @0() #0 {
   ret void
 }
 
-define amdgpu_kernel void @1() #0 {
-; CHECK-LABEL: @1(
-; CHECK-NEXT:    %lds.size.4.align.4.bc = bitcast [4 x i8] addrspace(3)* getelementptr inbounds (%llvm.amdgcn.kernel..lds.t.0, %llvm.amdgcn.kernel..lds.t.0 addrspace(3)* @llvm.amdgcn.kernel..lds.1, i32 0, i32 0) to i8 addrspace(3)*
+define amdgpu_kernel void @k3() #0 {
+; CHECK-LABEL: @k3(
+; CHECK-NEXT:    %lds.size.4.align.4.bc = bitcast [4 x i8] addrspace(3)* getelementptr inbounds (%llvm.amdgcn.kernel.k3.lds.t, %llvm.amdgcn.kernel.k3.lds.t addrspace(3)* @llvm.amdgcn.kernel.k3.lds, i32 0, i32 0) to i8 addrspace(3)*
 ; CHECK-NEXT:    store i8 4, i8 addrspace(3)* %lds.size.4.align.4.bc, align 4
 ; CHECK-NEXT:    ret void
 ;


        


More information about the llvm-commits mailing list