[llvm] [AMDGPU][LTO] Introduce AMDGPUCloneModuleLDS (PR #89683)

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Tue May 7 08:06:00 PDT 2024


================
@@ -0,0 +1,100 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -passes=amdgpu-clone-module-lds %s -S | FileCheck %s
+
+target triple = "amdgcn-amd-amdhsa"
+
+%struct.RT = type { i8, [10 x [20 x i32]], i8 }
+%struct.GV = type { i32, double, %struct.RT }
+
+; CHECK: [[GV_CLONE_0:@.*]] = internal addrspace(3) global %struct.GV poison, align 8
+; CHECK: [[GV:@.*]] = internal addrspace(3) global %struct.GV zeroinitializer, align 8
+ at lds_gv = internal addrspace(3) global %struct.GV zeroinitializer, align 8
+
+define protected amdgpu_kernel void @kernel1(i32 %n) #3 {
+; CHECK-LABEL: define protected amdgpu_kernel void @kernel1(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL:%.*]] = call i32 @lds_func(i32 [[N]], i1 false)
+; CHECK-NEXT:    [[CALL_CLONE_0:%.*]] = call i32 @lds_func.clone.0(i32 [[N]], i1 false)
+; CHECK-NEXT:    ret void
+;
+entry:
+  %call = call i32 @lds_func(i32 %n, i1 0)
----------------
arsenm wrote:

```suggestion
  %call = call i32 @lds_func(i32 %n, i1 false)
```

https://github.com/llvm/llvm-project/pull/89683


More information about the llvm-commits mailing list