[llvm] [AMDGPU][LTO] Introduce AMDGPUCloneModuleLDS (PR #89683)

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Tue Apr 23 04:29:33 PDT 2024


================
@@ -0,0 +1,134 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -passes=amdgpu-clone-module-lds %s -S | FileCheck %s
+
+target triple = "amdgcn-amd-amdhsa"
+
+; Before transformation,                    After transformation,
+;  K1  K2    K3                              K1  K2    K3
+;  |  /      |                               |  /      |
+;  | /       |                               | /       |
+;  A --------+               ==>             A --------+
+;  |                                         |
+;  |                                         |
+;  B                                         B
+;  |                                       / | \
+;  X                                      X1 X2 X3
+;  |                                      \  |  /
+;  D                                       \ | /
+;                                            D
+; where X contains an LDS reference
+
+; CHECK: [[GV_CLONE_0:@.*]] = internal unnamed_addr addrspace(3) global [64 x i32] undef, align 16
+; CHECK: [[GV_CLONE_1:@.*]] = internal unnamed_addr addrspace(3) global [64 x i32] undef, align 16
+; CHECK: [[GV:@.*]] = internal unnamed_addr addrspace(3) global [64 x i32] undef, align 16
+ at lds_gv = internal unnamed_addr addrspace(3) global [64 x i32] undef, align 16
+
+define protected amdgpu_kernel void @kernel1(i32 %n) {
+; CHECK-LABEL: define protected amdgpu_kernel void @kernel1(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL:%.*]] = call i32 @A(i32 [[N]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %call = call i32 @A(i32 %n)
+  ret void
+}
+
+define protected amdgpu_kernel void @kernel2(i32 %n) {
+; CHECK-LABEL: define protected amdgpu_kernel void @kernel2(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL:%.*]] = call i32 @A(i32 [[N]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %call = call i32 @A(i32 %n)
+  ret void
+}
+
+define protected amdgpu_kernel void @kernel3(i32 %n) {
+; CHECK-LABEL: define protected amdgpu_kernel void @kernel3(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL:%.*]] = call i32 @A(i32 [[N]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %call = call i32 @A(i32 %n)
+  ret void
+}
+
+define void @A() {
+; CHECK-LABEL: define void @A() {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    call void @B()
+; CHECK-NEXT:    ret void
+;
+entry:
+  call void @B()
+  ret void
+}
+
+define i32 @B() {
+; CHECK-LABEL: define i32 @B() {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[P:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    store i32 5, ptr [[P]], align 4
+; CHECK-NEXT:    [[RET_CLONE_0:%.*]] = call i32 @X.clone.0(ptr [[P]])
+; CHECK-NEXT:    [[RET_CLONE_1:%.*]] = call i32 @X.clone.1(ptr [[P]])
+; CHECK-NEXT:    [[RET:%.*]] = call i32 @X(ptr [[P]])
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+entry:
+  %p = alloca i32
+  store i32 5, ptr %p
+  %ret = call i32 @X(ptr %p)
+  ret i32 %ret
+}
+
+define i32 @X(ptr %x) {
+; CHECK-LABEL: define i32 @X(
+; CHECK-SAME: ptr [[X:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[P:%.*]] = getelementptr inbounds [64 x i32], ptr addrspacecast (ptr addrspace(3) [[GV]] to ptr), i64 0, i64 0
+; CHECK-NEXT:    [[V:%.*]] = load i32, ptr [[X]], align 4
+; CHECK-NEXT:    call void @D(ptr [[P]])
+; CHECK-NEXT:    store i32 [[V]], ptr [[P]], align 4
+; CHECK-NEXT:    ret i32 [[V]]
+;
+entry:
+  %p = getelementptr inbounds [64 x i32], ptr addrspacecast (ptr addrspace(3) @lds_gv to ptr), i64 0, i64 0
+  %v = load i32, ptr %x
+  call void @D(ptr %p)
+  store i32 %v, ptr %p
+  ret i32 %v
+}
+
+define void @D(ptr %x) {
+; CHECK-LABEL: define void @D(ptr %x) {
+; CHECK-NEXT:   entry:
+; CHECK-NEXT:     store i32 8, ptr %x, align 4
+; CHECK-NEXT:     ret void
+entry:
+  store i32 8, ptr %x
+  ret void
+}
+
+; CHECK-LABEL: define i32 @X.clone.0(ptr %x) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   %0 = addrspacecast ptr addrspace(3) [[GV_CLONE_0]] to ptr
+; CHECK-NEXT:   %p = getelementptr inbounds [64 x i32], ptr %0, i64 0, i64 0
+; CHECK-NEXT:   %v = load i32, ptr %x, align 4
+; CHECK-NEXT:   call void @D(ptr [[P]])
+; CHECK-NEXT:   store i32 %v, ptr %p, align 4
+; CHECK-NEXT:   ret i32 %v
+
+; CHECK-LABEL: define i32 @X.clone.1(ptr %x) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   %0 = addrspacecast ptr addrspace(3) [[GV_CLONE_1]] to ptr
+; CHECK-NEXT:   %p = getelementptr inbounds [64 x i32], ptr %0, i64 0, i64 0
+; CHECK-NEXT:   %v = load i32, ptr %x, align 4
+; CHECK-NEXT:   call void @D(ptr [[P]])
+; CHECK-NEXT:   store i32 %v, ptr %p, align 4
+; CHECK-NEXT:   ret i32 %v
----------------
arsenm wrote:

end of file line error 

https://github.com/llvm/llvm-project/pull/89683


More information about the llvm-commits mailing list