[llvm] [AMDGPU][LTO] Introduce AMDGPUCloneModuleLDS (PR #89683)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Tue Apr 23 04:29:33 PDT 2024
================
@@ -0,0 +1,134 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -passes=amdgpu-clone-module-lds %s -S | FileCheck %s
+
+target triple = "amdgcn-amd-amdhsa"
+
+; Before transformation, After transformation,
+; K1 K2 K3 K1 K2 K3
+; | / | | / |
+; | / | | / |
+; A --------+ ==> A --------+
+; | |
+; | |
+; B B
+; | / | \
+; X X1 X2 X3
+; | \ | /
+; D \ | /
+; D
+; where X contains an LDS reference
+
+; CHECK: [[GV_CLONE_0:@.*]] = internal unnamed_addr addrspace(3) global [64 x i32] undef, align 16
+; CHECK: [[GV_CLONE_1:@.*]] = internal unnamed_addr addrspace(3) global [64 x i32] undef, align 16
+; CHECK: [[GV:@.*]] = internal unnamed_addr addrspace(3) global [64 x i32] undef, align 16
+ at lds_gv = internal unnamed_addr addrspace(3) global [64 x i32] undef, align 16
+
+define protected amdgpu_kernel void @kernel1(i32 %n) {
+; CHECK-LABEL: define protected amdgpu_kernel void @kernel1(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CALL:%.*]] = call i32 @A(i32 [[N]])
+; CHECK-NEXT: ret void
+;
+entry:
+ %call = call i32 @A(i32 %n)
+ ret void
+}
+
+define protected amdgpu_kernel void @kernel2(i32 %n) {
+; CHECK-LABEL: define protected amdgpu_kernel void @kernel2(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CALL:%.*]] = call i32 @A(i32 [[N]])
+; CHECK-NEXT: ret void
+;
+entry:
+ %call = call i32 @A(i32 %n)
+ ret void
+}
+
+define protected amdgpu_kernel void @kernel3(i32 %n) {
+; CHECK-LABEL: define protected amdgpu_kernel void @kernel3(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CALL:%.*]] = call i32 @A(i32 [[N]])
+; CHECK-NEXT: ret void
+;
+entry:
+ %call = call i32 @A(i32 %n)
+ ret void
+}
+
+define void @A() {
+; CHECK-LABEL: define void @A() {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: call void @B()
+; CHECK-NEXT: ret void
+;
+entry:
+ call void @B()
+ ret void
+}
+
+define i32 @B() {
+; CHECK-LABEL: define i32 @B() {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[P:%.*]] = alloca i32, align 4
+; CHECK-NEXT: store i32 5, ptr [[P]], align 4
+; CHECK-NEXT: [[RET_CLONE_0:%.*]] = call i32 @X.clone.0(ptr [[P]])
+; CHECK-NEXT: [[RET_CLONE_1:%.*]] = call i32 @X.clone.1(ptr [[P]])
+; CHECK-NEXT: [[RET:%.*]] = call i32 @X(ptr [[P]])
+; CHECK-NEXT: ret i32 [[RET]]
+;
+entry:
+ %p = alloca i32
+ store i32 5, ptr %p
+ %ret = call i32 @X(ptr %p)
+ ret i32 %ret
+}
+
+define i32 @X(ptr %x) {
+; CHECK-LABEL: define i32 @X(
+; CHECK-SAME: ptr [[X:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[P:%.*]] = getelementptr inbounds [64 x i32], ptr addrspacecast (ptr addrspace(3) [[GV]] to ptr), i64 0, i64 0
+; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[X]], align 4
+; CHECK-NEXT: call void @D(ptr [[P]])
+; CHECK-NEXT: store i32 [[V]], ptr [[P]], align 4
+; CHECK-NEXT: ret i32 [[V]]
+;
+entry:
+ %p = getelementptr inbounds [64 x i32], ptr addrspacecast (ptr addrspace(3) @lds_gv to ptr), i64 0, i64 0
+ %v = load i32, ptr %x
+ call void @D(ptr %p)
+ store i32 %v, ptr %p
+ ret i32 %v
+}
+
+define void @D(ptr %x) {
+; CHECK-LABEL: define void @D(ptr %x) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: store i32 8, ptr %x, align 4
+; CHECK-NEXT: ret void
+entry:
+ store i32 8, ptr %x
+ ret void
+}
+
+; CHECK-LABEL: define i32 @X.clone.0(ptr %x) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %0 = addrspacecast ptr addrspace(3) [[GV_CLONE_0]] to ptr
+; CHECK-NEXT: %p = getelementptr inbounds [64 x i32], ptr %0, i64 0, i64 0
+; CHECK-NEXT: %v = load i32, ptr %x, align 4
+; CHECK-NEXT: call void @D(ptr [[P]])
+; CHECK-NEXT: store i32 %v, ptr %p, align 4
+; CHECK-NEXT: ret i32 %v
+
+; CHECK-LABEL: define i32 @X.clone.1(ptr %x) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %0 = addrspacecast ptr addrspace(3) [[GV_CLONE_1]] to ptr
+; CHECK-NEXT: %p = getelementptr inbounds [64 x i32], ptr %0, i64 0, i64 0
+; CHECK-NEXT: %v = load i32, ptr %x, align 4
+; CHECK-NEXT: call void @D(ptr [[P]])
+; CHECK-NEXT: store i32 %v, ptr %p, align 4
+; CHECK-NEXT: ret i32 %v
----------------
arsenm wrote:
end of file line error
https://github.com/llvm/llvm-project/pull/89683
More information about the llvm-commits
mailing list