[llvm] [AMDGPU] Correctly merge noalias scopes during lowering of LDS data. (PR #131664)
Sirish Pande via llvm-commits
llvm-commits at lists.llvm.org
Tue Apr 15 10:41:41 PDT 2025
srpande wrote:
> > I could possibly add more tests to show how LDS lowering pass is making aliasing more conservative.
>
> Yes, should have coverage showing the better / worse cases with merge
A little bit more context seems is needed here.
Consider the following test:
```
@a = internal unnamed_addr addrspace(3) global [64 x i32] undef, align 4
@b = internal unnamed_addr addrspace(3) global [64 x i32] undef, align 4
@c = internal unnamed_addr addrspace(3) global [64 x i32] undef, align 4
define amdgpu_kernel void @ds_load_stores_aainfo(ptr addrspace(1) %arg, i32 %i) {
bb:
%gep.a = getelementptr inbounds [64 x i32], ptr addrspace(3) @a, i32 0, i32 %i
%gep.b = getelementptr inbounds [64 x i32], ptr addrspace(3) @b, i32 0, i32 %i
%val.a = load i64, ptr addrspace(3) %gep.a, align 4, !tbaa !0, !noalias !5
%val.b = load i64, ptr addrspace(3) %gep.b, align 4, !tbaa !0, !noalias !5
store i64 1, ptr addrspace(3) @c, align 4, !tbaa !0, !noalias !2
%val = add i64 %val.a, %val.b
store i64 %val, ptr addrspace(1) %arg, align 4
ret void
}
!0 = !{!"omnipotent char", !1, i64 0}
!1 = !{!1}
!2 = !{!3}
!3 = distinct !{!3, !4}
!4 = distinct !{!4}
!5 = !{!3}
```
All LDS variables have scope of !3 and domain of !4. Clearly they do not alias each other. Following check would show LDS variables do not alias with each other.
`opt -S -aa-pipeline=basic-aa,scoped-noalias-aa -passes=aa-eval -evaluate-aa-metadata -print-all-alias-modref-info -disable-output`
After running LDS lowering pass, `opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds --amdgpu-lower-module-lds-strategy=module` following IR would be generated. You can see` %llvm.amdgcn.kernel.ds_load_stores_aainfo.lds.t` structure is created for three LDS variables.
```
%llvm.amdgcn.kernel.ds_load_stores_aainfo.lds.t = type { [64 x i32], [64 x i32], [64 x i32] }
@llvm.amdgcn.kernel.ds_load_stores_aainfo.lds = internal addrspace(3) global %llvm.amdgcn.kernel.ds_load_stores_aainfo.lds.t poison, align 16, !absolute_symbol !0
define amdgpu_kernel void @ds_load_stores_aainfo(ptr addrspace(1) %arg, i32 %i) #0 {
bb:
%gep.a = getelementptr inbounds [64 x i32], ptr addrspace(3) @llvm.amdgcn.kernel.ds_load_stores_aainfo.lds, i32 0, i32 %i
%gep.b = getelementptr inbounds [64 x i32], ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.ds_load_stores_aainfo.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.ds_load_stores_aainfo.lds, i32 0, i32 1), i32 0, i32 %i
%val.a = load i64, ptr addrspace(3) %gep.a, align 4, !tbaa !1, !alias.scope !4, !noalias !7
%val.b = load i64, ptr addrspace(3) %gep.b, align 4, !tbaa !1, !alias.scope !8, !noalias !7
store i64 1, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.ds_load_stores_aainfo.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.ds_load_stores_aainfo.lds, i32 0, i32 2), align 16, !tbaa !1, !alias.scope !10, !noalias !7
%val = add i64 %val.a, %val.b
store i64 %val, ptr addrspace(1) %arg, align 4
ret void
}
attributes #0 = { "amdgpu-lds-size"="768" }
!0 = !{i32 0, i32 1}
!1 = !{!2, !2, i64 0, i64 0}
!2 = !{!"omnipotent char", !3}
!3 = distinct !{!3}
!4 = !{!5}
!5 = distinct !{!5, !6}
!6 = distinct !{!6}
**!7 = !{}**
!8 = !{!9}
!9 = distinct !{!9, !6}
!10 = !{!11}
!11 = distinct !{!11, !6}
```
This IR now has a problem of being more conservative.
Load global LDS **a**, which was not alias with store into **c** as separate LDS entries, is now aliased.
`%val.a = load i64, ptr addrspace(3) %gep.a, align 4, !tbaa !1, !alias.scope !4, !noalias !7 ` is now mayalias with `store i64 1, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.ds_load_stores_aainfo.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.ds_load_stores_aainfo.lds, i32 0, i32 2), align 16, !tbaa !1, !alias.scope !10, !noalias !7`
Similarly, load from b is also now mayaliased.
` MayAlias: %val.b = load i64, ptr addrspace(3) %gep.b, align 4, !tbaa !1, !alias.scope !8, !noalias !7 <-> store i64 1, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.ds_load_stores_aainfo.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.ds_load_stores_aainfo.lds, i32 0, i32 2), align 16, !tbaa !1, !alias.scope !10, !noalias !7`
The reason is metadata **!7 = !{}** is empty set, whereas alias scope of **!4** and **!8** (from loads) both in domain **!6.**
Current LDS lowering patch could not disambiguate the domain. This patch looks at the domain all metadata and makes more precise aliasing information as shown below.
```
%llvm.amdgcn.kernel.ds_load_stores_aainfo.lds.t = type { [64 x i32], [64 x i32], [64 x i32] }
@llvm.amdgcn.kernel.ds_load_stores_aainfo.lds = internal addrspace(3) global %llvm.amdgcn.kernel.ds_load_stores_aainfo.lds.t poison, align 16, !absolute_symbol !0
define amdgpu_kernel void @ds_load_stores_aainfo(ptr addrspace(1) %arg, i32 %i) #0 {
bb:
%gep.a = getelementptr inbounds [64 x i32], ptr addrspace(3) @llvm.amdgcn.kernel.ds_load_stores_aainfo.lds, i32 0, i32 %i
%gep.b = getelementptr inbounds [64 x i32], ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.ds_load_stores_aainfo.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.ds_load_stores_aainfo.lds, i32 0, i32 1), i32 0, i32 %i
%val.a = load i64, ptr addrspace(3) %gep.a, align 4, !tbaa !1, !alias.scope !4, !noalias !7
%val.b = load i64, ptr addrspace(3) %gep.b, align 4, !tbaa !1, !alias.scope !12, !noalias !13
store i64 1, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.ds_load_stores_aainfo.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.ds_load_stores_aainfo.lds, i32 0, i32 2), align 16, !tbaa !1, !alias.scope !14, !noalias !15
%val = add i64 %val.a, %val.b
store i64 %val, ptr addrspace(1) %arg, align 4
tail call void @llvm.amdgcn.sched.group.barrier(i32 256, i32 1, i32 0)
tail call void @llvm.amdgcn.sched.group.barrier(i32 512, i32 1, i32 0)
tail call void @llvm.amdgcn.sched.group.barrier(i32 256, i32 1, i32 0)
ret void
}
; Function Attrs: convergent nocallback nofree nounwind willreturn
declare void @llvm.amdgcn.sched.group.barrier(i32 immarg, i32 immarg, i32 immarg) #1
attributes #0 = { "amdgpu-lds-size"="768" }
attributes #1 = { convergent nocallback nofree nounwind willreturn }
!0 = !{i32 0, i32 1}
!1 = !{!2, !2, i64 0, i64 0}
!2 = !{!"omnipotent char", !3}
!3 = distinct !{!3}
!4 = !{!5}
!5 = distinct !{!5, !6}
!6 = distinct !{!6}
!7 = !{!8, !10, !11}
!8 = distinct !{!8, !9}
!9 = distinct !{!9}
!10 = distinct !{!10, !6}
!11 = distinct !{!11, !6}
!12 = !{!10}
!13 = !{!8, !5, !11}
!14 = !{!11}
!15 = !{!8, !5, !10}
```
https://github.com/llvm/llvm-project/pull/131664
More information about the llvm-commits
mailing list