[llvm] [AMDGPU] Correctly merge noalias scopes during lowering of LDS data. (PR #131664)

Sirish Pande via llvm-commits llvm-commits at lists.llvm.org
Tue Apr 15 10:41:41 PDT 2025


srpande wrote:

> > I could possibly add more tests to show how LDS lowering pass is making aliasing more conservative.
> 
> Yes, should have coverage showing the better / worse cases with merge

A little bit more context seems is needed here. 

Consider the following test:

```
@a = internal unnamed_addr addrspace(3) global [64 x i32] undef, align 4
@b = internal unnamed_addr addrspace(3) global [64 x i32] undef, align 4
@c = internal unnamed_addr addrspace(3) global [64 x i32] undef, align 4

define amdgpu_kernel void @ds_load_stores_aainfo(ptr addrspace(1) %arg, i32 %i) {
bb:
  %gep.a = getelementptr inbounds [64 x i32], ptr addrspace(3) @a, i32 0, i32 %i
  %gep.b = getelementptr inbounds [64 x i32], ptr addrspace(3) @b, i32 0, i32 %i

  %val.a = load i64, ptr addrspace(3) %gep.a, align 4, !tbaa !0, !noalias !5
  %val.b = load i64, ptr addrspace(3) %gep.b, align 4, !tbaa !0, !noalias !5

  store i64 1, ptr addrspace(3) @c, align 4, !tbaa !0, !noalias !2

  %val = add i64 %val.a, %val.b
  store i64 %val, ptr addrspace(1) %arg, align 4
  ret void
}

  !0 = !{!"omnipotent char", !1, i64 0}
  !1 = !{!1}
  !2 = !{!3}
  !3 = distinct !{!3, !4}
  !4 = distinct !{!4}
  !5 = !{!3}
```
All LDS variables have scope of !3 and domain of !4. Clearly they do not alias each other. Following check would show LDS variables do not alias with each other.
`opt -S -aa-pipeline=basic-aa,scoped-noalias-aa -passes=aa-eval -evaluate-aa-metadata -print-all-alias-modref-info -disable-output`

After running LDS lowering pass, `opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds --amdgpu-lower-module-lds-strategy=module`  following IR would be generated. You can see` %llvm.amdgcn.kernel.ds_load_stores_aainfo.lds.t`  structure is created for three LDS variables.

```
%llvm.amdgcn.kernel.ds_load_stores_aainfo.lds.t = type { [64 x i32], [64 x i32], [64 x i32] }

@llvm.amdgcn.kernel.ds_load_stores_aainfo.lds = internal addrspace(3) global %llvm.amdgcn.kernel.ds_load_stores_aainfo.lds.t poison, align 16, !absolute_symbol !0

define amdgpu_kernel void @ds_load_stores_aainfo(ptr addrspace(1) %arg, i32 %i) #0 {
bb:
  %gep.a = getelementptr inbounds [64 x i32], ptr addrspace(3) @llvm.amdgcn.kernel.ds_load_stores_aainfo.lds, i32 0, i32 %i
  %gep.b = getelementptr inbounds [64 x i32], ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.ds_load_stores_aainfo.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.ds_load_stores_aainfo.lds, i32 0, i32 1), i32 0, i32 %i
  %val.a = load i64, ptr addrspace(3) %gep.a, align 4, !tbaa !1, !alias.scope !4, !noalias !7
  %val.b = load i64, ptr addrspace(3) %gep.b, align 4, !tbaa !1, !alias.scope !8, !noalias !7
  store i64 1, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.ds_load_stores_aainfo.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.ds_load_stores_aainfo.lds, i32 0, i32 2), align 16, !tbaa !1, !alias.scope !10, !noalias !7
  %val = add i64 %val.a, %val.b
  store i64 %val, ptr addrspace(1) %arg, align 4
  ret void
}

attributes #0 = { "amdgpu-lds-size"="768" }

!0 = !{i32 0, i32 1}
!1 = !{!2, !2, i64 0, i64 0}
!2 = !{!"omnipotent char", !3}
!3 = distinct !{!3}
!4 = !{!5}
!5 = distinct !{!5, !6}
!6 = distinct !{!6}
**!7 = !{}**
!8 = !{!9}
!9 = distinct !{!9, !6}
!10 = !{!11}
!11 = distinct !{!11, !6}
```
This IR now has a problem of being more conservative. 

 Load global LDS **a**, which was not alias with store into **c** as separate LDS entries, is now aliased.
 
`%val.a = load i64, ptr addrspace(3) %gep.a, align 4, !tbaa !1, !alias.scope !4, !noalias !7 ` is now mayalias with `store i64 1, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.ds_load_stores_aainfo.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.ds_load_stores_aainfo.lds, i32 0, i32 2), align 16, !tbaa !1, !alias.scope !10, !noalias !7`

Similarly, load from b is also now mayaliased.
`  MayAlias:   %val.b = load i64, ptr addrspace(3) %gep.b, align 4, !tbaa !1, !alias.scope !8, !noalias !7 <->   store i64 1, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.ds_load_stores_aainfo.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.ds_load_stores_aainfo.lds, i32 0, i32 2), align 16, !tbaa !1, !alias.scope !10, !noalias !7`

The reason is metadata **!7 = !{}**  is empty set, whereas alias scope of **!4** and **!8** (from loads) both in domain **!6.** 

Current LDS lowering patch could not disambiguate the domain. This patch looks at the domain all metadata and makes more precise aliasing information as shown below.
```

%llvm.amdgcn.kernel.ds_load_stores_aainfo.lds.t = type { [64 x i32], [64 x i32], [64 x i32] }

@llvm.amdgcn.kernel.ds_load_stores_aainfo.lds = internal addrspace(3) global %llvm.amdgcn.kernel.ds_load_stores_aainfo.lds.t poison, align 16, !absolute_symbol !0

define amdgpu_kernel void @ds_load_stores_aainfo(ptr addrspace(1) %arg, i32 %i) #0 {
bb:
  %gep.a = getelementptr inbounds [64 x i32], ptr addrspace(3) @llvm.amdgcn.kernel.ds_load_stores_aainfo.lds, i32 0, i32 %i
  %gep.b = getelementptr inbounds [64 x i32], ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.ds_load_stores_aainfo.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.ds_load_stores_aainfo.lds, i32 0, i32 1), i32 0, i32 %i
  %val.a = load i64, ptr addrspace(3) %gep.a, align 4, !tbaa !1, !alias.scope !4, !noalias !7
  %val.b = load i64, ptr addrspace(3) %gep.b, align 4, !tbaa !1, !alias.scope !12, !noalias !13
  store i64 1, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.ds_load_stores_aainfo.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.ds_load_stores_aainfo.lds, i32 0, i32 2), align 16, !tbaa !1, !alias.scope !14, !noalias !15
  %val = add i64 %val.a, %val.b
  store i64 %val, ptr addrspace(1) %arg, align 4
  tail call void @llvm.amdgcn.sched.group.barrier(i32 256, i32 1, i32 0)
  tail call void @llvm.amdgcn.sched.group.barrier(i32 512, i32 1, i32 0)
  tail call void @llvm.amdgcn.sched.group.barrier(i32 256, i32 1, i32 0)
  ret void
}

; Function Attrs: convergent nocallback nofree nounwind willreturn
declare void @llvm.amdgcn.sched.group.barrier(i32 immarg, i32 immarg, i32 immarg) #1

attributes #0 = { "amdgpu-lds-size"="768" }
attributes #1 = { convergent nocallback nofree nounwind willreturn }

!0 = !{i32 0, i32 1}
!1 = !{!2, !2, i64 0, i64 0}
!2 = !{!"omnipotent char", !3}
!3 = distinct !{!3}
!4 = !{!5}
!5 = distinct !{!5, !6}
!6 = distinct !{!6}
!7 = !{!8, !10, !11}
!8 = distinct !{!8, !9}
!9 = distinct !{!9}
!10 = distinct !{!10, !6}
!11 = distinct !{!11, !6}
!12 = !{!10}
!13 = !{!8, !5, !11}
!14 = !{!11}
!15 = !{!8, !5, !10}

```

https://github.com/llvm/llvm-project/pull/131664


More information about the llvm-commits mailing list