[llvm] [AMDGPU][Attributor] Infer `inreg` attribute in `AMDGPUAttributor` (PR #101609)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 24 06:12:02 PDT 2025
================
@@ -0,0 +1,257 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-attributor %s -o - | FileCheck %s
+
+ at g1 = protected addrspace(1) externally_initialized global i32 0, align 4
+ at g2 = protected addrspace(1) externally_initialized global i32 0, align 4
+ at g3 = protected addrspace(1) externally_initialized global i32 0, align 4
+ at g4 = protected addrspace(1) externally_initialized global i32 0, align 4
+
+;.
+; CHECK: @g1 = protected addrspace(1) externally_initialized global i32 0, align 4
+; CHECK: @g2 = protected addrspace(1) externally_initialized global i32 0, align 4
+; CHECK: @g3 = protected addrspace(1) externally_initialized global i32 0, align 4
+; CHECK: @g4 = protected addrspace(1) externally_initialized global i32 0, align 4
+;.
+define internal fastcc void @callee_infer(ptr addrspace(1) %x, i32 %y) {
+; CHECK-LABEL: define {{[^@]+}}@callee_infer
+; CHECK-SAME: (ptr addrspace(1) inreg [[X:%.*]], i32 inreg [[Y:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[X_VAL:%.*]] = load i32, ptr addrspace(1) [[X]], align 4
+; CHECK-NEXT: store i32 [[X_VAL]], ptr addrspace(1) @g3, align 4
+; CHECK-NEXT: store i32 [[Y]], ptr addrspace(1) @g4, align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %x.val = load i32, ptr addrspace(1) %x, align 4
+ store i32 %x.val, ptr addrspace(1) @g3, align 4
+ store i32 %y, ptr addrspace(1) @g4, align 4
+ ret void
+}
+
+define amdgpu_kernel void @kernel_infer(ptr addrspace(1) %p1, ptr addrspace(1) %p2, i32 %x) {
+; CHECK-LABEL: define {{[^@]+}}@kernel_infer
+; CHECK-SAME: (ptr addrspace(1) [[P1:%.*]], ptr addrspace(1) [[P2:%.*]], i32 [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[X]], 0
+; CHECK-NEXT: [[P:%.*]] = select i1 [[CMP]], ptr addrspace(1) [[P1]], ptr addrspace(1) [[P2]]
+; CHECK-NEXT: [[TMP0:%.*]] = call ptr addrspace(1) @llvm.amdgcn.readfirstlane.p1(ptr addrspace(1) @g1)
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[X]])
+; CHECK-NEXT: tail call fastcc void @callee_infer(ptr addrspace(1) [[TMP0]], i32 [[TMP1]])
+; CHECK-NEXT: [[TMP2:%.*]] = call ptr addrspace(1) @llvm.amdgcn.readfirstlane.p1(ptr addrspace(1) @g2)
+; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[X]])
+; CHECK-NEXT: tail call fastcc void @callee_infer(ptr addrspace(1) [[TMP2]], i32 [[TMP3]])
+; CHECK-NEXT: [[TMP4:%.*]] = call ptr addrspace(1) @llvm.amdgcn.readfirstlane.p1(ptr addrspace(1) @g1)
+; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 1)
+; CHECK-NEXT: tail call fastcc void @callee_infer(ptr addrspace(1) [[TMP4]], i32 [[TMP5]])
+; CHECK-NEXT: [[TMP6:%.*]] = call ptr addrspace(1) @llvm.amdgcn.readfirstlane.p1(ptr addrspace(1) @g2)
+; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 2)
+; CHECK-NEXT: tail call fastcc void @callee_infer(ptr addrspace(1) [[TMP6]], i32 [[TMP7]])
+; CHECK-NEXT: [[TMP8:%.*]] = call ptr addrspace(1) @llvm.amdgcn.readfirstlane.p1(ptr addrspace(1) [[P]])
+; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[X]])
+; CHECK-NEXT: tail call fastcc void @callee_infer(ptr addrspace(1) [[TMP8]], i32 [[TMP9]])
+; CHECK-NEXT: ret void
+;
+entry:
+ %cmp = icmp sgt i32 %x, 0
+ %p = select i1 %cmp, ptr addrspace(1) %p1, ptr addrspace(1) %p2
+ tail call fastcc void @callee_infer(ptr addrspace(1) @g1, i32 %x)
+ tail call fastcc void @callee_infer(ptr addrspace(1) @g2, i32 %x)
+ tail call fastcc void @callee_infer(ptr addrspace(1) @g1, i32 1)
+ tail call fastcc void @callee_infer(ptr addrspace(1) @g2, i32 2)
+ tail call fastcc void @callee_infer(ptr addrspace(1) %p, i32 %x)
+ ret void
+}
+
+define amdgpu_kernel void @kernel_infer_indirect(ptr addrspace(1) %p1, ptr addrspace(1) %p2, i32 %x) {
+; CHECK-LABEL: define {{[^@]+}}@kernel_infer_indirect
+; CHECK-SAME: (ptr addrspace(1) [[P1:%.*]], ptr addrspace(1) [[P2:%.*]], i32 [[X:%.*]]) #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[FN:%.*]] = alloca ptr, align 8, addrspace(5)
+; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[X]], 0
+; CHECK-NEXT: [[P:%.*]] = select i1 [[CMP]], ptr addrspace(1) [[P1]], ptr addrspace(1) [[P2]]
+; CHECK-NEXT: store ptr @kernel_infer, ptr addrspace(5) [[FN]], align 8
+; CHECK-NEXT: [[FN_CAST:%.*]] = addrspacecast ptr addrspace(5) [[FN]] to ptr
+; CHECK-NEXT: tail call fastcc void [[FN_CAST]](ptr addrspace(1) @g1, i32 [[X]])
+; CHECK-NEXT: tail call fastcc void [[FN_CAST]](ptr addrspace(1) @g2, i32 [[X]])
+; CHECK-NEXT: tail call fastcc void [[FN_CAST]](ptr addrspace(1) @g1, i32 1)
+; CHECK-NEXT: tail call fastcc void [[FN_CAST]](ptr addrspace(1) @g2, i32 2)
+; CHECK-NEXT: tail call fastcc void [[FN_CAST]](ptr addrspace(1) [[P]], i32 [[X]])
+; CHECK-NEXT: ret void
+;
+entry:
+ %fn = alloca ptr, addrspace(5)
+ %cmp = icmp sgt i32 %x, 0
+ %p = select i1 %cmp, ptr addrspace(1) %p1, ptr addrspace(1) %p2
+ store ptr @kernel_infer, ptr addrspace(5) %fn
+ %fn.cast = addrspacecast ptr addrspace(5) %fn to ptr
+ tail call fastcc void %fn.cast(ptr addrspace(1) @g1, i32 %x)
+ tail call fastcc void %fn.cast(ptr addrspace(1) @g2, i32 %x)
+ tail call fastcc void %fn.cast(ptr addrspace(1) @g1, i32 1)
+ tail call fastcc void %fn.cast(ptr addrspace(1) @g2, i32 2)
+ tail call fastcc void %fn.cast(ptr addrspace(1) %p, i32 %x)
+ ret void
+}
+
+define internal fastcc void @callee_not_infer(ptr addrspace(1) %x, i32 %y) {
+; CHECK-LABEL: define {{[^@]+}}@callee_not_infer
+; CHECK-SAME: (ptr addrspace(1) [[X:%.*]], i32 [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[X_VAL:%.*]] = load i32, ptr addrspace(1) [[X]], align 4
+; CHECK-NEXT: store i32 [[X_VAL]], ptr addrspace(1) @g3, align 4
+; CHECK-NEXT: store i32 [[Y]], ptr addrspace(1) @g4, align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %x.val = load i32, ptr addrspace(1) %x, align 4
+ store i32 %x.val, ptr addrspace(1) @g3, align 4
+ store i32 %y, ptr addrspace(1) @g4, align 4
+ ret void
+}
+
+define amdgpu_kernel void @kernel_not_infer(ptr addrspace(1) %q, ptr addrspace(1) %p1, ptr addrspace(1) %p2) {
+; CHECK-LABEL: define {{[^@]+}}@kernel_not_infer
+; CHECK-SAME: (ptr addrspace(1) [[Q:%.*]], ptr addrspace(1) [[P1:%.*]], ptr addrspace(1) [[P2:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[ID_X:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr addrspace(1) [[Q]], i32 [[ID_X]]
+; CHECK-NEXT: [[D:%.*]] = load i32, ptr addrspace(1) [[GEP]], align 4
+; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[D]], 0
+; CHECK-NEXT: [[P:%.*]] = select i1 [[CMP]], ptr addrspace(1) [[P1]], ptr addrspace(1) [[P2]]
+; CHECK-NEXT: tail call fastcc void @callee_not_infer(ptr addrspace(1) [[Q]], i32 [[ID_X]])
+; CHECK-NEXT: tail call fastcc void @callee_not_infer(ptr addrspace(1) [[P]], i32 [[ID_X]])
+; CHECK-NEXT: ret void
+;
+entry:
+ %id.x = call i32 @llvm.amdgcn.workitem.id.x()
+ %gep = getelementptr i32, ptr addrspace(1) %q, i32 %id.x
+ %d = load i32, ptr addrspace(1) %gep
+ %cmp = icmp sgt i32 %d, 0
+ %p = select i1 %cmp, ptr addrspace(1) %p1, ptr addrspace(1) %p2
+ tail call fastcc void @callee_not_infer(ptr addrspace(1) %q, i32 %id.x)
+ tail call fastcc void @callee_not_infer(ptr addrspace(1) %p, i32 %id.x)
+ ret void
+}
+
+define amdgpu_kernel void @kernel_not_infer_indirect(ptr addrspace(1) %q, ptr addrspace(1) %p1, ptr addrspace(1) %p2) {
+; CHECK-LABEL: define {{[^@]+}}@kernel_not_infer_indirect
+; CHECK-SAME: (ptr addrspace(1) [[Q:%.*]], ptr addrspace(1) [[P1:%.*]], ptr addrspace(1) [[P2:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[FN:%.*]] = alloca ptr, align 8, addrspace(5)
+; CHECK-NEXT: [[ID_X:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr addrspace(1) [[Q]], i32 [[ID_X]]
+; CHECK-NEXT: [[D:%.*]] = load i32, ptr addrspace(1) [[GEP]], align 4
+; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[D]], 0
+; CHECK-NEXT: [[P:%.*]] = select i1 [[CMP]], ptr addrspace(1) [[P1]], ptr addrspace(1) [[P2]]
+; CHECK-NEXT: store ptr @kernel_not_infer, ptr addrspace(5) [[FN]], align 8
+; CHECK-NEXT: [[FN_CAST:%.*]] = addrspacecast ptr addrspace(5) [[FN]] to ptr
+; CHECK-NEXT: tail call fastcc void [[FN_CAST]](ptr addrspace(1) [[Q]], i32 [[ID_X]])
+; CHECK-NEXT: tail call fastcc void [[FN_CAST]](ptr addrspace(1) [[P]], i32 [[ID_X]])
+; CHECK-NEXT: ret void
+;
+entry:
+ %fn = alloca ptr, addrspace(5)
+ %id.x = call i32 @llvm.amdgcn.workitem.id.x()
+ %gep = getelementptr i32, ptr addrspace(1) %q, i32 %id.x
+ %d = load i32, ptr addrspace(1) %gep
+ %cmp = icmp sgt i32 %d, 0
+ %p = select i1 %cmp, ptr addrspace(1) %p1, ptr addrspace(1) %p2
+ store ptr @kernel_not_infer, ptr addrspace(5) %fn
+ %fn.cast = addrspacecast ptr addrspace(5) %fn to ptr
+ tail call fastcc void %fn.cast(ptr addrspace(1) %q, i32 %id.x)
+ tail call fastcc void %fn.cast(ptr addrspace(1) %p, i32 %id.x)
+ ret void
+}
+
+define internal fastcc void @cs_callee_not_infer(ptr addrspace(1) %x, i32 %y) {
+; CHECK-LABEL: define {{[^@]+}}@cs_callee_not_infer
+; CHECK-SAME: (ptr addrspace(1) [[X:%.*]], i32 [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[X_VAL:%.*]] = load i32, ptr addrspace(1) [[X]], align 4
+; CHECK-NEXT: store i32 [[X_VAL]], ptr addrspace(1) @g3, align 4
+; CHECK-NEXT: store i32 [[Y]], ptr addrspace(1) @g4, align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %x.val = load i32, ptr addrspace(1) %x, align 4
+ store i32 %x.val, ptr addrspace(1) @g3, align 4
+ store i32 %y, ptr addrspace(1) @g4, align 4
+ ret void
+}
+
+define amdgpu_cs void @cs_kernel_not_infer(ptr addrspace(1) %q, ptr addrspace(1) %p1, ptr addrspace(1) %p2) {
+; CHECK-LABEL: define {{[^@]+}}@cs_kernel_not_infer
+; CHECK-SAME: (ptr addrspace(1) [[Q:%.*]], ptr addrspace(1) [[P1:%.*]], ptr addrspace(1) [[P2:%.*]]) #[[ATTR2:[0-9]+]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[ID_X:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr addrspace(1) [[Q]], i32 [[ID_X]]
+; CHECK-NEXT: [[D:%.*]] = load i32, ptr addrspace(1) [[GEP]], align 4
+; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[D]], 0
+; CHECK-NEXT: [[P:%.*]] = select i1 [[CMP]], ptr addrspace(1) [[P1]], ptr addrspace(1) [[P2]]
+; CHECK-NEXT: tail call fastcc void @cs_callee_not_infer(ptr addrspace(1) [[Q]], i32 [[ID_X]])
+; CHECK-NEXT: tail call fastcc void @cs_callee_not_infer(ptr addrspace(1) [[P]], i32 [[ID_X]])
+; CHECK-NEXT: ret void
+;
+entry:
+ %id.x = call i32 @llvm.amdgcn.workitem.id.x()
+ %gep = getelementptr i32, ptr addrspace(1) %q, i32 %id.x
+ %d = load i32, ptr addrspace(1) %gep
+ %cmp = icmp sgt i32 %d, 0
+ %p = select i1 %cmp, ptr addrspace(1) %p1, ptr addrspace(1) %p2
+ tail call fastcc void @cs_callee_not_infer(ptr addrspace(1) %q, i32 %id.x)
+ tail call fastcc void @cs_callee_not_infer(ptr addrspace(1) %p, i32 %id.x)
+ ret void
+}
+
+define internal fastcc void @cs_callee_not_infer_indirect(ptr addrspace(1) %x, i32 %y) {
+; CHECK-LABEL: define {{[^@]+}}@cs_callee_not_infer_indirect
+; CHECK-SAME: (ptr addrspace(1) [[X:%.*]], i32 [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[X_VAL:%.*]] = load i32, ptr addrspace(1) [[X]], align 4
+; CHECK-NEXT: store i32 [[X_VAL]], ptr addrspace(1) @g3, align 4
+; CHECK-NEXT: store i32 [[Y]], ptr addrspace(1) @g4, align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %x.val = load i32, ptr addrspace(1) %x, align 4
+ store i32 %x.val, ptr addrspace(1) @g3, align 4
+ store i32 %y, ptr addrspace(1) @g4, align 4
+ ret void
+}
+
+
+define amdgpu_cs void @cs_kernel_not_infer_indirect(ptr addrspace(1) %q, ptr addrspace(1) %p1, ptr addrspace(1) %p2) {
+; CHECK-LABEL: define {{[^@]+}}@cs_kernel_not_infer_indirect
+; CHECK-SAME: (ptr addrspace(1) [[Q:%.*]], ptr addrspace(1) [[P1:%.*]], ptr addrspace(1) [[P2:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[FN:%.*]] = alloca ptr, align 8, addrspace(5)
+; CHECK-NEXT: [[ID_X:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr addrspace(1) [[Q]], i32 [[ID_X]]
+; CHECK-NEXT: [[D:%.*]] = load i32, ptr addrspace(1) [[GEP]], align 4
+; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[D]], 0
+; CHECK-NEXT: [[P:%.*]] = select i1 [[CMP]], ptr addrspace(1) [[P1]], ptr addrspace(1) [[P2]]
+; CHECK-NEXT: store ptr @cs_callee_not_infer_indirect, ptr addrspace(5) [[FN]], align 8
+; CHECK-NEXT: [[FN_CAST:%.*]] = addrspacecast ptr addrspace(5) [[FN]] to ptr
+; CHECK-NEXT: tail call fastcc void [[FN_CAST]](ptr addrspace(1) [[Q]], i32 [[ID_X]])
+; CHECK-NEXT: tail call fastcc void [[FN_CAST]](ptr addrspace(1) [[P]], i32 [[ID_X]])
+; CHECK-NEXT: ret void
+;
+entry:
+ %fn = alloca ptr, addrspace(5)
+ %id.x = call i32 @llvm.amdgcn.workitem.id.x()
+ %gep = getelementptr i32, ptr addrspace(1) %q, i32 %id.x
+ %d = load i32, ptr addrspace(1) %gep
+ %cmp = icmp sgt i32 %d, 0
+ %p = select i1 %cmp, ptr addrspace(1) %p1, ptr addrspace(1) %p2
+ store ptr @cs_callee_not_infer_indirect, ptr addrspace(5) %fn
+ %fn.cast = addrspacecast ptr addrspace(5) %fn to ptr
+ tail call fastcc void %fn.cast(ptr addrspace(1) %q, i32 %id.x)
+ tail call fastcc void %fn.cast(ptr addrspace(1) %p, i32 %id.x)
+ ret void
+}
+;.
----------------
arsenm wrote:
Can you add some cases where the function already has the inreg attribute, with and without an existing readfirstlane?
https://github.com/llvm/llvm-project/pull/101609
More information about the llvm-commits
mailing list