[llvm] [AMDGPU] Detect trivially uniform arguments in InstCombine (PR #129897)
Jay Foad via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 7 01:49:25 PST 2025
https://github.com/jayfoad updated https://github.com/llvm/llvm-project/pull/129897
>From a2b805577ea4ee79e957949fd62abbcc12b6e21a Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad at amd.com>
Date: Wed, 5 Mar 2025 16:27:29 +0000
Subject: [PATCH 1/3] [AMDGPU] Detect trivially uniform arguments in
InstCombine
Update one test to use an SGPR argument as the simplest way of getting a
uniform value.
---
.../AMDGPU/AMDGPUInstCombineIntrinsic.cpp | 2 +
.../InstCombine/AMDGPU/amdgcn-intrinsics.ll | 60 +++++++++----------
2 files changed, 32 insertions(+), 30 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
index ebe740f884ea6..070e79f9c0ea3 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -440,6 +440,8 @@ static bool isTriviallyUniform(const Use &U) {
Value *V = U.get();
if (isa<Constant>(V))
return true;
+ if (const auto *A = dyn_cast<Argument>(V))
+ return AMDGPU::isArgPassedInSGPR(A);
if (const auto *II = dyn_cast<IntrinsicInst>(V)) {
if (!AMDGPU::isIntrinsicAlwaysUniform(II->getIntrinsicID()))
return false;
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
index 3605c4a929c58..811ab862e2091 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
@@ -66,7 +66,7 @@ define double @test_constant_fold_rcp_f64_43() nounwind {
define float @test_constant_fold_rcp_f32_43_strictfp() nounwind strictfp {
; CHECK-LABEL: @test_constant_fold_rcp_f32_43_strictfp(
-; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.rcp.f32(float 4.300000e+01) #[[ATTR14:[0-9]+]]
+; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.rcp.f32(float 4.300000e+01) #[[ATTR17:[0-9]+]]
; CHECK-NEXT: ret float [[VAL]]
;
%val = call float @llvm.amdgcn.rcp.f32(float 4.300000e+01) strictfp nounwind readnone
@@ -115,7 +115,7 @@ define half @test_constant_fold_sqrt_f16_0() nounwind {
define float @test_constant_fold_sqrt_f32_0() nounwind {
; CHECK-LABEL: @test_constant_fold_sqrt_f32_0(
-; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.sqrt.f32(float 0.000000e+00) #[[ATTR15:[0-9]+]]
+; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.sqrt.f32(float 0.000000e+00) #[[ATTR18:[0-9]+]]
; CHECK-NEXT: ret float [[VAL]]
;
%val = call float @llvm.amdgcn.sqrt.f32(float 0.0) nounwind readnone
@@ -124,7 +124,7 @@ define float @test_constant_fold_sqrt_f32_0() nounwind {
define double @test_constant_fold_sqrt_f64_0() nounwind {
; CHECK-LABEL: @test_constant_fold_sqrt_f64_0(
-; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.sqrt.f64(double 0.000000e+00) #[[ATTR15]]
+; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.sqrt.f64(double 0.000000e+00) #[[ATTR18]]
; CHECK-NEXT: ret double [[VAL]]
;
%val = call double @llvm.amdgcn.sqrt.f64(double 0.0) nounwind readnone
@@ -141,7 +141,7 @@ define half @test_constant_fold_sqrt_f16_neg0() nounwind {
define float @test_constant_fold_sqrt_f32_neg0() nounwind {
; CHECK-LABEL: @test_constant_fold_sqrt_f32_neg0(
-; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.sqrt.f32(float -0.000000e+00) #[[ATTR15]]
+; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.sqrt.f32(float -0.000000e+00) #[[ATTR18]]
; CHECK-NEXT: ret float [[VAL]]
;
%val = call float @llvm.amdgcn.sqrt.f32(float -0.0) nounwind readnone
@@ -150,7 +150,7 @@ define float @test_constant_fold_sqrt_f32_neg0() nounwind {
define double @test_constant_fold_sqrt_f64_neg0() nounwind {
; CHECK-LABEL: @test_constant_fold_sqrt_f64_neg0(
-; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.sqrt.f64(double -0.000000e+00) #[[ATTR15]]
+; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.sqrt.f64(double -0.000000e+00) #[[ATTR18]]
; CHECK-NEXT: ret double [[VAL]]
;
%val = call double @llvm.amdgcn.sqrt.f64(double -0.0) nounwind readnone
@@ -718,7 +718,7 @@ define i1 @test_class_isnan_f32(float %x) nounwind {
define i1 @test_class_isnan_f32_strict(float %x) nounwind strictfp {
; CHECK-LABEL: @test_class_isnan_f32_strict(
-; CHECK-NEXT: [[VAL:%.*]] = call i1 @llvm.is.fpclass.f32(float [[X:%.*]], i32 3) #[[ATTR16:[0-9]+]]
+; CHECK-NEXT: [[VAL:%.*]] = call i1 @llvm.is.fpclass.f32(float [[X:%.*]], i32 3) #[[ATTR19:[0-9]+]]
; CHECK-NEXT: ret i1 [[VAL]]
;
%val = call i1 @llvm.amdgcn.class.f32(float %x, i32 3) strictfp
@@ -736,7 +736,7 @@ define i1 @test_class_is_p0_n0_f32(float %x) nounwind {
define i1 @test_class_is_p0_n0_f32_strict(float %x) nounwind strictfp {
; CHECK-LABEL: @test_class_is_p0_n0_f32_strict(
-; CHECK-NEXT: [[VAL:%.*]] = call i1 @llvm.is.fpclass.f32(float [[X:%.*]], i32 96) #[[ATTR16]]
+; CHECK-NEXT: [[VAL:%.*]] = call i1 @llvm.is.fpclass.f32(float [[X:%.*]], i32 96) #[[ATTR19]]
; CHECK-NEXT: ret i1 [[VAL]]
;
%val = call i1 @llvm.amdgcn.class.f32(float %x, i32 96) strictfp
@@ -2000,7 +2000,7 @@ define i64 @icmp_constant_inputs_false() {
define i64 @icmp_constant_inputs_true() {
; CHECK-LABEL: @icmp_constant_inputs_true(
-; CHECK-NEXT: [[RESULT:%.*]] = call i64 @llvm.read_register.i64(metadata [[META0:![0-9]+]]) #[[ATTR17:[0-9]+]]
+; CHECK-NEXT: [[RESULT:%.*]] = call i64 @llvm.read_register.i64(metadata [[META0:![0-9]+]]) #[[ATTR20:[0-9]+]]
; CHECK-NEXT: ret i64 [[RESULT]]
;
%result = call i64 @llvm.amdgcn.icmp.i64.i32(i32 9, i32 8, i32 34)
@@ -2707,7 +2707,7 @@ define i64 @fcmp_constant_inputs_false() {
define i64 @fcmp_constant_inputs_true() {
; CHECK-LABEL: @fcmp_constant_inputs_true(
-; CHECK-NEXT: [[RESULT:%.*]] = call i64 @llvm.read_register.i64(metadata [[META0]]) #[[ATTR17]]
+; CHECK-NEXT: [[RESULT:%.*]] = call i64 @llvm.read_register.i64(metadata [[META0]]) #[[ATTR20]]
; CHECK-NEXT: ret i64 [[RESULT]]
;
%result = call i64 @llvm.amdgcn.fcmp.i64.f32(float 2.0, float 4.0, i32 4)
@@ -2845,7 +2845,7 @@ declare i32 @llvm.amdgcn.readfirstlane(i32)
@gv = constant i32 0
-define amdgpu_kernel void @readfirstlane_constant(i32 %arg, ptr %ptr) {
+define amdgpu_cs void @readfirstlane_constant(i32 %arg, ptr %ptr) {
; CHECK-LABEL: @readfirstlane_constant(
; CHECK-NEXT: [[VAR:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[ARG:%.*]])
; CHECK-NEXT: store volatile i32 [[VAR]], ptr [[PTR:%.*]], align 4
@@ -2931,7 +2931,7 @@ bb1:
declare i32 @llvm.amdgcn.readlane(i32, i32)
-define amdgpu_kernel void @readlane_constant(i32 %arg, i32 %lane, ptr %ptr) {
+define amdgpu_cs void @readlane_constant(i32 %arg, i32 %lane, ptr %ptr) {
; CHECK-LABEL: @readlane_constant(
; CHECK-NEXT: [[VAR:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[ARG:%.*]], i32 7)
; CHECK-NEXT: store volatile i32 [[VAR]], ptr [[PTR:%.*]], align 4
@@ -3143,14 +3143,12 @@ define amdgpu_kernel void @permlanex16_fetch_invalid_bound_ctrl(ptr addrspace(1)
; llvm.amdgcn.permlane64
; --------------------------------------------------------------------
-define amdgpu_kernel void @permlane64_uniform(ptr addrspace(1) %out, i32 %src0) {
+define amdgpu_kernel void @permlane64_uniform(ptr addrspace(1) %out, i32 %src) {
; CHECK-LABEL: @permlane64_uniform(
-; CHECK-NEXT: [[SRC1:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[SRC0:%.*]])
-; CHECK-NEXT: store i32 [[SRC1]], ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT: store i32 [[SRC1:%.*]], ptr addrspace(1) [[OUT:%.*]], align 4
; CHECK-NEXT: ret void
;
- %src1 = call i32 @llvm.amdgcn.readfirstlane(i32 %src0)
- %res = call i32 @llvm.amdgcn.permlane64(i32 %src1)
+ %res = call i32 @llvm.amdgcn.permlane64(i32 %src)
store i32 %res, ptr addrspace(1) %out
ret void
}
@@ -5845,7 +5843,7 @@ define double @trig_preop_constfold_neg32_segment() {
define double @trig_preop_constfold_strictfp() strictfp {
; CHECK-LABEL: @trig_preop_constfold_strictfp(
-; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.trig.preop.f64(double 3.454350e+02, i32 5) #[[ATTR16]]
+; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.trig.preop.f64(double 3.454350e+02, i32 5) #[[ATTR19]]
; CHECK-NEXT: ret double [[VAL]]
;
%val = call double @llvm.amdgcn.trig.preop.f64(double 3.454350e+02, i32 5) strictfp
@@ -6214,7 +6212,7 @@ define half @test_constant_fold_log_f16_neg10() {
define float @test_constant_fold_log_f32_qnan_strictfp() strictfp {
; CHECK-LABEL: @test_constant_fold_log_f32_qnan_strictfp(
-; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.log.f32(float 0x7FF8000000000000) #[[ATTR16]]
+; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.log.f32(float 0x7FF8000000000000) #[[ATTR19]]
; CHECK-NEXT: ret float [[VAL]]
;
%val = call float @llvm.amdgcn.log.f32(float 0x7FF8000000000000) strictfp
@@ -6223,7 +6221,7 @@ define float @test_constant_fold_log_f32_qnan_strictfp() strictfp {
define float @test_constant_fold_log_f32_0_strictfp() strictfp {
; CHECK-LABEL: @test_constant_fold_log_f32_0_strictfp(
-; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.log.f32(float 0.000000e+00) #[[ATTR16]]
+; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.log.f32(float 0.000000e+00) #[[ATTR19]]
; CHECK-NEXT: ret float [[VAL]]
;
%val = call float @llvm.amdgcn.log.f32(float 0.0) strictfp
@@ -6232,7 +6230,7 @@ define float @test_constant_fold_log_f32_0_strictfp() strictfp {
define float @test_constant_fold_log_f32_neg0_strictfp() strictfp {
; CHECK-LABEL: @test_constant_fold_log_f32_neg0_strictfp(
-; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.log.f32(float -0.000000e+00) #[[ATTR16]]
+; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.log.f32(float -0.000000e+00) #[[ATTR19]]
; CHECK-NEXT: ret float [[VAL]]
;
%val = call float @llvm.amdgcn.log.f32(float -0.0) strictfp
@@ -6241,7 +6239,7 @@ define float @test_constant_fold_log_f32_neg0_strictfp() strictfp {
define float @test_constant_fold_log_f32_neg_strictfp() strictfp {
; CHECK-LABEL: @test_constant_fold_log_f32_neg_strictfp(
-; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.log.f32(float -1.000000e+01) #[[ATTR16]]
+; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.log.f32(float -1.000000e+01) #[[ATTR19]]
; CHECK-NEXT: ret float [[VAL]]
;
%val = call float @llvm.amdgcn.log.f32(float -10.0) strictfp
@@ -6258,7 +6256,7 @@ define float @test_constant_fold_log_f32_pinf_strictfp() strictfp {
define float @test_constant_fold_log_f32_ninf_strictfp() strictfp {
; CHECK-LABEL: @test_constant_fold_log_f32_ninf_strictfp(
-; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.log.f32(float 0xFFF0000000000000) #[[ATTR16]]
+; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.log.f32(float 0xFFF0000000000000) #[[ATTR19]]
; CHECK-NEXT: ret float [[VAL]]
;
%val = call float @llvm.amdgcn.log.f32(float 0xFFF0000000000000) strictfp
@@ -6460,7 +6458,7 @@ define half @test_constant_fold_exp2_f16_neg10() {
define float @test_constant_fold_exp2_f32_qnan_strictfp() strictfp {
; CHECK-LABEL: @test_constant_fold_exp2_f32_qnan_strictfp(
-; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float 0x7FF8000000000000) #[[ATTR16]]
+; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float 0x7FF8000000000000) #[[ATTR19]]
; CHECK-NEXT: ret float [[VAL]]
;
%val = call float @llvm.amdgcn.exp2.f32(float 0x7FF8000000000000) strictfp
@@ -6469,7 +6467,7 @@ define float @test_constant_fold_exp2_f32_qnan_strictfp() strictfp {
define float @test_constant_fold_exp2_f32_0_strictfp() strictfp {
; CHECK-LABEL: @test_constant_fold_exp2_f32_0_strictfp(
-; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float 0.000000e+00) #[[ATTR16]]
+; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float 0.000000e+00) #[[ATTR19]]
; CHECK-NEXT: ret float [[VAL]]
;
%val = call float @llvm.amdgcn.exp2.f32(float 0.0) strictfp
@@ -6478,7 +6476,7 @@ define float @test_constant_fold_exp2_f32_0_strictfp() strictfp {
define float @test_constant_fold_exp2_f32_neg0_strictfp() strictfp {
; CHECK-LABEL: @test_constant_fold_exp2_f32_neg0_strictfp(
-; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float -0.000000e+00) #[[ATTR16]]
+; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float -0.000000e+00) #[[ATTR19]]
; CHECK-NEXT: ret float [[VAL]]
;
%val = call float @llvm.amdgcn.exp2.f32(float -0.0) strictfp
@@ -6487,7 +6485,7 @@ define float @test_constant_fold_exp2_f32_neg0_strictfp() strictfp {
define float @test_constant_fold_exp2_f32_1_strictfp() strictfp {
; CHECK-LABEL: @test_constant_fold_exp2_f32_1_strictfp(
-; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float 1.000000e+00) #[[ATTR16]]
+; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float 1.000000e+00) #[[ATTR19]]
; CHECK-NEXT: ret float [[VAL]]
;
%val = call float @llvm.amdgcn.exp2.f32(float 1.0) strictfp
@@ -6496,7 +6494,7 @@ define float @test_constant_fold_exp2_f32_1_strictfp() strictfp {
define float @test_constant_fold_exp2_f32_neg1_strictfp() strictfp {
; CHECK-LABEL: @test_constant_fold_exp2_f32_neg1_strictfp(
-; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float -1.000000e+00) #[[ATTR16]]
+; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float -1.000000e+00) #[[ATTR19]]
; CHECK-NEXT: ret float [[VAL]]
;
%val = call float @llvm.amdgcn.exp2.f32(float -1.0) strictfp
@@ -6505,7 +6503,7 @@ define float @test_constant_fold_exp2_f32_neg1_strictfp() strictfp {
define float @test_constant_fold_exp2_f32_2_strictfp() strictfp {
; CHECK-LABEL: @test_constant_fold_exp2_f32_2_strictfp(
-; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float 2.000000e+00) #[[ATTR16]]
+; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float 2.000000e+00) #[[ATTR19]]
; CHECK-NEXT: ret float [[VAL]]
;
%val = call float @llvm.amdgcn.exp2.f32(float 2.0) strictfp
@@ -6514,7 +6512,7 @@ define float @test_constant_fold_exp2_f32_2_strictfp() strictfp {
define float @test_constant_fold_exp2_f32_neg2_strictfp() strictfp {
; CHECK-LABEL: @test_constant_fold_exp2_f32_neg2_strictfp(
-; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float -2.000000e+00) #[[ATTR16]]
+; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float -2.000000e+00) #[[ATTR19]]
; CHECK-NEXT: ret float [[VAL]]
;
%val = call float @llvm.amdgcn.exp2.f32(float -2.0) strictfp
@@ -6523,7 +6521,7 @@ define float @test_constant_fold_exp2_f32_neg2_strictfp() strictfp {
define float @test_constant_fold_exp2_f32_neg_strictfp() strictfp {
; CHECK-LABEL: @test_constant_fold_exp2_f32_neg_strictfp(
-; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float -1.000000e+01) #[[ATTR16]]
+; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.exp2.f32(float -1.000000e+01) #[[ATTR19]]
; CHECK-NEXT: ret float [[VAL]]
;
%val = call float @llvm.amdgcn.exp2.f32(float -10.0) strictfp
@@ -6571,6 +6569,7 @@ declare i32 @llvm.amdgcn.prng.b32(i32)
define i32 @prng_undef_i32() {
; CHECK-LABEL: @prng_undef_i32(
; CHECK-NEXT: ret i32 undef
+;
%prng = call i32 @llvm.amdgcn.prng.b32(i32 undef)
ret i32 %prng
}
@@ -6578,6 +6577,7 @@ define i32 @prng_undef_i32() {
define i32 @prng_poison_i32() {
; CHECK-LABEL: @prng_poison_i32(
; CHECK-NEXT: ret i32 poison
+;
%prng = call i32 @llvm.amdgcn.prng.b32(i32 poison)
ret i32 %prng
}
>From 10e186494430cfa4e53258353872f8e85e3bb034 Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad at amd.com>
Date: Wed, 5 Mar 2025 16:58:58 +0000
Subject: [PATCH 2/3] Add trivial uniformity tests
---
.../InstCombine/AMDGPU/trivially-uniform.ll | 123 ++++++++++++++++++
1 file changed, 123 insertions(+)
create mode 100644 llvm/test/Transforms/InstCombine/AMDGPU/trivially-uniform.ll
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/trivially-uniform.ll b/llvm/test/Transforms/InstCombine/AMDGPU/trivially-uniform.ll
new file mode 100644
index 0000000000000..e1c51ea5fc36d
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/trivially-uniform.ll
@@ -0,0 +1,123 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -mtriple=amdgcn -mcpu=gfx1010 -passes=instcombine -S < %s | FileCheck %s
+
+; Use readfirstlane to demonstrate when InstCombine deems an input to
+; be trivially uniform.
+
+; Constants are trivially uniform.
+define i32 @test_constant() {
+; CHECK-LABEL: define i32 @test_constant(
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: ret i32 7
+;
+ %r = call i32 @llvm.amdgcn.readfirstlane(i32 7)
+ ret i32 %r
+}
+
+; The result of an AlwaysUniform intrinsic is trivially uniform.
+define i32 @test_intrinsic(i32 %x) {
+; CHECK-LABEL: define i32 @test_intrinsic(
+; CHECK-SAME: i32 [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[Y:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[X]])
+; CHECK-NEXT: ret i32 [[Y]]
+;
+ %y = call i32 @llvm.amdgcn.readfirstlane(i32 %x)
+ %r = call i32 @llvm.amdgcn.readfirstlane(i32 %y)
+ ret i32 %r
+}
+
+; In compute kernels, all arguments are trivially uniform.
+
+define amdgpu_kernel void @test_compute_i32(ptr %out, i32 %x) {
+; CHECK-LABEL: define amdgpu_kernel void @test_compute_i32(
+; CHECK-SAME: ptr [[OUT:%.*]], i32 [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: store i32 [[X]], ptr [[OUT]], align 4
+; CHECK-NEXT: ret void
+;
+ %r = call i32 @llvm.amdgcn.readfirstlane(i32 %x)
+ store i32 %r, ptr %out
+ ret void
+}
+
+define amdgpu_kernel void @test_compute_i1(ptr %out, i1 %x) {
+; CHECK-LABEL: define amdgpu_kernel void @test_compute_i1(
+; CHECK-SAME: ptr [[OUT:%.*]], i1 [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: store i1 [[X]], ptr [[OUT]], align 1
+; CHECK-NEXT: ret void
+;
+ %r = call i1 @llvm.amdgcn.readfirstlane(i1 %x)
+ store i1 %r, ptr %out
+ ret void
+}
+
+define amdgpu_kernel void @test_compute_v32i1(ptr %out, <32 x i1> %x) {
+; CHECK-LABEL: define amdgpu_kernel void @test_compute_v32i1(
+; CHECK-SAME: ptr [[OUT:%.*]], <32 x i1> [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: store <32 x i1> [[X]], ptr [[OUT]], align 4
+; CHECK-NEXT: ret void
+;
+ %r = call <32 x i1> @llvm.amdgcn.readfirstlane(<32 x i1> %x)
+ store <32 x i1> %r, ptr %out
+ ret void
+}
+
+; In graphics shaders, inreg arguments are trivially uniform.
+
+define amdgpu_ps i32 @test_graphics_i32(i32 inreg %x) {
+; CHECK-LABEL: define amdgpu_ps i32 @test_graphics_i32(
+; CHECK-SAME: i32 inreg [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: ret i32 [[X]]
+;
+ %r = call i32 @llvm.amdgcn.readfirstlane(i32 %x)
+ ret i32 %r
+}
+
+define amdgpu_ps i1 @test_graphics_i1(i1 inreg %x) {
+; CHECK-LABEL: define amdgpu_ps i1 @test_graphics_i1(
+; CHECK-SAME: i1 inreg [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: ret i1 [[X]]
+;
+ %r = call i1 @llvm.amdgcn.readfirstlane(i1 %x)
+ ret i1 %r
+}
+
+define amdgpu_ps <32 x i1> @test_graphics_v32i1(<32 x i1> inreg %x) {
+; CHECK-LABEL: define amdgpu_ps <32 x i1> @test_graphics_v32i1(
+; CHECK-SAME: <32 x i1> inreg [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: ret <32 x i1> [[X]]
+;
+ %r = call <32 x i1> @llvm.amdgcn.readfirstlane(<32 x i1> %x)
+ ret <32 x i1> %r
+}
+
+; In graphics shaders, non-inreg arguments are not trivially uniform.
+
+define amdgpu_ps i32 @test_graphics_i32_negative(i32 %x) {
+; CHECK-LABEL: define amdgpu_ps i32 @test_graphics_i32_negative(
+; CHECK-SAME: i32 [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[X]])
+; CHECK-NEXT: ret i32 [[R]]
+;
+ %r = call i32 @llvm.amdgcn.readfirstlane(i32 %x)
+ ret i32 %r
+}
+
+define amdgpu_ps i1 @test_graphics_i1_negative(i1 %x) {
+; CHECK-LABEL: define amdgpu_ps i1 @test_graphics_i1_negative(
+; CHECK-SAME: i1 [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[R:%.*]] = call i1 @llvm.amdgcn.readfirstlane.i1(i1 [[X]])
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %r = call i1 @llvm.amdgcn.readfirstlane(i1 %x)
+ ret i1 %r
+}
+
+define amdgpu_ps <32 x i1> @test_graphics_v32i1_negative(<32 x i1> %x) {
+; CHECK-LABEL: define amdgpu_ps <32 x i1> @test_graphics_v32i1_negative(
+; CHECK-SAME: <32 x i1> [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[R:%.*]] = call <32 x i1> @llvm.amdgcn.readfirstlane.v32i1(<32 x i1> [[X]])
+; CHECK-NEXT: ret <32 x i1> [[R]]
+;
+ %r = call <32 x i1> @llvm.amdgcn.readfirstlane(<32 x i1> %x)
+ ret <32 x i1> %r
+}
>From f39a6d2395e39dc1457109e9f4c0795e4f289c41 Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad at amd.com>
Date: Fri, 7 Mar 2025 09:49:13 +0000
Subject: [PATCH 3/3] Test amdgpu_gfx i1 cases
---
.../InstCombine/AMDGPU/trivially-uniform.ll | 40 +++++++++++++++++++
1 file changed, 40 insertions(+)
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/trivially-uniform.ll b/llvm/test/Transforms/InstCombine/AMDGPU/trivially-uniform.ll
index e1c51ea5fc36d..2a3e3929d71b2 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/trivially-uniform.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/trivially-uniform.ll
@@ -121,3 +121,43 @@ define amdgpu_ps <32 x i1> @test_graphics_v32i1_negative(<32 x i1> %x) {
%r = call <32 x i1> @llvm.amdgcn.readfirstlane(<32 x i1> %x)
ret <32 x i1> %r
}
+
+; Test i1 arguments in non-entry functions.
+
+define amdgpu_gfx i1 @test_callable_i1(i1 inreg %x) {
+; CHECK-LABEL: define amdgpu_gfx i1 @test_callable_i1(
+; CHECK-SAME: i1 inreg [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: ret i1 [[X]]
+;
+ %r = call i1 @llvm.amdgcn.readfirstlane(i1 %x)
+ ret i1 %r
+}
+
+define amdgpu_gfx <32 x i1> @test_callable_v32i1(<32 x i1> inreg %x) {
+; CHECK-LABEL: define amdgpu_gfx <32 x i1> @test_callable_v32i1(
+; CHECK-SAME: <32 x i1> inreg [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: ret <32 x i1> [[X]]
+;
+ %r = call <32 x i1> @llvm.amdgcn.readfirstlane(<32 x i1> %x)
+ ret <32 x i1> %r
+}
+
+define amdgpu_gfx i1 @test_callable_i1_negative(i1 %x) {
+; CHECK-LABEL: define amdgpu_gfx i1 @test_callable_i1_negative(
+; CHECK-SAME: i1 [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[R:%.*]] = call i1 @llvm.amdgcn.readfirstlane.i1(i1 [[X]])
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %r = call i1 @llvm.amdgcn.readfirstlane(i1 %x)
+ ret i1 %r
+}
+
+define amdgpu_gfx <32 x i1> @test_callable_v32i1_negative(<32 x i1> %x) {
+; CHECK-LABEL: define amdgpu_gfx <32 x i1> @test_callable_v32i1_negative(
+; CHECK-SAME: <32 x i1> [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[R:%.*]] = call <32 x i1> @llvm.amdgcn.readfirstlane.v32i1(<32 x i1> [[X]])
+; CHECK-NEXT: ret <32 x i1> [[R]]
+;
+ %r = call <32 x i1> @llvm.amdgcn.readfirstlane(<32 x i1> %x)
+ ret <32 x i1> %r
+}
More information about the llvm-commits
mailing list