[llvm] [AMDGPU] Set Convergent property for image.(getlod/sample*) intrinsics which uses WQM (PR #122908)
Mariusz Sikora via llvm-commits
llvm-commits at lists.llvm.org
Tue Jan 14 06:32:55 PST 2025
https://github.com/mariusz-sikora-at-amd created https://github.com/llvm/llvm-project/pull/122908
This change adds IntrConvergent property to image.getlod intrinsic and to several image.sample intrinsics. All image.sample intrinsics apart from LOD(_L), Level 0(_LZ), Derivative(_D) will be marked as Convergent.
>From be94d5850a2226b73282b886e74d66112f88cdbc Mon Sep 17 00:00:00 2001
From: Mariusz Sikora <mariusz.sikora at amd.com>
Date: Tue, 14 Jan 2025 03:30:35 -0500
Subject: [PATCH] [AMDGPU] Set Convergent property for image.(getlod/sample*)
intrinsics which uses WQM
This change adds IntrConvergent property to image.getlod intrinsic and
to several image.sample intrinsics. All image.sample intrinsics apart
from LOD(_L), Level 0(_LZ), Derivative(_D) will be marked as Convergent.
---
llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 11 ++++++++---
llvm/test/CodeGen/AMDGPU/sink-image-sample.ll | 4 ++--
2 files changed, 10 insertions(+), 5 deletions(-)
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index b930d6983e2251..b529642a558710 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -876,6 +876,8 @@ class AMDGPUSampleVariant<string ucmod, string lcmod, list<AMDGPUArg> extra_addr
// Name of the {lod} or {clamp} argument that is appended to the coordinates,
// if any.
string LodOrClamp = "";
+
+ bit UsesWQM = false;
}
// AMDGPUSampleVariants: all variants supported by IMAGE_SAMPLE
@@ -905,8 +907,9 @@ defset list<AMDGPUSampleVariant> AMDGPUSampleVariants = {
}
defset list<AMDGPUSampleVariant> AMDGPUSampleVariantsNoGradients = {
+ let UsesWQM = true in
defm AMDGPUSample : AMDGPUSampleHelper_Clamp<"", "", []>;
- let Bias = true in
+ let Bias = true, UsesWQM = true in
defm AMDGPUSample : AMDGPUSampleHelper_Clamp<
"_B", "_b", [AMDGPUArg<llvm_anyfloat_ty, "bias">]>;
let LodOrClamp = "lod" in
@@ -1172,7 +1175,8 @@ defset list<AMDGPUImageDimIntrinsic> AMDGPUImageDimIntrinsics = {
foreach dim = AMDGPUDims.NoMsaa in {
def !strconcat(NAME, "_", dim.Name) : AMDGPUImageDimIntrinsic<
AMDGPUDimSampleProfile<opmod, dim, sample>,
- !if(NoMem, [IntrNoMem], [IntrReadMem]),
+ !listconcat(!if(NoMem, [IntrNoMem], [IntrReadMem]),
+ !if(sample.UsesWQM, [IntrConvergent], [])),
!if(NoMem, [], [SDNPMemOperand])>;
}
}
@@ -1188,7 +1192,8 @@ defset list<AMDGPUImageDimIntrinsic> AMDGPUImageDimIntrinsics = {
foreach dim = AMDGPUDims.NoMsaa in {
def !strconcat(NAME, "_", dim.Name, "_nortn") : AMDGPUImageDimIntrinsic<
AMDGPUDimSampleNoReturnProfile<opmod, dim, sample>,
- [IntrWillReturn], [SDNPMemOperand]>;
+ !listconcat([IntrWillReturn], !if(sample.UsesWQM, [IntrConvergent], [])),
+ [SDNPMemOperand]>;
}
}
foreach sample = AMDGPUSampleVariants in {
diff --git a/llvm/test/CodeGen/AMDGPU/sink-image-sample.ll b/llvm/test/CodeGen/AMDGPU/sink-image-sample.ll
index e1273e1a4bcd08..8a3b544ac2b80f 100644
--- a/llvm/test/CodeGen/AMDGPU/sink-image-sample.ll
+++ b/llvm/test/CodeGen/AMDGPU/sink-image-sample.ll
@@ -11,7 +11,7 @@
define amdgpu_ps float @sinking_img_sample() {
main_body:
- %i = call <3 x float> @llvm.amdgcn.image.sample.2d.v3f32.f32(i32 7, float undef, float undef, <8 x i32> undef, <4 x i32> undef, i1 false, i32 0, i32 0)
+ %i = call <3 x float> @llvm.amdgcn.image.sample.l.2d.v3f32.f32(i32 7, float undef, float undef, float undef, <8 x i32> undef, <4 x i32> undef, i1 false, i32 0, i32 0)
br i1 undef, label %endif1, label %if1
if1: ; preds = %main_body
@@ -28,7 +28,7 @@ exit: ; preds = %endif1, %if1
ret float %i24
}
; Function Attrs: nounwind readonly willreturn
-declare <3 x float> @llvm.amdgcn.image.sample.2d.v3f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #3
+declare <3 x float> @llvm.amdgcn.image.sample.l.2d.v3f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #3
; Function Attrs: nofree nosync nounwind readnone speculatable willreturn
declare float @llvm.fma.f32(float, float, float) #2
More information about the llvm-commits
mailing list