[llvm] [AMDGPU] Add UniformBinFrag to SALU fminimum/fmaximum patterns. NFCI. (PR #142169)
Jay Foad via llvm-commits
llvm-commits at lists.llvm.org
Fri May 30 08:26:00 PDT 2025
https://github.com/jayfoad created https://github.com/llvm/llvm-project/pull/142169
SALU patterns should have UniformBinFrag because they can only handle
uniform inputs. VALU patterns do not need DivergentBinFrag because they
work for both uniform and divergent inputs; instead we can use
AddedComplexity to ensure that SALU patterns are preferred.
>From f31359c051314d8aae2af586f03dba486616f53d Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad at amd.com>
Date: Fri, 30 May 2025 16:20:23 +0100
Subject: [PATCH] [AMDGPU] Add UniformBinFrag to SALU fminimum/fmaximum
patterns. NFCI.
SALU patterns should have UniformBinFrag because they can only handle
uniform inputs. VALU patterns do not need DivergentBinFrag because they
work for both uniform and divergent inputs; instead we can use
AddedComplexity to ensure that SALU patterns are preferred.
---
llvm/lib/Target/AMDGPU/SOPInstructions.td | 10 +++++-----
llvm/lib/Target/AMDGPU/VOP3Instructions.td | 8 ++++----
2 files changed, 9 insertions(+), 9 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td
index 40b3dfb94ce2f..e0a36758534d5 100644
--- a/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -922,11 +922,11 @@ let SubtargetPredicate = HasSALUFloatInsts, mayRaiseFPException = 1,
// On GFX12 MIN/MAX instructions do not read MODE register.
let SubtargetPredicate = isGFX12Plus, mayRaiseFPException = 1, isCommutable = 1,
- isReMaterializable = 1, SchedRW = [WriteSFPU], AddedComplexity = 17 in {
- def S_MINIMUM_F32 : SOP2_F32_Inst<"s_minimum_f32", fminimum>;
- def S_MAXIMUM_F32 : SOP2_F32_Inst<"s_maximum_f32", fmaximum>;
- def S_MINIMUM_F16 : SOP2_F16_Inst<"s_minimum_f16", fminimum>;
- def S_MAXIMUM_F16 : SOP2_F16_Inst<"s_maximum_f16", fmaximum>;
+ isReMaterializable = 1, SchedRW = [WriteSFPU], AddedComplexity = 25 in {
+ def S_MINIMUM_F32 : SOP2_F32_Inst<"s_minimum_f32", UniformBinFrag<fminimum>>;
+ def S_MAXIMUM_F32 : SOP2_F32_Inst<"s_maximum_f32", UniformBinFrag<fmaximum>>;
+ def S_MINIMUM_F16 : SOP2_F16_Inst<"s_minimum_f16", UniformBinFrag<fminimum>>;
+ def S_MAXIMUM_F16 : SOP2_F16_Inst<"s_maximum_f16", UniformBinFrag<fmaximum>>;
}
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 0252c4f1b0929..594b37bb6e21a 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -168,10 +168,10 @@ defm V_MUL_HI_I32 : VOP3Inst <"v_mul_hi_i32", V_MUL_PROF<VOP_I32_I32_I32>, mulhs
} // End SchedRW = [WriteIntMul]
let SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0, AddedComplexity = 1 in {
-defm V_MINIMUM_F32 : VOP3Inst <"v_minimum_f32", VOP3_Profile<VOP_F32_F32_F32>, DivergentBinFrag<fminimum>>;
-defm V_MAXIMUM_F32 : VOP3Inst <"v_maximum_f32", VOP3_Profile<VOP_F32_F32_F32>, DivergentBinFrag<fmaximum>>;
-defm V_MINIMUM_F16 : VOP3Inst_t16 <"v_minimum_f16", VOP_F16_F16_F16, DivergentBinFrag<fminimum>>;
-defm V_MAXIMUM_F16 : VOP3Inst_t16 <"v_maximum_f16", VOP_F16_F16_F16, DivergentBinFrag<fmaximum>>;
+defm V_MINIMUM_F32 : VOP3Inst <"v_minimum_f32", VOP3_Profile<VOP_F32_F32_F32>, fminimum>;
+defm V_MAXIMUM_F32 : VOP3Inst <"v_maximum_f32", VOP3_Profile<VOP_F32_F32_F32>, fmaximum>;
+defm V_MINIMUM_F16 : VOP3Inst_t16 <"v_minimum_f16", VOP_F16_F16_F16, fminimum>;
+defm V_MAXIMUM_F16 : VOP3Inst_t16 <"v_maximum_f16", VOP_F16_F16_F16, fmaximum>;
let SchedRW = [WriteDoubleAdd] in {
defm V_MINIMUM_F64 : VOP3Inst <"v_minimum_f64", VOP3_Profile<VOP_F64_F64_F64>, fminimum>;
More information about the llvm-commits
mailing list