[llvm] 912df60 - AMDGPU: Handle minimumnum/maximumnum in fneg combines (#139133)
via llvm-commits
llvm-commits at lists.llvm.org
Thu May 8 23:07:04 PDT 2025
Author: Matt Arsenault
Date: 2025-05-09T08:07:01+02:00
New Revision: 912df60b08f6cbeaa1a5a27f7bc3628726842885
URL: https://github.com/llvm/llvm-project/commit/912df60b08f6cbeaa1a5a27f7bc3628726842885
DIFF: https://github.com/llvm/llvm-project/commit/912df60b08f6cbeaa1a5a27f7bc3628726842885.diff
LOG: AMDGPU: Handle minimumnum/maximumnum in fneg combines (#139133)
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index d4091ed6e770c..107a789523435 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -661,6 +661,8 @@ static bool fnegFoldsIntoOpcode(unsigned Opc) {
case ISD::FMAXNUM_IEEE:
case ISD::FMINIMUM:
case ISD::FMAXIMUM:
+ case ISD::FMINIMUMNUM:
+ case ISD::FMAXIMUMNUM:
case ISD::SELECT:
case ISD::FSIN:
case ISD::FTRUNC:
@@ -4807,10 +4809,14 @@ static unsigned inverseMinMax(unsigned Opc) {
return ISD::FMINIMUM;
case ISD::FMINIMUM:
return ISD::FMAXIMUM;
+ case ISD::FMAXIMUMNUM:
+ return ISD::FMINIMUMNUM;
+ case ISD::FMINIMUMNUM:
+ return ISD::FMAXIMUMNUM;
case AMDGPUISD::FMAX_LEGACY:
return AMDGPUISD::FMIN_LEGACY;
case AMDGPUISD::FMIN_LEGACY:
- return AMDGPUISD::FMAX_LEGACY;
+ return AMDGPUISD::FMAX_LEGACY;
default:
llvm_unreachable("invalid min/max opcode");
}
@@ -4932,6 +4938,8 @@ SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N,
case ISD::FMINNUM_IEEE:
case ISD::FMINIMUM:
case ISD::FMAXIMUM:
+ case ISD::FMINIMUMNUM:
+ case ISD::FMAXIMUMNUM:
case AMDGPUISD::FMAX_LEGACY:
case AMDGPUISD::FMIN_LEGACY: {
// fneg (fmaxnum x, y) -> fminnum (fneg x), (fneg y)
diff --git a/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll b/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll
index 30e685f974599..46da9d33639b6 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll
@@ -2167,8 +2167,8 @@ define half @v_fneg_inv2pi_minimumnum_f16(half %a) #0 {
; SI-LABEL: v_fneg_inv2pi_minimumnum_f16:
; SI: ; %bb.0:
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-NEXT: v_cvt_f16_f32_e32 v0, v0
-; SI-NEXT: v_cvt_f32_f16_e64 v0, -v0
+; SI-NEXT: v_cvt_f16_f32_e64 v0, -v0
+; SI-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-NEXT: v_max_f32_e32 v0, 0xbe230000, v0
; SI-NEXT: s_setpc_b64 s[30:31]
;
@@ -2188,8 +2188,8 @@ define half @v_fneg_neg_inv2pi_minimumnum_f16(half %a) #0 {
; SI-LABEL: v_fneg_neg_inv2pi_minimumnum_f16:
; SI: ; %bb.0:
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-NEXT: v_cvt_f16_f32_e32 v0, v0
-; SI-NEXT: v_cvt_f32_f16_e64 v0, -v0
+; SI-NEXT: v_cvt_f16_f32_e64 v0, -v0
+; SI-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-NEXT: v_max_f32_e32 v0, 0x3e230000, v0
; SI-NEXT: s_setpc_b64 s[30:31]
;
@@ -2208,10 +2208,10 @@ define double @v_fneg_inv2pi_minimumnum_f64(double %a) #0 {
; SI-LABEL: v_fneg_inv2pi_minimumnum_f64:
; SI: ; %bb.0:
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; SI-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1]
; SI-NEXT: s_mov_b32 s4, 0x6dc9c882
; SI-NEXT: s_mov_b32 s5, 0xbfc45f30
-; SI-NEXT: v_max_f64 v[0:1], -v[0:1], s[4:5]
+; SI-NEXT: v_max_f64 v[0:1], v[0:1], s[4:5]
; SI-NEXT: s_setpc_b64 s[30:31]
;
; VI-LABEL: v_fneg_inv2pi_minimumnum_f64:
@@ -2230,17 +2230,17 @@ define double @v_fneg_neg_inv2pi_minimumnum_f64(double %a) #0 {
; SI-LABEL: v_fneg_neg_inv2pi_minimumnum_f64:
; SI: ; %bb.0:
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; SI-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1]
; SI-NEXT: s_mov_b32 s4, 0x6dc9c882
; SI-NEXT: s_mov_b32 s5, 0x3fc45f30
-; SI-NEXT: v_max_f64 v[0:1], -v[0:1], s[4:5]
+; SI-NEXT: v_max_f64 v[0:1], v[0:1], s[4:5]
; SI-NEXT: s_setpc_b64 s[30:31]
;
; VI-LABEL: v_fneg_neg_inv2pi_minimumnum_f64:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; VI-NEXT: v_max_f64 v[0:1], -v[0:1], 0.15915494309189532
+; VI-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1]
+; VI-NEXT: v_max_f64 v[0:1], v[0:1], 0.15915494309189532
; VI-NEXT: s_setpc_b64 s[30:31]
%min = call double @llvm.minimumnum.f64(double 0xbfc45f306dc9c882, double %a)
%fneg = fneg double %min
@@ -2313,9 +2313,9 @@ define { float, float } @v_fneg_minimumnum_multi_use_minimumnum_f32_ieee(float %
; GCN-LABEL: v_fneg_minimumnum_multi_use_minimumnum_f32_ieee:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v1, 1.0, v1
-; GCN-NEXT: v_mul_f32_e32 v0, 1.0, v0
-; GCN-NEXT: v_max_f32_e64 v0, -v0, -v1
+; GCN-NEXT: v_mul_f32_e32 v1, -1.0, v1
+; GCN-NEXT: v_mul_f32_e32 v0, -1.0, v0
+; GCN-NEXT: v_max_f32_e32 v0, v0, v1
; GCN-NEXT: v_mul_f32_e32 v1, -4.0, v0
; GCN-NEXT: s_setpc_b64 s[30:31]
%min = call float @llvm.minimumnum.f32(float %a, float %b)
@@ -2330,9 +2330,9 @@ define <2 x float> @v_fneg_minimumnum_multi_use_minimumnum_f32_no_ieee(float %a,
; GCN-LABEL: v_fneg_minimumnum_multi_use_minimumnum_f32_no_ieee:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v1, 1.0, v1
-; GCN-NEXT: v_mul_f32_e32 v0, 1.0, v0
-; GCN-NEXT: v_max_f32_e64 v0, -v0, -v1
+; GCN-NEXT: v_mul_f32_e32 v1, -1.0, v1
+; GCN-NEXT: v_mul_f32_e32 v0, -1.0, v0
+; GCN-NEXT: v_max_f32_e32 v0, v0, v1
; GCN-NEXT: v_mul_f32_e32 v1, -4.0, v0
; GCN-NEXT: s_setpc_b64 s[30:31]
%min = call float @llvm.minimumnum.f32(float %a, float %b)
@@ -2513,9 +2513,9 @@ define { float, float } @v_fneg_maximumnum_multi_use_maximumnum_f32_ieee(float %
; GCN-LABEL: v_fneg_maximumnum_multi_use_maximumnum_f32_ieee:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v1, 1.0, v1
-; GCN-NEXT: v_mul_f32_e32 v0, 1.0, v0
-; GCN-NEXT: v_min_f32_e64 v0, -v0, -v1
+; GCN-NEXT: v_mul_f32_e32 v1, -1.0, v1
+; GCN-NEXT: v_mul_f32_e32 v0, -1.0, v0
+; GCN-NEXT: v_min_f32_e32 v0, v0, v1
; GCN-NEXT: v_mul_f32_e32 v1, -4.0, v0
; GCN-NEXT: s_setpc_b64 s[30:31]
%max = call float @llvm.maximumnum.f32(float %a, float %b)
@@ -2530,9 +2530,9 @@ define <2 x float> @v_fneg_maximumnum_multi_use_maximumnum_f32_no_ieee(float %a,
; GCN-LABEL: v_fneg_maximumnum_multi_use_maximumnum_f32_no_ieee:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v1, 1.0, v1
-; GCN-NEXT: v_mul_f32_e32 v0, 1.0, v0
-; GCN-NEXT: v_min_f32_e64 v0, -v0, -v1
+; GCN-NEXT: v_mul_f32_e32 v1, -1.0, v1
+; GCN-NEXT: v_mul_f32_e32 v0, -1.0, v0
+; GCN-NEXT: v_min_f32_e32 v0, v0, v1
; GCN-NEXT: v_mul_f32_e32 v1, -4.0, v0
; GCN-NEXT: s_setpc_b64 s[30:31]
%max = call float @llvm.maximumnum.f32(float %a, float %b)
More information about the llvm-commits
mailing list