[llvm] 36cfe26 - AMDGPU: Try to unfold fneg source when matching legacy fmin/fmax

Thu Feb 2 18:50:30 PST 2023

Author: Matt Arsenault
Date: 2023-02-02T22:50:23-04:00
New Revision: 36cfe26a5288d99e66c75d82989d154874999b98

URL: https://github.com/llvm/llvm-project/commit/36cfe26a5288d99e66c75d82989d154874999b98
DIFF: https://github.com/llvm/llvm-project/commit/36cfe26a5288d99e66c75d82989d154874999b98.diff

LOG: AMDGPU: Try to unfold fneg source when matching legacy fmin/fmax

This is NFC as it stands, since other combines will effectively
prevent this from being reachable. This will avoid regressions in a
future change which tries to make better use of select source
modifiers.

Didn't bother with the GlobalISel part for now, since the baseline
combine doesn't seem to work on the existing test.

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
    llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
    llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp
    llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll
    llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 7dda63d2ce417..62c331e6bcdda 100644

--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -1406,15 +1406,16 @@ SDValue AMDGPUTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
   return DAG.getBuildVector(Op.getValueType(), SDLoc(Op), Args);
 }
 
-/// Generate Min/Max node
-SDValue AMDGPUTargetLowering::combineFMinMaxLegacy(const SDLoc &DL, EVT VT,
-                                                   SDValue LHS, SDValue RHS,
-                                                   SDValue True, SDValue False,
-                                                   SDValue CC,
-                                                   DAGCombinerInfo &DCI) const {
-  if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
-    return SDValue();
+// TODO: Handle fabs too
+static SDValue peekFNeg(SDValue Val) {
+  if (Val.getOpcode() == ISD::FNEG)
+    return Val.getOperand(0);
 
+  return Val;
+}
+SDValue AMDGPUTargetLowering::combineFMinMaxLegacyImpl(
+    const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, SDValue True,
+    SDValue False, SDValue CC, DAGCombinerInfo &DCI) const {
   SelectionDAG &DAG = DCI.DAG;
   ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
   switch (CCOpcode) {
@@ -1480,6 +1481,45 @@ SDValue AMDGPUTargetLowering::combineFMinMaxLegacy(const SDLoc &DL, EVT VT,
   return SDValue();
 }
 
+/// Generate Min/Max node
+SDValue AMDGPUTargetLowering::combineFMinMaxLegacy(const SDLoc &DL, EVT VT,
+                                                   SDValue LHS, SDValue RHS,
+                                                   SDValue True, SDValue False,
+                                                   SDValue CC,
+                                                   DAGCombinerInfo &DCI) const {
+  if ((LHS == True && RHS == False) || (LHS == False && RHS == True))
+    return combineFMinMaxLegacyImpl(DL, VT, LHS, RHS, True, False, CC, DCI);
+
+  SelectionDAG &DAG = DCI.DAG;
+
+  // If we can't directly match this, try to see if we can fold an fneg to
+  // match.
+
+  ConstantFPSDNode *CRHS = dyn_cast<ConstantFPSDNode>(RHS);
+  ConstantFPSDNode *CFalse = dyn_cast<ConstantFPSDNode>(False);
+  SDValue NegTrue = peekFNeg(True);
+
+  // Undo the combine foldFreeOpFromSelect does if it helps us match the
+  // fmin/fmax.
+  //
+  // select (fcmp olt (lhs, K)), (fneg lhs), -K
+  // -> fneg (fmin_legacy lhs, K)
+  //
+  // TODO: Use getNegatedExpression
+  if (LHS == NegTrue && CFalse && CRHS) {
+    APFloat NegRHS = neg(CRHS->getValueAPF());
+    if (NegRHS == CFalse->getValueAPF()) {
+      SDValue Combined =
+          combineFMinMaxLegacyImpl(DL, VT, LHS, RHS, NegTrue, False, CC, DCI);
+      if (Combined)
+        return DAG.getNode(ISD::FNEG, DL, VT, Combined);
+      return SDValue();
+    }
+  }
+
+  return SDValue();
+}
+
 std::pair<SDValue, SDValue>
 AMDGPUTargetLowering::split64BitValue(SDValue Op, SelectionDAG &DAG) const {
   SDLoc SL(Op);

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index e5efa1362664d..0264a7ab950f3 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -229,6 +229,10 @@ class AMDGPUTargetLowering : public TargetLowering {
                           SmallVectorImpl<SDValue> &Results,
                           SelectionDAG &DAG) const override;
 
+  SDValue combineFMinMaxLegacyImpl(const SDLoc &DL, EVT VT, SDValue LHS,
+                                   SDValue RHS, SDValue True, SDValue False,
+                                   SDValue CC, DAGCombinerInfo &DCI) const;
+
   SDValue combineFMinMaxLegacy(const SDLoc &DL, EVT VT, SDValue LHS,
                                SDValue RHS, SDValue True, SDValue False,
                                SDValue CC, DAGCombinerInfo &DCI) const;

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp
index 9c04df0b36837..d3e6d3acec56e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp
@@ -91,6 +91,8 @@ bool AMDGPUPostLegalizerCombinerHelper::matchFMinFMaxLegacy(
   Info.True = MI.getOperand(2).getReg();
   Info.False = MI.getOperand(3).getReg();
 
+  // TODO: Handle case where the the selected value is an fneg and the compared
+  // constant is the negation of the selected value.
   if (!(Info.LHS == Info.True && Info.RHS == Info.False) &&
       !(Info.LHS == Info.False && Info.RHS == Info.True))
     return false;

diff  --git a/llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll b/llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll
index 852e6bbe2d2d6..a4cc9522a759d 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll
@@ -628,13 +628,12 @@ define amdgpu_ps half @fneg_fadd_0_nsz_f16(half inreg %tmp2, half inreg %tmp6, <
 ; SI-SAFE-LABEL: fneg_fadd_0_nsz_f16:
 ; SI-SAFE:       ; %bb.0: ; %.entry
 ; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v0, s0
-; SI-SAFE-NEXT:    v_bfrev_b32_e32 v1, 1
-; SI-SAFE-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
+; SI-SAFE-NEXT:    s_brev_b32 s0, 1
+; SI-SAFE-NEXT:    v_mov_b32_e32 v1, 0x7fc00000
 ; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v0, v0
-; SI-SAFE-NEXT:    v_cmp_nlt_f32_e32 vcc, 0, v0
-; SI-SAFE-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
-; SI-SAFE-NEXT:    v_cmp_nlt_f32_e32 vcc, 0, v0
-; SI-SAFE-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
+; SI-SAFE-NEXT:    v_min_legacy_f32_e32 v0, 0, v0
+; SI-SAFE-NEXT:    v_cmp_ngt_f32_e32 vcc, s0, v0
+; SI-SAFE-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
 ; SI-SAFE-NEXT:    ; return to shader part epilog
 ;
 ; SI-NSZ-LABEL: fneg_fadd_0_nsz_f16:

diff  --git a/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll b/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll
index 53502cb214fde..3e066cfc81d6c 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll
@@ -291,12 +291,10 @@ define amdgpu_ps float @fneg_fadd_0_f32(float inreg %tmp2, float inreg %tmp6, <4
 define amdgpu_ps float @fneg_fadd_0_nsz_f32(float inreg %tmp2, float inreg %tmp6, <4 x i32> %arg) #2 {
 ; SI-SAFE-LABEL: fneg_fadd_0_nsz_f32:
 ; SI-SAFE:       ; %bb.0: ; %.entry
-; SI-SAFE-NEXT:    v_bfrev_b32_e32 v0, 1
-; SI-SAFE-NEXT:    v_mov_b32_e32 v1, s0
-; SI-SAFE-NEXT:    v_cmp_ngt_f32_e64 vcc, s0, 0
-; SI-SAFE-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; SI-SAFE-NEXT:    v_min_legacy_f32_e64 v0, 0, s0
+; SI-SAFE-NEXT:    s_brev_b32 s0, 1
 ; SI-SAFE-NEXT:    v_mov_b32_e32 v1, 0x7fc00000
-; SI-SAFE-NEXT:    v_cmp_nlt_f32_e32 vcc, 0, v0
+; SI-SAFE-NEXT:    v_cmp_ngt_f32_e32 vcc, s0, v0
 ; SI-SAFE-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
 ; SI-SAFE-NEXT:    ; return to shader part epilog
 ;