[llvm] db6bc2a - [AMDGPU][GlobalISel] Fold G_FNEG above when users cannot fold mods

Mirko Brkusanin via llvm-commits llvm-commits at lists.llvm.org
Wed Nov 17 05:27:37 PST 2021


Author: Mirko Brkusanin
Date: 2021-11-17T14:25:13+01:00
New Revision: db6bc2ab51371f69569c25514c20c3786398a032

URL: https://github.com/llvm/llvm-project/commit/db6bc2ab51371f69569c25514c20c3786398a032
DIFF: https://github.com/llvm/llvm-project/commit/db6bc2ab51371f69569c25514c20c3786398a032.diff

LOG: [AMDGPU][GlobalISel] Fold G_FNEG above when users cannot fold mods

If possible fold fneg into instruction above if users cannot fold mods and we
know it will decrease instruction count.
Follows same logic as SDAG combiner in choosing opportunities to combine.

Differential Revision: https://reviews.llvm.org/D112827

Added: 
    llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp
    llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.h
    llvm/test/CodeGen/AMDGPU/GlobalISel/combine-foldable-fneg.mir

Modified: 
    llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
    llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
    llvm/lib/Target/AMDGPU/AMDGPUCombine.td
    llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp
    llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp
    llvm/lib/Target/AMDGPU/CMakeLists.txt
    llvm/utils/gn/secondary/llvm/lib/Target/AMDGPU/BUILD.gn

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 120e90c9022e8..ff4ad4b726366 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -136,6 +136,10 @@ class CombinerHelper {
   void replaceRegOpWith(MachineRegisterInfo &MRI, MachineOperand &FromRegOp,
                         Register ToReg) const;
 
+  /// Replace the opcode in instruction with a new opcode and inform the
+  /// observer of the changes.
+  void replaceOpcodeWith(MachineInstr &FromMI, unsigned ToOpcode) const;
+
   /// Get the register bank of \p Reg.
   /// If Reg has not been assigned a register, a register class,
   /// or a register bank, then this returns nullptr.

diff  --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 9eac2d8867948..3a52959d54bfe 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -158,6 +158,15 @@ void CombinerHelper::replaceRegOpWith(MachineRegisterInfo &MRI,
   Observer.changedInstr(*FromRegOp.getParent());
 }
 
+void CombinerHelper::replaceOpcodeWith(MachineInstr &FromMI,
+                                       unsigned ToOpcode) const {
+  Observer.changingInstr(FromMI);
+
+  FromMI.setDesc(Builder.getTII().get(ToOpcode));
+
+  Observer.changedInstr(FromMI);
+}
+
 const RegisterBank *CombinerHelper::getRegBank(Register Reg) const {
   return RBI->getRegBank(Reg, MRI, *TRI);
 }

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
index 28946435af467..df2f9a0fa3a96 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
@@ -64,19 +64,29 @@ def remove_fcanonicalize : GICombineRule<
          [{ return PostLegalizerHelper.matchRemoveFcanonicalize(*${fcanonicalize}, ${matchinfo}); }]),
   (apply [{ Helper.replaceSingleDefInstWithReg(*${fcanonicalize}, ${matchinfo}); }])>;
 
+def foldable_fneg_matchdata : GIDefMatchData<"MachineInstr *">;
+
+def foldable_fneg : GICombineRule<
+  (defs root:$ffn, foldable_fneg_matchdata:$matchinfo),
+  (match (wip_match_opcode G_FNEG):$ffn,
+         [{ return Helper.matchFoldableFneg(*${ffn}, ${matchinfo}); }]),
+  (apply [{ Helper.applyFoldableFneg(*${ffn}, ${matchinfo}); }])>;
+
 // Combines which should only apply on SI/VI
 def gfx6gfx7_combines : GICombineGroup<[fcmp_select_to_fmin_fmax_legacy]>;
 
 def AMDGPUPreLegalizerCombinerHelper: GICombinerHelper<
-  "AMDGPUGenPreLegalizerCombinerHelper", [all_combines, clamp_i64_to_i16]> {
+  "AMDGPUGenPreLegalizerCombinerHelper",
+  [all_combines, clamp_i64_to_i16, foldable_fneg]> {
   let DisableRuleOption = "amdgpuprelegalizercombiner-disable-rule";
   let StateClass = "AMDGPUPreLegalizerCombinerHelperState";
+  let AdditionalArguments = [];
 }
 
 def AMDGPUPostLegalizerCombinerHelper: GICombinerHelper<
   "AMDGPUGenPostLegalizerCombinerHelper",
   [all_combines, gfx6gfx7_combines,
-   uchar_to_float, cvt_f32_ubyteN, remove_fcanonicalize]> {
+   uchar_to_float, cvt_f32_ubyteN, remove_fcanonicalize, foldable_fneg]> {
   let DisableRuleOption = "amdgpupostlegalizercombiner-disable-rule";
   let StateClass = "AMDGPUPostLegalizerCombinerHelperState";
   let AdditionalArguments = [];

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp
new file mode 100644
index 0000000000000..d808a9ee2b8f9
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp
@@ -0,0 +1,382 @@
+//=== lib/CodeGen/GlobalISel/AMDGPUCombinerHelper.cpp ---------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUCombinerHelper.h"
+#include "GCNSubtarget.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
+#include "llvm/IR/IntrinsicsAMDGPU.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+using namespace MIPatternMatch;
+
+LLVM_READNONE
+static bool fnegFoldsIntoMI(const MachineInstr &MI) {
+  switch (MI.getOpcode()) {
+  case AMDGPU::G_FADD:
+  case AMDGPU::G_FSUB:
+  case AMDGPU::G_FMUL:
+  case AMDGPU::G_FMA:
+  case AMDGPU::G_FMAD:
+  case AMDGPU::G_FMINNUM:
+  case AMDGPU::G_FMAXNUM:
+  case AMDGPU::G_FMINNUM_IEEE:
+  case AMDGPU::G_FMAXNUM_IEEE:
+  case AMDGPU::G_FSIN:
+  case AMDGPU::G_FPEXT:
+  case AMDGPU::G_INTRINSIC_TRUNC:
+  case AMDGPU::G_FPTRUNC:
+  case AMDGPU::G_FRINT:
+  case AMDGPU::G_FNEARBYINT:
+  case AMDGPU::G_INTRINSIC_ROUND:
+  case AMDGPU::G_INTRINSIC_ROUNDEVEN:
+  case AMDGPU::G_FCANONICALIZE:
+  case AMDGPU::G_AMDGPU_RCP_IFLAG:
+  case AMDGPU::G_AMDGPU_FMIN_LEGACY:
+  case AMDGPU::G_AMDGPU_FMAX_LEGACY:
+    return true;
+  case AMDGPU::G_INTRINSIC: {
+    unsigned IntrinsicID = MI.getIntrinsicID();
+    switch (IntrinsicID) {
+    case Intrinsic::amdgcn_rcp:
+    case Intrinsic::amdgcn_rcp_legacy:
+    case Intrinsic::amdgcn_sin:
+    case Intrinsic::amdgcn_fmul_legacy:
+    case Intrinsic::amdgcn_fmed3:
+    case Intrinsic::amdgcn_fma_legacy:
+      return true;
+    default:
+      return false;
+    }
+  }
+  default:
+    return false;
+  }
+}
+
+/// \p returns true if the operation will definitely need to use a 64-bit
+/// encoding, and thus will use a VOP3 encoding regardless of the source
+/// modifiers.
+LLVM_READONLY
+static bool opMustUseVOP3Encoding(const MachineInstr &MI,
+                                  const MachineRegisterInfo &MRI) {
+  return MI.getNumOperands() >
+             (MI.getOpcode() == AMDGPU::G_INTRINSIC ? 4 : 3) ||
+         MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits() == 64;
+}
+
+// Most FP instructions support source modifiers.
+LLVM_READONLY
+static bool hasSourceMods(const MachineInstr &MI) {
+  if (!MI.memoperands().empty())
+    return false;
+
+  switch (MI.getOpcode()) {
+  case AMDGPU::COPY:
+  case AMDGPU::G_SELECT:
+  case AMDGPU::G_FDIV:
+  case AMDGPU::G_FREM:
+  case TargetOpcode::INLINEASM:
+  case TargetOpcode::INLINEASM_BR:
+  case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS:
+  case AMDGPU::G_BITCAST:
+  case AMDGPU::G_ANYEXT:
+  case AMDGPU::G_BUILD_VECTOR:
+  case AMDGPU::G_BUILD_VECTOR_TRUNC:
+  case AMDGPU::G_PHI:
+    return false;
+  case AMDGPU::G_INTRINSIC: {
+    unsigned IntrinsicID = MI.getIntrinsicID();
+    switch (IntrinsicID) {
+    case Intrinsic::amdgcn_interp_p1:
+    case Intrinsic::amdgcn_interp_p2:
+    case Intrinsic::amdgcn_interp_mov:
+    case Intrinsic::amdgcn_interp_p1_f16:
+    case Intrinsic::amdgcn_interp_p2_f16:
+    case Intrinsic::amdgcn_div_scale:
+      return false;
+    default:
+      return true;
+    }
+  }
+  default:
+    return true;
+  }
+}
+
+static bool allUsesHaveSourceMods(MachineInstr &MI, MachineRegisterInfo &MRI,
+                                  unsigned CostThreshold = 4) {
+  // Some users (such as 3-operand FMA/MAD) must use a VOP3 encoding, and thus
+  // it is truly free to use a source modifier in all cases. If there are
+  // multiple users but for each one will necessitate using VOP3, there will be
+  // a code size increase. Try to avoid increasing code size unless we know it
+  // will save on the instruction count.
+  unsigned NumMayIncreaseSize = 0;
+  Register Dst = MI.getOperand(0).getReg();
+  for (const MachineInstr &Use : MRI.use_nodbg_instructions(Dst)) {
+    if (!hasSourceMods(Use))
+      return false;
+
+    if (!opMustUseVOP3Encoding(Use, MRI)) {
+      if (++NumMayIncreaseSize > CostThreshold)
+        return false;
+    }
+  }
+  return true;
+}
+
+static bool mayIgnoreSignedZero(MachineInstr &MI) {
+  const TargetOptions &Options = MI.getMF()->getTarget().Options;
+  return Options.NoSignedZerosFPMath || MI.getFlag(MachineInstr::MIFlag::FmNsz);
+}
+
+static bool isInv2Pi(const APFloat &APF) {
+  static const APFloat KF16(APFloat::IEEEhalf(), APInt(16, 0x3118));
+  static const APFloat KF32(APFloat::IEEEsingle(), APInt(32, 0x3e22f983));
+  static const APFloat KF64(APFloat::IEEEdouble(),
+                            APInt(64, 0x3fc45f306dc9c882));
+
+  return APF.bitwiseIsEqual(KF16) || APF.bitwiseIsEqual(KF32) ||
+         APF.bitwiseIsEqual(KF64);
+}
+
+// 0 and 1.0 / (0.5 * pi) do not have inline immmediates, so there is an
+// additional cost to negate them.
+static bool isConstantCostlierToNegate(MachineInstr &MI, Register Reg,
+                                       MachineRegisterInfo &MRI) {
+  Optional<FPValueAndVReg> FPValReg;
+  if (mi_match(Reg, MRI, m_GFCstOrSplat(FPValReg))) {
+    if (FPValReg->Value.isZero() && !FPValReg->Value.isNegative())
+      return true;
+
+    const GCNSubtarget &ST = MI.getMF()->getSubtarget<GCNSubtarget>();
+    if (ST.hasInv2PiInlineImm() && isInv2Pi(FPValReg->Value))
+      return true;
+  }
+  return false;
+}
+
+static unsigned inverseMinMax(unsigned Opc) {
+  switch (Opc) {
+  case AMDGPU::G_FMAXNUM:
+    return AMDGPU::G_FMINNUM;
+  case AMDGPU::G_FMINNUM:
+    return AMDGPU::G_FMAXNUM;
+  case AMDGPU::G_FMAXNUM_IEEE:
+    return AMDGPU::G_FMINNUM_IEEE;
+  case AMDGPU::G_FMINNUM_IEEE:
+    return AMDGPU::G_FMAXNUM_IEEE;
+  case AMDGPU::G_AMDGPU_FMAX_LEGACY:
+    return AMDGPU::G_AMDGPU_FMIN_LEGACY;
+  case AMDGPU::G_AMDGPU_FMIN_LEGACY:
+    return AMDGPU::G_AMDGPU_FMAX_LEGACY;
+  default:
+    llvm_unreachable("invalid min/max opcode");
+  }
+}
+
+bool AMDGPUCombinerHelper::matchFoldableFneg(MachineInstr &MI,
+                                             MachineInstr *&MatchInfo) {
+  Register Src = MI.getOperand(1).getReg();
+  MatchInfo = MRI.getVRegDef(Src);
+
+  // If the input has multiple uses and we can either fold the negate down, or
+  // the other uses cannot, give up. This both prevents unprofitable
+  // transformations and infinite loops: we won't repeatedly try to fold around
+  // a negate that has no 'good' form.
+  if (MRI.hasOneNonDBGUse(Src)) {
+    if (allUsesHaveSourceMods(MI, MRI, 0))
+      return false;
+  } else {
+    if (fnegFoldsIntoMI(*MatchInfo) &&
+        (allUsesHaveSourceMods(MI, MRI) ||
+         !allUsesHaveSourceMods(*MatchInfo, MRI)))
+      return false;
+  }
+
+  switch (MatchInfo->getOpcode()) {
+  case AMDGPU::G_FMINNUM:
+  case AMDGPU::G_FMAXNUM:
+  case AMDGPU::G_FMINNUM_IEEE:
+  case AMDGPU::G_FMAXNUM_IEEE:
+  case AMDGPU::G_AMDGPU_FMIN_LEGACY:
+  case AMDGPU::G_AMDGPU_FMAX_LEGACY:
+    // 0 doesn't have a negated inline immediate.
+    return !isConstantCostlierToNegate(*MatchInfo,
+                                       MatchInfo->getOperand(2).getReg(), MRI);
+  case AMDGPU::G_FADD:
+  case AMDGPU::G_FSUB:
+  case AMDGPU::G_FMA:
+  case AMDGPU::G_FMAD:
+    return mayIgnoreSignedZero(*MatchInfo);
+  case AMDGPU::G_FMUL:
+  case AMDGPU::G_FPEXT:
+  case AMDGPU::G_INTRINSIC_TRUNC:
+  case AMDGPU::G_FPTRUNC:
+  case AMDGPU::G_FRINT:
+  case AMDGPU::G_FNEARBYINT:
+  case AMDGPU::G_INTRINSIC_ROUND:
+  case AMDGPU::G_INTRINSIC_ROUNDEVEN:
+  case AMDGPU::G_FSIN:
+  case AMDGPU::G_FCANONICALIZE:
+  case AMDGPU::G_AMDGPU_RCP_IFLAG:
+    return true;
+  case AMDGPU::G_INTRINSIC: {
+    unsigned IntrinsicID = MatchInfo->getIntrinsicID();
+    switch (IntrinsicID) {
+    case Intrinsic::amdgcn_rcp:
+    case Intrinsic::amdgcn_rcp_legacy:
+    case Intrinsic::amdgcn_sin:
+    case Intrinsic::amdgcn_fmul_legacy:
+    case Intrinsic::amdgcn_fmed3:
+      return true;
+    case Intrinsic::amdgcn_fma_legacy:
+      return mayIgnoreSignedZero(*MatchInfo);
+    default:
+      return false;
+    }
+  }
+  default:
+    return false;
+  }
+}
+
+void AMDGPUCombinerHelper::applyFoldableFneg(MachineInstr &MI,
+                                             MachineInstr *&MatchInfo) {
+  // Transform:
+  // %A = inst %Op1, ...
+  // %B = fneg %A
+  //
+  // into:
+  //
+  // (if %A has one use, specifically fneg above)
+  // %B = inst (maybe fneg %Op1), ...
+  //
+  // (if %A has multiple uses)
+  // %B = inst (maybe fneg %Op1), ...
+  // %A = fneg %B
+
+  // Replace register in operand with a register holding negated value.
+  auto NegateOperand = [&](MachineOperand &Op) {
+    Register Reg = Op.getReg();
+    if (!mi_match(Reg, MRI, m_GFNeg(m_Reg(Reg))))
+      Reg = Builder.buildFNeg(MRI.getType(Reg), Reg).getReg(0);
+    replaceRegOpWith(MRI, Op, Reg);
+  };
+
+  // Replace either register in operands with a register holding negated value.
+  auto NegateEitherOperand = [&](MachineOperand &X, MachineOperand &Y) {
+    Register XReg = X.getReg();
+    Register YReg = Y.getReg();
+    if (mi_match(XReg, MRI, m_GFNeg(m_Reg(XReg))))
+      replaceRegOpWith(MRI, X, XReg);
+    else if (mi_match(YReg, MRI, m_GFNeg(m_Reg(YReg))))
+      replaceRegOpWith(MRI, Y, YReg);
+    else {
+      YReg = Builder.buildFNeg(MRI.getType(YReg), YReg).getReg(0);
+      replaceRegOpWith(MRI, Y, YReg);
+    }
+  };
+
+  Builder.setInstrAndDebugLoc(*MatchInfo);
+
+  // Negate appropriate operands so that resulting value of MatchInfo is
+  // negated.
+  switch (MatchInfo->getOpcode()) {
+  case AMDGPU::G_FADD:
+  case AMDGPU::G_FSUB:
+    NegateOperand(MatchInfo->getOperand(1));
+    NegateOperand(MatchInfo->getOperand(2));
+    break;
+  case AMDGPU::G_FMUL:
+    NegateEitherOperand(MatchInfo->getOperand(1), MatchInfo->getOperand(2));
+    break;
+  case AMDGPU::G_FMINNUM:
+  case AMDGPU::G_FMAXNUM:
+  case AMDGPU::G_FMINNUM_IEEE:
+  case AMDGPU::G_FMAXNUM_IEEE:
+  case AMDGPU::G_AMDGPU_FMIN_LEGACY:
+  case AMDGPU::G_AMDGPU_FMAX_LEGACY: {
+    NegateOperand(MatchInfo->getOperand(1));
+    NegateOperand(MatchInfo->getOperand(2));
+    unsigned Opposite = inverseMinMax(MatchInfo->getOpcode());
+    replaceOpcodeWith(*MatchInfo, Opposite);
+    break;
+  }
+  case AMDGPU::G_FMA:
+  case AMDGPU::G_FMAD:
+    NegateEitherOperand(MatchInfo->getOperand(1), MatchInfo->getOperand(2));
+    NegateOperand(MatchInfo->getOperand(3));
+    break;
+  case AMDGPU::G_FPEXT:
+  case AMDGPU::G_INTRINSIC_TRUNC:
+  case AMDGPU::G_FRINT:
+  case AMDGPU::G_FNEARBYINT:
+  case AMDGPU::G_INTRINSIC_ROUND:
+  case AMDGPU::G_INTRINSIC_ROUNDEVEN:
+  case AMDGPU::G_FSIN:
+  case AMDGPU::G_FCANONICALIZE:
+  case AMDGPU::G_AMDGPU_RCP_IFLAG:
+  case AMDGPU::G_FPTRUNC:
+    NegateOperand(MatchInfo->getOperand(1));
+    break;
+  case AMDGPU::G_INTRINSIC: {
+    unsigned IntrinsicID = MatchInfo->getIntrinsicID();
+    switch (IntrinsicID) {
+    case Intrinsic::amdgcn_rcp:
+    case Intrinsic::amdgcn_rcp_legacy:
+    case Intrinsic::amdgcn_sin:
+      NegateOperand(MatchInfo->getOperand(2));
+      break;
+    case Intrinsic::amdgcn_fmul_legacy:
+      NegateEitherOperand(MatchInfo->getOperand(2), MatchInfo->getOperand(3));
+      break;
+    case Intrinsic::amdgcn_fmed3:
+      NegateOperand(MatchInfo->getOperand(2));
+      NegateOperand(MatchInfo->getOperand(3));
+      NegateOperand(MatchInfo->getOperand(4));
+      break;
+    case Intrinsic::amdgcn_fma_legacy:
+      NegateEitherOperand(MatchInfo->getOperand(2), MatchInfo->getOperand(3));
+      NegateOperand(MatchInfo->getOperand(4));
+      break;
+    default:
+      llvm_unreachable("folding fneg not supported for this intrinsic");
+    }
+    break;
+  }
+  default:
+    llvm_unreachable("folding fneg not supported for this instruction");
+  }
+
+  Register Dst = MI.getOperand(0).getReg();
+  Register MatchInfoDst = MatchInfo->getOperand(0).getReg();
+
+  if (MRI.hasOneNonDBGUse(MatchInfoDst)) {
+    // MatchInfo now has negated value so use that instead of old Dst.
+    replaceRegWith(MRI, Dst, MatchInfoDst);
+  } else {
+    // We want to swap all uses of Dst with uses of MatchInfoDst and vice versa
+    // but replaceRegWith will replace defs as well. It is easier to replace one
+    // def with a new register.
+    LLT Type = MRI.getType(Dst);
+    Register NegatedMatchInfo = MRI.createGenericVirtualRegister(Type);
+    replaceRegOpWith(MRI, MatchInfo->getOperand(0), NegatedMatchInfo);
+
+    // MatchInfo now has negated value so use that instead of old Dst.
+    replaceRegWith(MRI, Dst, NegatedMatchInfo);
+
+    // Recreate non negated value for other uses of old MatchInfoDst
+    Builder.setInstrAndDebugLoc(MI);
+    Builder.buildFNeg(MatchInfoDst, NegatedMatchInfo, MI.getFlags());
+  }
+
+  MI.eraseFromParent();
+  return;
+}

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.h b/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.h
new file mode 100644
index 0000000000000..1d4747136bf72
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.h
@@ -0,0 +1,26 @@
+//=== lib/CodeGen/GlobalISel/AMDGPUCombinerHelper.h -----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This contains common combine transformations that may be used in a combine
+/// pass.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GlobalISel/Combiner.h"
+#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
+
+using namespace llvm;
+
+class AMDGPUCombinerHelper : public CombinerHelper {
+public:
+  using CombinerHelper::CombinerHelper;
+
+  bool matchFoldableFneg(MachineInstr &MI, MachineInstr *&MatchInfo);
+  void applyFoldableFneg(MachineInstr &MI, MachineInstr *&MatchInfo);
+};

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp
index 728be811afaea..fc984d2dda648 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "AMDGPU.h"
+#include "AMDGPUCombinerHelper.h"
 #include "AMDGPULegalizerInfo.h"
 #include "GCNSubtarget.h"
 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
@@ -34,10 +35,11 @@ class AMDGPUPostLegalizerCombinerHelper {
   MachineIRBuilder &B;
   MachineFunction &MF;
   MachineRegisterInfo &MRI;
-  CombinerHelper &Helper;
+  AMDGPUCombinerHelper &Helper;
 
 public:
-  AMDGPUPostLegalizerCombinerHelper(MachineIRBuilder &B, CombinerHelper &Helper)
+  AMDGPUPostLegalizerCombinerHelper(MachineIRBuilder &B,
+                                    AMDGPUCombinerHelper &Helper)
       : B(B), MF(B.getMF()), MRI(*B.getMRI()), Helper(Helper){};
 
   struct FMinFMaxLegacyInfo {
@@ -257,12 +259,12 @@ bool AMDGPUPostLegalizerCombinerHelper::matchRemoveFcanonicalize(
 
 class AMDGPUPostLegalizerCombinerHelperState {
 protected:
-  CombinerHelper &Helper;
+  AMDGPUCombinerHelper &Helper;
   AMDGPUPostLegalizerCombinerHelper &PostLegalizerHelper;
 
 public:
   AMDGPUPostLegalizerCombinerHelperState(
-      CombinerHelper &Helper,
+      AMDGPUCombinerHelper &Helper,
       AMDGPUPostLegalizerCombinerHelper &PostLegalizerHelper)
       : Helper(Helper), PostLegalizerHelper(PostLegalizerHelper) {}
 };
@@ -300,7 +302,7 @@ class AMDGPUPostLegalizerCombinerInfo final : public CombinerInfo {
 bool AMDGPUPostLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
                                               MachineInstr &MI,
                                               MachineIRBuilder &B) const {
-  CombinerHelper Helper(Observer, B, KB, MDT, LInfo);
+  AMDGPUCombinerHelper Helper(Observer, B, KB, MDT, LInfo);
   AMDGPUPostLegalizerCombinerHelper PostLegalizerHelper(B, Helper);
   AMDGPUGenPostLegalizerCombinerHelper Generated(GeneratedRuleCfg, Helper,
                                                  PostLegalizerHelper);

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp
index ba08af2ecfcbd..c029046ab65ff 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "AMDGPU.h"
+#include "AMDGPUCombinerHelper.h"
 #include "AMDGPULegalizerInfo.h"
 #include "GCNSubtarget.h"
 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
@@ -34,10 +35,11 @@ class AMDGPUPreLegalizerCombinerHelper {
   MachineIRBuilder &B;
   MachineFunction &MF;
   MachineRegisterInfo &MRI;
-  CombinerHelper &Helper;
+  AMDGPUCombinerHelper &Helper;
 
 public:
-  AMDGPUPreLegalizerCombinerHelper(MachineIRBuilder &B, CombinerHelper &Helper)
+  AMDGPUPreLegalizerCombinerHelper(MachineIRBuilder &B,
+                                   AMDGPUCombinerHelper &Helper)
       : B(B), MF(B.getMF()), MRI(*B.getMRI()), Helper(Helper){};
 
   struct ClampI64ToI16MatchInfo {
@@ -154,12 +156,12 @@ void AMDGPUPreLegalizerCombinerHelper::applyClampI64ToI16(
 
 class AMDGPUPreLegalizerCombinerHelperState {
 protected:
-  CombinerHelper &Helper;
+  AMDGPUCombinerHelper &Helper;
   AMDGPUPreLegalizerCombinerHelper &PreLegalizerHelper;
 
 public:
   AMDGPUPreLegalizerCombinerHelperState(
-      CombinerHelper &Helper,
+      AMDGPUCombinerHelper &Helper,
       AMDGPUPreLegalizerCombinerHelper &PreLegalizerHelper)
       : Helper(Helper), PreLegalizerHelper(PreLegalizerHelper) {}
 };
@@ -196,12 +198,12 @@ class AMDGPUPreLegalizerCombinerInfo final : public CombinerInfo {
 bool AMDGPUPreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
                                               MachineInstr &MI,
                                               MachineIRBuilder &B) const {
-  CombinerHelper Helper(Observer, B, KB, MDT);
+  AMDGPUCombinerHelper Helper(Observer, B, KB, MDT);
   AMDGPUPreLegalizerCombinerHelper PreLegalizerHelper(B, Helper);
   AMDGPUGenPreLegalizerCombinerHelper Generated(GeneratedRuleCfg, Helper,
                                                 PreLegalizerHelper);
 
-  if (Generated.tryCombineAll(Observer, MI, B, Helper))
+  if (Generated.tryCombineAll(Observer, MI, B))
     return true;
 
   switch (MI.getOpcode()) {

diff  --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt
index 6dd10affdfc9a..ca5208355db96 100644
--- a/llvm/lib/Target/AMDGPU/CMakeLists.txt
+++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt
@@ -51,6 +51,7 @@ add_llvm_target(AMDGPUCodeGen
   AMDGPUAttributor.cpp
   AMDGPUCallLowering.cpp
   AMDGPUCodeGenPrepare.cpp
+  AMDGPUCombinerHelper.cpp
   AMDGPUCtorDtorLowering.cpp
   AMDGPUExportClustering.cpp
   AMDGPUFixFunctionBitcasts.cpp

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-foldable-fneg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-foldable-fneg.mir
new file mode 100644
index 0000000000000..093cd0d5c73c6
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-foldable-fneg.mir
@@ -0,0 +1,779 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=amdgpu-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s
+# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=amdgpu-postlegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s
+
+---
+name:            test_fminnum
+body:             |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; CHECK-LABEL: name: test_fminnum
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]]
+    ; CHECK-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[COPY1]]
+    ; CHECK-NEXT: [[FMAXNUM:%[0-9]+]]:_(s32) = G_FMAXNUM [[FNEG]], [[FNEG1]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[FMAXNUM]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(s32) = G_FMINNUM %0, %1
+    %3:_(s32) = G_FNEG %2
+    $vgpr0 = COPY %3(s32)
+
+...
+---
+name:            test_fmaxnum
+body:             |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; CHECK-LABEL: name: test_fmaxnum
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]]
+    ; CHECK-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[COPY1]]
+    ; CHECK-NEXT: [[FMINNUM:%[0-9]+]]:_(s32) = G_FMINNUM [[FNEG]], [[FNEG1]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[FMINNUM]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(s32) = G_FMAXNUM %0, %1
+    %3:_(s32) = G_FNEG %2
+    $vgpr0 = COPY %3(s32)
+
+...
+---
+name:            test_fminnum_ieee
+body:             |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; CHECK-LABEL: name: test_fminnum_ieee
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]]
+    ; CHECK-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[COPY1]]
+    ; CHECK-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FNEG]], [[FNEG1]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(s32) = G_FMINNUM_IEEE %0, %1
+    %3:_(s32) = G_FNEG %2
+    $vgpr0 = COPY %3(s32)
+
+...
+---
+name:            test_fmaxnum_ieee
+body:             |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; CHECK-LABEL: name: test_fmaxnum_ieee
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]]
+    ; CHECK-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[COPY1]]
+    ; CHECK-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FNEG]], [[FNEG1]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(s32) = G_FMAXNUM_IEEE %0, %1
+    %3:_(s32) = G_FNEG %2
+    $vgpr0 = COPY %3(s32)
+
+...
+---
+name:            test_amdgpu_fmin_legacy
+body:             |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; CHECK-LABEL: name: test_amdgpu_fmin_legacy
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]]
+    ; CHECK-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[COPY1]]
+    ; CHECK-NEXT: [[AMDGPU_FMAX_LEGACY:%[0-9]+]]:_(s32) = G_AMDGPU_FMAX_LEGACY [[FNEG]], [[FNEG1]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_FMAX_LEGACY]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(s32) = G_AMDGPU_FMIN_LEGACY %0, %1
+    %3:_(s32) = G_FNEG %2
+    $vgpr0 = COPY %3(s32)
+
+...
+---
+name:            test_amdgpu_fmax_legacy
+body:             |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; CHECK-LABEL: name: test_amdgpu_fmax_legacy
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]]
+    ; CHECK-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[COPY1]]
+    ; CHECK-NEXT: [[AMDGPU_FMIN_LEGACY:%[0-9]+]]:_(s32) = G_AMDGPU_FMIN_LEGACY [[FNEG]], [[FNEG1]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_FMIN_LEGACY]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(s32) = G_AMDGPU_FMAX_LEGACY %0, %1
+    %3:_(s32) = G_FNEG %2
+    $vgpr0 = COPY %3(s32)
+
+...
+---
+name:            test_fadd
+body:             |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; CHECK-LABEL: name: test_fadd
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]]
+    ; CHECK-NEXT: [[FSUB:%[0-9]+]]:_(s32) = nsz G_FSUB [[FNEG]], [[COPY1]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[FSUB]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(s32) = nsz G_FADD %0, %1
+    %3:_(s32) = G_FNEG %2
+    $vgpr0 = COPY %3(s32)
+
+...
+---
+name:            test_fsub
+body:             |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; CHECK-LABEL: name: test_fsub
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; CHECK-NEXT: [[FSUB:%[0-9]+]]:_(s32) = nsz G_FSUB [[COPY1]], [[COPY]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[FSUB]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(s32) = nsz G_FSUB %0, %1
+    %3:_(s32) = G_FNEG %2
+    $vgpr0 = COPY %3(s32)
+
+...
+---
+name:            test_fma
+body:             |
+  bb.0:
+    liveins: $vgpr0, $vgpr1, $vgpr2
+
+    ; CHECK-LABEL: name: test_fma
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY1]]
+    ; CHECK-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[COPY2]]
+    ; CHECK-NEXT: [[FMA:%[0-9]+]]:_(s32) = nsz G_FMA [[COPY]], [[FNEG]], [[FNEG1]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[FMA]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(s32) = COPY $vgpr2
+    %3:_(s32) = nsz G_FMA %0, %1, %2
+    %4:_(s32) = G_FNEG %3
+    $vgpr0 = COPY %4(s32)
+
+...
+---
+name:            test_fmad
+body:             |
+  bb.0:
+    liveins: $vgpr0, $vgpr1, $vgpr2
+
+    ; CHECK-LABEL: name: test_fmad
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY1]]
+    ; CHECK-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[COPY2]]
+    ; CHECK-NEXT: [[FMAD:%[0-9]+]]:_(s32) = nsz G_FMAD [[COPY]], [[FNEG]], [[FNEG1]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[FMAD]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(s32) = COPY $vgpr2
+    %3:_(s32) = nsz G_FMAD %0, %1, %2
+    %4:_(s32) = G_FNEG %3
+    $vgpr0 = COPY %4(s32)
+
+...
+---
+name:            test_fmul
+body:             |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; CHECK-LABEL: name: test_fmul
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY1]]
+    ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[FNEG]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[FMUL]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(s32) = G_FMUL %0, %1
+    %3:_(s32) = G_FNEG %2
+    $vgpr0 = COPY %3(s32)
+
+...
+---
+name:            test_fpext
+body:             |
+  bb.0:
+    liveins: $vgpr0
+
+    ; CHECK-LABEL: name: test_fpext
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC]]
+    ; CHECK-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[FNEG]](s16)
+    ; CHECK-NEXT: $vgpr0 = COPY [[FPEXT]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s16) = G_TRUNC %0(s32)
+    %2:_(s32) = G_FPEXT %1(s16)
+    %3:_(s32) = G_FNEG %2
+    $vgpr0 = COPY %3(s32)
+
+...
+---
+name:            test_intrinsic_trunc
+body:             |
+  bb.0:
+    liveins: $vgpr0
+
+    ; CHECK-LABEL: name: test_intrinsic_trunc
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]]
+    ; CHECK-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FNEG]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[INTRINSIC_TRUNC]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = G_INTRINSIC_TRUNC %0
+    %2:_(s32) = G_FNEG %1
+    $vgpr0 = COPY %2(s32)
+
+...
+---
+name:            test_frint
+body:             |
+  bb.0:
+    liveins: $vgpr0
+
+    ; CHECK-LABEL: name: test_frint
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]]
+    ; CHECK-NEXT: [[FRINT:%[0-9]+]]:_(s32) = G_FRINT [[FNEG]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[FRINT]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = G_FRINT %0
+    %2:_(s32) = G_FNEG %1
+    $vgpr0 = COPY %2(s32)
+
+...
+---
+name:            test_fnearbyint
+body:             |
+  bb.0:
+    liveins: $vgpr0
+
+    ; CHECK-LABEL: name: test_fnearbyint
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]]
+    ; CHECK-NEXT: [[FNEARBYINT:%[0-9]+]]:_(s32) = G_FNEARBYINT [[FNEG]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[FNEARBYINT]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = G_FNEARBYINT %0
+    %2:_(s32) = G_FNEG %1
+    $vgpr0 = COPY %2(s32)
+
+...
+---
+name:            test_intrinsic_round
+body:             |
+  bb.0:
+    liveins: $vgpr0
+
+    ; CHECK-LABEL: name: test_intrinsic_round
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]]
+    ; CHECK-NEXT: [[INTRINSIC_ROUND:%[0-9]+]]:_(s32) = G_INTRINSIC_ROUND [[FNEG]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[INTRINSIC_ROUND]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = G_INTRINSIC_ROUND %0
+    %2:_(s32) = G_FNEG %1
+    $vgpr0 = COPY %2(s32)
+
+...
+---
+name:            test_intrinsic_roundeven
+body:             |
+  bb.0:
+    liveins: $vgpr0
+
+    ; CHECK-LABEL: name: test_intrinsic_roundeven
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]]
+    ; CHECK-NEXT: [[INTRINSIC_ROUNDEVEN:%[0-9]+]]:_(s32) = G_INTRINSIC_ROUNDEVEN [[FNEG]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[INTRINSIC_ROUNDEVEN]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = G_INTRINSIC_ROUNDEVEN %0
+    %2:_(s32) = G_FNEG %1
+    $vgpr0 = COPY %2(s32)
+
+...
+---
+name:            test_fsin
+body:             |
+  bb.0:
+    liveins: $vgpr0
+
+    ; CHECK-LABEL: name: test_fsin
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]]
+    ; CHECK-NEXT: [[FSIN:%[0-9]+]]:_(s32) = G_FSIN [[FNEG]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[FSIN]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = G_FSIN %0
+    %2:_(s32) = G_FNEG %1
+    $vgpr0 = COPY %2(s32)
+
+...
+---
+name:            test_fcanonicalize
+body:             |
+  bb.0:
+    liveins: $vgpr0
+
+    ; CHECK-LABEL: name: test_fcanonicalize
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]]
+    ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FNEG]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[FCANONICALIZE]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = G_FCANONICALIZE %0
+    %2:_(s32) = G_FNEG %1
+    $vgpr0 = COPY %2(s32)
+
+...
+---
+name:            test_amdgcn_rcp_iflag
+body:             |
+  bb.0:
+    liveins: $vgpr0
+
+    ; CHECK-LABEL: name: test_amdgcn_rcp_iflag
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]]
+    ; CHECK-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FNEG]](s32)
+    ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_RCP_IFLAG]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = G_AMDGPU_RCP_IFLAG %0
+    %2:_(s32) = G_FNEG %1
+    $vgpr0 = COPY %2(s32)
+
+...
+---
+name:            test_fptrunc
+body:             |
+  bb.0:
+    liveins: $vgpr0_vgpr1
+    ; CHECK-LABEL: name: test_fptrunc
+    ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
+    ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[COPY]]
+    ; CHECK-NEXT: [[FPTRUNC:%[0-9]+]]:_(s32) = G_FPTRUNC [[FNEG]](s64)
+    ; CHECK-NEXT: $vgpr0 = COPY [[FPTRUNC]](s32)
+    %0:_(s64) = COPY $vgpr0_vgpr1
+    %1:_(s32) = G_FPTRUNC %0:_(s64)
+    %2:_(s32) = G_FNEG %1:_
+    $vgpr0 = COPY %2:_(s32)
+
+...
+---
+name:            test_amdgcn_rcp
+body:             |
+  bb.0:
+    liveins: $vgpr0
+
+    ; CHECK-LABEL: name: test_amdgcn_rcp
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]]
+    ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FNEG]](s32)
+    ; CHECK-NEXT: $vgpr0 = COPY [[INT]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), %0(s32)
+    %2:_(s32) = G_FNEG %1
+    $vgpr0 = COPY %2(s32)
+
+...
+---
+name:            test_amdgcn_rcp_legacy
+body:             |
+  bb.0:
+    liveins: $vgpr0
+
+    ; CHECK-LABEL: name: test_amdgcn_rcp_legacy
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]]
+    ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp.legacy), [[FNEG]](s32)
+    ; CHECK-NEXT: $vgpr0 = COPY [[INT]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp.legacy), %0(s32)
+    %2:_(s32) = G_FNEG %1
+    $vgpr0 = COPY %2(s32)
+
+...
+---
+name:            test_amdgcn_sin
+body:             |
+  bb.0:
+    liveins: $vgpr0
+
+    ; CHECK-LABEL: name: test_amdgcn_sin
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]]
+    ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[FNEG]](s32)
+    ; CHECK-NEXT: $vgpr0 = COPY [[INT]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), %0(s32)
+    %2:_(s32) = G_FNEG %1
+    $vgpr0 = COPY %2(s32)
+
+...
+---
+name:            test_fmul_legacy
+body:             |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; CHECK-LABEL: name: test_fmul_legacy
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY1]]
+    ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), [[COPY]](s32), [[FNEG]](s32)
+    ; CHECK-NEXT: $vgpr0 = COPY [[INT]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmul.legacy), %0(s32), %1(s32)
+    %3:_(s32) = G_FNEG %2
+    $vgpr0 = COPY %3(s32)
+
+...
+---
+name:            test_fmed3
+body:             |
+  bb.0:
+    liveins: $vgpr0, $vgpr1, $vgpr2
+
+    ; CHECK-LABEL: name: test_fmed3
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]]
+    ; CHECK-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[COPY1]]
+    ; CHECK-NEXT: [[FNEG2:%[0-9]+]]:_(s32) = G_FNEG [[COPY2]]
+    ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmed3), [[FNEG]](s32), [[FNEG1]](s32), [[FNEG2]](s32)
+    ; CHECK-NEXT: $vgpr0 = COPY [[INT]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(s32) = COPY $vgpr2
+    %3:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmed3), %0(s32), %1(s32), %2(s32)
+    %4:_(s32) = G_FNEG %3
+    $vgpr0 = COPY %4(s32)
+
+...
+---
+name:            test_amdgcn_fma_legacy
+body:             |
+  bb.0:
+    liveins: $vgpr0, $vgpr1, $vgpr2
+
+    ; CHECK-LABEL: name: test_amdgcn_fma_legacy
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY1]]
+    ; CHECK-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[COPY2]]
+    ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = nsz G_INTRINSIC intrinsic(@llvm.amdgcn.fma.legacy), [[COPY]](s32), [[FNEG]](s32), [[FNEG1]](s32)
+    ; CHECK-NEXT: $vgpr0 = COPY [[INT]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(s32) = COPY $vgpr2
+    %3:_(s32) = nsz G_INTRINSIC intrinsic(@llvm.amdgcn.fma.legacy), %0(s32), %1(s32), %2(s32)
+    %4:_(s32) = G_FNEG %3
+    $vgpr0 = COPY %4(s32)
+
+...
+
+# Don't fold fneg for fadd, fsub, fma, fmad or fma_legacy without nsz
+---
+name:            test_fadd_sz
+body:             |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; CHECK-LABEL: name: test_fadd_sz
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; CHECK-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[FADD]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[FNEG]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(s32) = G_FADD %0, %1
+    %3:_(s32) = G_FNEG %2
+    $vgpr0 = COPY %3(s32)
+
+...
+---
+name:            test_fsub_sz
+body:             |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; CHECK-LABEL: name: test_fsub_sz
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; CHECK-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[FSUB]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[FNEG]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(s32) = G_FSUB %0, %1
+    %3:_(s32) = G_FNEG %2
+    $vgpr0 = COPY %3(s32)
+
+...
+---
+name:            test_fma_sz
+body:             |
+  bb.0:
+    liveins: $vgpr0, $vgpr1, $vgpr2
+
+    ; CHECK-LABEL: name: test_fma_sz
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CHECK-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]]
+    ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[FMA]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[FNEG]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(s32) = COPY $vgpr2
+    %3:_(s32) = G_FMA %0, %1, %2
+    %4:_(s32) = G_FNEG %3
+    $vgpr0 = COPY %4(s32)
+
+...
+---
+name:            test_fmad_sz
+body:             |
+  bb.0:
+    liveins: $vgpr0, $vgpr1, $vgpr2
+
+    ; CHECK-LABEL: name: test_fmad_sz
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CHECK-NEXT: [[FMAD:%[0-9]+]]:_(s32) = G_FMAD [[COPY]], [[COPY1]], [[COPY2]]
+    ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[FMAD]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[FNEG]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(s32) = COPY $vgpr2
+    %3:_(s32) = G_FMAD %0, %1, %2
+    %4:_(s32) = G_FNEG %3
+    $vgpr0 = COPY %4(s32)
+
+...
+---
+name:            test_amdgcn_fma_legacy_sz
+body:             |
+  bb.0:
+    liveins: $vgpr0, $vgpr1, $vgpr2
+
+    ; CHECK-LABEL: name: test_amdgcn_fma_legacy_sz
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fma.legacy), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
+    ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[FNEG]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(s32) = COPY $vgpr2
+    %3:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fma.legacy), %0(s32), %1(s32), %2(s32)
+    %4:_(s32) = G_FNEG %3
+    $vgpr0 = COPY %4(s32)
+
+...
+
+# Don't negate 0 for minnum, maxnum
+---
+name:            test_fminnum_zero
+body:             |
+  bb.0:
+    liveins: $vgpr0
+    ; CHECK-LABEL: name: test_fminnum_zero
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
+    ; CHECK-NEXT: [[FMINNUM:%[0-9]+]]:_(s32) = G_FMINNUM [[COPY]], [[C]]
+    ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[FMINNUM]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[FNEG]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = G_FCONSTANT float 0.000000e+00
+    %2:_(s32) = G_FMINNUM %0:_, %1:_
+    %3:_(s32) = G_FNEG %2:_
+    $vgpr0 = COPY %3:_(s32)
+
+...
+
+# On VI and above don't negate 1.0 / (0.5 * pi)
+---
+name:            test_fminnum_inv2pi_half
+body:             |
+  bb.0:
+    liveins: $vgpr0
+    ; CHECK-LABEL: name: test_fminnum_inv2pi_half
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3118
+    ; CHECK-NEXT: [[FMINNUM:%[0-9]+]]:_(s16) = G_FMINNUM [[TRUNC]], [[C]]
+    ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[FMINNUM]]
+    ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FNEG]](s16)
+    ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s16) = G_TRUNC %0:_(s32)
+    %2:_(s16) = G_FCONSTANT half 0xH3118
+    %3:_(s16) = G_FMINNUM %1:_, %2:_
+    %4:_(s16) = G_FNEG %3:_
+    %5:_(s32) = G_ANYEXT %4:_(s16)
+    $vgpr0 = COPY %5:_(s32)
+
+...
+---
+name:            test_fminnum_inv2pi_float
+body:             |
+  bb.0:
+    liveins: $vgpr0
+    ; CHECK-LABEL: name: test_fminnum_inv2pi_float
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FC45F3060000000
+    ; CHECK-NEXT: [[FMINNUM:%[0-9]+]]:_(s32) = G_FMINNUM [[COPY]], [[C]]
+    ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[FMINNUM]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[FNEG]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = G_FCONSTANT float 0x3FC45F3060000000
+    %2:_(s32) = G_FMINNUM %0:_, %1:_
+    %3:_(s32) = G_FNEG %2:_
+    $vgpr0 = COPY %3:_(s32)
+
+...
+---
+name:            test_fminnum_inv2pi_double
+body:             |
+  bb.0:
+    liveins: $vgpr0_vgpr1
+    ; CHECK-LABEL: name: test_fminnum_inv2pi_double
+    ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x3FC45F306DC9C882
+    ; CHECK-NEXT: [[FMINNUM:%[0-9]+]]:_(s64) = G_FMINNUM [[COPY]], [[C]]
+    ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[FMINNUM]]
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[FNEG]](s64)
+    %0:_(s64) = COPY $vgpr0_vgpr1
+    %1:_(s64) = G_FCONSTANT double 0x3FC45F306DC9C882
+    %2:_(s64) = G_FMINNUM %0:_, %1:_
+    %3:_(s64) = G_FNEG %2:_
+    $vgpr0_vgpr1 = COPY %3:_(s64)
+
+...
+
+#Don't fold when where instruction count will not decrease.
+---
+name:            test_use_both
+body:             |
+  bb.0:
+    liveins: $vgpr0, $vgpr1, $vgpr2
+
+    ; CHECK-LABEL: name: test_use_both
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[FMUL]]
+    ; CHECK-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FNEG]], [[COPY2]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[FMUL]](s32)
+    ; CHECK-NEXT: $vgpr1 = COPY [[FNEG]](s32)
+    ; CHECK-NEXT: $vgpr2 = COPY [[FMUL1]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(s32) = COPY $vgpr2
+    %3:_(s32) = G_FMUL %0, %1
+    %4:_(s32) = G_FNEG %3
+    %5:_(s32) = G_FMUL %4, %2
+    $vgpr0 = COPY %3:_(s32)
+    $vgpr1 = COPY %4:_(s32)
+    $vgpr2 = COPY %5:_(s32)
+
+...
+
+#Don't fold when where instruction count will not decrease.
+---
+name:            test_use_both2
+body:             |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; CHECK-LABEL: name: test_use_both2
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[FMUL]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[FMUL]](s32)
+    ; CHECK-NEXT: $vgpr1 = COPY [[FNEG]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(s32) = G_FMUL %0, %1
+    %3:_(s32) = G_FNEG %2
+    $vgpr0 = COPY %2:_(s32)
+    $vgpr1 = COPY %3:_(s32)
+
+...
+
+---
+name:            multiple_uses_of_fneg
+body:             |
+  bb.0:
+    liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+
+    ; CHECK-LABEL: name: multiple_uses_of_fneg
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+    ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY1]]
+    ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[FNEG]]
+    ; CHECK-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FMUL]], [[COPY2]]
+    ; CHECK-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL]], [[COPY3]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[FMUL]](s32)
+    ; CHECK-NEXT: $vgpr1 = COPY [[FMUL1]](s32)
+    ; CHECK-NEXT: $vgpr2 = COPY [[FMUL2]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(s32) = COPY $vgpr2
+    %3:_(s32) = COPY $vgpr3
+
+    %4:_(s32) = G_FMUL %0, %1
+    %5:_(s32) = G_FNEG %4
+    %6:_(s32) = G_FMUL %5, %2
+    %7:_(s32) = G_FMUL %5, %3
+
+    $vgpr0 = COPY %5:_(s32)
+    $vgpr1 = COPY %6:_(s32)
+    $vgpr2 = COPY %7:_(s32)
+
+...

diff  --git a/llvm/utils/gn/secondary/llvm/lib/Target/AMDGPU/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/AMDGPU/BUILD.gn
index 78d0f3c2ddc11..47a1dcf02c9b5 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Target/AMDGPU/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Target/AMDGPU/BUILD.gn
@@ -131,6 +131,7 @@ static_library("LLVMAMDGPUCodeGen") {
     "AMDGPUAttributor.cpp",
     "AMDGPUCallLowering.cpp",
     "AMDGPUCodeGenPrepare.cpp",
+    "AMDGPUCombinerHelper.cpp"
     "AMDGPUCtorDtorLowering.cpp",
     "AMDGPUExportClustering.cpp",
     "AMDGPUFixFunctionBitcasts.cpp",


        


More information about the llvm-commits mailing list