[PATCH] D154947: [AArch64] Split lowerVectorFCMP combine

Pierre van Houtryve via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 12 04:13:51 PDT 2023


This revision was automatically updated to reflect the committed changes.
Pierre-vh marked an inline comment as done.
Closed by commit rGaf67b6760bdb: [AArch64] Split lowerVectorFCMP combine (authored by Pierre-vh).

Changed prior to commit:
  https://reviews.llvm.org/D154947?vs=539010&id=539490#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D154947/new/

https://reviews.llvm.org/D154947

Files:
  llvm/lib/Target/AArch64/AArch64Combine.td
  llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp


Index: llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
===================================================================
--- llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
+++ llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
@@ -949,29 +949,45 @@
 }
 
 /// Try to lower a vector G_FCMP \p MI into an AArch64-specific pseudo.
-bool lowerVectorFCMP(MachineInstr &MI, MachineRegisterInfo &MRI,
-                     MachineIRBuilder &MIB) {
+bool matchLowerVectorFCMP(MachineInstr &MI, MachineRegisterInfo &MRI,
+                          MachineIRBuilder &MIB) {
   assert(MI.getOpcode() == TargetOpcode::G_FCMP);
   const auto &ST = MI.getMF()->getSubtarget<AArch64Subtarget>();
+
   Register Dst = MI.getOperand(0).getReg();
   LLT DstTy = MRI.getType(Dst);
   if (!DstTy.isVector() || !ST.hasNEON())
     return false;
-  const auto Pred =
-      static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
   Register LHS = MI.getOperand(2).getReg();
   unsigned EltSize = MRI.getType(LHS).getScalarSizeInBits();
   if (EltSize == 16 && !ST.hasFullFP16())
     return false;
   if (EltSize != 16 && EltSize != 32 && EltSize != 64)
     return false;
-  Register RHS = MI.getOperand(3).getReg();
+
+  return true;
+}
+
+/// Try to lower a vector G_FCMP \p MI into an AArch64-specific pseudo.
+void applyLowerVectorFCMP(MachineInstr &MI, MachineRegisterInfo &MRI,
+                          MachineIRBuilder &MIB) {
+  assert(MI.getOpcode() == TargetOpcode::G_FCMP);
+  const auto &ST = MI.getMF()->getSubtarget<AArch64Subtarget>();
+
+  const auto &CmpMI = cast<GFCmp>(MI);
+
+  Register Dst = CmpMI.getReg(0);
+  CmpInst::Predicate Pred = CmpMI.getCond();
+  Register LHS = CmpMI.getLHSReg();
+  Register RHS = CmpMI.getRHSReg();
+
+  LLT DstTy = MRI.getType(Dst);
+
   auto Splat = getAArch64VectorSplat(*MRI.getVRegDef(RHS), MRI);
 
   // Compares against 0 have special target-specific pseudos.
   bool IsZero = Splat && Splat->isCst() && Splat->getCst() == 0;
 
-
   bool Invert = false;
   AArch64CC::CondCode CC, CC2 = AArch64CC::AL;
   if (Pred == CmpInst::Predicate::FCMP_ORD && IsZero) {
@@ -984,10 +1000,12 @@
   } else
     changeVectorFCMPPredToAArch64CC(Pred, CC, CC2, Invert);
 
-  bool NoNans = ST.getTargetLowering()->getTargetMachine().Options.NoNaNsFPMath;
-
   // Instead of having an apply function, just build here to simplify things.
   MIB.setInstrAndDebugLoc(MI);
+
+  const bool NoNans =
+      ST.getTargetLowering()->getTargetMachine().Options.NoNaNsFPMath;
+
   auto Cmp = getVectorFCMP(CC, LHS, RHS, IsZero, NoNans, MRI);
   Register CmpRes;
   if (CC2 == AArch64CC::AL)
@@ -1002,7 +1020,6 @@
     CmpRes = MIB.buildNot(DstTy, CmpRes).getReg(0);
   MRI.replaceRegWith(Dst, CmpRes);
   MI.eraseFromParent();
-  return true;
 }
 
 bool matchFormTruncstore(MachineInstr &MI, MachineRegisterInfo &MRI,
Index: llvm/lib/Target/AArch64/AArch64Combine.td
===================================================================
--- llvm/lib/Target/AArch64/AArch64Combine.td
+++ llvm/lib/Target/AArch64/AArch64Combine.td
@@ -165,8 +165,8 @@
 def lower_vector_fcmp : GICombineRule<
   (defs root:$root),
   (match (wip_match_opcode G_FCMP):$root,
-    [{ return lowerVectorFCMP(*${root}, MRI, B); }]),
-  (apply [{}])>;
+    [{ return matchLowerVectorFCMP(*${root}, MRI, B); }]),
+  (apply [{ applyLowerVectorFCMP(*${root}, MRI, B); }])>;
 
 def form_truncstore_matchdata : GIDefMatchData<"Register">;
 def form_truncstore : GICombineRule<


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D154947.539490.patch
Type: text/x-patch
Size: 3509 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20230712/f9a6a1d4/attachment.bin>


More information about the llvm-commits mailing list