[llvm] 5b15809 - [AArch64][GlobalISel] Create a new minimal combiner pass just for -O0.

Amara Emerson via llvm-commits llvm-commits at lists.llvm.org
Fri May 7 17:07:31 PDT 2021


Author: Amara Emerson
Date: 2021-05-07T17:01:27-07:00
New Revision: 5b158093e2469dec16a070019c6432d26bf7be9b

URL: https://github.com/llvm/llvm-project/commit/5b158093e2469dec16a070019c6432d26bf7be9b
DIFF: https://github.com/llvm/llvm-project/commit/5b158093e2469dec16a070019c6432d26bf7be9b.diff

LOG: [AArch64][GlobalISel] Create a new minimal combiner pass just for -O0.

We never bothered to have a separate set of combines for -O0 in the prelegalizer
before. This results in some minor performance hits for a mode where performance
isn't a concern (although not regressing code size significantly is still preferable).

This also removes the CSE option since we don't need it for -O0.

Through experiments, I've arrived at a set of combines that gets the most code
size improvement at -O0, while reducing the amount of time spent in the combiner
by around 35% give or take.

Differential Revision: https://reviews.llvm.org/D102038

Added: 
    llvm/lib/Target/AArch64/GISel/AArch64O0PreLegalizerCombiner.cpp

Modified: 
    llvm/include/llvm/Target/GlobalISel/Combine.td
    llvm/lib/Target/AArch64/AArch64.h
    llvm/lib/Target/AArch64/AArch64Combine.td
    llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
    llvm/lib/Target/AArch64/CMakeLists.txt
    llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp
    llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.h
    llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp
    llvm/test/CodeGen/AArch64/GlobalISel/gisel-commandline-option.ll
    llvm/test/CodeGen/AArch64/O0-pipeline.ll

Removed: 
    llvm/test/CodeGen/AArch64/combine-loads.ll


################################################################################
diff  --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index a8c64e0be6bd..3c28faa687d7 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -658,3 +658,10 @@ def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines,
     const_combines, xor_of_and_with_same_reg, ptr_add_with_zero,
     shift_immed_chain, shift_of_shifted_logic_chain, load_or_combine,
     div_rem_to_divrem, funnel_shift_combines]>;
+
+// A combine group used to for prelegalizer combiners at -O0. The combines in
+// this group have been selected based on experiments to balance code size and
+// compile time performance.
+def optnone_combines : GICombineGroup<[trivial_combines,
+    ptr_add_immed_chain, combines_for_extload,
+    not_cmp_fold, opt_brcond_by_inverting_cond]>;

diff  --git a/llvm/lib/Target/AArch64/AArch64.h b/llvm/lib/Target/AArch64/AArch64.h
index e304ce94f439..658d44771e8d 100644
--- a/llvm/lib/Target/AArch64/AArch64.h
+++ b/llvm/lib/Target/AArch64/AArch64.h
@@ -59,7 +59,8 @@ ModulePass *createSVEIntrinsicOptsPass();
 InstructionSelector *
 createAArch64InstructionSelector(const AArch64TargetMachine &,
                                  AArch64Subtarget &, AArch64RegisterBankInfo &);
-FunctionPass *createAArch64PreLegalizerCombiner(bool IsOptNone);
+FunctionPass *createAArch64O0PreLegalizerCombiner();
+FunctionPass *createAArch64PreLegalizerCombiner();
 FunctionPass *createAArch64PostLegalizerCombiner(bool IsOptNone);
 FunctionPass *createAArch64PostLegalizerLowering();
 FunctionPass *createAArch64PostSelectOptimize();
@@ -82,6 +83,7 @@ void initializeAArch64SpeculationHardeningPass(PassRegistry&);
 void initializeAArch64LoadStoreOptPass(PassRegistry&);
 void initializeAArch64LowerHomogeneousPrologEpilogPass(PassRegistry &);
 void initializeAArch64SIMDInstrOptPass(PassRegistry&);
+void initializeAArch64O0PreLegalizerCombinerPass(PassRegistry &);
 void initializeAArch64PreLegalizerCombinerPass(PassRegistry&);
 void initializeAArch64PostLegalizerCombinerPass(PassRegistry &);
 void initializeAArch64PostLegalizerLoweringPass(PassRegistry &);

diff  --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index 6449b26bb699..ec9618b2e082 100644
--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -43,6 +43,13 @@ def AArch64PreLegalizerCombinerHelper: GICombinerHelper<
   let AdditionalArguments = [];
 }
 
+def AArch64O0PreLegalizerCombinerHelper: GICombinerHelper<
+  "AArch64GenO0PreLegalizerCombinerHelper", [optnone_combines]> {
+  let DisableRuleOption = "aarch64O0prelegalizercombiner-disable-rule";
+  let StateClass = "AArch64O0PreLegalizerCombinerHelperState";
+  let AdditionalArguments = [];
+}
+
 // Matchdata for combines which replace a G_SHUFFLE_VECTOR with a
 // target-specific opcode.
 def shuffle_matchdata : GIDefMatchData<"ShuffleVectorPseudo">;

diff  --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
index d2364d3c34a9..ed02a9eb0833 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -184,6 +184,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAArch64Target() {
   initializeAArch64ExpandPseudoPass(*PR);
   initializeAArch64LoadStoreOptPass(*PR);
   initializeAArch64SIMDInstrOptPass(*PR);
+  initializeAArch64O0PreLegalizerCombinerPass(*PR);
   initializeAArch64PreLegalizerCombinerPass(*PR);
   initializeAArch64PostLegalizerCombinerPass(*PR);
   initializeAArch64PostLegalizerLoweringPass(*PR);
@@ -562,8 +563,10 @@ bool AArch64PassConfig::addIRTranslator() {
 }
 
 void AArch64PassConfig::addPreLegalizeMachineIR() {
-  bool IsOptNone = getOptLevel() == CodeGenOpt::None;
-  addPass(createAArch64PreLegalizerCombiner(IsOptNone));
+  if (getOptLevel() == CodeGenOpt::None)
+    addPass(createAArch64O0PreLegalizerCombiner());
+  else
+    addPass(createAArch64PreLegalizerCombiner());
 }
 
 bool AArch64PassConfig::addLegalizeMachineIR() {

diff  --git a/llvm/lib/Target/AArch64/CMakeLists.txt b/llvm/lib/Target/AArch64/CMakeLists.txt
index 4bf8da0380a8..a77a66bacc4c 100644
--- a/llvm/lib/Target/AArch64/CMakeLists.txt
+++ b/llvm/lib/Target/AArch64/CMakeLists.txt
@@ -10,6 +10,8 @@ tablegen(LLVM AArch64GenDAGISel.inc -gen-dag-isel)
 tablegen(LLVM AArch64GenDisassemblerTables.inc -gen-disassembler)
 tablegen(LLVM AArch64GenFastISel.inc -gen-fast-isel)
 tablegen(LLVM AArch64GenGlobalISel.inc -gen-global-isel)
+tablegen(LLVM AArch64GenO0PreLegalizeGICombiner.inc -gen-global-isel-combiner
+              -combiners="AArch64O0PreLegalizerCombinerHelper")
 tablegen(LLVM AArch64GenPreLegalizeGICombiner.inc -gen-global-isel-combiner
               -combiners="AArch64PreLegalizerCombinerHelper")
 tablegen(LLVM AArch64GenPostLegalizeGICombiner.inc -gen-global-isel-combiner
@@ -32,6 +34,7 @@ add_llvm_target(AArch64CodeGen
   GISel/AArch64GlobalISelUtils.cpp
   GISel/AArch64InstructionSelector.cpp
   GISel/AArch64LegalizerInfo.cpp
+  GISel/AArch64O0PreLegalizerCombiner.cpp
   GISel/AArch64PreLegalizerCombiner.cpp
   GISel/AArch64PostLegalizerCombiner.cpp
   GISel/AArch64PostLegalizerLowering.cpp

diff  --git a/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp b/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp
index a1392ccb59e6..826adddb96fb 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp
@@ -11,6 +11,7 @@
 #include "AArch64GlobalISelUtils.h"
 #include "AArch64InstrInfo.h"
 #include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/TargetLowering.h"
 #include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
@@ -57,3 +58,38 @@ bool AArch64GISelUtils::isCMN(const MachineInstr *MaybeSub,
       getConstantVRegValWithLookThrough(MaybeSub->getOperand(1).getReg(), MRI);
   return MaybeZero && MaybeZero->Value.getZExtValue() == 0;
 }
+
+bool AArch64GISelUtils::tryEmitBZero(MachineInstr &MI,
+                                     MachineIRBuilder &MIRBuilder,
+                                     bool MinSize) {
+  assert(MI.getOpcode() == TargetOpcode::G_MEMSET);
+  MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
+  auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
+  if (!TLI.getLibcallName(RTLIB::BZERO))
+    return false;
+  auto Zero = getConstantVRegValWithLookThrough(MI.getOperand(1).getReg(), MRI);
+  if (!Zero || Zero->Value.getSExtValue() != 0)
+    return false;
+
+  // It's not faster to use bzero rather than memset for sizes <= 256.
+  // However, it *does* save us a mov from wzr, so if we're going for
+  // minsize, use bzero even if it's slower.
+  if (!MinSize) {
+    // If the size is known, check it. If it is not known, assume using bzero is
+    // better.
+    if (auto Size =
+            getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI)) {
+      if (Size->Value.getSExtValue() <= 256)
+        return false;
+    }
+  }
+
+  MIRBuilder.setInstrAndDebugLoc(MI);
+  MIRBuilder
+      .buildInstr(TargetOpcode::G_BZERO, {},
+                  {MI.getOperand(0), MI.getOperand(2)})
+      .addImm(MI.getOperand(3).getImm())
+      .addMemOperand(*MI.memoperands_begin());
+  MI.eraseFromParent();
+  return true;
+}

diff  --git a/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.h b/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.h
index 142d999ef05a..cddaf33e3085 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.h
+++ b/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.h
@@ -13,6 +13,7 @@
 #define LLVM_LIB_TARGET_AARCH64_GISEL_AARCH64GLOBALISELUTILS_H
 
 #include "llvm/ADT/Optional.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
 #include "llvm/CodeGen/GlobalISel/Utils.h"
 #include "llvm/CodeGen/Register.h"
 #include "MCTargetDesc/AArch64AddressingModes.h"
@@ -44,6 +45,14 @@ Optional<int64_t> getAArch64VectorSplatScalar(const MachineInstr &MI,
 bool isCMN(const MachineInstr *MaybeSub, const CmpInst::Predicate &Pred,
            const MachineRegisterInfo &MRI);
 
+/// Replace a G_MEMSET with a value of 0 with a G_BZERO instruction if it is
+/// supported and beneficial to do so.
+///
+/// \note This only applies on Darwin.
+///
+/// \returns true if \p MI was replaced with a G_BZERO.
+bool tryEmitBZero(MachineInstr &MI, MachineIRBuilder &MIRBuilder, bool MinSize);
+
 } // namespace AArch64GISelUtils
 } // namespace llvm
 

diff  --git a/llvm/lib/Target/AArch64/GISel/AArch64O0PreLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64O0PreLegalizerCombiner.cpp
new file mode 100644
index 000000000000..ad92f84d4141
--- /dev/null
+++ b/llvm/lib/Target/AArch64/GISel/AArch64O0PreLegalizerCombiner.cpp
@@ -0,0 +1,171 @@
+//=== lib/CodeGen/GlobalISel/AArch64O0PreLegalizerCombiner.cpp ------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass does combining of machine instructions at the generic MI level,
+// before the legalizer.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64GlobalISelUtils.h"
+#include "AArch64TargetMachine.h"
+#include "llvm/CodeGen/GlobalISel/Combiner.h"
+#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
+#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
+#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
+#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "aarch64-O0-prelegalizer-combiner"
+
+using namespace llvm;
+using namespace MIPatternMatch;
+
+class AArch64O0PreLegalizerCombinerHelperState {
+protected:
+  CombinerHelper &Helper;
+
+public:
+  AArch64O0PreLegalizerCombinerHelperState(CombinerHelper &Helper)
+      : Helper(Helper) {}
+};
+
+#define AARCH64O0PRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
+#include "AArch64GenO0PreLegalizeGICombiner.inc"
+#undef AARCH64O0PRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
+
+namespace {
+#define AARCH64O0PRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
+#include "AArch64GenO0PreLegalizeGICombiner.inc"
+#undef AARCH64O0PRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
+
+class AArch64O0PreLegalizerCombinerInfo : public CombinerInfo {
+  GISelKnownBits *KB;
+  MachineDominatorTree *MDT;
+  AArch64GenO0PreLegalizerCombinerHelperRuleConfig GeneratedRuleCfg;
+
+public:
+  AArch64O0PreLegalizerCombinerInfo(bool EnableOpt, bool OptSize, bool MinSize,
+                                    GISelKnownBits *KB,
+                                    MachineDominatorTree *MDT)
+      : CombinerInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
+                     /*LegalizerInfo*/ nullptr, EnableOpt, OptSize, MinSize),
+        KB(KB), MDT(MDT) {
+    if (!GeneratedRuleCfg.parseCommandLineOption())
+      report_fatal_error("Invalid rule identifier");
+  }
+
+  virtual bool combine(GISelChangeObserver &Observer, MachineInstr &MI,
+                       MachineIRBuilder &B) const override;
+};
+
+bool AArch64O0PreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
+                                                MachineInstr &MI,
+                                                MachineIRBuilder &B) const {
+  CombinerHelper Helper(Observer, B, KB, MDT);
+  AArch64GenO0PreLegalizerCombinerHelper Generated(GeneratedRuleCfg, Helper);
+
+  if (Generated.tryCombineAll(Observer, MI, B))
+    return true;
+
+  unsigned Opc = MI.getOpcode();
+  switch (Opc) {
+  case TargetOpcode::G_CONCAT_VECTORS:
+    return Helper.tryCombineConcatVectors(MI);
+  case TargetOpcode::G_SHUFFLE_VECTOR:
+    return Helper.tryCombineShuffleVector(MI);
+  case TargetOpcode::G_MEMCPY:
+  case TargetOpcode::G_MEMMOVE:
+  case TargetOpcode::G_MEMSET: {
+    // At -O0 set a maxlen of 32 to inline;
+    unsigned MaxLen = 32;
+    // Try to inline memcpy type calls if optimizations are enabled.
+    if (Helper.tryCombineMemCpyFamily(MI, MaxLen))
+      return true;
+    if (Opc == TargetOpcode::G_MEMSET)
+      return llvm::AArch64GISelUtils::tryEmitBZero(MI, B, EnableMinSize);
+    return false;
+  }
+  }
+
+  return false;
+}
+
+#define AARCH64O0PRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP
+#include "AArch64GenO0PreLegalizeGICombiner.inc"
+#undef AARCH64O0PRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP
+
+// Pass boilerplate
+// ================
+
+class AArch64O0PreLegalizerCombiner : public MachineFunctionPass {
+public:
+  static char ID;
+
+  AArch64O0PreLegalizerCombiner();
+
+  StringRef getPassName() const override {
+    return "AArch64O0PreLegalizerCombiner";
+  }
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+};
+} // end anonymous namespace
+
+void AArch64O0PreLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<TargetPassConfig>();
+  AU.setPreservesCFG();
+  getSelectionDAGFallbackAnalysisUsage(AU);
+  AU.addRequired<GISelKnownBitsAnalysis>();
+  AU.addPreserved<GISelKnownBitsAnalysis>();
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+AArch64O0PreLegalizerCombiner::AArch64O0PreLegalizerCombiner()
+    : MachineFunctionPass(ID) {
+  initializeAArch64O0PreLegalizerCombinerPass(*PassRegistry::getPassRegistry());
+}
+
+bool AArch64O0PreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
+  if (MF.getProperties().hasProperty(
+          MachineFunctionProperties::Property::FailedISel))
+    return false;
+  auto &TPC = getAnalysis<TargetPassConfig>();
+
+  const Function &F = MF.getFunction();
+  GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
+  AArch64O0PreLegalizerCombinerInfo PCInfo(
+      false, F.hasOptSize(), F.hasMinSize(), KB, nullptr /* MDT */);
+  Combiner C(PCInfo, &TPC);
+  return C.combineMachineInstrs(MF, nullptr /* CSEInfo */);
+}
+
+char AArch64O0PreLegalizerCombiner::ID = 0;
+INITIALIZE_PASS_BEGIN(AArch64O0PreLegalizerCombiner, DEBUG_TYPE,
+                      "Combine AArch64 machine instrs before legalization",
+                      false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis)
+INITIALIZE_PASS_DEPENDENCY(GISelCSEAnalysisWrapperPass)
+INITIALIZE_PASS_END(AArch64O0PreLegalizerCombiner, DEBUG_TYPE,
+                    "Combine AArch64 machine instrs before legalization", false,
+                    false)
+
+namespace llvm {
+FunctionPass *createAArch64O0PreLegalizerCombiner() {
+  return new AArch64O0PreLegalizerCombiner();
+}
+} // end namespace llvm

diff  --git a/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp
index a87dc0d5583b..97820cf7d57d 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp
@@ -11,6 +11,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "AArch64GlobalISelUtils.h"
 #include "AArch64TargetMachine.h"
 #include "llvm/CodeGen/GlobalISel/Combiner.h"
 #include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
@@ -219,46 +220,6 @@ static bool applyFoldGlobalOffset(MachineInstr &MI, MachineRegisterInfo &MRI,
   return true;
 }
 
-/// Replace a G_MEMSET with a value of 0 with a G_BZERO instruction if it is
-/// supported and beneficial to do so.
-///
-/// \note This only applies on Darwin.
-///
-/// \returns true if \p MI was replaced with a G_BZERO.
-static bool tryEmitBZero(MachineInstr &MI, MachineIRBuilder &MIRBuilder,
-                         bool MinSize) {
-  assert(MI.getOpcode() == TargetOpcode::G_MEMSET);
-  MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
-  auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
-  if (!TLI.getLibcallName(RTLIB::BZERO))
-    return false;
-  auto Zero = getConstantVRegValWithLookThrough(MI.getOperand(1).getReg(), MRI);
-  if (!Zero || Zero->Value.getSExtValue() != 0)
-    return false;
-
-  // It's not faster to use bzero rather than memset for sizes <= 256.
-  // However, it *does* save us a mov from wzr, so if we're going for
-  // minsize, use bzero even if it's slower.
-  if (!MinSize) {
-    // If the size is known, check it. If it is not known, assume using bzero is
-    // better.
-    if (auto Size =
-            getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI)) {
-      if (Size->Value.getSExtValue() <= 256)
-        return false;
-    }
-  }
-
-  MIRBuilder.setInstrAndDebugLoc(MI);
-  MIRBuilder
-      .buildInstr(TargetOpcode::G_BZERO, {},
-                  {MI.getOperand(0), MI.getOperand(2)})
-      .addImm(MI.getOperand(3).getImm())
-      .addMemOperand(*MI.memoperands_begin());
-  MI.eraseFromParent();
-  return true;
-}
-
 class AArch64PreLegalizerCombinerHelperState {
 protected:
   CombinerHelper &Helper;
@@ -321,7 +282,7 @@ bool AArch64PreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
     if (!EnableMinSize && Helper.tryCombineMemCpyFamily(MI, MaxLen))
       return true;
     if (Opc == TargetOpcode::G_MEMSET)
-      return tryEmitBZero(MI, B, EnableMinSize);
+      return llvm::AArch64GISelUtils::tryEmitBZero(MI, B, EnableMinSize);
     return false;
   }
   }
@@ -340,15 +301,13 @@ class AArch64PreLegalizerCombiner : public MachineFunctionPass {
 public:
   static char ID;
 
-  AArch64PreLegalizerCombiner(bool IsOptNone = false);
+  AArch64PreLegalizerCombiner();
 
   StringRef getPassName() const override { return "AArch64PreLegalizerCombiner"; }
 
   bool runOnMachineFunction(MachineFunction &MF) override;
 
   void getAnalysisUsage(AnalysisUsage &AU) const override;
-private:
-  bool IsOptNone;
 };
 } // end anonymous namespace
 
@@ -358,17 +317,15 @@ void AArch64PreLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
   getSelectionDAGFallbackAnalysisUsage(AU);
   AU.addRequired<GISelKnownBitsAnalysis>();
   AU.addPreserved<GISelKnownBitsAnalysis>();
-  if (!IsOptNone) {
-    AU.addRequired<MachineDominatorTree>();
-    AU.addPreserved<MachineDominatorTree>();
-  }
+  AU.addRequired<MachineDominatorTree>();
+  AU.addPreserved<MachineDominatorTree>();
   AU.addRequired<GISelCSEAnalysisWrapperPass>();
   AU.addPreserved<GISelCSEAnalysisWrapperPass>();
   MachineFunctionPass::getAnalysisUsage(AU);
 }
 
-AArch64PreLegalizerCombiner::AArch64PreLegalizerCombiner(bool IsOptNone)
-    : MachineFunctionPass(ID), IsOptNone(IsOptNone) {
+AArch64PreLegalizerCombiner::AArch64PreLegalizerCombiner()
+    : MachineFunctionPass(ID) {
   initializeAArch64PreLegalizerCombinerPass(*PassRegistry::getPassRegistry());
 }
 
@@ -387,8 +344,7 @@ bool AArch64PreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
   bool EnableOpt =
       MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F);
   GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
-  MachineDominatorTree *MDT =
-      IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>();
+  MachineDominatorTree *MDT = &getAnalysis<MachineDominatorTree>();
   AArch64PreLegalizerCombinerInfo PCInfo(EnableOpt, F.hasOptSize(),
                                          F.hasMinSize(), KB, MDT);
   Combiner C(PCInfo, &TPC);
@@ -408,7 +364,7 @@ INITIALIZE_PASS_END(AArch64PreLegalizerCombiner, DEBUG_TYPE,
 
 
 namespace llvm {
-FunctionPass *createAArch64PreLegalizerCombiner(bool IsOptNone) {
-  return new AArch64PreLegalizerCombiner(IsOptNone);
+FunctionPass *createAArch64PreLegalizerCombiner() {
+  return new AArch64PreLegalizerCombiner();
 }
 } // end namespace llvm

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/gisel-commandline-option.ll b/llvm/test/CodeGen/AArch64/GlobalISel/gisel-commandline-option.ll
index 48538424895f..d67f1cb739b9 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/gisel-commandline-option.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/gisel-commandline-option.ll
@@ -56,9 +56,11 @@
 ; VERIFY-NEXT:   Verify generated machine code
 ; ENABLED-NEXT:  Analysis for ComputingKnownBits
 ; ENABLED-O1-NEXT:  MachineDominator Tree Construction
-; ENABLED-NEXT:  Analysis containing CSE Info
-; ENABLED-NEXT:  PreLegalizerCombiner
+; ENABLED-O1-NEXT:  Analysis containing CSE Info
+; ENABLED-O1-NEXT:  PreLegalizerCombiner
+; VERIFY-O0-NEXT:  AArch64O0PreLegalizerCombiner
 ; VERIFY-NEXT:   Verify generated machine code
+; VERIFY-O0-NEXT:  Analysis containing CSE Info
 ; ENABLED-NEXT:  Legalizer
 ; VERIFY-NEXT:   Verify generated machine code
 ; ENABLED:  RegBankSelect

diff  --git a/llvm/test/CodeGen/AArch64/O0-pipeline.ll b/llvm/test/CodeGen/AArch64/O0-pipeline.ll
index 647edcc8defb..3e0fd31d9841 100644
--- a/llvm/test/CodeGen/AArch64/O0-pipeline.ll
+++ b/llvm/test/CodeGen/AArch64/O0-pipeline.ll
@@ -34,8 +34,8 @@
 ; CHECK-NEXT:       Analysis containing CSE Info
 ; CHECK-NEXT:       IRTranslator
 ; CHECK-NEXT:       Analysis for ComputingKnownBits
+; CHECK-NEXT:       AArch64O0PreLegalizerCombiner
 ; CHECK-NEXT:       Analysis containing CSE Info
-; CHECK-NEXT:       AArch64PreLegalizerCombiner
 ; CHECK-NEXT:       Legalizer
 ; CHECK-NEXT:       AArch64PostLegalizerLowering
 ; CHECK-NEXT:       RegBankSelect

diff  --git a/llvm/test/CodeGen/AArch64/combine-loads.ll b/llvm/test/CodeGen/AArch64/combine-loads.ll
deleted file mode 100644
index be2501b1ab5b..000000000000
--- a/llvm/test/CodeGen/AArch64/combine-loads.ll
+++ /dev/null
@@ -1,21 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -O0 -mtriple=aarch64-unknown-unknown | FileCheck %s
-
-define <2 x i64> @z(i64* nocapture nonnull readonly %p) {
-; CHECK-LABEL: z:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ldr d1, [x0]
-; CHECK-NEXT:    ldr d2, [x0, #8]
-; CHECK-NEXT:    // implicit-def: $q0
-; CHECK-NEXT:    mov v0.16b, v1.16b
-; CHECK-NEXT:    // implicit-def: $q1
-; CHECK-NEXT:    mov v1.16b, v2.16b
-; CHECK-NEXT:    mov v0.d[1], v1.d[0]
-; CHECK-NEXT:    ret
-  %b = load i64, i64* %p
-  %p2 = getelementptr i64, i64* %p, i64 1
-  %bb = load i64, i64* %p2
-  %r1 = insertelement <2 x i64> zeroinitializer, i64 %b, i32 0
-  %r2 = insertelement <2 x i64> %r1, i64 %bb, i32 1
-  ret <2 x i64> %r2
-}


        


More information about the llvm-commits mailing list