[llvm] r363632 - [GlobalISel][Localizer] Rewrite localizer to run in 2 phases, inter & intra block.

Amara Emerson via llvm-commits llvm-commits at lists.llvm.org
Mon Jun 17 16:20:29 PDT 2019


Author: aemerson
Date: Mon Jun 17 16:20:29 2019
New Revision: 363632

URL: http://llvm.org/viewvc/llvm-project?rev=363632&view=rev
Log:
[GlobalISel][Localizer] Rewrite localizer to run in 2 phases, inter & intra block.

Inter-block localization is the same as what currently happens, except now it
only runs on the entry block because that's where the problematic constants with
long live ranges come from.

The second phase is a new intra-block localization phase which attempts to
re-sink the already localized instructions further right before one of the
multiple uses.

One additional change is to also localize G_GLOBAL_VALUE as they're constants
too. However, on some targets like arm64 it takes multiple instructions to
materialize the value, so some additional heuristics with a TTI hook have been
introduced attempt to prevent code size regressions when localizing these.

Overall, these changes improve CTMark code size on arm64 by 1.2%.

Full code size results:

Program                                         baseline       new       diff
------------------------------------------------------------------------------
 test-suite...-typeset/consumer-typeset.test    1249984      1217216     -2.6%
 test-suite...:: CTMark/ClamAV/clamscan.test    1264928      1232152     -2.6%
 test-suite :: CTMark/SPASS/SPASS.test          1394092      1361316     -2.4%
 test-suite...Mark/mafft/pairlocalalign.test    731320       714928      -2.2%
 test-suite :: CTMark/lencod/lencod.test        1340592      1324200     -1.2%
 test-suite :: CTMark/kimwitu++/kc.test         3853512      3820420     -0.9%
 test-suite :: CTMark/Bullet/bullet.test        3406036      3389652     -0.5%
 test-suite...ark/tramp3d-v4/tramp3d-v4.test    8017000      8016992     -0.0%
 test-suite...TMark/7zip/7zip-benchmark.test    2856588      2856588      0.0%
 test-suite...:: CTMark/sqlite3/sqlite3.test    765704       765704       0.0%
 Geomean difference                                                      -1.2%

Differential Revision: https://reviews.llvm.org/D63303

Added:
    llvm/trunk/test/CodeGen/AArch64/GlobalISel/localizer-arm64-tti.ll
Modified:
    llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h
    llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h
    llvm/trunk/include/llvm/CodeGen/GlobalISel/Localizer.h
    llvm/trunk/lib/Analysis/TargetTransformInfo.cpp
    llvm/trunk/lib/CodeGen/GlobalISel/Localizer.cpp
    llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.h
    llvm/trunk/test/CodeGen/AArch64/GlobalISel/localizer.mir

Modified: llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h?rev=363632&r1=363631&r2=363632&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h (original)
+++ llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h Mon Jun 17 16:20:29 2019
@@ -1053,6 +1053,11 @@ public:
   /// \returns True if the target wants to expand the given reduction intrinsic
   /// into a shuffle sequence.
   bool shouldExpandReduction(const IntrinsicInst *II) const;
+
+  /// \returns the size cost of rematerializing a GlobalValue address relative
+  /// to a stack reload.
+  unsigned getGISelRematGlobalCost() const;
+
   /// @}
 
 private:
@@ -1269,6 +1274,7 @@ public:
   virtual bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
                                      ReductionFlags) const = 0;
   virtual bool shouldExpandReduction(const IntrinsicInst *II) const = 0;
+  virtual unsigned getGISelRematGlobalCost() const = 0;
   virtual int getInstructionLatency(const Instruction *I) = 0;
 };
 
@@ -1701,6 +1707,11 @@ public:
   bool shouldExpandReduction(const IntrinsicInst *II) const override {
     return Impl.shouldExpandReduction(II);
   }
+
+  unsigned getGISelRematGlobalCost() const override {
+    return Impl.getGISelRematGlobalCost();
+  }
+
   int getInstructionLatency(const Instruction *I) override {
     return Impl.getInstructionLatency(I);
   }

Modified: llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h?rev=363632&r1=363631&r2=363632&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h (original)
+++ llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h Mon Jun 17 16:20:29 2019
@@ -572,6 +572,10 @@ public:
     return true;
   }
 
+  unsigned getGISelRematGlobalCost() const {
+    return 1;
+  }
+
 protected:
   // Obtain the minimum required size to hold the value (without the sign)
   // In case of a vector it returns the min required size for one element.

Modified: llvm/trunk/include/llvm/CodeGen/GlobalISel/Localizer.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/GlobalISel/Localizer.h?rev=363632&r1=363631&r2=363632&view=diff
==============================================================================
--- llvm/trunk/include/llvm/CodeGen/GlobalISel/Localizer.h (original)
+++ llvm/trunk/include/llvm/CodeGen/GlobalISel/Localizer.h Mon Jun 17 16:20:29 2019
@@ -27,6 +27,7 @@
 namespace llvm {
 // Forward declarations.
 class MachineRegisterInfo;
+class TargetTransformInfo;
 
 /// This pass implements the localization mechanism described at the
 /// top of this file. One specificity of the implementation is that
@@ -43,9 +44,11 @@ private:
   /// MRI contains all the register class/bank information that this
   /// pass uses and updates.
   MachineRegisterInfo *MRI;
+  /// TTI used for getting remat costs for instructions.
+  TargetTransformInfo *TTI;
 
   /// Check whether or not \p MI needs to be moved close to its uses.
-  static bool shouldLocalize(const MachineInstr &MI);
+  bool shouldLocalize(const MachineInstr &MI);
 
   /// Check if \p MOUse is used in the same basic block as \p Def.
   /// If the use is in the same block, we say it is local.
@@ -57,6 +60,13 @@ private:
   /// Initialize the field members using \p MF.
   void init(MachineFunction &MF);
 
+  /// Do inter-block localization from the entry block.
+  bool localizeInterBlock(MachineFunction &MF,
+                          SmallPtrSetImpl<MachineInstr *> &LocalizedInstrs);
+
+  /// Do intra-block localization of already localized instructions.
+  bool localizeIntraBlock(SmallPtrSetImpl<MachineInstr *> &LocalizedInstrs);
+
 public:
   Localizer();
 

Modified: llvm/trunk/lib/Analysis/TargetTransformInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/TargetTransformInfo.cpp?rev=363632&r1=363631&r2=363632&view=diff
==============================================================================
--- llvm/trunk/lib/Analysis/TargetTransformInfo.cpp (original)
+++ llvm/trunk/lib/Analysis/TargetTransformInfo.cpp Mon Jun 17 16:20:29 2019
@@ -724,6 +724,10 @@ bool TargetTransformInfo::shouldExpandRe
   return TTIImpl->shouldExpandReduction(II);
 }
 
+unsigned TargetTransformInfo::getGISelRematGlobalCost() const {
+  return TTIImpl->getGISelRematGlobalCost();
+}
+
 int TargetTransformInfo::getInstructionLatency(const Instruction *I) const {
   return TTIImpl->getInstructionLatency(I);
 }

Modified: llvm/trunk/lib/CodeGen/GlobalISel/Localizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/GlobalISel/Localizer.cpp?rev=363632&r1=363631&r2=363632&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/GlobalISel/Localizer.cpp (original)
+++ llvm/trunk/lib/CodeGen/GlobalISel/Localizer.cpp Mon Jun 17 16:20:29 2019
@@ -10,6 +10,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/GlobalISel/Localizer.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -20,17 +21,55 @@
 using namespace llvm;
 
 char Localizer::ID = 0;
-INITIALIZE_PASS(Localizer, DEBUG_TYPE,
-                "Move/duplicate certain instructions close to their use", false,
-                false)
+INITIALIZE_PASS_BEGIN(Localizer, DEBUG_TYPE,
+                      "Move/duplicate certain instructions close to their use",
+                      false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_END(Localizer, DEBUG_TYPE,
+                    "Move/duplicate certain instructions close to their use",
+                    false, false)
 
 Localizer::Localizer() : MachineFunctionPass(ID) {
   initializeLocalizerPass(*PassRegistry::getPassRegistry());
 }
 
-void Localizer::init(MachineFunction &MF) { MRI = &MF.getRegInfo(); }
+void Localizer::init(MachineFunction &MF) {
+  MRI = &MF.getRegInfo();
+  TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(MF.getFunction());
+}
 
 bool Localizer::shouldLocalize(const MachineInstr &MI) {
+  // Assuming a spill and reload of a value has a cost of 1 instruction each,
+  // this helper function computes the maximum number of uses we should consider
+  // for remat. E.g. on arm64 global addresses take 2 insts to materialize. We
+  // break even in terms of code size when the original MI has 2 users vs
+  // choosing to potentially spill. Any more than 2 users we we have a net code
+  // size increase. This doesn't take into account register pressure though.
+  auto maxUses = [](unsigned RematCost) {
+    // A cost of 1 means remats are basically free.
+    if (RematCost == 1)
+      return UINT_MAX;
+    if (RematCost == 2)
+      return 2U;
+
+    // Remat is too expensive, only sink if there's one user.
+    if (RematCost > 2)
+      return 1U;
+    llvm_unreachable("Unexpected remat cost");
+  };
+
+  // Helper to walk through uses and terminate if we've reached a limit. Saves
+  // us spending time traversing uses if all we want to know is if it's >= min.
+  auto isUsesAtMost = [&](unsigned Reg, unsigned MaxUses) {
+    unsigned NumUses = 0;
+    auto UI = MRI->use_instr_nodbg_begin(Reg), UE = MRI->use_instr_nodbg_end();
+    for (; UI != UE && NumUses < MaxUses; ++UI) {
+      NumUses++;
+    }
+    // If we haven't reached the end yet then there are more than MaxUses users.
+    return UI == UE;
+  };
+
   switch (MI.getOpcode()) {
   default:
     return false;
@@ -40,10 +79,20 @@ bool Localizer::shouldLocalize(const Mac
   case TargetOpcode::G_FCONSTANT:
   case TargetOpcode::G_FRAME_INDEX:
     return true;
+  case TargetOpcode::G_GLOBAL_VALUE: {
+    unsigned RematCost = TTI->getGISelRematGlobalCost();
+    unsigned Reg = MI.getOperand(0).getReg();
+    unsigned MaxUses = maxUses(RematCost);
+    if (MaxUses == UINT_MAX)
+      return true; // Remats are "free" so always localize.
+    bool B = isUsesAtMost(Reg, MaxUses);
+    return B;
+  }
   }
 }
 
 void Localizer::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<TargetTransformInfoWrapperPass>();
   getSelectionDAGFallbackAnalysisUsage(AU);
   MachineFunctionPass::getAnalysisUsage(AU);
 }
@@ -57,6 +106,106 @@ bool Localizer::isLocalUse(MachineOperan
   return InsertMBB == Def.getParent();
 }
 
+bool Localizer::localizeInterBlock(
+    MachineFunction &MF, SmallPtrSetImpl<MachineInstr *> &LocalizedInstrs) {
+  bool Changed = false;
+  DenseMap<std::pair<MachineBasicBlock *, unsigned>, unsigned> MBBWithLocalDef;
+
+  // Since the IRTranslator only emits constants into the entry block, and the
+  // rest of the GISel pipeline generally emits constants close to their users,
+  // we only localize instructions in the entry block here. This might change if
+  // we start doing CSE across blocks.
+  auto &MBB = MF.front();
+  for (MachineInstr &MI : MBB) {
+    if (LocalizedInstrs.count(&MI) || !shouldLocalize(MI))
+      continue;
+    LLVM_DEBUG(dbgs() << "Should localize: " << MI);
+    assert(MI.getDesc().getNumDefs() == 1 &&
+           "More than one definition not supported yet");
+    unsigned Reg = MI.getOperand(0).getReg();
+    // Check if all the users of MI are local.
+    // We are going to invalidation the list of use operands, so we
+    // can't use range iterator.
+    for (auto MOIt = MRI->use_begin(Reg), MOItEnd = MRI->use_end();
+         MOIt != MOItEnd;) {
+      MachineOperand &MOUse = *MOIt++;
+      // Check if the use is already local.
+      MachineBasicBlock *InsertMBB;
+      LLVM_DEBUG(MachineInstr &MIUse = *MOUse.getParent();
+                 dbgs() << "Checking use: " << MIUse
+                        << " #Opd: " << MIUse.getOperandNo(&MOUse) << '\n');
+      if (isLocalUse(MOUse, MI, InsertMBB))
+        continue;
+      LLVM_DEBUG(dbgs() << "Fixing non-local use\n");
+      Changed = true;
+      auto MBBAndReg = std::make_pair(InsertMBB, Reg);
+      auto NewVRegIt = MBBWithLocalDef.find(MBBAndReg);
+      if (NewVRegIt == MBBWithLocalDef.end()) {
+        // Create the localized instruction.
+        MachineInstr *LocalizedMI = MF.CloneMachineInstr(&MI);
+        LocalizedInstrs.insert(LocalizedMI);
+        MachineInstr &UseMI = *MOUse.getParent();
+        if (MRI->hasOneUse(Reg) && !UseMI.isPHI())
+          InsertMBB->insert(InsertMBB->SkipPHIsAndLabels(UseMI), LocalizedMI);
+        else
+          InsertMBB->insert(InsertMBB->SkipPHIsAndLabels(InsertMBB->begin()),
+                            LocalizedMI);
+
+        // Set a new register for the definition.
+        unsigned NewReg = MRI->createGenericVirtualRegister(MRI->getType(Reg));
+        MRI->setRegClassOrRegBank(NewReg, MRI->getRegClassOrRegBank(Reg));
+        LocalizedMI->getOperand(0).setReg(NewReg);
+        NewVRegIt =
+            MBBWithLocalDef.insert(std::make_pair(MBBAndReg, NewReg)).first;
+        LLVM_DEBUG(dbgs() << "Inserted: " << *LocalizedMI);
+      }
+      LLVM_DEBUG(dbgs() << "Update use with: " << printReg(NewVRegIt->second)
+                        << '\n');
+      // Update the user reg.
+      MOUse.setReg(NewVRegIt->second);
+    }
+  }
+  return Changed;
+}
+
+bool Localizer::localizeIntraBlock(
+    SmallPtrSetImpl<MachineInstr *> &LocalizedInstrs) {
+  bool Changed = false;
+
+  // For each already-localized instruction which has multiple users, then we
+  // scan the block top down from the current position until we hit one of them.
+
+  // FIXME: Consider doing inst duplication if live ranges are very long due to
+  // many users, but this case may be better served by regalloc improvements.
+
+  for (MachineInstr *MI : LocalizedInstrs) {
+    unsigned Reg = MI->getOperand(0).getReg();
+    MachineBasicBlock &MBB = *MI->getParent();
+    // If the instruction has a single use, we would have already moved it right
+    // before its user in localizeInterBlock().
+    if (MRI->hasOneUse(Reg))
+      continue;
+
+    // All of the user MIs of this reg.
+    SmallPtrSet<MachineInstr *, 32> Users;
+    for (MachineInstr &UseMI : MRI->use_nodbg_instructions(Reg))
+      Users.insert(&UseMI);
+
+    MachineBasicBlock::iterator II(MI);
+    ++II;
+    while (II != MBB.end() && !Users.count(&*II))
+      ++II;
+
+    LLVM_DEBUG(dbgs() << "Intra-block: moving " << *MI << " before " << *&*II
+                      << "\n");
+    assert(II != MBB.end() && "Didn't find the user in the MBB");
+    MI->removeFromParent();
+    MBB.insert(II, MI);
+    Changed = true;
+  }
+  return Changed;
+}
+
 bool Localizer::runOnMachineFunction(MachineFunction &MF) {
   // If the ISel pipeline failed, do not bother running that pass.
   if (MF.getProperties().hasProperty(
@@ -67,62 +216,10 @@ bool Localizer::runOnMachineFunction(Mac
 
   init(MF);
 
-  bool Changed = false;
-  // Keep track of the instructions we localized.
-  // We won't need to process them if we see them later in the CFG.
-  SmallPtrSet<MachineInstr *, 16> LocalizedInstrs;
-  DenseMap<std::pair<MachineBasicBlock *, unsigned>, unsigned> MBBWithLocalDef;
-  // TODO: Do bottom up traversal.
-  for (MachineBasicBlock &MBB : MF) {
-    for (MachineInstr &MI : MBB) {
-      if (LocalizedInstrs.count(&MI) || !shouldLocalize(MI))
-        continue;
-      LLVM_DEBUG(dbgs() << "Should localize: " << MI);
-      assert(MI.getDesc().getNumDefs() == 1 &&
-             "More than one definition not supported yet");
-      unsigned Reg = MI.getOperand(0).getReg();
-      // Check if all the users of MI are local.
-      // We are going to invalidation the list of use operands, so we
-      // can't use range iterator.
-      for (auto MOIt = MRI->use_begin(Reg), MOItEnd = MRI->use_end();
-           MOIt != MOItEnd;) {
-        MachineOperand &MOUse = *MOIt++;
-        // Check if the use is already local.
-        MachineBasicBlock *InsertMBB;
-        LLVM_DEBUG(MachineInstr &MIUse = *MOUse.getParent();
-                   dbgs() << "Checking use: " << MIUse
-                          << " #Opd: " << MIUse.getOperandNo(&MOUse) << '\n');
-        if (isLocalUse(MOUse, MI, InsertMBB))
-          continue;
-        LLVM_DEBUG(dbgs() << "Fixing non-local use\n");
-        Changed = true;
-        auto MBBAndReg = std::make_pair(InsertMBB, Reg);
-        auto NewVRegIt = MBBWithLocalDef.find(MBBAndReg);
-        if (NewVRegIt == MBBWithLocalDef.end()) {
-          // Create the localized instruction.
-          MachineInstr *LocalizedMI = MF.CloneMachineInstr(&MI);
-          LocalizedInstrs.insert(LocalizedMI);
-          // Don't try to be smart for the insertion point.
-          // There is no guarantee that the first seen use is the first
-          // use in the block.
-          InsertMBB->insert(InsertMBB->SkipPHIsAndLabels(InsertMBB->begin()),
-                            LocalizedMI);
+  // Keep track of the instructions we localized. We'll do a second pass of
+  // intra-block localization to further reduce live ranges.
+  SmallPtrSet<MachineInstr *, 32> LocalizedInstrs;
 
-          // Set a new register for the definition.
-          unsigned NewReg =
-              MRI->createGenericVirtualRegister(MRI->getType(Reg));
-          MRI->setRegClassOrRegBank(NewReg, MRI->getRegClassOrRegBank(Reg));
-          LocalizedMI->getOperand(0).setReg(NewReg);
-          NewVRegIt =
-              MBBWithLocalDef.insert(std::make_pair(MBBAndReg, NewReg)).first;
-          LLVM_DEBUG(dbgs() << "Inserted: " << *LocalizedMI);
-        }
-        LLVM_DEBUG(dbgs() << "Update use with: " << printReg(NewVRegIt->second)
-                          << '\n');
-        // Update the user reg.
-        MOUse.setReg(NewVRegIt->second);
-      }
-    }
-  }
-  return Changed;
+  bool Changed = localizeInterBlock(MF, LocalizedInstrs);
+  return Changed |= localizeIntraBlock(LocalizedInstrs);
 }

Modified: llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.h?rev=363632&r1=363631&r2=363632&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.h (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.h Mon Jun 17 16:20:29 2019
@@ -165,6 +165,10 @@ public:
     return false;
   }
 
+  unsigned getGISelRematGlobalCost() const {
+    return 2;
+  }
+
   bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
                              TTI::ReductionFlags Flags) const;
 

Added: llvm/trunk/test/CodeGen/AArch64/GlobalISel/localizer-arm64-tti.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/GlobalISel/localizer-arm64-tti.ll?rev=363632&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/GlobalISel/localizer-arm64-tti.ll (added)
+++ llvm/trunk/test/CodeGen/AArch64/GlobalISel/localizer-arm64-tti.ll Mon Jun 17 16:20:29 2019
@@ -0,0 +1,62 @@
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+; RUN: llc -o - -verify-machineinstrs -O0 -global-isel -stop-after=localizer %s | FileCheck %s
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-apple-ios5.0.0"
+
+ at var1 = common global i32 0, align 4
+ at var2 = common global i32 0, align 4
+ at var3 = common global i32 0, align 4
+ at var4 = common global i32 0, align 4
+
+; This is an ll test instead of MIR because -run-pass doesn't seem to support
+; initializing the target TTI which we need for this test.
+
+; Some of the instructions in entry block are dead after this pass so don't
+; strictly need to be checked for.
+
+define i32 @foo() {
+  ; CHECK-LABEL: name: foo
+  ; CHECK: bb.1.entry:
+  ; CHECK:   successors: %bb.2(0x40000000), %bb.3(0x40000000)
+  ; CHECK:   [[GV:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var1
+  ; CHECK:   [[C:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 1
+  ; CHECK:   [[C1:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 2
+  ; CHECK:   [[GV1:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var2
+  ; CHECK:   [[C2:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 3
+  ; CHECK:   [[GV2:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var3
+  ; CHECK:   [[C3:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 0
+  ; CHECK:   [[LOAD:%[0-9]+]]:gpr(s32) = G_LOAD [[GV]](p0) :: (load 4 from @var1)
+  ; CHECK:   [[ICMP:%[0-9]+]]:gpr(s32) = G_ICMP intpred(eq), [[LOAD]](s32), [[C]]
+  ; CHECK:   [[TRUNC:%[0-9]+]]:gpr(s1) = G_TRUNC [[ICMP]](s32)
+  ; CHECK:   G_BRCOND [[TRUNC]](s1), %bb.2
+  ; CHECK:   G_BR %bb.3
+  ; CHECK: bb.2.if.then:
+  ; CHECK:   successors: %bb.3(0x80000000)
+  ; CHECK:   [[GV3:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var2
+  ; CHECK:   [[C4:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 2
+  ; CHECK:   G_STORE [[C4]](s32), [[GV3]](p0) :: (store 4 into @var2)
+  ; CHECK:   [[C5:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 3
+  ; CHECK:   G_STORE [[C5]](s32), [[GV]](p0) :: (store 4 into @var1)
+  ; CHECK:   [[GV4:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var3
+  ; CHECK:   G_STORE [[C4]](s32), [[GV4]](p0) :: (store 4 into @var3)
+  ; CHECK:   G_STORE [[C5]](s32), [[GV]](p0) :: (store 4 into @var1)
+  ; CHECK: bb.3.if.end:
+  ; CHECK:   [[C6:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 0
+  ; CHECK:   $w0 = COPY [[C6]](s32)
+  ; CHECK:   RET_ReallyLR implicit $w0
+entry:
+  %0 = load i32, i32* @var1, align 4
+  %cmp = icmp eq i32 %0, 1
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  store i32 2, i32* @var2, align 4
+  store i32 3, i32* @var1, align 4
+  store i32 2, i32* @var3, align 4
+  store i32 3, i32* @var1, align 4
+  br label %if.end
+
+if.end:
+  ret i32 0
+}
+

Modified: llvm/trunk/test/CodeGen/AArch64/GlobalISel/localizer.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/GlobalISel/localizer.mir?rev=363632&r1=363631&r2=363632&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/GlobalISel/localizer.mir (original)
+++ llvm/trunk/test/CodeGen/AArch64/GlobalISel/localizer.mir Mon Jun 17 16:20:29 2019
@@ -15,6 +15,29 @@
   define void @float_non_local_phi_use_followed_by_use_fi() { ret void }
   define void @non_local_phi() { ret void }
   define void @non_local_label() { ret void }
+
+  @var1 = common global i32 0, align 4
+  @var2 = common global i32 0, align 4
+  @var3 = common global i32 0, align 4
+  @var4 = common global i32 0, align 4
+
+  define i32 @intrablock_with_globalvalue() {
+  entry:
+    %0 = load i32, i32* @var1, align 4
+    %cmp = icmp eq i32 %0, 1
+    br i1 %cmp, label %if.then, label %if.end
+
+  if.then:
+    store i32 2, i32* @var2, align 4
+    store i32 3, i32* @var1, align 4
+    store i32 2, i32* @var3, align 4
+    store i32 3, i32* @var1, align 4
+    br label %if.end
+
+  if.end:
+    ret i32 0
+  }
+
 ...
 
 ---
@@ -301,3 +324,67 @@ body:             |
     %2:fpr(s32) = G_FADD %0, %1
     G_BR %bb.1
 ...
+---
+name:            intrablock_with_globalvalue
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: intrablock_with_globalvalue
+  ; CHECK: bb.0.entry:
+  ; CHECK:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; CHECK:   [[GV:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var1
+  ; CHECK:   [[C:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 1
+  ; CHECK:   [[C1:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 2
+  ; CHECK:   [[GV1:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var2
+  ; CHECK:   [[C2:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 3
+  ; CHECK:   [[GV2:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var3
+  ; CHECK:   [[C3:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 0
+  ; CHECK:   [[LOAD:%[0-9]+]]:gpr(s32) = G_LOAD [[GV]](p0) :: (load 4 from @var1)
+  ; CHECK:   [[ICMP:%[0-9]+]]:gpr(s32) = G_ICMP intpred(eq), [[LOAD]](s32), [[C]]
+  ; CHECK:   [[TRUNC:%[0-9]+]]:gpr(s1) = G_TRUNC [[ICMP]](s32)
+  ; CHECK:   G_BRCOND [[TRUNC]](s1), %bb.1
+  ; CHECK:   G_BR %bb.2
+  ; CHECK: bb.1.if.then:
+  ; CHECK:   successors: %bb.2(0x80000000)
+  ; CHECK:   [[GV3:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var2
+  ; CHECK:   [[C4:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 2
+  ; CHECK:   G_STORE [[C4]](s32), [[GV3]](p0) :: (store 4 into @var2)
+  ; CHECK:   [[GV4:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var1
+  ; CHECK:   [[C5:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 3
+  ; CHECK:   G_STORE [[C5]](s32), [[GV4]](p0) :: (store 4 into @var1)
+  ; CHECK:   [[GV5:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var3
+  ; CHECK:   G_STORE [[C4]](s32), [[GV5]](p0) :: (store 4 into @var3)
+  ; CHECK:   G_STORE [[C5]](s32), [[GV4]](p0) :: (store 4 into @var1)
+  ; CHECK: bb.2.if.end:
+  ; CHECK:   [[C6:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 0
+  ; CHECK:   $w0 = COPY [[C6]](s32)
+  ; CHECK:   RET_ReallyLR implicit $w0
+
+  ; Some of these instructions are dead. We're checking that the other instructions are
+  ; sunk immediately before their first user in the if.then block or as close as possible.
+  bb.1.entry:
+    %1:gpr(p0) = G_GLOBAL_VALUE @var1
+    %2:gpr(s32) = G_CONSTANT i32 1
+    %4:gpr(s32) = G_CONSTANT i32 2
+    %5:gpr(p0) = G_GLOBAL_VALUE @var2
+    %6:gpr(s32) = G_CONSTANT i32 3
+    %7:gpr(p0) = G_GLOBAL_VALUE @var3
+    %8:gpr(s32) = G_CONSTANT i32 0
+    %0:gpr(s32) = G_LOAD %1(p0) :: (load 4 from @var1)
+    %9:gpr(s32) = G_ICMP intpred(eq), %0(s32), %2
+    %3:gpr(s1) = G_TRUNC %9(s32)
+    G_BRCOND %3(s1), %bb.2
+    G_BR %bb.3
+
+  bb.2.if.then:
+    G_STORE %4(s32), %5(p0) :: (store 4 into @var2)
+    G_STORE %6(s32), %1(p0) :: (store 4 into @var1)
+    G_STORE %4(s32), %7(p0) :: (store 4 into @var3)
+    G_STORE %6(s32), %1(p0) :: (store 4 into @var1)
+
+  bb.3.if.end:
+    $w0 = COPY %8(s32)
+    RET_ReallyLR implicit $w0
+
+...




More information about the llvm-commits mailing list