[llvm-commits] [llvm] r93278 - in /llvm/trunk: include/llvm/CodeGen/Passes.h include/llvm/Target/TargetInstrInfo.h lib/CodeGen/LLVMTargetMachine.cpp lib/CodeGen/OptimizeExts.cpp lib/Target/X86/X86InstrInfo.cpp lib/Target/X86/X86InstrInfo.h test/CodeGen/X86/2008-08-05-SpillerBug.ll test/CodeGen/X86/sext-subreg.ll test/CodeGen/X86/stack-color-with-reg.ll

Evan Cheng evan.cheng at apple.com
Tue Jan 12 16:30:23 PST 2010


Author: evancheng
Date: Tue Jan 12 18:30:23 2010
New Revision: 93278

URL: http://llvm.org/viewvc/llvm-project?rev=93278&view=rev
Log:
Add a quick pass to optimize sign / zero extension instructions. For targets where the pre-extension values are available in the subreg of the result of the extension, replace the uses of the pre-extension value with the result + extract_subreg.

For now, this pass is fairly conservative. It only perform the replacement when both the pre- and post- extension values are used in the block. It will miss cases where the post-extension values are live, but not used.

Added:
    llvm/trunk/lib/CodeGen/OptimizeExts.cpp
    llvm/trunk/test/CodeGen/X86/sext-subreg.ll
Modified:
    llvm/trunk/include/llvm/CodeGen/Passes.h
    llvm/trunk/include/llvm/Target/TargetInstrInfo.h
    llvm/trunk/lib/CodeGen/LLVMTargetMachine.cpp
    llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
    llvm/trunk/lib/Target/X86/X86InstrInfo.h
    llvm/trunk/test/CodeGen/X86/2008-08-05-SpillerBug.ll
    llvm/trunk/test/CodeGen/X86/stack-color-with-reg.ll

Modified: llvm/trunk/include/llvm/CodeGen/Passes.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/Passes.h?rev=93278&r1=93277&r2=93278&view=diff

==============================================================================
--- llvm/trunk/include/llvm/CodeGen/Passes.h (original)
+++ llvm/trunk/include/llvm/CodeGen/Passes.h Tue Jan 12 18:30:23 2010
@@ -170,6 +170,10 @@
   /// instructions.
   FunctionPass *createMachineSinkingPass();
 
+  /// createOptimizeExtsPass - This pass performs sign / zero extension
+  /// optimization by increasing uses of extended values.
+  FunctionPass *createOptimizeExtsPass();
+
   /// createStackSlotColoringPass - This pass performs stack slot coloring.
   FunctionPass *createStackSlotColoringPass(bool);
 

Modified: llvm/trunk/include/llvm/Target/TargetInstrInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetInstrInfo.h?rev=93278&r1=93277&r2=93278&view=diff

==============================================================================
--- llvm/trunk/include/llvm/Target/TargetInstrInfo.h (original)
+++ llvm/trunk/include/llvm/Target/TargetInstrInfo.h Tue Jan 12 18:30:23 2010
@@ -149,16 +149,15 @@
     return false;
   }
 
-  /// isCoalescableInstr - Return true if the instruction is "coalescable". That
-  /// is, it's like a copy where it's legal for the source to overlap the
-  /// destination. e.g. X86::MOVSX64rr32.
-  virtual bool isCoalescableInstr(const MachineInstr &MI, bool &isCopy,
-                               unsigned &SrcReg, unsigned &DstReg,
-                               unsigned &SrcSubIdx, unsigned &DstSubIdx) const {
-    if (isMoveInstr(MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) {
-      isCopy = true;
-      return true;
-    }
+  /// isCoalescableExtInstr - Return true if the instruction is a "coalescable"
+  /// extension instruction. That is, it's like a copy where it's legal for the
+  /// source to overlap the destination. e.g. X86::MOVSX64rr32. If this returns
+  /// true, then it's expected the pre-extension value is available as a subreg
+  /// of the result register. This also returns the sub-register index in
+  /// SubIdx.
+  virtual bool isCoalescableExtInstr(const MachineInstr &MI,
+                                     unsigned &SrcReg, unsigned &DstReg,
+                                     unsigned &SubIdx) const {
     return false;
   }
 

Modified: llvm/trunk/lib/CodeGen/LLVMTargetMachine.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/LLVMTargetMachine.cpp?rev=93278&r1=93277&r2=93278&view=diff

==============================================================================
--- llvm/trunk/lib/CodeGen/LLVMTargetMachine.cpp (original)
+++ llvm/trunk/lib/CodeGen/LLVMTargetMachine.cpp Tue Jan 12 18:30:23 2010
@@ -62,6 +62,10 @@
     cl::desc("Verify generated machine code"),
     cl::init(getenv("LLVM_VERIFY_MACHINEINSTRS")!=NULL));
 
+#if 1
+static cl::opt<bool> XX("xx", cl::Hidden);
+#endif
+
 // Enable or disable FastISel. Both options are needed, because
 // FastISel is enabled by default with -fast, and we wish to be
 // able to enable or disable fast-isel independently from -O0.
@@ -324,6 +328,7 @@
                  /* allowDoubleDefs= */ true);
 
   if (OptLevel != CodeGenOpt::None) {
+    PM.add(createOptimizeExtsPass());
     if (!DisableMachineLICM)
       PM.add(createMachineLICMPass());
     if (!DisableMachineSink)

Added: llvm/trunk/lib/CodeGen/OptimizeExts.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/OptimizeExts.cpp?rev=93278&view=auto

==============================================================================
--- llvm/trunk/lib/CodeGen/OptimizeExts.cpp (added)
+++ llvm/trunk/lib/CodeGen/OptimizeExts.cpp Tue Jan 12 18:30:23 2010
@@ -0,0 +1,149 @@
+//===-- OptimizeExts.cpp - Optimize sign / zero extension instrs -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ext-opt"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+static cl::opt<bool> Aggressive("aggressive-ext-opt", cl::Hidden,
+                                cl::desc("Aggressive extension optimization"));
+
+STATISTIC(NumReuse, "Number of extension results reused");
+
+namespace {
+  class OptimizeExts : public MachineFunctionPass {
+    const TargetMachine   *TM;
+    const TargetInstrInfo *TII;
+    MachineRegisterInfo *MRI;
+    MachineDominatorTree *DT;   // Machine dominator tree
+
+  public:
+    static char ID; // Pass identification
+    OptimizeExts() : MachineFunctionPass(&ID) {}
+
+    virtual bool runOnMachineFunction(MachineFunction &MF);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+      MachineFunctionPass::getAnalysisUsage(AU);
+      AU.addRequired<MachineDominatorTree>();
+      AU.addPreserved<MachineDominatorTree>();
+    }
+  };
+}
+
+char OptimizeExts::ID = 0;
+static RegisterPass<OptimizeExts>
+X("opt-exts", "Optimize sign / zero extensions");
+
+FunctionPass *llvm::createOptimizeExtsPass() { return new OptimizeExts(); }
+
+bool OptimizeExts::runOnMachineFunction(MachineFunction &MF) {
+  TM = &MF.getTarget();
+  TII = TM->getInstrInfo();
+  MRI = &MF.getRegInfo();
+  DT = &getAnalysis<MachineDominatorTree>();
+
+  bool Changed = false;
+
+  SmallPtrSet<MachineInstr*, 8> LocalMIs;
+  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+    MachineBasicBlock *MBB = &*I;
+    for (MachineBasicBlock::iterator MII = I->begin(), ME = I->end(); MII != ME;
+         ++MII) {
+      MachineInstr *MI = &*MII;
+      LocalMIs.insert(MI);
+
+      unsigned SrcReg, DstReg, SubIdx;
+      if (TII->isCoalescableExtInstr(*MI, SrcReg, DstReg, SubIdx)) {
+        if (TargetRegisterInfo::isPhysicalRegister(DstReg) ||
+            TargetRegisterInfo::isPhysicalRegister(SrcReg))
+          continue;
+
+        MachineRegisterInfo::use_iterator UI = MRI->use_begin(SrcReg);
+        if (++UI == MRI->use_end())
+          // No other uses.
+          continue;
+
+        // Ok, the source has other uses. See if we can replace the other uses
+        // with use of the result of the extension.
+        
+        SmallPtrSet<MachineBasicBlock*, 4> ReachedBBs;
+        UI = MRI->use_begin(DstReg);
+        for (MachineRegisterInfo::use_iterator UE = MRI->use_end(); UI != UE;
+             ++UI)
+          ReachedBBs.insert(UI->getParent());
+
+        bool ExtendLife = true;
+        SmallVector<MachineOperand*, 8> Uses;
+        SmallVector<MachineOperand*, 8> ExtendedUses;
+
+        UI = MRI->use_begin(SrcReg);
+        for (MachineRegisterInfo::use_iterator UE = MRI->use_end(); UI != UE;
+             ++UI) {
+          MachineOperand &UseMO = UI.getOperand();
+          MachineInstr *UseMI = &*UI;
+          if (UseMI == MI)
+            continue;
+          MachineBasicBlock *UseMBB = UseMI->getParent();
+          if (UseMBB == MBB) {
+            // Local uses that come after the extension.
+            if (!LocalMIs.count(UseMI))
+              Uses.push_back(&UseMO);
+          } else if (ReachedBBs.count(UseMBB))
+            // Non-local uses where the result of extension is used. Always
+            // replace these.
+            Uses.push_back(&UseMO);
+          else if (Aggressive && DT->dominates(MBB, UseMBB))
+            // We may want to extend live range of the extension result in order
+            // to replace these uses.
+            ExtendedUses.push_back(&UseMO);
+          else {
+            // Both will be live out of the def MBB anyway. Don't extend live
+            // range of the extension result.
+            ExtendLife = false;
+            break;
+          }
+        }
+
+        if (ExtendLife && !ExtendedUses.empty())
+          // Ok, we'll extend the liveness of the extension result.
+          std::copy(ExtendedUses.begin(), ExtendedUses.end(),
+                    std::back_inserter(Uses));
+
+        // Now replace all uses.
+        if (!Uses.empty()) {
+          const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
+          for (unsigned i = 0, e = Uses.size(); i != e; ++i) {
+            MachineOperand *UseMO = Uses[i];
+            MachineInstr *UseMI = UseMO->getParent();
+            MachineBasicBlock *UseMBB = UseMI->getParent();
+            unsigned NewVR = MRI->createVirtualRegister(RC);
+            BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(),
+                    TII->get(TargetInstrInfo::EXTRACT_SUBREG), NewVR)
+              .addReg(DstReg).addImm(SubIdx);
+            UseMO->setReg(NewVR);
+            ++NumReuse;
+            Changed = true;
+          }
+        }
+      }
+    }
+  }
+
+  return Changed;
+}

Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=93278&r1=93277&r2=93278&view=diff

==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Tue Jan 12 18:30:23 2010
@@ -713,9 +713,9 @@
 }
 
 bool
-X86InstrInfo::isCoalescableInstr(const MachineInstr &MI, bool &isCopy,
-                               unsigned &SrcReg, unsigned &DstReg,
-                               unsigned &SrcSubIdx, unsigned &DstSubIdx) const {
+X86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
+                                    unsigned &SrcReg, unsigned &DstReg,
+                                    unsigned &SubIdx) const {
   switch (MI.getOpcode()) {
   default: break;
   case X86::MOVSX16rr8:
@@ -733,10 +733,8 @@
     if (MI.getOperand(0).getSubReg() || MI.getOperand(1).getSubReg())
       // Be conservative.
       return false;
-    isCopy = false;
     SrcReg = MI.getOperand(1).getReg();
     DstReg = MI.getOperand(0).getReg();
-    DstSubIdx = 0;
     switch (MI.getOpcode()) {
     default:
       llvm_unreachable(0);
@@ -747,22 +745,23 @@
     case X86::MOVZX32rr8:
     case X86::MOVSX64rr8:
     case X86::MOVZX64rr8:
-      SrcSubIdx = 1;
+      SubIdx = 1;
       break;
     case X86::MOVSX32rr16:
     case X86::MOVZX32rr16:
     case X86::MOVSX64rr16:
     case X86::MOVZX64rr16:
-      SrcSubIdx = 3;
+      SubIdx = 3;
       break;
     case X86::MOVSX64rr32:
     case X86::MOVZX64rr32:
-      SrcSubIdx = 4;
+      SubIdx = 4;
       break;
     }
+    return true;
   }
   }
-  return isMoveInstr(MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx);
+  return false;
 }
 
 /// isFrameOperand - Return true and the FrameIndex if the specified

Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.h?rev=93278&r1=93277&r2=93278&view=diff

==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.h (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.h Tue Jan 12 18:30:23 2010
@@ -448,13 +448,15 @@
                            unsigned &SrcReg, unsigned &DstReg,
                            unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
 
-  /// isCoalescableInstr - Return true if the instruction is "coalescable". That
-  /// is, it's like a copy where it's legal for the source to overlap the
-  /// destination. e.g. X86::MOVSX64rr32.
-  virtual bool isCoalescableInstr(const MachineInstr &MI, bool &isCopy,
-                                unsigned &SrcReg, unsigned &DstReg,
-                                unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
-
+  /// isCoalescableExtInstr - Return true if the instruction is a "coalescable"
+  /// extension instruction. That is, it's like a copy where it's legal for the
+  /// source to overlap the destination. e.g. X86::MOVSX64rr32. If this returns
+  /// true, then it's expected the pre-extension value is available as a subreg
+  /// of the result register. This also returns the sub-register index in
+  /// SubIdx.
+  virtual bool isCoalescableExtInstr(const MachineInstr &MI,
+                                     unsigned &SrcReg, unsigned &DstReg,
+                                     unsigned &SubIdx) const;
 
   unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const;
   /// isLoadFromStackSlotPostFE - Check for post-frame ptr elimination

Modified: llvm/trunk/test/CodeGen/X86/2008-08-05-SpillerBug.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/2008-08-05-SpillerBug.ll?rev=93278&r1=93277&r2=93278&view=diff

==============================================================================
--- llvm/trunk/test/CodeGen/X86/2008-08-05-SpillerBug.ll (original)
+++ llvm/trunk/test/CodeGen/X86/2008-08-05-SpillerBug.ll Tue Jan 12 18:30:23 2010
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin -disable-fp-elim -stats |& grep asm-printer | grep 58
+; RUN: llc < %s -mtriple=i386-apple-darwin -disable-fp-elim -stats |& grep asm-printer | grep 57
 ; PR2568
 
 @g_3 = external global i16		; <i16*> [#uses=1]

Added: llvm/trunk/test/CodeGen/X86/sext-subreg.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sext-subreg.ll?rev=93278&view=auto

==============================================================================
--- llvm/trunk/test/CodeGen/X86/sext-subreg.ll (added)
+++ llvm/trunk/test/CodeGen/X86/sext-subreg.ll Tue Jan 12 18:30:23 2010
@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+; rdar://7529457
+
+define i64 @t(i64 %A, i64 %B, i32* %P, i64 *%P2) nounwind {
+; CHECK: t:
+; CHECK: movslq %e{{.*}}, %rax
+; CHECK: movq %rax
+; CHECK: movl %eax
+  %C = add i64 %A, %B
+  %D = trunc i64 %C to i32
+  volatile store i32 %D, i32* %P
+  %E = shl i64 %C, 32
+  %F = ashr i64 %E, 32  
+  volatile store i64 %F, i64 *%P2
+  volatile store i32 %D, i32* %P
+  ret i64 undef
+}

Modified: llvm/trunk/test/CodeGen/X86/stack-color-with-reg.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/stack-color-with-reg.ll?rev=93278&r1=93277&r2=93278&view=diff

==============================================================================
--- llvm/trunk/test/CodeGen/X86/stack-color-with-reg.ll (original)
+++ llvm/trunk/test/CodeGen/X86/stack-color-with-reg.ll Tue Jan 12 18:30:23 2010
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -relocation-model=pic -disable-fp-elim -color-ss-with-regs -stats -info-output-file - > %t
-; RUN:   grep stackcoloring %t | grep "stack slot refs replaced with reg refs"  | grep 6
+; RUN:   grep stackcoloring %t | grep "stack slot refs replaced with reg refs"  | grep 9
 
 	type { [62 x %struct.Bitvec*] }		; type %0
 	type { i8* }		; type %1





More information about the llvm-commits mailing list