[llvm-commits] CVS: llvm/lib/Target/X86/X86ISelDAGToDAG.cpp X86InstrInfo.cpp

Mon Mar 19 23:08:46 PDT 2007

Changes in directory llvm/lib/Target/X86:

X86ISelDAGToDAG.cpp updated: 1.145 -> 1.146
X86InstrInfo.cpp updated: 1.78 -> 1.79
---
Log message:

Two changes: 
1) codegen a shift of a register as a shift, not an LEA.
2) teach the RA to convert a shift to an LEA instruction if it wants something
   in three-address form.

This gives us asm diffs like:

-       leal (,%eax,4), %eax
+       shll $2, %eax

which is faster on some processors and smaller on all of them.

and, more interestingly:

-       movl 24(%esi), %eax
-       leal (,%eax,4), %edi
+       movl 24(%esi), %edi
+       shll $2, %edi

Without #2, #1 was a significant pessimization in some cases.

This implements CodeGen/X86/shift-codegen.ll



---
Diffs of the changes:  (+39 -13)

 X86ISelDAGToDAG.cpp |    7 +++----
 X86InstrInfo.cpp    |   45 ++++++++++++++++++++++++++++++++++++---------
 2 files changed, 39 insertions(+), 13 deletions(-)


Index: llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
diff -u llvm/lib/Target/X86/X86ISelDAGToDAG.cpp:1.145 llvm/lib/Target/X86/X86ISelDAGToDAG.cpp:1.146

--- llvm/lib/Target/X86/X86ISelDAGToDAG.cpp:1.145	Sun Feb  4 14:18:17 2007
+++ llvm/lib/Target/X86/X86ISelDAGToDAG.cpp	Tue Mar 20 01:08:29 2007
@@ -886,10 +886,9 @@
   else
     AM.IndexReg = CurDAG->getRegister(0, VT);
 
-  if (AM.Scale > 2) 
-    Complexity += 2;
-  // Don't match just leal(,%reg,2). It's cheaper to do addl %reg, %reg
-  else if (AM.Scale > 1)
+  // Don't match just leal(,%reg,2). It's cheaper to do addl %reg, %reg, or with
+  // a simple shift.
+  if (AM.Scale > 1)
     Complexity++;
 
   // FIXME: We are artificially lowering the criteria to turn ADD %reg, $GA


Index: llvm/lib/Target/X86/X86InstrInfo.cpp
diff -u llvm/lib/Target/X86/X86InstrInfo.cpp:1.78 llvm/lib/Target/X86/X86InstrInfo.cpp:1.79
--- llvm/lib/Target/X86/X86InstrInfo.cpp:1.78	Thu Mar  8 16:09:11 2007
+++ llvm/lib/Target/X86/X86InstrInfo.cpp	Tue Mar 20 01:08:29 2007
@@ -132,29 +132,57 @@
 
   MachineInstr *NewMI = NULL;
   // FIXME: 16-bit LEA's are really slow on Athlons, but not bad on P4's.  When
-  // we have subtarget support, enable the 16-bit LEA generation here.
+  // we have better subtarget support, enable the 16-bit LEA generation here.
   bool DisableLEA16 = true;
 
   switch (MI->getOpcode()) {
-  default: break;
+  default: return 0;
   case X86::SHUFPSrri: {
     assert(MI->getNumOperands() == 4 && "Unknown shufps instruction!");
-    const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>();
+    if (!TM.getSubtarget<X86Subtarget>().hasSSE2()) return 0;
+    
     unsigned A = MI->getOperand(0).getReg();
     unsigned B = MI->getOperand(1).getReg();
     unsigned C = MI->getOperand(2).getReg();
-    unsigned M = MI->getOperand(3).getImmedValue();
-    if (!Subtarget->hasSSE2() || B != C) return 0;
+    unsigned M = MI->getOperand(3).getImm();
+    if (B != C) return 0;
     NewMI = BuildMI(get(X86::PSHUFDri), A).addReg(B).addImm(M);
-    goto Done;
+    break;
+  }
+  case X86::SHL32ri: {
+    assert(MI->getNumOperands() == 3 && "Unknown shift instruction!");
+    // NOTE: LEA doesn't produce flags like shift does, but LLVM never uses
+    // the flags produced by a shift yet, so this is safe.
+    unsigned Dest = MI->getOperand(0).getReg();
+    unsigned Src = MI->getOperand(1).getReg();
+    unsigned ShAmt = MI->getOperand(2).getImm();
+    if (ShAmt == 0 || ShAmt >= 4) return 0;
+    
+    NewMI = BuildMI(get(X86::LEA32r), Dest)
+      .addReg(0).addImm(1 << ShAmt).addReg(Src).addImm(0);
+    break;
+  }
+  case X86::SHL16ri: {
+    assert(MI->getNumOperands() == 3 && "Unknown shift instruction!");
+    if (DisableLEA16) return 0;
+    
+    // NOTE: LEA doesn't produce flags like shift does, but LLVM never uses
+    // the flags produced by a shift yet, so this is safe.
+    unsigned Dest = MI->getOperand(0).getReg();
+    unsigned Src = MI->getOperand(1).getReg();
+    unsigned ShAmt = MI->getOperand(2).getImm();
+    if (ShAmt == 0 || ShAmt >= 4) return 0;
+    
+    NewMI = BuildMI(get(X86::LEA16r), Dest)
+      .addReg(0).addImm(1 << ShAmt).addReg(Src).addImm(0);
+    break;
   }
   }
 
   // FIXME: None of these instructions are promotable to LEAs without
   // additional information.  In particular, LEA doesn't set the flags that
   // add and inc do.  :(
-  return 0;
-
+  if (0)
   switch (MI->getOpcode()) {
   case X86::INC32r:
   case X86::INC64_32r:
@@ -220,7 +248,6 @@
     break;
   }
 
-Done:
   if (NewMI) {
     NewMI->copyKillDeadInfo(MI);
     LV.instructionChanged(MI, NewMI);  // Update live variables