[llvm-commits] discouraging use of x86_64 R12 and R13

Mon Nov 17 16:08:19 PST 2008

A trivial patch for a mild (and unproven) performance improvement.  (<rdar://problem/6351057 
 >)

The x86_64 R12 and R13 register encodings are related to the RBP and  
RSP encodings, and thus require slightly longer instructions than the  
other R8..R15 registers.  Specifically, just like [RBP], the X86_64  
cannot encode [R12] directly; this is expressed as [R12+0].  In like  
manner, [R13] resembles [RSP], and requires an SIB byte.

Since R14 and R15 don't have these drawbacks, here is a patch for LLVM  
to prefer them over R12 & R13.  By "prefer," we really mean "allocate  
R14 and R15 first."

This has passed DejaGNU with no regressions, and some experimentation  
with a hacky testcase suggests that it works, at least for 32-bit  
integers.  I haven't tested all the permutations (bytes, shorts, long  
longs).

If this is acceptable, would someone kindly commit this?

stuart

Index: llvm.regorder/lib/Target/X86/X86RegisterInfo.td
===================================================================

--- llvm.regorder/lib/Target/X86/X86RegisterInfo.td	(revision 59450)
+++ llvm.regorder/lib/Target/X86/X86RegisterInfo.td	(working copy)
@@ -181,9 +181,9 @@ def x86_subreg_16bit   : PatLeaf<(i32 2)
  def x86_subreg_32bit   : PatLeaf<(i32 3)>;

  def : SubRegSet<1, [AX, CX, DX, BX, SP,  BP,  SI,  DI,
-                    R8W, R9W, R10W, R11W, R12W, R13W, R14W, R15W],
+                    R8W, R9W, R10W, R11W, R14W, R15W, R12W, R13W],
                     [AL, CL, DL, BL, SPL, BPL, SIL, DIL,
-                    R8B, R9B, R10B, R11B, R12B, R13B, R14B, R15B]>;
+                    R8B, R9B, R10B, R11B, R14B, R15B, R12B, R13B]>;

  // It's unclear if this subreg set is safe, given that not all  
registers
  // in the class have an 'H' subreg.
@@ -191,30 +191,30 @@ def : SubRegSet<1, [AX, CX, DX, BX, SP,
  //                    [AH, CH, DH, BH]>;

  def : SubRegSet<1, [EAX, ECX, EDX, EBX, ESP, EBP, ESI, EDI,
-                    R8D, R9D, R10D, R11D, R12D, R13D, R14D, R15D],
+                    R8D, R9D, R10D, R11D, R14D, R15D, R12D, R13D],
                     [AL, CL, DL, BL, SPL, BPL, SIL, DIL,
-                    R8B, R9B, R10B, R11B, R12B, R13B, R14B, R15B]>;
+                    R8B, R9B, R10B, R11B, R14B, R15B, R12B, R13B]>;

  def : SubRegSet<2, [EAX, ECX, EDX, EBX, ESP, EBP, ESI, EDI,
-                    R8D, R9D, R10D, R11D, R12D, R13D, R14D, R15D],
+                    R8D, R9D, R10D, R11D, R14D, R15D, R12D, R13D],
                     [AX,  CX,  DX,  BX,  SP,  BP,  SI,  DI,
-                    R8W, R9W, R10W, R11W, R12W, R13W, R14W, R15W]>;
+                    R8W, R9W, R10W, R11W, R14W, R15W, R12W, R13W]>;


  def : SubRegSet<1, [RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI,
-                    R8,  R9,  R10, R11, R12, R13, R14, R15],
+                    R8,  R9,  R10, R11, R14, R15, R12, R13],
                     [AL, CL, DL, BL, SPL, BPL, SIL, DIL,
-                    R8B, R9B, R10B, R11B, R12B, R13B, R14B, R15B]>;
+                    R8B, R9B, R10B, R11B, R14B, R15B, R12B, R13B]>;

  def : SubRegSet<2, [RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI,
-                    R8,  R9,  R10, R11, R12, R13, R14, R15],
+                    R8,  R9,  R10, R11, R14, R15, R12, R13],
                     [AX,  CX,  DX,  BX,  SP,  BP,  SI,  DI,
-                    R8W, R9W, R10W, R11W, R12W, R13W, R14W, R15W]>;
+                    R8W, R9W, R10W, R11W, R14W, R15W, R12W, R13W]>;

  def : SubRegSet<3, [RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI,
-                    R8,  R9,  R10, R11, R12, R13, R14, R15],
+                    R8,  R9,  R10, R11, R14, R15, R12, R13],
                     [EAX, ECX, EDX, EBX, ESP, EBP, ESI, EDI,
-                    R8D, R9D, R10D, R11D, R12D, R13D, R14D, R15D]>;
+                    R8D, R9D, R10D, R11D, R14D, R15D, R12D, R13D]>;

  // 
= 
= 
=---------------------------------------------------------------------- 
===//
  // Register Class Definitions... now that we have all of the pieces,  
define the
@@ -229,7 +229,7 @@ def : SubRegSet<3, [RAX, RCX, RDX, RBX,
  // FIXME: Allow AH, CH, DH, BH in 64-mode for non-REX instructions,
  def GR8 : RegisterClass<"X86", [i8],  8,
                          [AL, CL, DL, BL, AH, CH, DH, BH, SIL, DIL,  
BPL, SPL,
-                         R8B, R9B, R10B, R11B, R12B, R13B, R14B,  
R15B]> {
+                         R8B, R9B, R10B, R11B, R14B, R15B, R12B,  
R13B]> {
    let MethodProtos = [{
      iterator allocation_order_begin(const MachineFunction &MF) const;
      iterator allocation_order_end(const MachineFunction &MF) const;
@@ -240,12 +240,12 @@ def GR8 : RegisterClass<"X86", [i8],  8,
        static const unsigned X86_GR8_AO_64_fp[] =
        {X86::AL,   X86::CL,   X86::DL,   X86::SIL, X86::DIL,
         X86::R8B,  X86::R9B,  X86::R10B, X86::R11B,
-       X86::BL,   X86::R12B, X86::R13B, X86::R14B, X86::R15B};
+       X86::BL,   X86::R14B, X86::R15B, X86::R12B, X86::R13B};
        // If not, just don't allocate SPL.
        static const unsigned X86_GR8_AO_64[] =
        {X86::AL,   X86::CL,   X86::DL,   X86::SIL, X86::DIL,
         X86::R8B,  X86::R9B,  X86::R10B, X86::R11B,
-       X86::BL,   X86::R12B, X86::R13B, X86::R14B, X86::R15B,  
X86::BPL};
+       X86::BL,   X86::R14B, X86::R15B, X86::R12B, X86::R13B,  
X86::BPL};
        // In 32-mode, none of the 8-bit registers aliases EBP or ESP.
        static const unsigned X86_GR8_AO_32[] =
        {X86::AL, X86::CL, X86::DL, X86::AH, X86::CH, X86::DH,  
X86::BL, X86::BH};
@@ -281,7 +281,7 @@ def GR8 : RegisterClass<"X86", [i8],  8,

  def GR16 : RegisterClass<"X86", [i16], 16,
                           [AX, CX, DX, SI, DI, BX, BP, SP,
-                          R8W, R9W, R10W, R11W, R12W, R13W, R14W,  
R15W]> {
+                          R8W, R9W, R10W, R11W, R14W, R15W, R12W,  
R13W]> {
    let SubRegClassList = [GR8];
    let MethodProtos = [{
      iterator allocation_order_begin(const MachineFunction &MF) const;
@@ -293,14 +293,14 @@ def GR16 : RegisterClass<"X86", [i16], 1
        static const unsigned X86_GR16_AO_64_fp[] =
        {X86::AX,  X86::CX,   X86::DX,   X86::SI,   X86::DI,
         X86::R8W, X86::R9W,  X86::R10W, X86::R11W,
-       X86::BX,  X86::R12W, X86::R13W, X86::R14W, X86::R15W};
+       X86::BX, X86::R14W, X86::R15W,  X86::R12W, X86::R13W};
        static const unsigned X86_GR16_AO_32_fp[] =
        {X86::AX, X86::CX, X86::DX, X86::SI, X86::DI, X86::BX};
        // If not, just don't allocate SPL.
        static const unsigned X86_GR16_AO_64[] =
        {X86::AX,  X86::CX,   X86::DX,   X86::SI,   X86::DI,
         X86::R8W, X86::R9W,  X86::R10W, X86::R11W,
-       X86::BX,  X86::R12W, X86::R13W, X86::R14W, X86::R15W, X86::BP};
+       X86::BX, X86::R14W, X86::R15W,  X86::R12W, X86::R13W, X86::BP};
        static const unsigned X86_GR16_AO_32[] =
        {X86::AX, X86::CX, X86::DX, X86::SI, X86::DI, X86::BX, X86::BP};

@@ -345,7 +345,7 @@ def GR16 : RegisterClass<"X86", [i16], 1

  def GR32 : RegisterClass<"X86", [i32], 32,
                           [EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP,
-                          R8D, R9D, R10D, R11D, R12D, R13D, R14D,  
R15D]> {
+                          R8D, R9D, R10D, R11D, R14D, R15D, R12D,  
R13D]> {
    let SubRegClassList = [GR8, GR16];
    let MethodProtos = [{
      iterator allocation_order_begin(const MachineFunction &MF) const;
@@ -357,14 +357,14 @@ def GR32 : RegisterClass<"X86", [i32], 3
        static const unsigned X86_GR32_AO_64_fp[] =
        {X86::EAX, X86::ECX,  X86::EDX,  X86::ESI,  X86::EDI,
         X86::R8D, X86::R9D,  X86::R10D, X86::R11D,
-       X86::EBX, X86::R12D, X86::R13D, X86::R14D, X86::R15D};
+       X86::EBX, X86::R14D, X86::R15D, X86::R12D, X86::R13D};
        static const unsigned X86_GR32_AO_32_fp[] =
        {X86::EAX, X86::ECX, X86::EDX, X86::ESI, X86::EDI, X86::EBX};
        // If not, just don't allocate SPL.
        static const unsigned X86_GR32_AO_64[] =
        {X86::EAX, X86::ECX,  X86::EDX,  X86::ESI,  X86::EDI,
         X86::R8D, X86::R9D,  X86::R10D, X86::R11D,
-       X86::EBX, X86::R12D, X86::R13D, X86::R14D, X86::R15D, X86::EBP};
+       X86::EBX, X86::R14D, X86::R15D, X86::R12D, X86::R13D, X86::EBP};
        static const unsigned X86_GR32_AO_32[] =
        {X86::EAX, X86::ECX, X86::EDX, X86::ESI, X86::EDI, X86::EBX,  
X86::EBP};

@@ -409,7 +409,7 @@ def GR32 : RegisterClass<"X86", [i32], 3

  def GR64 : RegisterClass<"X86", [i64], 64,
                           [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
-                          RBX, R12, R13, R14, R15, RBP, RSP]> {
+                          RBX, R14, R15, R12, R13, RBP, RSP]> {
    let SubRegClassList = [GR8, GR16, GR32];
    let MethodProtos = [{
      iterator allocation_order_end(const MachineFunction &MF) const;