[llvm-commits] discouraging use of x86_64 R12 and R13

Evan Cheng evan.cheng at apple.com
Mon Nov 17 18:08:31 PST 2008


On Nov 17, 2008, at 4:08 PM, Stuart Hastings wrote:

> A trivial patch for a mild (and unproven) performance improvement.   
> (<rdar://problem/6351057
>> )
>
> The x86_64 R12 and R13 register encodings are related to the RBP and
> RSP encodings, and thus require slightly longer instructions than the
> other R8..R15 registers.  Specifically, just like [RBP], the X86_64
> cannot encode [R12] directly; this is expressed as [R12+0].  In like
> manner, [R13] resembles [RSP], and requires an SIB byte.
>
> Since R14 and R15 don't have these drawbacks, here is a patch for LLVM
> to prefer them over R12 & R13.  By "prefer," we really mean "allocate
> R14 and R15 first."
>
> This has passed DejaGNU with no regressions, and some experimentation
> with a hacky testcase suggests that it works, at least for 32-bit
> integers.  I haven't tested all the permutations (bytes, shorts, long
> longs).
>
> If this is acceptable, would someone kindly commit this?

Thanks. Comments below.

>
>
> stuart
>
> Index: llvm.regorder/lib/Target/X86/X86RegisterInfo.td
> ===================================================================
> --- llvm.regorder/lib/Target/X86/X86RegisterInfo.td	(revision 59450)
> +++ llvm.regorder/lib/Target/X86/X86RegisterInfo.td	(working copy)
> @@ -181,9 +181,9 @@ def x86_subreg_16bit   : PatLeaf<(i32 2)
>  def x86_subreg_32bit   : PatLeaf<(i32 3)>;
>

You don't need to touch these SubRegSet defs. They have nothing to do  
with allocation orders.

Otherwise, the patch is fine. Please avoid inlining the patch though.  
It makes it difficult to extract it out of the email message.

Thanks,

Evan


>  def : SubRegSet<1, [AX, CX, DX, BX, SP,  BP,  SI,  DI,
> -                    R8W, R9W, R10W, R11W, R12W, R13W, R14W, R15W],
> +                    R8W, R9W, R10W, R11W, R14W, R15W, R12W, R13W],
>                     [AL, CL, DL, BL, SPL, BPL, SIL, DIL,
> -                    R8B, R9B, R10B, R11B, R12B, R13B, R14B, R15B]>;
> +                    R8B, R9B, R10B, R11B, R14B, R15B, R12B, R13B]>;
>
>  // It's unclear if this subreg set is safe, given that not all
> registers
>  // in the class have an 'H' subreg.
> @@ -191,30 +191,30 @@ def : SubRegSet<1, [AX, CX, DX, BX, SP,
>  //                    [AH, CH, DH, BH]>;
>
>  def : SubRegSet<1, [EAX, ECX, EDX, EBX, ESP, EBP, ESI, EDI,
> -                    R8D, R9D, R10D, R11D, R12D, R13D, R14D, R15D],
> +                    R8D, R9D, R10D, R11D, R14D, R15D, R12D, R13D],
>                     [AL, CL, DL, BL, SPL, BPL, SIL, DIL,
> -                    R8B, R9B, R10B, R11B, R12B, R13B, R14B, R15B]>;
> +                    R8B, R9B, R10B, R11B, R14B, R15B, R12B, R13B]>;
>
>  def : SubRegSet<2, [EAX, ECX, EDX, EBX, ESP, EBP, ESI, EDI,
> -                    R8D, R9D, R10D, R11D, R12D, R13D, R14D, R15D],
> +                    R8D, R9D, R10D, R11D, R14D, R15D, R12D, R13D],
>                     [AX,  CX,  DX,  BX,  SP,  BP,  SI,  DI,
> -                    R8W, R9W, R10W, R11W, R12W, R13W, R14W, R15W]>;
> +                    R8W, R9W, R10W, R11W, R14W, R15W, R12W, R13W]>;
>
>
>  def : SubRegSet<1, [RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI,
> -                    R8,  R9,  R10, R11, R12, R13, R14, R15],
> +                    R8,  R9,  R10, R11, R14, R15, R12, R13],
>                     [AL, CL, DL, BL, SPL, BPL, SIL, DIL,
> -                    R8B, R9B, R10B, R11B, R12B, R13B, R14B, R15B]>;
> +                    R8B, R9B, R10B, R11B, R14B, R15B, R12B, R13B]>;
>
>  def : SubRegSet<2, [RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI,
> -                    R8,  R9,  R10, R11, R12, R13, R14, R15],
> +                    R8,  R9,  R10, R11, R14, R15, R12, R13],
>                     [AX,  CX,  DX,  BX,  SP,  BP,  SI,  DI,
> -                    R8W, R9W, R10W, R11W, R12W, R13W, R14W, R15W]>;
> +                    R8W, R9W, R10W, R11W, R14W, R15W, R12W, R13W]>;
>
>  def : SubRegSet<3, [RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI,
> -                    R8,  R9,  R10, R11, R12, R13, R14, R15],
> +                    R8,  R9,  R10, R11, R14, R15, R12, R13],
>                     [EAX, ECX, EDX, EBX, ESP, EBP, ESI, EDI,
> -                    R8D, R9D, R10D, R11D, R12D, R13D, R14D, R15D]>;
> +                    R8D, R9D, R10D, R11D, R14D, R15D, R12D, R13D]>;
>
>  //
> =
> =
> = 
> ----------------------------------------------------------------------
> ===//
>  // Register Class Definitions... now that we have all of the pieces,
> define the
> @@ -229,7 +229,7 @@ def : SubRegSet<3, [RAX, RCX, RDX, RBX,
>  // FIXME: Allow AH, CH, DH, BH in 64-mode for non-REX instructions,
>  def GR8 : RegisterClass<"X86", [i8],  8,
>                          [AL, CL, DL, BL, AH, CH, DH, BH, SIL, DIL,
> BPL, SPL,
> -                         R8B, R9B, R10B, R11B, R12B, R13B, R14B,
> R15B]> {
> +                         R8B, R9B, R10B, R11B, R14B, R15B, R12B,
> R13B]> {
>    let MethodProtos = [{
>      iterator allocation_order_begin(const MachineFunction &MF) const;
>      iterator allocation_order_end(const MachineFunction &MF) const;
> @@ -240,12 +240,12 @@ def GR8 : RegisterClass<"X86", [i8],  8,
>        static const unsigned X86_GR8_AO_64_fp[] =
>        {X86::AL,   X86::CL,   X86::DL,   X86::SIL, X86::DIL,
>         X86::R8B,  X86::R9B,  X86::R10B, X86::R11B,
> -       X86::BL,   X86::R12B, X86::R13B, X86::R14B, X86::R15B};
> +       X86::BL,   X86::R14B, X86::R15B, X86::R12B, X86::R13B};
>        // If not, just don't allocate SPL.
>        static const unsigned X86_GR8_AO_64[] =
>        {X86::AL,   X86::CL,   X86::DL,   X86::SIL, X86::DIL,
>         X86::R8B,  X86::R9B,  X86::R10B, X86::R11B,
> -       X86::BL,   X86::R12B, X86::R13B, X86::R14B, X86::R15B,
> X86::BPL};
> +       X86::BL,   X86::R14B, X86::R15B, X86::R12B, X86::R13B,
> X86::BPL};
>        // In 32-mode, none of the 8-bit registers aliases EBP or ESP.
>        static const unsigned X86_GR8_AO_32[] =
>        {X86::AL, X86::CL, X86::DL, X86::AH, X86::CH, X86::DH,
> X86::BL, X86::BH};
> @@ -281,7 +281,7 @@ def GR8 : RegisterClass<"X86", [i8],  8,
>
>  def GR16 : RegisterClass<"X86", [i16], 16,
>                           [AX, CX, DX, SI, DI, BX, BP, SP,
> -                          R8W, R9W, R10W, R11W, R12W, R13W, R14W,
> R15W]> {
> +                          R8W, R9W, R10W, R11W, R14W, R15W, R12W,
> R13W]> {
>    let SubRegClassList = [GR8];
>    let MethodProtos = [{
>      iterator allocation_order_begin(const MachineFunction &MF) const;
> @@ -293,14 +293,14 @@ def GR16 : RegisterClass<"X86", [i16], 1
>        static const unsigned X86_GR16_AO_64_fp[] =
>        {X86::AX,  X86::CX,   X86::DX,   X86::SI,   X86::DI,
>         X86::R8W, X86::R9W,  X86::R10W, X86::R11W,
> -       X86::BX,  X86::R12W, X86::R13W, X86::R14W, X86::R15W};
> +       X86::BX, X86::R14W, X86::R15W,  X86::R12W, X86::R13W};
>        static const unsigned X86_GR16_AO_32_fp[] =
>        {X86::AX, X86::CX, X86::DX, X86::SI, X86::DI, X86::BX};
>        // If not, just don't allocate SPL.
>        static const unsigned X86_GR16_AO_64[] =
>        {X86::AX,  X86::CX,   X86::DX,   X86::SI,   X86::DI,
>         X86::R8W, X86::R9W,  X86::R10W, X86::R11W,
> -       X86::BX,  X86::R12W, X86::R13W, X86::R14W, X86::R15W,  
> X86::BP};
> +       X86::BX, X86::R14W, X86::R15W,  X86::R12W, X86::R13W,  
> X86::BP};
>        static const unsigned X86_GR16_AO_32[] =
>        {X86::AX, X86::CX, X86::DX, X86::SI, X86::DI, X86::BX,  
> X86::BP};
>
> @@ -345,7 +345,7 @@ def GR16 : RegisterClass<"X86", [i16], 1
>
>  def GR32 : RegisterClass<"X86", [i32], 32,
>                           [EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP,
> -                          R8D, R9D, R10D, R11D, R12D, R13D, R14D,
> R15D]> {
> +                          R8D, R9D, R10D, R11D, R14D, R15D, R12D,
> R13D]> {
>    let SubRegClassList = [GR8, GR16];
>    let MethodProtos = [{
>      iterator allocation_order_begin(const MachineFunction &MF) const;
> @@ -357,14 +357,14 @@ def GR32 : RegisterClass<"X86", [i32], 3
>        static const unsigned X86_GR32_AO_64_fp[] =
>        {X86::EAX, X86::ECX,  X86::EDX,  X86::ESI,  X86::EDI,
>         X86::R8D, X86::R9D,  X86::R10D, X86::R11D,
> -       X86::EBX, X86::R12D, X86::R13D, X86::R14D, X86::R15D};
> +       X86::EBX, X86::R14D, X86::R15D, X86::R12D, X86::R13D};
>        static const unsigned X86_GR32_AO_32_fp[] =
>        {X86::EAX, X86::ECX, X86::EDX, X86::ESI, X86::EDI, X86::EBX};
>        // If not, just don't allocate SPL.
>        static const unsigned X86_GR32_AO_64[] =
>        {X86::EAX, X86::ECX,  X86::EDX,  X86::ESI,  X86::EDI,
>         X86::R8D, X86::R9D,  X86::R10D, X86::R11D,
> -       X86::EBX, X86::R12D, X86::R13D, X86::R14D, X86::R15D,  
> X86::EBP};
> +       X86::EBX, X86::R14D, X86::R15D, X86::R12D, X86::R13D,  
> X86::EBP};
>        static const unsigned X86_GR32_AO_32[] =
>        {X86::EAX, X86::ECX, X86::EDX, X86::ESI, X86::EDI, X86::EBX,
> X86::EBP};
>
> @@ -409,7 +409,7 @@ def GR32 : RegisterClass<"X86", [i32], 3
>
>  def GR64 : RegisterClass<"X86", [i64], 64,
>                           [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
> -                          RBX, R12, R13, R14, R15, RBP, RSP]> {
> +                          RBX, R14, R15, R12, R13, RBP, RSP]> {
>    let SubRegClassList = [GR8, GR16, GR32];
>    let MethodProtos = [{
>      iterator allocation_order_end(const MachineFunction &MF) const;
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits




More information about the llvm-commits mailing list