[llvm-commits] discouraging use of x86_64 R12 and R13
Stuart Hastings
stuart at apple.com
Mon Nov 17 16:08:19 PST 2008
A trivial patch for a mild (and unproven) performance improvement. (<rdar://problem/6351057
>)
The x86_64 R12 and R13 register encodings are related to the RBP and
RSP encodings, and thus require slightly longer instructions than the
other R8..R15 registers. Specifically, just like [RBP], the X86_64
cannot encode [R12] directly; this is expressed as [R12+0]. In like
manner, [R13] resembles [RSP], and requires an SIB byte.
Since R14 and R15 don't have these drawbacks, here is a patch for LLVM
to prefer them over R12 & R13. By "prefer," we really mean "allocate
R14 and R15 first."
This has passed DejaGNU with no regressions, and some experimentation
with a hacky testcase suggests that it works, at least for 32-bit
integers. I haven't tested all the permutations (bytes, shorts, long
longs).
If this is acceptable, would someone kindly commit this?
stuart
Index: llvm.regorder/lib/Target/X86/X86RegisterInfo.td
===================================================================
--- llvm.regorder/lib/Target/X86/X86RegisterInfo.td (revision 59450)
+++ llvm.regorder/lib/Target/X86/X86RegisterInfo.td (working copy)
@@ -181,9 +181,9 @@ def x86_subreg_16bit : PatLeaf<(i32 2)
def x86_subreg_32bit : PatLeaf<(i32 3)>;
def : SubRegSet<1, [AX, CX, DX, BX, SP, BP, SI, DI,
- R8W, R9W, R10W, R11W, R12W, R13W, R14W, R15W],
+ R8W, R9W, R10W, R11W, R14W, R15W, R12W, R13W],
[AL, CL, DL, BL, SPL, BPL, SIL, DIL,
- R8B, R9B, R10B, R11B, R12B, R13B, R14B, R15B]>;
+ R8B, R9B, R10B, R11B, R14B, R15B, R12B, R13B]>;
// It's unclear if this subreg set is safe, given that not all
registers
// in the class have an 'H' subreg.
@@ -191,30 +191,30 @@ def : SubRegSet<1, [AX, CX, DX, BX, SP,
// [AH, CH, DH, BH]>;
def : SubRegSet<1, [EAX, ECX, EDX, EBX, ESP, EBP, ESI, EDI,
- R8D, R9D, R10D, R11D, R12D, R13D, R14D, R15D],
+ R8D, R9D, R10D, R11D, R14D, R15D, R12D, R13D],
[AL, CL, DL, BL, SPL, BPL, SIL, DIL,
- R8B, R9B, R10B, R11B, R12B, R13B, R14B, R15B]>;
+ R8B, R9B, R10B, R11B, R14B, R15B, R12B, R13B]>;
def : SubRegSet<2, [EAX, ECX, EDX, EBX, ESP, EBP, ESI, EDI,
- R8D, R9D, R10D, R11D, R12D, R13D, R14D, R15D],
+ R8D, R9D, R10D, R11D, R14D, R15D, R12D, R13D],
[AX, CX, DX, BX, SP, BP, SI, DI,
- R8W, R9W, R10W, R11W, R12W, R13W, R14W, R15W]>;
+ R8W, R9W, R10W, R11W, R14W, R15W, R12W, R13W]>;
def : SubRegSet<1, [RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI,
- R8, R9, R10, R11, R12, R13, R14, R15],
+ R8, R9, R10, R11, R14, R15, R12, R13],
[AL, CL, DL, BL, SPL, BPL, SIL, DIL,
- R8B, R9B, R10B, R11B, R12B, R13B, R14B, R15B]>;
+ R8B, R9B, R10B, R11B, R14B, R15B, R12B, R13B]>;
def : SubRegSet<2, [RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI,
- R8, R9, R10, R11, R12, R13, R14, R15],
+ R8, R9, R10, R11, R14, R15, R12, R13],
[AX, CX, DX, BX, SP, BP, SI, DI,
- R8W, R9W, R10W, R11W, R12W, R13W, R14W, R15W]>;
+ R8W, R9W, R10W, R11W, R14W, R15W, R12W, R13W]>;
def : SubRegSet<3, [RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI,
- R8, R9, R10, R11, R12, R13, R14, R15],
+ R8, R9, R10, R11, R14, R15, R12, R13],
[EAX, ECX, EDX, EBX, ESP, EBP, ESI, EDI,
- R8D, R9D, R10D, R11D, R12D, R13D, R14D, R15D]>;
+ R8D, R9D, R10D, R11D, R14D, R15D, R12D, R13D]>;
//
=
=
=----------------------------------------------------------------------
===//
// Register Class Definitions... now that we have all of the pieces,
define the
@@ -229,7 +229,7 @@ def : SubRegSet<3, [RAX, RCX, RDX, RBX,
// FIXME: Allow AH, CH, DH, BH in 64-mode for non-REX instructions,
def GR8 : RegisterClass<"X86", [i8], 8,
[AL, CL, DL, BL, AH, CH, DH, BH, SIL, DIL,
BPL, SPL,
- R8B, R9B, R10B, R11B, R12B, R13B, R14B,
R15B]> {
+ R8B, R9B, R10B, R11B, R14B, R15B, R12B,
R13B]> {
let MethodProtos = [{
iterator allocation_order_begin(const MachineFunction &MF) const;
iterator allocation_order_end(const MachineFunction &MF) const;
@@ -240,12 +240,12 @@ def GR8 : RegisterClass<"X86", [i8], 8,
static const unsigned X86_GR8_AO_64_fp[] =
{X86::AL, X86::CL, X86::DL, X86::SIL, X86::DIL,
X86::R8B, X86::R9B, X86::R10B, X86::R11B,
- X86::BL, X86::R12B, X86::R13B, X86::R14B, X86::R15B};
+ X86::BL, X86::R14B, X86::R15B, X86::R12B, X86::R13B};
// If not, just don't allocate SPL.
static const unsigned X86_GR8_AO_64[] =
{X86::AL, X86::CL, X86::DL, X86::SIL, X86::DIL,
X86::R8B, X86::R9B, X86::R10B, X86::R11B,
- X86::BL, X86::R12B, X86::R13B, X86::R14B, X86::R15B,
X86::BPL};
+ X86::BL, X86::R14B, X86::R15B, X86::R12B, X86::R13B,
X86::BPL};
// In 32-mode, none of the 8-bit registers aliases EBP or ESP.
static const unsigned X86_GR8_AO_32[] =
{X86::AL, X86::CL, X86::DL, X86::AH, X86::CH, X86::DH,
X86::BL, X86::BH};
@@ -281,7 +281,7 @@ def GR8 : RegisterClass<"X86", [i8], 8,
def GR16 : RegisterClass<"X86", [i16], 16,
[AX, CX, DX, SI, DI, BX, BP, SP,
- R8W, R9W, R10W, R11W, R12W, R13W, R14W,
R15W]> {
+ R8W, R9W, R10W, R11W, R14W, R15W, R12W,
R13W]> {
let SubRegClassList = [GR8];
let MethodProtos = [{
iterator allocation_order_begin(const MachineFunction &MF) const;
@@ -293,14 +293,14 @@ def GR16 : RegisterClass<"X86", [i16], 1
static const unsigned X86_GR16_AO_64_fp[] =
{X86::AX, X86::CX, X86::DX, X86::SI, X86::DI,
X86::R8W, X86::R9W, X86::R10W, X86::R11W,
- X86::BX, X86::R12W, X86::R13W, X86::R14W, X86::R15W};
+ X86::BX, X86::R14W, X86::R15W, X86::R12W, X86::R13W};
static const unsigned X86_GR16_AO_32_fp[] =
{X86::AX, X86::CX, X86::DX, X86::SI, X86::DI, X86::BX};
// If not, just don't allocate SPL.
static const unsigned X86_GR16_AO_64[] =
{X86::AX, X86::CX, X86::DX, X86::SI, X86::DI,
X86::R8W, X86::R9W, X86::R10W, X86::R11W,
- X86::BX, X86::R12W, X86::R13W, X86::R14W, X86::R15W, X86::BP};
+ X86::BX, X86::R14W, X86::R15W, X86::R12W, X86::R13W, X86::BP};
static const unsigned X86_GR16_AO_32[] =
{X86::AX, X86::CX, X86::DX, X86::SI, X86::DI, X86::BX, X86::BP};
@@ -345,7 +345,7 @@ def GR16 : RegisterClass<"X86", [i16], 1
def GR32 : RegisterClass<"X86", [i32], 32,
[EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP,
- R8D, R9D, R10D, R11D, R12D, R13D, R14D,
R15D]> {
+ R8D, R9D, R10D, R11D, R14D, R15D, R12D,
R13D]> {
let SubRegClassList = [GR8, GR16];
let MethodProtos = [{
iterator allocation_order_begin(const MachineFunction &MF) const;
@@ -357,14 +357,14 @@ def GR32 : RegisterClass<"X86", [i32], 3
static const unsigned X86_GR32_AO_64_fp[] =
{X86::EAX, X86::ECX, X86::EDX, X86::ESI, X86::EDI,
X86::R8D, X86::R9D, X86::R10D, X86::R11D,
- X86::EBX, X86::R12D, X86::R13D, X86::R14D, X86::R15D};
+ X86::EBX, X86::R14D, X86::R15D, X86::R12D, X86::R13D};
static const unsigned X86_GR32_AO_32_fp[] =
{X86::EAX, X86::ECX, X86::EDX, X86::ESI, X86::EDI, X86::EBX};
// If not, just don't allocate SPL.
static const unsigned X86_GR32_AO_64[] =
{X86::EAX, X86::ECX, X86::EDX, X86::ESI, X86::EDI,
X86::R8D, X86::R9D, X86::R10D, X86::R11D,
- X86::EBX, X86::R12D, X86::R13D, X86::R14D, X86::R15D, X86::EBP};
+ X86::EBX, X86::R14D, X86::R15D, X86::R12D, X86::R13D, X86::EBP};
static const unsigned X86_GR32_AO_32[] =
{X86::EAX, X86::ECX, X86::EDX, X86::ESI, X86::EDI, X86::EBX,
X86::EBP};
@@ -409,7 +409,7 @@ def GR32 : RegisterClass<"X86", [i32], 3
def GR64 : RegisterClass<"X86", [i64], 64,
[RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
- RBX, R12, R13, R14, R15, RBP, RSP]> {
+ RBX, R14, R15, R12, R13, RBP, RSP]> {
let SubRegClassList = [GR8, GR16, GR32];
let MethodProtos = [{
iterator allocation_order_end(const MachineFunction &MF) const;
More information about the llvm-commits
mailing list