[llvm] r255656 - [X86] Smaller code for materializing 32-bit 1 and -1 constants
Hans Wennborg via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 15 09:10:28 PST 2015
Author: hans
Date: Tue Dec 15 11:10:28 2015
New Revision: 255656
URL: http://llvm.org/viewvc/llvm-project?rev=255656&view=rev
Log:
[X86] Smaller code for materializing 32-bit 1 and -1 constants
"movl $-1, %eax" is 5 bytes, "xorl %eax, %eax; decl %eax" is 3 bytes.
This commit makes LLVM use the latter when optimizing for size.
Differential Revision: http://reviews.llvm.org/D14971
Added:
llvm/trunk/test/CodeGen/X86/materialize-one.ll
Modified:
llvm/trunk/lib/Target/X86/X86InstrCompiler.td
llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
Modified: llvm/trunk/lib/Target/X86/X86InstrCompiler.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrCompiler.td?rev=255656&r1=255655&r2=255656&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrCompiler.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrCompiler.td Tue Dec 15 11:10:28 2015
@@ -262,6 +262,22 @@ def : Pat<(i64 0), (SUBREG_TO_REG (i64 0
let AddedComplexity = 20;
}
+let Predicates = [OptForSize, NotSlowIncDec, Not64BitMode],
+ AddedComplexity = 1 in {
+ // Pseudo instructions for materializing 1 and -1 using XOR+INC/DEC,
+ // which only require 3 bytes compared to MOV32ri which requires 5.
+ let Defs = [EFLAGS], isReMaterializable = 1, isPseudo = 1 in {
+ def MOV32r1 : I<0, Pseudo, (outs GR32:$dst), (ins), "",
+ [(set GR32:$dst, 1)]>;
+ def MOV32r_1 : I<0, Pseudo, (outs GR32:$dst), (ins), "",
+ [(set GR32:$dst, -1)]>;
+ }
+
+ // MOV16ri is 4 bytes, so the instructions above are smaller.
+ def : Pat<(i16 1), (EXTRACT_SUBREG (MOV32r1), sub_16bit)>;
+ def : Pat<(i16 -1), (EXTRACT_SUBREG (MOV32r_1), sub_16bit)>;
+}
+
// Materialize i64 constant where top 32-bits are zero. This could theoretically
// use MOV32ri with a SUBREG_TO_REG to represent the zero-extension, however
// that would make it more difficult to rematerialize.
Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=255656&r1=255655&r2=255656&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Tue Dec 15 11:10:28 2015
@@ -2472,13 +2472,29 @@ void X86InstrInfo::reMaterialize(Machine
unsigned DestReg, unsigned SubIdx,
const MachineInstr *Orig,
const TargetRegisterInfo &TRI) const {
- // MOV32r0 is implemented with a xor which clobbers condition code.
- // Re-materialize it as movri instructions to avoid side effects.
- unsigned Opc = Orig->getOpcode();
- if (Opc == X86::MOV32r0 && !isSafeToClobberEFLAGS(MBB, I)) {
+ bool ClobbersEFLAGS = false;
+ for (const MachineOperand &MO : Orig->operands()) {
+ if (MO.isReg() && MO.isDef() && MO.getReg() == X86::EFLAGS) {
+ ClobbersEFLAGS = true;
+ break;
+ }
+ }
+
+ if (ClobbersEFLAGS && !isSafeToClobberEFLAGS(MBB, I)) {
+ // The instruction clobbers EFLAGS. Re-materialize as MOV32ri to avoid side
+ // effects.
+ int Value;
+ switch (Orig->getOpcode()) {
+ case X86::MOV32r0: Value = 0; break;
+ case X86::MOV32r1: Value = 1; break;
+ case X86::MOV32r_1: Value = -1; break;
+ default:
+ llvm_unreachable("Unexpected instruction!");
+ }
+
DebugLoc DL = Orig->getDebugLoc();
BuildMI(MBB, I, DL, get(X86::MOV32ri)).addOperand(Orig->getOperand(0))
- .addImm(0);
+ .addImm(Value);
} else {
MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig);
MBB.insert(I, MI);
@@ -5262,6 +5278,24 @@ static bool Expand2AddrUndef(MachineInst
return true;
}
+static bool expandMOV32r1(MachineInstrBuilder &MIB, const TargetInstrInfo &TII,
+ bool MinusOne) {
+ MachineBasicBlock &MBB = *MIB->getParent();
+ DebugLoc DL = MIB->getDebugLoc();
+ unsigned Reg = MIB->getOperand(0).getReg();
+
+ // Insert the XOR.
+ BuildMI(MBB, MIB.getInstr(), DL, TII.get(X86::XOR32rr), Reg)
+ .addReg(Reg, RegState::Undef)
+ .addReg(Reg, RegState::Undef);
+
+ // Turn the pseudo into an INC or DEC.
+ MIB->setDesc(TII.get(MinusOne ? X86::DEC32r : X86::INC32r));
+ MIB.addReg(Reg);
+
+ return true;
+}
+
// LoadStackGuard has so far only been implemented for 64-bit MachO. Different
// code sequence is needed for other targets.
static void expandLoadStackGuard(MachineInstrBuilder &MIB,
@@ -5290,6 +5324,10 @@ bool X86InstrInfo::expandPostRAPseudo(Ma
switch (MI->getOpcode()) {
case X86::MOV32r0:
return Expand2AddrUndef(MIB, get(X86::XOR32rr));
+ case X86::MOV32r1:
+ return expandMOV32r1(MIB, *this, /*MinusOne=*/ false);
+ case X86::MOV32r_1:
+ return expandMOV32r1(MIB, *this, /*MinusOne=*/ true);
case X86::SETB_C8r:
return Expand2AddrUndef(MIB, get(X86::SBB8rr));
case X86::SETB_C16r:
Added: llvm/trunk/test/CodeGen/X86/materialize-one.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/materialize-one.ll?rev=255656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/materialize-one.ll (added)
+++ llvm/trunk/test/CodeGen/X86/materialize-one.ll Tue Dec 15 11:10:28 2015
@@ -0,0 +1,100 @@
+; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+cmov %s -o - | FileCheck %s --check-prefix=CHECK32
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+cmov %s -o - | FileCheck %s --check-prefix=CHECK64
+
+define i32 @one32() optsize {
+entry:
+ ret i32 1
+
+; CHECK32-LABEL: one32
+; CHECK32: xorl %eax, %eax
+; CHECK32-NEXT: incl %eax
+; CHECK32-NEXT: ret
+
+; FIXME: Figure out the best approach in 64-bit mode.
+; CHECK64-LABEL: one32
+; CHECK64: movl $1, %eax
+; CHECK64-NEXT: retq
+}
+
+define i32 @minus_one32() optsize {
+entry:
+ ret i32 -1
+
+; CHECK32-LABEL: minus_one32
+; CHECK32: xorl %eax, %eax
+; CHECK32-NEXT: decl %eax
+; CHECK32-NEXT: ret
+}
+
+define i16 @one16() optsize {
+entry:
+ ret i16 1
+
+; CHECK32-LABEL: one16
+; CHECK32: xorl %eax, %eax
+; CHECK32-NEXT: incl %eax
+; CHECK32-NEXT: retl
+}
+
+define i16 @minus_one16() optsize {
+entry:
+ ret i16 -1
+
+; CHECK32-LABEL: minus_one16
+; CHECK32: xorl %eax, %eax
+; CHECK32-NEXT: decl %eax
+; CHECK32-NEXT: retl
+}
+
+define i32 @test_rematerialization() optsize {
+entry:
+ ; Materialize -1 (thiscall forces it into %ecx).
+ tail call x86_thiscallcc void @f(i32 -1)
+
+ ; Clobber all registers except %esp, leaving nowhere to store the -1 besides
+ ; spilling it to the stack.
+ tail call void asm sideeffect "", "~{eax},~{ebx},~{ecx},~{edx},~{edi},~{esi},~{ebp},~{dirflag},~{fpsr},~{flags}"()
+
+ ; -1 should be re-materialized here instead of getting spilled above.
+ ret i32 -1
+
+; CHECK32-LABEL: test_rematerialization
+; CHECK32: xorl %ecx, %ecx
+; CHECK32-NEXT: decl %ecx
+; CHECK32: calll
+; CHECK32: xorl %eax, %eax
+; CHECK32-NEXT: decl %eax
+; CHECK32-NOT: %eax
+; CHECK32: retl
+}
+
+define i32 @test_rematerialization2(i32 %x) optsize {
+entry:
+ ; Materialize -1 (thiscall forces it into %ecx).
+ tail call x86_thiscallcc void @f(i32 -1)
+
+ ; Clobber all registers except %esp, leaving nowhere to store the -1 besides
+ ; spilling it to the stack.
+ tail call void asm sideeffect "", "~{eax},~{ebx},~{ecx},~{edx},~{edi},~{esi},~{ebp},~{dirflag},~{fpsr},~{flags}"()
+
+ ; Define eflags.
+ %a = icmp ne i32 %x, 123
+ %b = zext i1 %a to i32
+ ; Cause -1 to be rematerialized right in front of the cmov, which needs eflags.
+ ; It must therefore not use the xor-dec lowering.
+ %c = select i1 %a, i32 %b, i32 -1
+ ret i32 %c
+
+; CHECK32-LABEL: test_rematerialization2
+; CHECK32: xorl %ecx, %ecx
+; CHECK32-NEXT: decl %ecx
+; CHECK32: calll
+; CHECK32: cmpl
+; CHECK32: setne
+; CHECK32-NOT: xorl
+; CHECK32: movl $-1
+; CHECK32: cmov
+; CHECK32: retl
+}
+
+declare x86_thiscallcc void @f(i32)
More information about the llvm-commits
mailing list