[llvm] r255936 - [X86] Use push-pop for materializing small constants under 'minsize'

Mon Jan 4 21:32:25 PST 2016

On Thu, Dec 17, 2015 at 3:18 PM, Hans Wennborg via llvm-commits <
llvm-commits at lists.llvm.org> wrote:

> Author: hans
> Date: Thu Dec 17 17:18:39 2015
> New Revision: 255936
>
> URL: http://llvm.org/viewvc/llvm-project?rev=255936&view=rev
> Log:
> [X86] Use push-pop for materializing small constants under 'minsize'
>
> Use the 3-byte (4 with REX prefix) push-pop sequence for materializing
> small constants. This is smaller than using a mov (5, 6 or 7 bytes
> depending on size and REX prefix), but it's likely to be slower, so
> only used for 'minsize'.
>
> This is a follow-up to r255656.
>
> Differential Revision: http://reviews.llvm.org/D15549
>
> Added:
>     llvm/trunk/test/CodeGen/X86/materialize.ll
> Removed:
>     llvm/trunk/test/CodeGen/X86/materialize-one.ll
> Modified:
>     llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp
>     llvm/trunk/lib/Target/X86/X86InstrCompiler.td
>     llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
>     llvm/trunk/lib/Target/X86/X86InstrInfo.h
>     llvm/trunk/lib/Target/X86/X86InstrInfo.td
>     llvm/trunk/test/CodeGen/X86/powi.ll
>
> Modified: llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp?rev=255936&r1=255935&r2=255936&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp (original)
> +++ llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp Thu Dec 17 17:18:39 2015
> @@ -157,9 +157,13 @@ namespace {
>      /// performance.
>      bool OptForSize;
>
> +    /// If true, selector should try to optimize for minimum code size.
> +    bool OptForMinSize;
> +
>    public:
>      explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOpt::Level
> OptLevel)
> -        : SelectionDAGISel(tm, OptLevel), OptForSize(false) {}
> +        : SelectionDAGISel(tm, OptLevel), OptForSize(false),
> +          OptForMinSize(false) {}
>
>      const char *getPassName() const override {
>        return "X86 DAG->DAG Instruction Selection";
> @@ -531,8 +535,10 @@ static bool isCalleeLoad(SDValue Callee,
>  }
>
>  void X86DAGToDAGISel::PreprocessISelDAG() {
> -  // OptForSize is used in pattern predicates that isel is matching.
> +  // OptFor[Min]Size are used in pattern predicates that isel is matching.
>    OptForSize = MF->getFunction()->optForSize();
> +  OptForMinSize = MF->getFunction()->optForMinSize();
> +  assert((!OptForMinSize || OptForSize) && "OptForMinSize implies
> OptForSize");
>
>    for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
>         E = CurDAG->allnodes_end(); I != E; ) {
>
> Modified: llvm/trunk/lib/Target/X86/X86InstrCompiler.td
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrCompiler.td?rev=255936&r1=255935&r2=255936&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86InstrCompiler.td (original)
> +++ llvm/trunk/lib/Target/X86/X86InstrCompiler.td Thu Dec 17 17:18:39 2015
> @@ -250,7 +250,7 @@ def MORESTACK_RET_RESTORE_R10 : I<0, Pse
>  // Alias instruction mapping movr0 to xor.
>  // FIXME: remove when we can teach regalloc that xor reg, reg is ok.
>  let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1,
> -    isPseudo = 1 in
> +    isPseudo = 1, AddedComplexity = 20 in
>  def MOV32r0  : I<0, Pseudo, (outs GR32:$dst), (ins), "",
>                   [(set GR32:$dst, 0)], IIC_ALU_NONMEM>,
> Sched<[WriteZero]>;
>
> @@ -263,7 +263,7 @@ def : Pat<(i64 0), (SUBREG_TO_REG (i64 0
>  }
>
>  let Predicates = [OptForSize, NotSlowIncDec, Not64BitMode],
> -    AddedComplexity = 1 in {
> +    AddedComplexity = 15 in {
>    // Pseudo instructions for materializing 1 and -1 using XOR+INC/DEC,
>    // which only require 3 bytes compared to MOV32ri which requires 5.
>    let Defs = [EFLAGS], isReMaterializable = 1, isPseudo = 1 in {
> @@ -278,6 +278,17 @@ let Predicates = [OptForSize, NotSlowInc
>    def : Pat<(i16 -1), (EXTRACT_SUBREG (MOV32r_1), sub_16bit)>;
>  }
>
> +let isReMaterializable = 1, isPseudo = 1, AddedComplexity = 10 in {
> +// AddedComplexity higher than MOV64ri but lower than MOV32r0 and MOV32r1.
> +// FIXME: Add itinerary class and Schedule.
> +def MOV32ImmSExti8 : I<0, Pseudo, (outs GR32:$dst), (ins i32i8imm:$src),
> "",
> +                       [(set GR32:$dst, i32immSExt8:$src)]>,
> +                     Requires<[OptForMinSize]>;
> +def MOV64ImmSExti8 : I<0, Pseudo, (outs GR64:$dst), (ins i64i8imm:$src),
> "",
> +                       [(set GR64:$dst, i64immSExt8:$src)]>,
> +                     Requires<[OptForMinSize, NotWin64WithoutFP]>;
> +}
> +
>  // Materialize i64 constant where top 32-bits are zero. This could
> theoretically
>  // use MOV32ri with a SUBREG_TO_REG to represent the zero-extension,
> however
>  // that would make it more difficult to rematerialize.
>
> Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=255936&r1=255935&r2=255936&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original)
> +++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Thu Dec 17 17:18:39 2015
> @@ -23,6 +23,7 @@
>  #include "llvm/CodeGen/MachineDominators.h"
>  #include "llvm/CodeGen/MachineFrameInfo.h"
>  #include "llvm/CodeGen/MachineInstrBuilder.h"
> +#include "llvm/CodeGen/MachineModuleInfo.h"
>  #include "llvm/CodeGen/MachineRegisterInfo.h"
>  #include "llvm/CodeGen/StackMaps.h"
>  #include "llvm/IR/DerivedTypes.h"
> @@ -5297,6 +5298,50 @@ static bool expandMOV32r1(MachineInstrBu
>    return true;
>  }
>
> +bool X86InstrInfo::ExpandMOVImmSExti8(MachineInstrBuilder &MIB) const {
> +  MachineBasicBlock &MBB = *MIB->getParent();
> +  DebugLoc DL = MIB->getDebugLoc();
> +  int64_t Imm = MIB->getOperand(1).getImm();
> +  assert(Imm != 0 && "Using push/pop for 0 is not efficient.");
> +  MachineBasicBlock::iterator I = MIB.getInstr();
> +
> +  int StackAdjustment;
> +
> +  if (Subtarget.is64Bit()) {
> +    assert(MIB->getOpcode() == X86::MOV64ImmSExti8 ||
> +           MIB->getOpcode() == X86::MOV32ImmSExti8);
> +    // 64-bit mode doesn't have 32-bit push/pop, so use 64-bit operations
> and
> +    // widen the register if necessary.
> +    StackAdjustment = 8;
> +    BuildMI(MBB, I, DL, get(X86::PUSH64i8)).addImm(Imm);
> +    MIB->setDesc(get(X86::POP64r));
> +    MIB->getOperand(0)
> +        .setReg(getX86SubSuperRegister(MIB->getOperand(0).getReg(),
> MVT::i64));
> +  } else {
> +    assert(MIB->getOpcode() == X86::MOV32ImmSExti8);
> +    StackAdjustment = 4;
> +    BuildMI(MBB, I, DL, get(X86::PUSH32i8)).addImm(Imm);
> +    MIB->setDesc(get(X86::POP32r));
> +  }
> +
> +  // Build CFI if necessary.
> +  MachineFunction &MF = *MBB.getParent();
> +  const X86FrameLowering *TFL = Subtarget.getFrameLowering();
> +  bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
> +  bool NeedsDwarfCFI =
> +      !IsWin64Prologue &&
> +      (MF.getMMI().hasDebugInfo() ||
> MF.getFunction()->needsUnwindTableEntry());
> +  bool EmitCFI = !TFL->hasFP(MF) && NeedsDwarfCFI;
> +  if (EmitCFI) {
> +    TFL->BuildCFI(MBB, I, DL,
> +        MCCFIInstruction::createAdjustCfaOffset(nullptr,
> StackAdjustment));
> +    TFL->BuildCFI(MBB, std::next(I), DL,
> +        MCCFIInstruction::createAdjustCfaOffset(nullptr,
> -StackAdjustment));
> +  }
> +
> +  return true;
> +}
> +
>  // LoadStackGuard has so far only been implemented for 64-bit MachO.
> Different
>  // code sequence is needed for other targets.
>  static void expandLoadStackGuard(MachineInstrBuilder &MIB,
> @@ -5329,6 +5374,9 @@ bool X86InstrInfo::expandPostRAPseudo(Ma
>      return expandMOV32r1(MIB, *this, /*MinusOne=*/ false);
>    case X86::MOV32r_1:
>      return expandMOV32r1(MIB, *this, /*MinusOne=*/ true);
> +  case X86::MOV32ImmSExti8:
> +  case X86::MOV64ImmSExti8:
> +    return ExpandMOVImmSExti8(MIB);
>    case X86::SETB_C8r:
>      return Expand2AddrUndef(MIB, get(X86::SBB8rr));
>    case X86::SETB_C16r:
>
> Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.h
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.h?rev=255936&r1=255935&r2=255936&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86InstrInfo.h (original)
> +++ llvm/trunk/lib/Target/X86/X86InstrInfo.h Thu Dec 17 17:18:39 2015
> @@ -23,6 +23,7 @@
>  #include "X86GenInstrInfo.inc"
>
>  namespace llvm {
> +  class MachineInstrBuilder;
>    class X86RegisterInfo;
>    class X86Subtarget;
>
> @@ -564,6 +565,9 @@ private:
>    /// operand and follow operands form a reference to the stack frame.
>    bool isFrameOperand(const MachineInstr *MI, unsigned int Op,
>                        int &FrameIndex) const;
> +
> +  /// Expand the MOVImmSExti8 pseudo-instructions.
> +  bool ExpandMOVImmSExti8(MachineInstrBuilder &MIB) const;
>  };
>
>  } // End llvm namespace
>
> Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.td
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.td?rev=255936&r1=255935&r2=255936&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86InstrInfo.td (original)
> +++ llvm/trunk/lib/Target/X86/X86InstrInfo.td Thu Dec 17 17:18:39 2015
> @@ -820,6 +820,8 @@ def In32BitMode  : Predicate<"Subtarget-
>                               AssemblerPredicate<"Mode32Bit", "32-bit
> mode">;
>  def IsWin64      : Predicate<"Subtarget->isTargetWin64()">;
>  def NotWin64     : Predicate<"!Subtarget->isTargetWin64()">;
> +def NotWin64WithoutFP : Predicate<"!Subtarget->isTargetWin64() ||"
> +
> "Subtarget->getFrameLowering()->hasFP(*MF)">;
>  def IsPS4        : Predicate<"Subtarget->isTargetPS4()">;
>  def NotPS4       : Predicate<"!Subtarget->isTargetPS4()">;
>  def IsNaCl       : Predicate<"Subtarget->isTargetNaCl()">;
> @@ -833,6 +835,7 @@ def NearData     : Predicate<"TM.getCode
>  def IsStatic     : Predicate<"TM.getRelocationModel() == Reloc::Static">;
>  def IsNotPIC     : Predicate<"TM.getRelocationModel() != Reloc::PIC_">;
>  def OptForSize   : Predicate<"OptForSize">;
> +def OptForMinSize : Predicate<"OptForMinSize">;
>  def OptForSpeed  : Predicate<"!OptForSize">;
>  def FastBTMem    : Predicate<"!Subtarget->isBTMemSlow()">;
>  def CallImmAddr  : Predicate<"Subtarget->IsLegalToCallImmediateAddr(TM)">;
>
> Removed: llvm/trunk/test/CodeGen/X86/materialize-one.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/materialize-one.ll?rev=255935&view=auto
>
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/materialize-one.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/materialize-one.ll (removed)
> @@ -1,100 +0,0 @@
> -; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+cmov %s -o - |
> FileCheck %s --check-prefix=CHECK32
> -; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+cmov %s -o - |
> FileCheck %s --check-prefix=CHECK64
> -
> -define i32 @one32() optsize {
> -entry:
> -  ret i32 1
> -
> -; CHECK32-LABEL: one32
> -; CHECK32:       xorl %eax, %eax
> -; CHECK32-NEXT:  incl %eax
> -; CHECK32-NEXT:  ret
> -
> -; FIXME: Figure out the best approach in 64-bit mode.
> -; CHECK64-LABEL: one32
> -; CHECK64:       movl $1, %eax
> -; CHECK64-NEXT:  retq
> -}
> -
> -define i32 @minus_one32() optsize {
> -entry:
> -  ret i32 -1
> -
> -; CHECK32-LABEL: minus_one32
> -; CHECK32:       xorl %eax, %eax
> -; CHECK32-NEXT:  decl %eax
> -; CHECK32-NEXT:  ret
> -}
> -
> -define i16 @one16() optsize {
> -entry:
> -  ret i16 1
> -
> -; CHECK32-LABEL: one16
> -; CHECK32:       xorl %eax, %eax
> -; CHECK32-NEXT:  incl %eax
> -; CHECK32-NEXT:  retl
> -}
> -
> -define i16 @minus_one16() optsize {
> -entry:
> -  ret i16 -1
> -
> -; CHECK32-LABEL: minus_one16
> -; CHECK32:       xorl %eax, %eax
> -; CHECK32-NEXT:  decl %eax
> -; CHECK32-NEXT:  retl
> -}
> -
> -define i32 @test_rematerialization() optsize {
> -entry:
> -  ; Materialize -1 (thiscall forces it into %ecx).
> -  tail call x86_thiscallcc void @f(i32 -1)
> -
> -  ; Clobber all registers except %esp, leaving nowhere to store the -1
> besides
> -  ; spilling it to the stack.
> -  tail call void asm sideeffect "",
> "~{eax},~{ebx},~{ecx},~{edx},~{edi},~{esi},~{ebp},~{dirflag},~{fpsr},~{flags}"()
> -
> -  ; -1 should be re-materialized here instead of getting spilled above.
> -  ret i32 -1
> -
> -; CHECK32-LABEL: test_rematerialization
> -; CHECK32:       xorl %ecx, %ecx
> -; CHECK32-NEXT:  decl %ecx
> -; CHECK32:       calll
> -; CHECK32:       xorl %eax, %eax
> -; CHECK32-NEXT:  decl %eax
> -; CHECK32-NOT:   %eax
> -; CHECK32:       retl
> -}
> -
> -define i32 @test_rematerialization2(i32 %x) optsize {
> -entry:
> -  ; Materialize -1 (thiscall forces it into %ecx).
> -  tail call x86_thiscallcc void @f(i32 -1)
> -
> -  ; Clobber all registers except %esp, leaving nowhere to store the -1
> besides
> -  ; spilling it to the stack.
> -  tail call void asm sideeffect "",
> "~{eax},~{ebx},~{ecx},~{edx},~{edi},~{esi},~{ebp},~{dirflag},~{fpsr},~{flags}"()
> -
> -  ; Define eflags.
> -  %a = icmp ne i32 %x, 123
> -  %b = zext i1 %a to i32
> -  ; Cause -1 to be rematerialized right in front of the cmov, which needs
> eflags.
> -  ; It must therefore not use the xor-dec lowering.
> -  %c = select i1 %a, i32 %b, i32 -1
> -  ret i32 %c
> -
> -; CHECK32-LABEL: test_rematerialization2
> -; CHECK32:       xorl %ecx, %ecx
> -; CHECK32-NEXT:  decl %ecx
> -; CHECK32:       calll
> -; CHECK32:       cmpl
> -; CHECK32:       setne
> -; CHECK32-NOT:   xorl
> -; CHECK32:       movl $-1
> -; CHECK32:       cmov
> -; CHECK32:       retl
> -}
> -
> -declare x86_thiscallcc void @f(i32)
>
> Added: llvm/trunk/test/CodeGen/X86/materialize.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/materialize.ll?rev=255936&view=auto
>
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/materialize.ll (added)
> +++ llvm/trunk/test/CodeGen/X86/materialize.ll Thu Dec 17 17:18:39 2015
> @@ -0,0 +1,184 @@
> +; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+cmov %s -o - |
> FileCheck %s --check-prefix=CHECK32
> +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+cmov %s -o - |
> FileCheck %s --check-prefix=CHECK64
> +; RUN: llc -mtriple=x86_64-pc-win32 -mattr=+cmov %s -o - | FileCheck %s
> --check-prefix=CHECKWIN64
> +
> +define i32 @one32_nooptsize() {
> +entry:
> +  ret i32 1
> +
> +; When not optimizing for size, use mov.
> +; CHECK32-LABEL: one32_nooptsize:
> +; CHECK32:       movl $1, %eax
> +; CHECK32-NEXT:  retl
> +; CHECK64-LABEL: one32_nooptsize:
> +; CHECK64:       movl $1, %eax
> +; CHECK64-NEXT:  retq
> +}
> +
> +define i32 @one32() optsize {
> +entry:
> +  ret i32 1
> +
> +; CHECK32-LABEL: one32:
> +; CHECK32:       xorl %eax, %eax
> +; CHECK32-NEXT:  incl %eax
> +; CHECK32-NEXT:  retl
> +
> +; FIXME: Figure out the best approach in 64-bit mode.
> +; CHECK64-LABEL: one32:
> +; CHECK64:       movl $1, %eax
> +; CHECK64-NEXT:  retq
> +}
> +
> +define i32 @one32_minsize() minsize {
> +entry:
> +  ret i32 1
> +
> +; On 32-bit, xor-inc is preferred over push-pop.
> +; CHECK32-LABEL: one32_minsize:
> +; CHECK32:       xorl %eax, %eax
> +; CHECK32-NEXT:  incl %eax
> +; CHECK32-NEXT:  retl
> +
> +; On 64-bit we don't do xor-inc yet, so push-pop it is. Note that we have
> to
> +; pop into a 64-bit register even when we just need 32 bits.
> +; CHECK64-LABEL: one32_minsize:
> +; CHECK64:       pushq $1
> +; CHECK64:       .cfi_adjust_cfa_offset 8
> +; CHECK64:       popq %rax
> +; CHECK64:       .cfi_adjust_cfa_offset -8
> +; CHECK64-NEXT:  retq
> +}
> +
> +define i64 @one64_minsize() minsize {
> +entry:
> +  ret i64 1
> +; On 64-bit we don't do xor-inc yet, so push-pop it is.
> +; CHECK64-LABEL: one64_minsize:
> +; CHECK64:       pushq $1
> +; CHECK64:       .cfi_adjust_cfa_offset 8
> +; CHECK64:       popq %rax
> +; CHECK64:       .cfi_adjust_cfa_offset -8
> +; CHECK64-NEXT:  retq
> +
> +; On Win64 we can't adjust the stack unless there's a frame pointer.
> +; CHECKWIN64-LABEL: one64_minsize:
> +; CHECKWIN64:       movl $1, %eax
> +; CHECKWIN64-NEXT:  retq
> +}
> +
> +define i32 @minus_one32() optsize {
> +entry:
> +  ret i32 -1
> +
> +; CHECK32-LABEL: minus_one32:
> +; CHECK32:       xorl %eax, %eax
> +; CHECK32-NEXT:  decl %eax
> +; CHECK32-NEXT:  retl
> +}
> +
> +define i32 @minus_one32_minsize() minsize {
> +entry:
> +  ret i32 -1
> +
> +; xor-dec is preferred over push-pop.
> +; CHECK32-LABEL: minus_one32_minsize:
> +; CHECK32:       xorl %eax, %eax
> +; CHECK32-NEXT:  decl %eax
> +; CHECK32-NEXT:  retl
> +}
> +
> +define i16 @one16() optsize {
> +entry:
> +  ret i16 1
> +
> +; CHECK32-LABEL: one16:
> +; CHECK32:       xorl %eax, %eax
> +; CHECK32-NEXT:  incl %eax
> +; CHECK32-NEXT:  retl
> +}
> +
> +define i16 @minus_one16() optsize {
> +entry:
> +  ret i16 -1
> +
> +; CHECK32-LABEL: minus_one16:
> +; CHECK32:       xorl %eax, %eax
> +; CHECK32-NEXT:  decl %eax
> +; CHECK32-NEXT:  retl
> +}
> +
> +define i32 @minus_five32() minsize {
> +entry:
> +  ret i32 -5
> +
> +; CHECK32-LABEL: minus_five32:
> +; CHECK32: pushl $-5
> +; CHECK32: popl %eax
> +; CHECK32: retl
> +}
> +
> +define i64 @minus_five64() minsize {
> +entry:
> +  ret i64 -5
> +
> +; CHECK64-LABEL: minus_five64:
> +; CHECK64: pushq $-5
> +; CHECK64:       .cfi_adjust_cfa_offset 8
> +; CHECK64: popq %rax
> +; CHECK64:       .cfi_adjust_cfa_offset -8
> +; CHECK64: retq
> +}
> +
> +define i32 @rematerialize_minus_one() optsize {
> +entry:
> +  ; Materialize -1 (thiscall forces it into %ecx).
> +  tail call x86_thiscallcc void @f(i32 -1)
> +
> +  ; Clobber all registers except %esp, leaving nowhere to store the -1
> besides
> +  ; spilling it to the stack.
> +  tail call void asm sideeffect "",
> "~{eax},~{ebx},~{ecx},~{edx},~{edi},~{esi},~{ebp},~{dirflag},~{fpsr},~{flags}"()
> +
> +  ; -1 should be re-materialized here instead of getting spilled above.
> +  ret i32 -1
> +
> +; CHECK32-LABEL: rematerialize_minus_one
> +; CHECK32:       xorl %ecx, %ecx
> +; CHECK32-NEXT:  decl %ecx
> +; CHECK32:       calll
> +; CHECK32:       xorl %eax, %eax
> +; CHECK32-NEXT:  decl %eax
> +; CHECK32-NOT:   %eax
> +; CHECK32:       retl
> +}
> +
> +define i32 @rematerialize_minus_one_eflags(i32 %x) optsize {
> +entry:
> +  ; Materialize -1 (thiscall forces it into %ecx).
> +  tail call x86_thiscallcc void @f(i32 -1)
> +
> +  ; Clobber all registers except %esp, leaving nowhere to store the -1
> besides
> +  ; spilling it to the stack.
> +  tail call void asm sideeffect "",
> "~{eax},~{ebx},~{ecx},~{edx},~{edi},~{esi},~{ebp},~{dirflag},~{fpsr},~{flags}"()
> +
> +  ; Define eflags.
> +  %a = icmp ne i32 %x, 123
> +  %b = zext i1 %a to i32
> +  ; Cause -1 to be rematerialized right in front of the cmov, which needs
> eflags.
> +  ; It must therefore not use the xor-dec lowering.
> +  %c = select i1 %a, i32 %b, i32 -1
> +  ret i32 %c
> +
> +; CHECK32-LABEL: rematerialize_minus_one_eflags
> +; CHECK32:       xorl %ecx, %ecx
> +; CHECK32-NEXT:  decl %ecx
> +; CHECK32:       calll
> +; CHECK32:       cmpl
> +; CHECK32:       setne
> +; CHECK32-NOT:   xorl
> +; CHECK32:       movl $-1
> +; CHECK32:       cmov
> +; CHECK32:       retl
> +}
> +
> +declare x86_thiscallcc void @f(i32)
>
> Modified: llvm/trunk/test/CodeGen/X86/powi.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/powi.ll?rev=255936&r1=255935&r2=255936&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/powi.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/powi.ll Thu Dec 17 17:18:39 2015
> @@ -29,9 +29,9 @@ define double @pow_wrapper_optsize(doubl
>  define double @pow_wrapper_minsize(double %a) minsize {
>  ; CHECK-LABEL: pow_wrapper_minsize:
>  ; CHECK:       # BB#0:
> -; CHECK-NEXT:    movl  $15, %edi
> +; CHECK-NEXT:    movl  $128, %edi
>  ; CHECK-NEXT:    jmp
> -  %ret = tail call double @llvm.powi.f64(double %a, i32 15) nounwind ;
> <double> [#uses=1]
> +  %ret = tail call double @llvm.powi.f64(double %a, i32 128) nounwind ;
> <double> [#uses=1]
>    ret double %ret
>  }
>
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>

This was reverted in r255656 because it caused a regression, PR26023.
Please see r255656 and PR26023 for more details.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20160104/426068bb/attachment-0001.html>