[llvm-commits] [llvm] r160919 - in /llvm/trunk: include/llvm/Target/TargetInstrInfo.h lib/CodeGen/PeepholeOptimizer.cpp lib/Target/X86/X86InstrInfo.cpp lib/Target/X86/X86InstrInfo.h test/CodeGen/X86/2012-05-19-avx2-store.ll test/CodeGen/X86/break-sse-dep.ll test/CodeGen/X86/fold-load.ll test/CodeGen/X86/fold-pcmpeqd-1.ll test/CodeGen/X86/sse-minmax.ll test/CodeGen/X86/vec_compare.ll

Manman Ren mren at apple.com
Sat Jul 28 13:58:37 PDT 2012


This seems to be breaking the dragonegg bots:
make[4]: *** [_divdc3.o] Error 1
../../../../gcc.src/libgcc/../gcc/libgcc2.c: In function ‘__divxc3’:
../../../../gcc.src/libgcc/../gcc/libgcc2.c:1944:1: error: too many outgoing branch edges from bb 98
../../../../gcc.src/libgcc/../gcc/libgcc2.c:1944:1: error: too many outgoing branch edges from bb 94
../../../../gcc.src/libgcc/../gcc/libgcc2.c:1944:1: error: too many outgoing branch edges from bb 85
../../../../gcc.src/libgcc/../gcc/libgcc2.c:1944:1: error: too many outgoing branch edges from bb 81
../../../../gcc.src/libgcc/../gcc/libgcc2.c:1944:1: error: too many outgoing branch edges from bb 77
../../../../gcc.src/libgcc/../gcc/libgcc2.c:1944:1: error: too many outgoing branch edges from bb 73
../../../../gcc.src/libgcc/../gcc/libgcc2.c:1944:1: error: too many outgoing branch edges from bb 69
../../../../gcc.src/libgcc/../gcc/libgcc2.c:1944:1: error: too many outgoing branch edges from bb 66
../../../../gcc.src/libgcc/../gcc/libgcc2.c:1944:1: error: too many outgoing branch edges from bb 57
../../../../gcc.src/libgcc/../gcc/libgcc2.c:1944:1: error: too many outgoing branch edges from bb 53
../../../../gcc.src/libgcc/../gcc/libgcc2.c:1944:1: error: too many outgoing branch edges from bb 44
../../../../gcc.src/libgcc/../gcc/libgcc2.c:1944:1: error: too many outgoing branch edges from bb 40
../../../../gcc.src/libgcc/../gcc/libgcc2.c:1944:1: error: too many outgoing branch edges from bb 36
../../../../gcc.src/libgcc/../gcc/libgcc2.c:1944:1: error: too many outgoing branch edges from bb 32
../../../../gcc.src/libgcc/../gcc/libgcc2.c:1944:1: error: too many outgoing branch edges from bb 28
../../../../gcc.src/libgcc/../gcc/libgcc2.c:1944:1: error: too many outgoing branch edges from bb 25
../../../../gcc.src/libgcc/../gcc/libgcc2.c:1944:1: error: too many outgoing branch edges from bb 18
../../../../gcc.src/libgcc/../gcc/libgcc2.c:1944:1: error: too many outgoing branch edges from bb 14
../../../../gcc.src/libgcc/../gcc/libgcc2.c:1944:1: error: too many outgoing branch edges from bb 8
../../../../gcc.src/libgcc/../gcc/libgcc2.c:1944:1: error: too many outgoing branch edges from bb 6
../../../../gcc.src/libgcc/../gcc/libgcc2.c:1944:1: internal compiler error: verify_flow_info failed
Please submit a full bug report,

I am not sure how to fix this. Thanks,

Manman

On Jul 28, 2012, at 9:48 AM, Manman Ren wrote:

> Author: mren
> Date: Sat Jul 28 11:48:01 2012
> New Revision: 160919
> 
> URL: http://llvm.org/viewvc/llvm-project?rev=160919&view=rev
> Log:
> X86 Peephole: fold loads to the source register operand if possible.
> 
> Machine CSE and other optimizations can remove instructions so folding
> is possible at peephole while not possible at ISel.
> 
> rdar://10554090 and rdar://11873276
> 
> Modified:
>    llvm/trunk/include/llvm/Target/TargetInstrInfo.h
>    llvm/trunk/lib/CodeGen/PeepholeOptimizer.cpp
>    llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
>    llvm/trunk/lib/Target/X86/X86InstrInfo.h
>    llvm/trunk/test/CodeGen/X86/2012-05-19-avx2-store.ll
>    llvm/trunk/test/CodeGen/X86/break-sse-dep.ll
>    llvm/trunk/test/CodeGen/X86/fold-load.ll
>    llvm/trunk/test/CodeGen/X86/fold-pcmpeqd-1.ll
>    llvm/trunk/test/CodeGen/X86/sse-minmax.ll
>    llvm/trunk/test/CodeGen/X86/vec_compare.ll
> 
> Modified: llvm/trunk/include/llvm/Target/TargetInstrInfo.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetInstrInfo.h?rev=160919&r1=160918&r2=160919&view=diff
> ==============================================================================
> --- llvm/trunk/include/llvm/Target/TargetInstrInfo.h (original)
> +++ llvm/trunk/include/llvm/Target/TargetInstrInfo.h Sat Jul 28 11:48:01 2012
> @@ -14,6 +14,7 @@
> #ifndef LLVM_TARGET_TARGETINSTRINFO_H
> #define LLVM_TARGET_TARGETINSTRINFO_H
> 
> +#include "llvm/ADT/SmallSet.h"
> #include "llvm/MC/MCInstrInfo.h"
> #include "llvm/CodeGen/DFAPacketizer.h"
> #include "llvm/CodeGen/MachineFunction.h"
> @@ -693,6 +694,16 @@
>     return false;
>   }
> 
> +  /// optimizeLoadInstr - Try to remove the load by folding it to a register
> +  /// operand at the use. We fold the load instructions if and only if the
> +  /// def and use are in the same BB.
> +  virtual MachineInstr* optimizeLoadInstr(MachineInstr *MI,
> +                        const MachineRegisterInfo *MRI,
> +                        SmallSet<unsigned, 4> &FoldAsLoadDefRegs,
> +                        MachineInstr *&DefMI) const {
> +    return 0;
> +  }
> +
>   /// FoldImmediate - 'Reg' is known to be defined by a move immediate
>   /// instruction, try to fold the immediate into the use instruction.
>   virtual bool FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI,
> 
> Modified: llvm/trunk/lib/CodeGen/PeepholeOptimizer.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/PeepholeOptimizer.cpp?rev=160919&r1=160918&r2=160919&view=diff
> ==============================================================================
> --- llvm/trunk/lib/CodeGen/PeepholeOptimizer.cpp (original)
> +++ llvm/trunk/lib/CodeGen/PeepholeOptimizer.cpp Sat Jul 28 11:48:01 2012
> @@ -78,6 +78,7 @@
> STATISTIC(NumBitcasts,   "Number of bitcasts eliminated");
> STATISTIC(NumCmps,       "Number of compares eliminated");
> STATISTIC(NumImmFold,    "Number of move immediate folded");
> +STATISTIC(NumLoadFold,   "Number of loads folded");
> 
> namespace {
>   class PeepholeOptimizer : public MachineFunctionPass {
> @@ -441,6 +442,7 @@
>   SmallPtrSet<MachineInstr*, 8> LocalMIs;
>   SmallSet<unsigned, 4> ImmDefRegs;
>   DenseMap<unsigned, MachineInstr*> ImmDefMIs;
> +  SmallSet<unsigned, 4> FoldAsLoadDefRegs;
>   for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
>     MachineBasicBlock *MBB = &*I;
> 
> @@ -448,6 +450,7 @@
>     LocalMIs.clear();
>     ImmDefRegs.clear();
>     ImmDefMIs.clear();
> +    FoldAsLoadDefRegs.clear();
> 
>     bool First = true;
>     MachineBasicBlock::iterator PMII;
> @@ -489,6 +492,25 @@
>           Changed |= foldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs);
>       }
> 
> +      MachineInstr *DefMI = 0;
> +      MachineInstr *FoldMI = TII->optimizeLoadInstr(MI, MRI, FoldAsLoadDefRegs,
> +                                                    DefMI);
> +      if (FoldMI) {
> +        // Update LocalMIs since we replaced MI with FoldMI and deleted DefMI.
> +        LocalMIs.erase(MI);
> +        LocalMIs.erase(DefMI);
> +        LocalMIs.insert(FoldMI);
> +        MI->eraseFromParent();
> +        DefMI->eraseFromParent();
> +        ++NumLoadFold;
> +
> +        // MI is replaced with FoldMI.
> +        Changed = true;
> +        PMII = FoldMI;
> +        MII = llvm::next(PMII);
> +        continue;
> +      }
> +
>       First = false;
>       PMII = MII;
>       ++MII;
> 
> Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=160919&r1=160918&r2=160919&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original)
> +++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Sat Jul 28 11:48:01 2012
> @@ -3323,6 +3323,75 @@
>   return true;
> }
> 
> +/// optimizeLoadInstr - Try to remove the load by folding it to a register
> +/// operand at the use. We fold the load instructions if and only if the
> +/// def and use are in the same BB.
> +MachineInstr* X86InstrInfo::
> +optimizeLoadInstr(MachineInstr *MI, const MachineRegisterInfo *MRI,
> +                  SmallSet<unsigned, 4> &FoldAsLoadDefRegs,
> +                  MachineInstr *&DefMI) const {
> +  if (MI->mayStore() || MI->isCall())
> +    // To be conservative, we don't fold the loads if there is a store in
> +    // between.
> +    FoldAsLoadDefRegs.clear();
> +  // We only fold loads to a virtual register.
> +  if (MI->canFoldAsLoad()) {
> +    const MCInstrDesc &MCID = MI->getDesc();
> +    if (MCID.getNumDefs() == 1) {
> +      unsigned Reg = MI->getOperand(0).getReg();
> +      // To reduce compilation time, we check MRI->hasOneUse when inserting
> +      // loads. It should be checked when processing uses of the load, since
> +      // uses can be removed during peephole.
> +      if (TargetRegisterInfo::isVirtualRegister(Reg) && MRI->hasOneUse(Reg)) {
> +        FoldAsLoadDefRegs.insert(Reg);
> +        return 0;
> +      }
> +    }
> +  }
> +
> +  // Collect information about virtual register operands of MI.
> +  DenseMap<unsigned, unsigned> SrcVirtualRegToOp;
> +  SmallSet<unsigned, 4> DstVirtualRegs;
> +  for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
> +    MachineOperand &MO = MI->getOperand(i);
> +    if (!MO.isReg())
> +      continue;
> +    unsigned Reg = MO.getReg();
> +    if (!TargetRegisterInfo::isVirtualRegister(Reg))
> +      continue;
> +    if (MO.isDef())
> +      DstVirtualRegs.insert(Reg);
> +    else if (FoldAsLoadDefRegs.count(Reg)) {
> +      // Only handle the case where Reg is used in a single src operand.
> +      if (SrcVirtualRegToOp.find(Reg) != SrcVirtualRegToOp.end())
> +        SrcVirtualRegToOp.erase(Reg);
> +      else
> +        SrcVirtualRegToOp.insert(std::make_pair(Reg, i));
> +    }
> +  }
> +
> +  for (DenseMap<unsigned, unsigned>::iterator SI = SrcVirtualRegToOp.begin(),
> +       SE = SrcVirtualRegToOp.end(); SI != SE; SI++) {
> +    // If the virtual register is updated by MI, we can't fold the load.
> +    if (DstVirtualRegs.count(SI->first)) continue;
> +
> +    // Check whether we can fold the def into this operand.
> +    DefMI = MRI->getVRegDef(SI->first);
> +    assert(DefMI);
> +    bool SawStore = false;
> +    if (!DefMI->isSafeToMove(this, 0, SawStore))
> +       continue;
> +
> +    SmallVector<unsigned, 8> Ops;
> +    Ops.push_back(SI->second);
> +    MachineInstr *FoldMI = foldMemoryOperand(MI, Ops, DefMI);
> +    if (!FoldMI) continue;
> +    FoldAsLoadDefRegs.erase(SI->first);
> +    return FoldMI;
> +  }
> +  return 0;
> +}
> +
> /// Expand2AddrUndef - Expand a single-def pseudo instruction to a two-addr
> /// instruction with two undef reads of the register being defined.  This is
> /// used for mapping:
> 
> Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.h?rev=160919&r1=160918&r2=160919&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86InstrInfo.h (original)
> +++ llvm/trunk/lib/Target/X86/X86InstrInfo.h Sat Jul 28 11:48:01 2012
> @@ -387,6 +387,14 @@
>                                     unsigned SrcReg2, int CmpMask, int CmpValue,
>                                     const MachineRegisterInfo *MRI) const;
> 
> +  /// optimizeLoadInstr - Try to remove the load by folding it to a register
> +  /// operand at the use. We fold the load instructions if and only if the
> +  /// def and use are in the same BB.
> +  virtual MachineInstr* optimizeLoadInstr(MachineInstr *MI,
> +                        const MachineRegisterInfo *MRI,
> +                        SmallSet<unsigned, 4> &FoldAsLoadDefRegs,
> +                        MachineInstr *&DefMI) const;
> +
> private:
>   MachineInstr * convertToThreeAddressWithLEA(unsigned MIOpc,
>                                               MachineFunction::iterator &MFI,
> 
> Modified: llvm/trunk/test/CodeGen/X86/2012-05-19-avx2-store.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/2012-05-19-avx2-store.ll?rev=160919&r1=160918&r2=160919&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/2012-05-19-avx2-store.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/2012-05-19-avx2-store.ll Sat Jul 28 11:48:01 2012
> @@ -3,8 +3,7 @@
> define void @double_save(<4 x i32>* %Ap, <4 x i32>* %Bp, <8 x i32>* %P) nounwind ssp {
> entry:
>   ; CHECK: vmovaps
> -  ; CHECK: vmovaps
> -  ; CHECK: vinsertf128
> +  ; CHECK: vinsertf128 $1, ([[A0:%rdi|%rsi]]),
>   ; CHECK: vmovups
>   %A = load <4 x i32>* %Ap
>   %B = load <4 x i32>* %Bp
> 
> Modified: llvm/trunk/test/CodeGen/X86/break-sse-dep.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/break-sse-dep.ll?rev=160919&r1=160918&r2=160919&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/break-sse-dep.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/break-sse-dep.ll Sat Jul 28 11:48:01 2012
> @@ -34,8 +34,7 @@
> define double @squirt(double* %x) nounwind {
> entry:
> ; CHECK: squirt:
> -; CHECK: movsd ([[A0]]), %xmm0
> -; CHECK: sqrtsd %xmm0, %xmm0
> +; CHECK: sqrtsd ([[A0]]), %xmm0
>   %z = load double* %x
>   %t = call double @llvm.sqrt.f64(double %z)
>   ret double %t
> 
> Modified: llvm/trunk/test/CodeGen/X86/fold-load.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fold-load.ll?rev=160919&r1=160918&r2=160919&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/fold-load.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/fold-load.ll Sat Jul 28 11:48:01 2012
> @@ -45,3 +45,29 @@
> 
> }
> 
> +; rdar://10554090
> +; xor in exit block will be CSE'ed and load will be folded to xor in entry.
> +define i1 @test3(i32* %P, i32* %Q) nounwind {
> +; CHECK: test3:
> +; CHECK: movl 8(%esp), %eax
> +; CHECK: xorl (%eax),
> +; CHECK: j
> +; CHECK-NOT: xor
> +entry:
> +  %0 = load i32* %P, align 4
> +  %1 = load i32* %Q, align 4
> +  %2 = xor i32 %0, %1
> +  %3 = and i32 %2, 65535
> +  %4 = icmp eq i32 %3, 0
> +  br i1 %4, label %exit, label %land.end
> +
> +exit:
> +  %shr.i.i19 = xor i32 %1, %0
> +  %5 = and i32 %shr.i.i19, 2147418112
> +  %6 = icmp eq i32 %5, 0
> +  br label %land.end
> +
> +land.end:
> +  %7 = phi i1 [ %6, %exit ], [ false, %entry ]
> +  ret i1 %7
> +}
> 
> Modified: llvm/trunk/test/CodeGen/X86/fold-pcmpeqd-1.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fold-pcmpeqd-1.ll?rev=160919&r1=160918&r2=160919&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/fold-pcmpeqd-1.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/fold-pcmpeqd-1.ll Sat Jul 28 11:48:01 2012
> @@ -1,11 +1,14 @@
> -; RUN: llc < %s -march=x86 -mattr=+sse2 > %t
> -; RUN: grep pcmpeqd %t | count 1
> -; RUN: grep xor %t | count 1
> -; RUN: not grep LCP %t
> +; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
> 
> define <2 x double> @foo() nounwind {
>   ret <2 x double> bitcast (<2 x i64><i64 -1, i64 -1> to <2 x double>)
> +; CHECK: foo:
> +; CHECK: pcmpeqd %xmm{{[0-9]+}}, %xmm{{[0-9]+}}
> +; CHECK-NEXT: ret
> }
> define <2 x double> @bar() nounwind {
>   ret <2 x double> bitcast (<2 x i64><i64 0, i64 0> to <2 x double>)
> +; CHECK: bar:
> +; CHECK: xorps %xmm{{[0-9]+}}, %xmm{{[0-9]+}}
> +; CHECK-NEXT: ret
> }
> 
> Modified: llvm/trunk/test/CodeGen/X86/sse-minmax.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse-minmax.ll?rev=160919&r1=160918&r2=160919&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/sse-minmax.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/sse-minmax.ll Sat Jul 28 11:48:01 2012
> @@ -137,16 +137,13 @@
> }
> 
> ; CHECK:      ogt_x:
> -; CHECK-NEXT: xorp{{[sd]}} %xmm1, %xmm1
> -; CHECK-NEXT: maxsd %xmm1, %xmm0
> +; CHECK-NEXT: maxsd LCP{{.*}}(%rip), %xmm0
> ; CHECK-NEXT: ret
> ; UNSAFE:      ogt_x:
> -; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
> -; UNSAFE-NEXT: maxsd %xmm1, %xmm0
> +; UNSAFE-NEXT: maxsd LCP{{.*}}(%rip), %xmm0
> ; UNSAFE-NEXT: ret
> ; FINITE:      ogt_x:
> -; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
> -; FINITE-NEXT: maxsd %xmm1, %xmm0
> +; FINITE-NEXT: maxsd LCP{{.*}}(%rip), %xmm0
> ; FINITE-NEXT: ret
> define double @ogt_x(double %x) nounwind {
>   %c = fcmp ogt double %x, 0.000000e+00
> @@ -155,16 +152,13 @@
> }
> 
> ; CHECK:      olt_x:
> -; CHECK-NEXT: xorp{{[sd]}} %xmm1, %xmm1
> -; CHECK-NEXT: minsd %xmm1, %xmm0
> +; CHECK-NEXT: minsd LCP{{.*}}(%rip), %xmm0
> ; CHECK-NEXT: ret
> ; UNSAFE:      olt_x:
> -; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
> -; UNSAFE-NEXT: minsd %xmm1, %xmm0
> +; UNSAFE-NEXT: minsd LCP{{.*}}(%rip), %xmm0
> ; UNSAFE-NEXT: ret
> ; FINITE:      olt_x:
> -; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
> -; FINITE-NEXT: minsd %xmm1, %xmm0
> +; FINITE-NEXT: minsd LCP{{.*}}(%rip), %xmm0
> ; FINITE-NEXT: ret
> define double @olt_x(double %x) nounwind {
>   %c = fcmp olt double %x, 0.000000e+00
> @@ -217,12 +211,10 @@
> ; CHECK:      oge_x:
> ; CHECK:      ucomisd %xmm1, %xmm0
> ; UNSAFE:      oge_x:
> -; UNSAFE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
> -; UNSAFE-NEXT: maxsd   %xmm1, %xmm0
> +; UNSAFE-NEXT: maxsd   LCP{{.*}}(%rip), %xmm0
> ; UNSAFE-NEXT: ret
> ; FINITE:      oge_x:
> -; FINITE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
> -; FINITE-NEXT: maxsd   %xmm1, %xmm0
> +; FINITE-NEXT: maxsd   LCP{{.*}}(%rip), %xmm0
> ; FINITE-NEXT: ret
> define double @oge_x(double %x) nounwind {
>   %c = fcmp oge double %x, 0.000000e+00
> @@ -233,12 +225,10 @@
> ; CHECK:      ole_x:
> ; CHECK:      ucomisd %xmm0, %xmm1
> ; UNSAFE:      ole_x:
> -; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
> -; UNSAFE-NEXT: minsd %xmm1, %xmm0
> +; UNSAFE-NEXT: minsd LCP{{.*}}(%rip), %xmm0
> ; UNSAFE-NEXT: ret
> ; FINITE:      ole_x:
> -; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
> -; FINITE-NEXT: minsd %xmm1, %xmm0
> +; FINITE-NEXT: minsd LCP{{.*}}(%rip), %xmm0
> ; FINITE-NEXT: ret
> define double @ole_x(double %x) nounwind {
>   %c = fcmp ole double %x, 0.000000e+00
> @@ -411,12 +401,10 @@
> ; CHECK:      ugt_x:
> ; CHECK:      ucomisd %xmm0, %xmm1
> ; UNSAFE:      ugt_x:
> -; UNSAFE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
> -; UNSAFE-NEXT: maxsd   %xmm1, %xmm0
> +; UNSAFE-NEXT: maxsd   LCP{{.*}}(%rip), %xmm0
> ; UNSAFE-NEXT: ret
> ; FINITE:      ugt_x:
> -; FINITE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
> -; FINITE-NEXT: maxsd   %xmm1, %xmm0
> +; FINITE-NEXT: maxsd   LCP{{.*}}(%rip), %xmm0
> ; FINITE-NEXT: ret
> define double @ugt_x(double %x) nounwind {
>   %c = fcmp ugt double %x, 0.000000e+00
> @@ -427,12 +415,10 @@
> ; CHECK:      ult_x:
> ; CHECK:      ucomisd %xmm1, %xmm0
> ; UNSAFE:      ult_x:
> -; UNSAFE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
> -; UNSAFE-NEXT: minsd   %xmm1, %xmm0
> +; UNSAFE-NEXT: minsd   LCP{{.*}}(%rip), %xmm0
> ; UNSAFE-NEXT: ret
> ; FINITE:      ult_x:
> -; FINITE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
> -; FINITE-NEXT: minsd   %xmm1, %xmm0
> +; FINITE-NEXT: minsd   LCP{{.*}}(%rip), %xmm0
> ; FINITE-NEXT: ret
> define double @ult_x(double %x) nounwind {
>   %c = fcmp ult double %x, 0.000000e+00
> @@ -482,12 +468,10 @@
> ; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0
> ; CHECK-NEXT: ret
> ; UNSAFE:      uge_x:
> -; UNSAFE-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
> -; UNSAFE-NEXT: maxsd  %xmm1, %xmm0
> +; UNSAFE-NEXT: maxsd  LCP{{.*}}(%rip), %xmm0
> ; UNSAFE-NEXT: ret
> ; FINITE:      uge_x:
> -; FINITE-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
> -; FINITE-NEXT: maxsd  %xmm1, %xmm0
> +; FINITE-NEXT: maxsd  LCP{{.*}}(%rip), %xmm0
> ; FINITE-NEXT: ret
> define double @uge_x(double %x) nounwind {
>   %c = fcmp uge double %x, 0.000000e+00
> @@ -501,12 +485,10 @@
> ; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0
> ; CHECK-NEXT: ret
> ; UNSAFE:      ule_x:
> -; UNSAFE-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
> -; UNSAFE-NEXT: minsd  %xmm1, %xmm0
> +; UNSAFE-NEXT: minsd  LCP{{.*}}(%rip), %xmm0
> ; UNSAFE-NEXT: ret
> ; FINITE:      ule_x:
> -; FINITE-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
> -; FINITE-NEXT: minsd  %xmm1, %xmm0
> +; FINITE-NEXT: minsd  LCP{{.*}}(%rip), %xmm0
> ; FINITE-NEXT: ret
> define double @ule_x(double %x) nounwind {
>   %c = fcmp ule double %x, 0.000000e+00
> @@ -515,8 +497,7 @@
> }
> 
> ; CHECK:      uge_inverse_x:
> -; CHECK-NEXT: xorp{{[sd]}} %xmm1, %xmm1
> -; CHECK-NEXT: minsd %xmm1, %xmm0
> +; CHECK-NEXT: minsd LCP{{.*}}(%rip), %xmm0
> ; CHECK-NEXT: ret
> ; UNSAFE:      uge_inverse_x:
> ; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
> @@ -535,8 +516,7 @@
> }
> 
> ; CHECK:      ule_inverse_x:
> -; CHECK-NEXT: xorp{{[sd]}} %xmm1, %xmm1
> -; CHECK-NEXT: maxsd %xmm1, %xmm0
> +; CHECK-NEXT: maxsd LCP{{.*}}(%rip), %xmm0
> ; CHECK-NEXT: ret
> ; UNSAFE:      ule_inverse_x:
> ; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
> 
> Modified: llvm/trunk/test/CodeGen/X86/vec_compare.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_compare.ll?rev=160919&r1=160918&r2=160919&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/vec_compare.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/vec_compare.ll Sat Jul 28 11:48:01 2012
> @@ -14,8 +14,8 @@
> define <4 x i32> @test2(<4 x i32> %A, <4 x i32> %B) nounwind {
> ; CHECK: test2:
> ; CHECK: pcmp
> -; CHECK: pcmp
> -; CHECK: pxor
> +; CHECK: pxor LCP
> +; CHECK: movdqa
> ; CHECK: ret
> 	%C = icmp sge <4 x i32> %A, %B
>         %D = sext <4 x i1> %C to <4 x i32>
> 
> 
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20120728/f2c27393/attachment.html>


More information about the llvm-commits mailing list